#ifdef WINDOWS
# include <winsock.h>
+#else
+# include <netdb.h> /* for h_errno */
#endif
#include "wget.h"
#if USE_DIGEST
# include "md5.h"
#endif
+#ifdef HAVE_SSL
+# include "gen_sslfunc.h"
+#endif /* HAVE_SSL */
extern char *version_string;
extern int errno;
#endif
#ifndef h_errno
+# ifndef __CYGWIN__
extern int h_errno;
+# endif
#endif
\f
http_process_type (const char *hdr, void *arg)
{
char **result = (char **)arg;
- char *p;
-
- p = strrchr (hdr, ';');
- if (p)
- {
- int len = p - hdr;
- *result = (char *)xmalloc (len + 1);
- memcpy (*result, hdr, len);
- (*result)[len] = '\0';
- }
- else
- *result = xstrdup (hdr);
+ /* Locate P on `;' or the terminating zero, whichever comes first. */
+ const char *p = strchr (hdr, ';');
+ if (!p)
+ p = hdr + strlen (hdr);
+ while (p > hdr && ISSPACE (*(p - 1)))
+ --p;
+ *result = strdupdelim (hdr, p);
return 1;
}
return 1;
}
\f
-/* Persistent connections (pc). Currently, we cache the most recently
- used connection as persistent, provided that the HTTP server agrees
- to make it such. The persistence data is stored in the variables
+/* Persistent connections. Currently, we cache the most recently used
+ connection as persistent, provided that the HTTP server agrees to
+ make it such. The persistence data is stored in the variables
below. Ideally, it would be in a structure, and it should be
possible to cache an arbitrary fixed number of these connections.
I think the code is quite easy to extend in that direction. */
-/* Whether the persistent connection is active. */
+/* Whether a persistent connection is active. */
static int pc_active_p;
-
-/* Host and port of the last persistent connection. */
+/* Host and port of currently active persistent connection. */
static unsigned char pc_last_host[4];
static unsigned short pc_last_port;
-/* File descriptor of the last persistent connection. */
+/* File descriptor of the currently active persistent connection. */
static int pc_last_fd;
+#ifdef HAVE_SSL
+/* Whether a ssl handshake has occoured on this connection */
+static int pc_active_ssl;
+/* SSL connection of the currently active persistent connection. */
+static SSL *pc_last_ssl;
+#endif /* HAVE_SSL */
+
/* Mark the persistent connection as invalid. This is used by the
CLOSE_* macros after they forcefully close a registered persistent
- connection. */
+ connection. This does not close the file descriptor -- it is left
+ to the caller to do that. (Maybe it should, though.) */
static void
invalidate_persistent (void)
{
pc_active_p = 0;
+#ifdef HAVE_SSL
+ pc_active_ssl = 0;
+#endif /* HAVE_SSL */
DEBUGP (("Invalidating fd %d from further reuse.\n", pc_last_fd));
}
If a previous connection was persistent, it is closed. */
static void
-register_persistent (const char *host, unsigned short port, int fd)
+register_persistent (const char *host, unsigned short port, int fd
+#ifdef HAVE_SSL
+ , SSL *ssl
+#endif
+ )
{
int success;
persistent connection exists, but we then connect to a
different host, and try to register a persistent
connection to that one. */
+#ifdef HAVE_SSL
+ /* The ssl disconnect has to take place before the closing
+ of pc_last_fd. */
+ if (pc_last_ssl)
+ shutdown_ssl(pc_last_ssl);
+#endif
CLOSE (pc_last_fd);
invalidate_persistent ();
}
pc_last_port = port;
pc_last_fd = fd;
pc_active_p = 1;
+#ifdef HAVE_SSL
+ pc_last_ssl = ssl;
+ pc_active_ssl = ssl ? 1 : 0;
+#endif
DEBUGP (("Registered fd %d for persistent reuse.\n", fd));
}
connecting to HOST:PORT. */
static int
-persistent_available_p (const char *host, unsigned short port)
+persistent_available_p (const char *host, unsigned short port
+#ifdef HAVE_SSL
+ , int ssl
+#endif
+ )
{
unsigned char this_host[4];
+ /* First, check whether a persistent connection is active at all. */
if (!pc_active_p)
return 0;
+ /* Second, check if the active connection pertains to the correct
+ (HOST, PORT) ordered pair. */
if (port != pc_last_port)
return 0;
+#ifdef HAVE_SSL
+ /* Second, a): check if current connection is (not) ssl, too. This
+ test is unlikely to fail because HTTP and HTTPS typicaly use
+ different ports. Yet it is possible, or so I [Christian
+ Fraenkel] have been told, to run HTTPS and HTTP simultaneus on
+ the same port. */
+ if (ssl != pc_active_ssl)
+ return 0;
+#endif /* HAVE_SSL */
if (!store_hostaddress (this_host, host))
return 0;
if (memcmp (pc_last_host, this_host, 4))
return 0;
+ /* Third: check whether the connection is still open. This is
+ important because most server implement a liberal (short) timeout
+ on persistent connections. Wget can of course always reconnect
+ if the connection doesn't work out, but it's nicer to know in
+ advance. This test is a logical followup of the first test, but
+ is "expensive" and therefore placed at the end of the list. */
if (!test_socket_open (pc_last_fd))
{
+ /* Oops, the socket is no longer open. Now that we know that,
+ let's invalidate the persistent connection before returning
+ 0. */
CLOSE (pc_last_fd);
invalidate_persistent ();
return 0;
return 1;
}
+#ifdef HAVE_SSL
+# define SHUTDOWN_SSL(ssl) do { \
+ if (ssl) \
+ shutdown_ssl (ssl); \
+} while (0)
+#else
+# define SHUTDOWN_SSL(ssl)
+#endif
+
/* The idea behind these two CLOSE macros is to distinguish between
two cases: one when the job we've been doing is finished, and we
want to close the connection and leave, and two when something is
#define CLOSE_FINISH(fd) do { \
if (!keep_alive) \
{ \
+ SHUTDOWN_SSL (ssl); \
CLOSE (fd); \
if (pc_active_p && (fd) == pc_last_fd) \
invalidate_persistent (); \
} while (0)
#define CLOSE_INVALIDATE(fd) do { \
+ SHUTDOWN_SSL (ssl); \
CLOSE (fd); \
if (pc_active_p && (fd) == pc_last_fd) \
invalidate_persistent (); \
} while (0)
-
\f
struct http_stat
{
static time_t http_atotm PARAMS ((char *));
+#define BEGINS_WITH(line, string_constant) \
+ (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
+ && (ISSPACE (line[sizeof (string_constant) - 1]) \
+ || !line[sizeof (string_constant) - 1]))
+
/* Retrieve a document through HTTP protocol. It recognizes status
code, and correctly handles redirections. It closes the network
socket. If it receives an error from the functions below it, it
FILE *fp;
int auth_tried_already;
struct rbuf rbuf;
+#ifdef HAVE_SSL
+ static SSL_CTX *ssl_ctx = NULL;
+ SSL *ssl = NULL;
+#endif /* HAVE_SSL */
/* Whether this connection will be kept alive after the HTTP request
is done. */
/* Whether keep-alive should be inhibited. */
int inhibit_keep_alive;
+#ifdef HAVE_SSL
+ /* initialize ssl_ctx on first run */
+ if (!ssl_ctx)
+ {
+ err=init_ssl (&ssl_ctx);
+ if (err != 0)
+ {
+ switch (err)
+ {
+ case SSLERRCTXCREATE:
+ /* this is fatal */
+ logprintf (LOG_NOTQUIET, _("Failed to set up an SSL context\n"));
+ ssl_printerrors ();
+ return err;
+ case SSLERRCERTFILE:
+ /* try without certfile */
+ logprintf (LOG_NOTQUIET,
+ _("Failed to load certificates from %s\n"),
+ opt.sslcertfile);
+ ssl_printerrors ();
+ logprintf (LOG_NOTQUIET,
+ _("Trying without the specified certificate\n"));
+ break;
+ case SSLERRCERTKEY:
+ logprintf (LOG_NOTQUIET,
+ _("Failed to get certificate key from %s\n"),
+ opt.sslcertkey);
+ ssl_printerrors ();
+ logprintf (LOG_NOTQUIET,
+ _("Trying without the specified certificate\n"));
+ break;
+ default:
+ break;
+ }
+ }
+ }
+#endif /* HAVE_SSL */
+
if (!(*dt & HEAD_ONLY))
/* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
know the local filename so we can save to it. */
again:
/* We need to come back here when the initial attempt to retrieve
- without authorization header fails. */
+ without authorization header fails. (Expected to happen at least
+ for the Digest authorization scheme.) */
+
keep_alive = 0;
http_keep_alive_1 = http_keep_alive_2 = 0;
/* First: establish the connection. */
if (inhibit_keep_alive
- || !persistent_available_p (u->host, u->port))
+ ||
+#ifndef HAVE_SSL
+ !persistent_available_p (u->host, u->port)
+#else
+ !persistent_available_p (u->host, u->port, (u->proto==URLHTTPS ? 1 : 0))
+#endif /* HAVE_SSL */
+ )
{
logprintf (LOG_VERBOSE, _("Connecting to %s:%hu... "), u->host, u->port);
err = make_connection (&sock, u->host, u->port);
- switch (err)
+ switch (err)
{
case HOSTERR:
logputs (LOG_VERBOSE, "\n");
abort ();
break;
}
+#ifdef HAVE_SSL
+ if (u->proto == URLHTTPS)
+ if (connect_ssl (&ssl, ssl_ctx,sock) != 0)
+ {
+ logputs (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
+ CLOSE (sock);
+ return CONSSLERR;
+ }
+#endif /* HAVE_SSL */
}
else
{
/* #### pc_last_fd should be accessed through an accessor
function. */
sock = pc_last_fd;
+#ifdef HAVE_SSL
+ ssl = pc_last_ssl;
+#endif /* HAVE_SSL */
DEBUGP (("Reusing fd %d.\n", sock));
}
passwd = passwd ? passwd : opt.http_passwd;
wwwauth = NULL;
- if (authenticate_h && user && passwd)
+ if (user && passwd)
{
- wwwauth = create_authorization_line (authenticate_h, user, passwd,
- command, ou->path);
+ if (!authenticate_h)
+ {
+ /* We have the username and the password, but haven't tried
+ any authorization yet. Let's see if the "Basic" method
+ works. If not, we'll come back here and construct a
+ proper authorization method with the right challenges.
+
+ If we didn't employ this kind of logic, every URL that
+ requires authorization would have to be processed twice,
+ which is very suboptimal and generates a bunch of false
+ "unauthorized" errors in the server log.
+
+ #### But this logic also has a serious problem when used
+ with stronger authentications: we *first* transmit the
+ username and the password in clear text, and *then*
+ attempt a stronger authentication scheme. That cannot be
+ right! We are only fortunate that almost everyone still
+ uses the `Basic' scheme anyway.
+
+ There should be an option to prevent this from happening,
+ for those who use strong authentication schemes and value
+ their passwords. */
+ wwwauth = basic_authentication_encode (user, passwd, "Authorization");
+ }
+ else
+ {
+ wwwauth = create_authorization_line (authenticate_h, user, passwd,
+ command, ou->path);
+ }
}
proxyauth = NULL;
FREE_MAYBE (proxyauth);
/* Send the request to server. */
- num_written = iwrite (sock, request, strlen (request));
+#ifdef HAVE_SSL
+ if (u->proto == URLHTTPS)
+ num_written = ssl_iwrite (ssl, request, strlen (request));
+ else
+#endif /* HAVE_SSL */
+ num_written = iwrite (sock, request, strlen (request));
+
if (num_written < 0)
{
logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
/* Before reading anything, initialize the rbuf. */
rbuf_initialize (&rbuf, sock);
-
+#ifdef HAVE_SSL
+ if (u->proto == URLHTTPS)
+ rbuf.ssl = ssl;
+ else
+ rbuf.ssl = NULL;
+#endif /* HAVE_SSL */
all_headers = NULL;
all_length = 0;
/* Header-fetching loop. */
what you accept." Oh boy. */
logputs (LOG_VERBOSE, "\n");
logputs (LOG_NOTQUIET, _("End of file while parsing headers.\n"));
- free (hdr);
+ xfree (hdr);
FREE_MAYBE (type);
FREE_MAYBE (hs->newloc);
FREE_MAYBE (all_headers);
logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"),
strerror (errno));
- free (hdr);
+ xfree (hdr);
FREE_MAYBE (type);
FREE_MAYBE (hs->newloc);
FREE_MAYBE (all_headers);
hs->error = xstrdup (_("No data received"));
else
hs->error = xstrdup (_("Malformed status line"));
- free (hdr);
+ xfree (hdr);
break;
}
else if (!*error)
/* Exit on empty header. */
if (!*hdr)
{
- free (hdr);
+ xfree (hdr);
break;
}
}
}
done_header:
- free (hdr);
+ xfree (hdr);
}
logputs (LOG_VERBOSE, "\n");
if (keep_alive)
/* The server has promised that it will not close the connection
when we're done. This means that we can register it. */
+#ifndef HAVE_SSL
register_persistent (u->host, u->port, sock);
+#else
+ register_persistent (u->host, u->port, sock, ssl);
+#endif /* HAVE_SSL */
if ((statcode == HTTP_STATUS_UNAUTHORIZED)
&& authenticate_h)
{
/* If we have tried it already, then there is not point
retrying it. */
+ failed:
logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
- free (authenticate_h);
+ xfree (authenticate_h);
return AUTHFAILED;
}
else if (!known_authentication_scheme_p (authenticate_h))
{
- free (authenticate_h);
+ xfree (authenticate_h);
logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
return AUTHFAILED;
}
+ else if (BEGINS_WITH (authenticate_h, "Basic"))
+ {
+ /* The authentication scheme is basic, the one we try by
+ default, and it failed. There's no sense in trying
+ again. */
+ goto failed;
+ }
else
{
auth_tried_already = 1;
/* We do not need this anymore. */
if (authenticate_h)
{
- free (authenticate_h);
+ xfree (authenticate_h);
authenticate_h = NULL;
}
uerr_t
http_loop (struct urlinfo *u, char **newloc, int *dt)
{
- static int first_retrieval = 1;
-
int count;
int use_ts, got_head = 0; /* time-stamping info */
char *filename_plus_orig_suffix;
if (((suf = suffix (u->local)) != NULL)
&& (!strcmp (suf, "html") || !strcmp (suf, "htm")))
*dt |= TEXTHTML;
- free (suf);
- free(filename_plus_orig_suffix); /* must precede every return! */
+ xfree (suf);
+ xfree (filename_plus_orig_suffix); /* must precede every return! */
/* Another harmless lie: */
return RETROK;
}
{
/* Increment the pass counter. */
++count;
- /* Wait before the retrieval (unless this is the very first
- retrieval).
- Check if we are retrying or not, wait accordingly - HEH */
- if (!first_retrieval && (opt.wait || (count && opt.waitretry)))
- {
- if (count)
- {
- if (count<opt.waitretry)
- sleep(count);
- else
- sleep(opt.waitretry);
- }
- else
- sleep (opt.wait);
- }
- if (first_retrieval)
- first_retrieval = 0;
+ sleep_between_retrievals (count);
/* Get the current time string. */
tms = time_str (NULL);
/* Print fetch message, if opt.verbose. */
#ifdef WINDOWS
ws_changetitle (hurl, 1);
#endif
- free (hurl);
+ xfree (hurl);
}
/* Default document type is empty. However, if spider mode is
printwhat (count, opt.ntry);
continue;
break;
- case HOSTERR: case CONREFUSED: case PROXERR: case AUTHFAILED:
+ case HOSTERR: case CONREFUSED: case PROXERR: case AUTHFAILED:
+ case SSLERRCTXCREATE:
/* Fatal errors just return from the function. */
FREEHSTAT (hstat);
- free(filename_plus_orig_suffix); /* must precede every return! */
+ xfree (filename_plus_orig_suffix); /* must precede every return! */
return err;
break;
case FWRITEERR: case FOPENERR:
logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
u->local, strerror (errno));
FREEHSTAT (hstat);
- free(filename_plus_orig_suffix); /* must precede every return! */
+ return err;
+ break;
+ case CONSSLERR:
+ /* Another fatal error. */
+ logputs (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
+ FREEHSTAT (hstat);
+ xfree (filename_plus_orig_suffix); /* must precede every return! */
return err;
break;
case NEWLOCATION:
logprintf (LOG_NOTQUIET,
_("ERROR: Redirection (%d) without location.\n"),
hstat.statcode);
- free(filename_plus_orig_suffix); /* must precede every return! */
+ xfree (filename_plus_orig_suffix); /* must precede every return! */
return WRONGCODE;
}
FREEHSTAT (hstat);
- free(filename_plus_orig_suffix); /* must precede every return! */
+ xfree (filename_plus_orig_suffix); /* must precede every return! */
return NEWLOCATION;
break;
case RETRFINISHED:
/* #### Ugly ugly ugly! */
char *hurl = str_url (u->proxy ? u->proxy : u, 1);
logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
- free (hurl);
+ xfree (hurl);
}
logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
tms, hstat.statcode, hstat.error);
logputs (LOG_VERBOSE, "\n");
FREEHSTAT (hstat);
- free(filename_plus_orig_suffix); /* must precede every return! */
+ xfree (filename_plus_orig_suffix); /* must precede every return! */
return WRONGCODE;
}
Server file no newer than local file `%s' -- not retrieving.\n\n"),
local_filename);
FREEHSTAT (hstat);
- free(filename_plus_orig_suffix);/*must precede every return!*/
+ xfree (filename_plus_orig_suffix); /*must precede every return!*/
return RETROK;
}
else if (tml >= tmr)
FREEHSTAT (hstat);
continue;
}
- if (!opt.dfp
- && (tmr != (time_t) (-1))
+ if ((tmr != (time_t) (-1))
&& !opt.spider
&& ((hstat.len == hstat.contlen) ||
((hstat.res == 0) &&
((hstat.contlen == -1) ||
(hstat.len >= hstat.contlen && !opt.kill_longer)))))
{
- touch (u->local, tmr);
+ /* #### This code repeats in http.c and ftp.c. Move it to a
+ function! */
+ const char *fl = NULL;
+ if (opt.output_document)
+ {
+ if (opt.od_known_regular)
+ fl = opt.output_document;
+ }
+ else
+ fl = u->local;
+ if (fl)
+ touch (fl, tmr);
}
/* End of time-stamping section. */
if (opt.spider)
{
logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error);
- free(filename_plus_orig_suffix); /* must precede every return! */
+ xfree (filename_plus_orig_suffix); /* must precede every return! */
return RETROK;
}
else
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
- free(filename_plus_orig_suffix); /* must precede every return! */
+ xfree(filename_plus_orig_suffix); /* must precede every return! */
return RETROK;
}
else if (hstat.res == 0) /* No read error */
else
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
- free(filename_plus_orig_suffix); /* must precede every return! */
+ xfree (filename_plus_orig_suffix); /* must precede every return! */
return RETROK;
}
else if (hstat.len < hstat.contlen) /* meaning we lost the
else
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
- free(filename_plus_orig_suffix); /* must precede every return! */
+ xfree (filename_plus_orig_suffix); /* must precede every return! */
return RETROK;
}
else /* the same, but not accepted */
break;
}
while (!opt.ntry || (count < opt.ntry));
- free(filename_plus_orig_suffix); /* must precede every return! */
+ xfree (filename_plus_orig_suffix); /* must precede every return! */
return TRYLIMEXC;
}
\f
"^ *(GMT|[+-][0-9]|$)", 0 otherwise. P being NULL (a valid result of
strptime()) is considered a failure and 0 is returned. */
static int
-check_end (char *p)
+check_end (const char *p)
{
if (!p)
return 0;
++p;
if (!*p
|| (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
- || ((p[0] == '+' || p[1] == '-') && ISDIGIT (p[1])))
+ || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1])))
return 1;
else
return 0;
sprintf (t1, "%s:%s", user, passwd);
t2 = (char *)alloca (1 + len2);
base64_encode (t1, t2, len1);
- res = (char *)malloc (len2 + 11 + strlen (header));
+ res = (char *)xmalloc (len2 + 11 + strlen (header));
sprintf (res, "%s: Basic %s\r\n", header, t2);
return res;
#endif /* USE_DIGEST */
-#define HACK_O_MATIC(line, string_constant) \
+#define BEGINS_WITH(line, string_constant) \
(!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
&& (ISSPACE (line[sizeof (string_constant) - 1]) \
|| !line[sizeof (string_constant) - 1]))
static int
known_authentication_scheme_p (const char *au)
{
- return HACK_O_MATIC (au, "Basic")
- || HACK_O_MATIC (au, "Digest")
- || HACK_O_MATIC (au, "NTLM");
+ return BEGINS_WITH (au, "Basic")
+ || BEGINS_WITH (au, "Digest")
+ || BEGINS_WITH (au, "NTLM");
}
-#undef HACK_O_MATIC
+#undef BEGINS_WITH
/* Create the HTTP authorization request header. When the
`WWW-Authenticate' response header is seen, according to the