/* HTTP support.
- Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
+ Copyright (C) 1995, 1996, 1997, 1998, 2000 Free Software Foundation, Inc.
This file is part of Wget.
return 1;
}
+/* Check whether the `Connection' header is set to "keep-alive". */
+static int
+http_process_connection (const char *hdr, void *arg)
+{
+ int *flag = (int *)arg;
+ if (!strcasecmp (hdr, "Keep-Alive"))
+ *flag = 1;
+ return 1;
+}
+\f
+/* Persistent connections (pc). Currently, we cache the most recently
+ used connection as persistent, provided that the HTTP server agrees
+ to make it such. The persistence data is stored in the variables
+ below. Ideally, it would be in a structure, and it should be
+ possible to cache an arbitrary fixed number of these connections.
+
+ I think the code is quite easy to extend in that direction. */
+
+/* Whether the persistent connection is active. */
+static int pc_active_p;
+
+/* Host and port of the last persistent connection. */
+static unsigned char pc_last_host[4];
+static unsigned short pc_last_port;
+
+/* File descriptor of the last persistent connection. */
+static int pc_last_fd;
+
+/* Mark the persistent connection as invalid. This is used by the
+ CLOSE_* macros after they forcefully close a registered persistent
+ connection. */
+
+static void
+invalidate_persistent (void)
+{
+ pc_active_p = 0;
+ DEBUGP (("Invalidating fd %d from further reuse.\n", pc_last_fd));
+}
+
+/* Register FD, which should be a TCP/IP connection to HOST:PORT, as
+ persistent. This will enable someone to use the same connection
+ later. In the context of HTTP, this must be called only AFTER the
+ response has been received and the server has promised that the
+ connection will remain alive.
+
+ If a previous connection was persistent, it is closed. */
+
+static void
+register_persistent (const char *host, unsigned short port, int fd)
+{
+ int success;
+
+ if (pc_active_p)
+ {
+ if (pc_last_fd == fd)
+ {
+ /* The connection FD is already registered. Nothing to
+ do. */
+ return;
+ }
+ else
+ {
+ /* The old persistent connection is still active; let's
+ close it first. This situation arises whenever a
+ persistent connection exists, but we then connect to a
+ different host, and try to register a persistent
+ connection to that one. */
+ CLOSE (pc_last_fd);
+ invalidate_persistent ();
+ }
+ }
+
+ /* This store_hostaddress may not fail, because it has the results
+ in the cache. */
+ success = store_hostaddress (pc_last_host, host);
+ assert (success);
+ pc_last_port = port;
+ pc_last_fd = fd;
+ pc_active_p = 1;
+ DEBUGP (("Registered fd %d for persistent reuse.\n", fd));
+}
+
+/* Return non-zero if a persistent connection is available for
+ connecting to HOST:PORT. */
+
+static int
+persistent_available_p (const char *host, unsigned short port)
+{
+ unsigned char this_host[4];
+ if (!pc_active_p)
+ return 0;
+ if (port != pc_last_port)
+ return 0;
+ if (!store_hostaddress (this_host, host))
+ return 0;
+ if (memcmp (pc_last_host, this_host, 4))
+ return 0;
+ if (!test_socket_open (pc_last_fd))
+ {
+ CLOSE (pc_last_fd);
+ invalidate_persistent ();
+ return 0;
+ }
+ return 1;
+}
+
+/* The idea behind these two CLOSE macros is to distinguish between
+ two cases: one when the job we've been doing is finished, and we
+ want to close the connection and leave, and two when something is
+ seriously wrong and we're closing the connection as part of
+ cleanup.
+
+ In case of keep_alive, CLOSE_FINISH should leave the connection
+ open, while CLOSE_INVALIDATE should still close it.
+
+ Note that the semantics of the flag `keep_alive' is "this
+ connection *will* be reused (the server has promised not to close
+ the connection once we're done)", while the semantics of
+ `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an
+ active, registered connection". */
+
+#define CLOSE_FINISH(fd) do { \
+ if (!keep_alive) \
+ { \
+ CLOSE (fd); \
+ if (pc_active_p && (fd) == pc_last_fd) \
+ invalidate_persistent (); \
+ } \
+} while (0)
+
+#define CLOSE_INVALIDATE(fd) do { \
+ CLOSE (fd); \
+ if (pc_active_p && (fd) == pc_last_fd) \
+ invalidate_persistent (); \
+} while (0)
+
\f
struct http_stat
{
char *authenticate_h;
char *proxyauth;
char *all_headers;
- char *host_port;
- int host_port_len;
+ char *port_maybe;
+ char *request_keep_alive;
int sock, hcount, num_written, all_length, remport, statcode;
long contlen, contrange;
struct urlinfo *ou;
int auth_tried_already;
struct rbuf rbuf;
+ /* Whether this connection will be kept alive after the HTTP request
+ is done. */
+ int keep_alive;
+
+ /* Flags that detect the two ways of specifying HTTP keep-alive
+ response. */
+ int http_keep_alive_1, http_keep_alive_2;
+
+ /* Whether keep-alive should be inhibited. */
+ int inhibit_keep_alive;
+
if (!(*dt & HEAD_ONLY))
/* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
know the local filename so we can save to it. */
authenticate_h = 0;
auth_tried_already = 0;
+ inhibit_keep_alive = (!opt.http_keep_alive || u->proxy != NULL);
+
again:
/* We need to come back here when the initial attempt to retrieve
without authorization header fails. */
+ keep_alive = 0;
+ http_keep_alive_1 = http_keep_alive_2 = 0;
/* Initialize certain elements of struct http_stat. */
hs->len = 0L;
ou = u;
/* First: establish the connection. */
- logprintf (LOG_VERBOSE, _("Connecting to %s:%hu... "), u->host, u->port);
- err = make_connection (&sock, u->host, u->port);
- switch (err)
+ if (inhibit_keep_alive
+ || !persistent_available_p (u->host, u->port))
{
- case HOSTERR:
- logputs (LOG_VERBOSE, "\n");
- logprintf (LOG_NOTQUIET, "%s: %s.\n", u->host, herrmsg (h_errno));
- return HOSTERR;
- break;
- case CONSOCKERR:
- logputs (LOG_VERBOSE, "\n");
- logprintf (LOG_NOTQUIET, "socket: %s\n", strerror (errno));
- return CONSOCKERR;
- break;
- case CONREFUSED:
- logputs (LOG_VERBOSE, "\n");
- logprintf (LOG_NOTQUIET,
- _("Connection to %s:%hu refused.\n"), u->host, u->port);
- CLOSE (sock);
- return CONREFUSED;
- case CONERROR:
- logputs (LOG_VERBOSE, "\n");
- logprintf (LOG_NOTQUIET, "connect: %s\n", strerror (errno));
- CLOSE (sock);
- return CONERROR;
- break;
- case NOCONERROR:
- /* Everything is fine! */
- logputs (LOG_VERBOSE, _("connected!\n"));
- break;
- default:
- abort ();
- break;
- } /* switch */
+ logprintf (LOG_VERBOSE, _("Connecting to %s:%hu... "), u->host, u->port);
+ err = make_connection (&sock, u->host, u->port);
+ switch (err)
+ {
+ case HOSTERR:
+ logputs (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET, "%s: %s.\n", u->host, herrmsg (h_errno));
+ return HOSTERR;
+ break;
+ case CONSOCKERR:
+ logputs (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET, "socket: %s\n", strerror (errno));
+ return CONSOCKERR;
+ break;
+ case CONREFUSED:
+ logputs (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET,
+ _("Connection to %s:%hu refused.\n"), u->host, u->port);
+ CLOSE (sock);
+ return CONREFUSED;
+ case CONERROR:
+ logputs (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET, "connect: %s\n", strerror (errno));
+ CLOSE (sock);
+ return CONERROR;
+ break;
+ case NOCONERROR:
+ /* Everything is fine! */
+ logputs (LOG_VERBOSE, _("connected!\n"));
+ break;
+ default:
+ abort ();
+ break;
+ }
+ }
+ else
+ {
+ logprintf (LOG_VERBOSE, _("Reusing connection to %s:%hu.\n"), u->host, u->port);
+ /* #### pc_last_fd should be accessed through an accessor
+ function. */
+ sock = pc_last_fd;
+ DEBUGP (("Reusing fd %d.\n", sock));
+ }
if (u->proxy)
path = u->proxy->url;
if (hs->restval)
{
range = (char *)alloca (13 + numdigit (hs->restval) + 4);
- /* #### Gag me! Some servers (e.g. WebSitePro) have been known
- to misinterpret the following `Range' format, and return the
- document as multipart/x-byte-ranges MIME type!
-
- #### TODO: Interpret MIME types, recognize bullshits similar
- the one described above, and deal with them! */
+ /* Gag me! Some servers (e.g. WebSitePro) have been known to
+ respond to the following `Range' format by generating a
+ multipart/x-byte-ranges MIME document! This MIME type was
+ present in an old draft of the byteranges specification.
+ HTTP/1.1 specifies a multipart/byte-ranges MIME type, but
+ only if multiple non-overlapping ranges are requested --
+ which Wget never does. */
sprintf (range, "Range: bytes=%ld-\r\n", hs->restval);
}
else
remhost = ou->host;
remport = ou->port;
- if (remport == 80)
+ /* String of the form :PORT. Used only for non-standard ports. */
+ port_maybe = NULL;
+ if (remport != 80)
{
- host_port = NULL;
- host_port_len = 0;
+ port_maybe = (char *)alloca (numdigit (remport) + 2);
+ sprintf (port_maybe, ":%d", remport);
}
+
+ if (!inhibit_keep_alive)
+ request_keep_alive = "Connection: Keep-Alive\r\n";
else
- {
- host_port = (char *)alloca (numdigit (remport) + 2);
- host_port_len = sprintf (host_port, ":%d", remport);
- }
+ request_keep_alive = NULL;
/* Allocate the memory for the request. */
request = (char *)alloca (strlen (command) + strlen (path)
+ strlen (useragent)
- + strlen (remhost) + host_port_len
+ + strlen (remhost)
+ + (port_maybe ? strlen (port_maybe) : 0)
+ strlen (HTTP_ACCEPT)
+ + (request_keep_alive
+ ? strlen (request_keep_alive) : 0)
+ (referer ? strlen (referer) : 0)
+ (wwwauth ? strlen (wwwauth) : 0)
+ (proxyauth ? strlen (proxyauth) : 0)
User-Agent: %s\r\n\
Host: %s%s\r\n\
Accept: %s\r\n\
-%s%s%s%s%s%s\r\n",
+%s%s%s%s%s%s%s\r\n",
command, path, useragent, remhost,
- host_port ? host_port : "",
- HTTP_ACCEPT, referer ? referer : "",
+ port_maybe ? port_maybe : "",
+ HTTP_ACCEPT,
+ request_keep_alive ? request_keep_alive : "",
+ referer ? referer : "",
wwwauth ? wwwauth : "",
proxyauth ? proxyauth : "",
range ? range : "",
num_written = iwrite (sock, request, strlen (request));
if (num_written < 0)
{
- logputs (LOG_VERBOSE, _("Failed writing HTTP request.\n"));
- CLOSE (sock);
+ logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
+ strerror (errno));
+ CLOSE_INVALIDATE (sock);
return WRITEFAILED;
}
logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
FREE_MAYBE (type);
FREE_MAYBE (hs->newloc);
FREE_MAYBE (all_headers);
- CLOSE (sock);
+ CLOSE_INVALIDATE (sock);
return HEOF;
}
else if (status == HG_ERROR)
FREE_MAYBE (type);
FREE_MAYBE (hs->newloc);
FREE_MAYBE (all_headers);
- CLOSE (sock);
+ CLOSE_INVALIDATE (sock);
return HERR;
}
goto done_header;
}
}
+ /* Check for keep-alive related responses. */
+ if (!inhibit_keep_alive)
+ {
+ /* Check for the `Keep-Alive' header. */
+ if (!http_keep_alive_1)
+ {
+ if (header_process (hdr, "Keep-Alive", header_exists,
+ &http_keep_alive_1))
+ goto done_header;
+ }
+ /* Check for `Connection: Keep-Alive'. */
+ if (!http_keep_alive_2)
+ {
+ if (header_process (hdr, "Connection", http_process_connection,
+ &http_keep_alive_2))
+ goto done_header;
+ }
+ }
done_header:
free (hdr);
}
logputs (LOG_VERBOSE, "\n");
+ if (contlen != -1
+ && (http_keep_alive_1 || http_keep_alive_2))
+ {
+ assert (inhibit_keep_alive == 0);
+ keep_alive = 1;
+ }
+ if (keep_alive)
+ /* The server has promised that it will not close the connection
+ when we're done. This means that we can register it. */
+ register_persistent (u->host, u->port, sock);
+
if ((statcode == HTTP_STATUS_UNAUTHORIZED)
&& authenticate_h)
{
FREE_MAYBE (type);
type = NULL;
FREEHSTAT (*hs);
- CLOSE (sock);
+ CLOSE_FINISH (sock);
if (auth_tried_already)
{
/* If we have tried it already, then there is not point
FREE_MAYBE (type);
FREE_MAYBE (hs->newloc);
FREE_MAYBE (all_headers);
- CLOSE (sock);
+ CLOSE_INVALIDATE (sock);
return RANGEERR;
}
_("Location: %s%s\n"),
hs->newloc ? hs->newloc : _("unspecified"),
hs->newloc ? _(" [following]") : "");
- CLOSE (sock);
+ CLOSE_FINISH (sock);
FREE_MAYBE (type);
FREE_MAYBE (all_headers);
return NEWLOCATION;
hs->res = 0;
FREE_MAYBE (type);
FREE_MAYBE (all_headers);
- CLOSE (sock);
+ CLOSE_FINISH (sock);
return RETRFINISHED;
}
if (!fp)
{
logprintf (LOG_NOTQUIET, "%s: %s\n", u->local, strerror (errno));
- CLOSE (sock);
+ CLOSE_FINISH (sock);
FREE_MAYBE (all_headers);
return FOPENERR;
}
}
- else /* opt.dfp */
- fp = opt.dfp;
+ else /* opt.dfp */
+ {
+ fp = opt.dfp;
+ if (!hs->restval)
+ {
+ /* This will silently fail for streams that don't correspond
+ to regular files, but that's OK. */
+ rewind (fp);
+ clearerr (fp);
+ }
+ }
/* #### This confuses the code that checks for file size. There
should be some overhead information. */
/* Get the contents of the document. */
hs->res = get_contents (sock, fp, &hs->len, hs->restval,
(contlen != -1 ? contlen : 0),
- &rbuf);
+ &rbuf, keep_alive);
hs->dltime = elapsed_time ();
{
/* Close or flush the file. We have to be careful to check for
hs->res = -2;
}
FREE_MAYBE (all_headers);
- CLOSE (sock);
+ CLOSE_FINISH (sock);
if (hs->res == -2)
return FWRITEERR;
return RETRFINISHED;
_wasn't_ specified last time, or the server contains files called
*.orig, -N will be back to not operating correctly with -k. */
{
- /* Would a single s[n]printf() call be faster? */
+ /* Would a single s[n]printf() call be faster? --dan
+
+ It wouldn't. sprintf() is horribly slow. At one point I
+ profiled Wget, and found that a measurable and
+ non-negligible amount of time was lost calling sprintf()
+ in url.c. Replacing sprintf with inline calls to
+ strcpy() and long_to_string() made a difference.
+ --hniksic */
strcpy(filename_plus_orig_suffix, u->local);
strcpy(filename_plus_orig_suffix + filename_len, ".orig");
tms, u->url, hstat.len, hstat.contlen, locf, count);
}
++opt.numurls;
- opt.downloaded += hstat.len;
+ downloaded_increase (hstat.len);
/* Remember that we downloaded the file for later ".orig" code. */
if (*dt & ADDED_HTML_EXTENSION)
tms, u->url, hstat.len, locf, count);
}
++opt.numurls;
- opt.downloaded += hstat.len;
+ downloaded_increase (hstat.len);
/* Remember that we downloaded the file for later ".orig" code. */
if (*dt & ADDED_HTML_EXTENSION)
"%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n",
tms, u->url, hstat.len, hstat.contlen, locf, count);
++opt.numurls;
- opt.downloaded += hstat.len;
+ downloaded_increase (hstat.len);
/* Remember that we downloaded the file for later ".orig" code. */
if (*dt & ADDED_HTML_EXTENSION)
static int
known_authentication_scheme_p (const char *au)
{
- return HACK_O_MATIC (au, "Basic") || HACK_O_MATIC (au, "Digest");
+ return HACK_O_MATIC (au, "Basic")
+ || HACK_O_MATIC (au, "Digest")
+ || HACK_O_MATIC (au, "NTLM");
}
#undef HACK_O_MATIC
if (!strncasecmp (au, "Basic", 5))
wwwauth = basic_authentication_encode (user, passwd, "Authorization");
+ if (!strncasecmp (au, "NTLM", 4))
+ wwwauth = basic_authentication_encode (user, passwd, "Authorization");
#ifdef USE_DIGEST
else if (!strncasecmp (au, "Digest", 6))
wwwauth = digest_authentication_encode (au, user, passwd, method, path);