/* HTTP support.
- Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
+ Copyright (C) 1995, 1996, 1997, 1998, 2000 Free Software Foundation, Inc.
This file is part of Wget.
return 1;
}
+/* Check whether the `Connection' header is set to "keep-alive". */
+static int
+http_process_connection (const char *hdr, void *arg)
+{
+ int *flag = (int *)arg;
+ if (!strcasecmp (hdr, "Keep-Alive"))
+ *flag = 1;
+ return 1;
+}
+\f
+/* Persistent connections (pc). */
+
+static unsigned char pc_last_host[4];
+static unsigned short pc_last_port;
+static int pc_last_fd;
+
+static void
+register_persistent (const char *host, unsigned short port, int fd)
+{
+ if (!store_hostaddress (pc_last_host, host))
+ return;
+ pc_last_port = port;
+ pc_last_fd = fd;
+}
+
+static void
+invalidate_persistent (void)
+{
+ pc_last_port = 0;
+}
+
+static int
+persistent_available_p (const char *host, unsigned short port)
+{
+ unsigned char this_host[4];
+ if (port != pc_last_port)
+ return 0;
+ if (!store_hostaddress (this_host, host))
+ return 0;
+ if (memcmp (pc_last_host, this_host, 4))
+ return 0;
+ if (!test_socket_open (pc_last_fd))
+ {
+ invalidate_persistent ();
+ return 0;
+ }
+ return 1;
+}
+
+/* The idea behind these two CLOSE macros is to distinguish between
+ two cases: one when the job we've been doing is finished, and we
+ want to close the connection and leave, and two when something is
+ seriously wrong and we're closing the connection as part of
+ cleanup.
+
+ In case of keep_alive, CLOSE_FINISH should leave the connection
+ open, while CLOSE_INVALIDATE should still close it.
+
+ The semantic difference between the flags `keep_alive' and
+ `reused_connection' is that keep_alive defines the state of HTTP:
+ whether the connection *will* be preservable. reused_connection,
+ on the other hand, reflects the present: whether the *current*
+ connection is the result of preserving. */
+
+#define CLOSE_FINISH(fd) do { \
+ if (!keep_alive) \
+ { \
+ CLOSE (fd); \
+ if (reused_connection) \
+ invalidate_persistent (); \
+ } \
+} while (0)
+
+#define CLOSE_INVALIDATE(fd) do { \
+ CLOSE (fd); \
+ if (reused_connection) \
+ invalidate_persistent (); \
+} while (0)
+
\f
struct http_stat
{
FILE *fp;
int auth_tried_already;
struct rbuf rbuf;
+ int keep_alive, http_keep_alive_1, http_keep_alive_2;
+ int reused_connection;
- /* Let the others worry about local filename... */
if (!(*dt & HEAD_ONLY))
+ /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
+ know the local filename so we can save to it. */
assert (u->local != NULL);
authenticate_h = 0;
again:
/* We need to come back here when the initial attempt to retrieve
without authorization header fails. */
+ keep_alive = 0;
+ http_keep_alive_1 = http_keep_alive_2 = 0;
+ reused_connection = 0;
- /* Initialize certain elements of struct hstat. */
+ /* Initialize certain elements of struct http_stat. */
hs->len = 0L;
hs->contlen = -1;
hs->res = -1;
ou = u;
/* First: establish the connection. */
- logprintf (LOG_VERBOSE, _("Connecting to %s:%hu... "), u->host, u->port);
- err = make_connection (&sock, u->host, u->port);
- switch (err)
+ if (u->proxy || !persistent_available_p (u->host, u->port))
{
- case HOSTERR:
- logputs (LOG_VERBOSE, "\n");
- logprintf (LOG_NOTQUIET, "%s: %s.\n", u->host, herrmsg (h_errno));
- return HOSTERR;
- break;
- case CONSOCKERR:
- logputs (LOG_VERBOSE, "\n");
- logprintf (LOG_NOTQUIET, "socket: %s\n", strerror (errno));
- return CONSOCKERR;
- break;
- case CONREFUSED:
- logputs (LOG_VERBOSE, "\n");
- logprintf (LOG_NOTQUIET,
- _("Connection to %s:%hu refused.\n"), u->host, u->port);
- CLOSE (sock);
- return CONREFUSED;
- case CONERROR:
- logputs (LOG_VERBOSE, "\n");
- logprintf (LOG_NOTQUIET, "connect: %s\n", strerror (errno));
- CLOSE (sock);
- return CONERROR;
- break;
- case NOCONERROR:
- /* Everything is fine! */
- logputs (LOG_VERBOSE, _("connected!\n"));
- break;
- default:
- abort ();
- break;
- } /* switch */
+ logprintf (LOG_VERBOSE, _("Connecting to %s:%hu... "), u->host, u->port);
+ err = make_connection (&sock, u->host, u->port);
+ switch (err)
+ {
+ case HOSTERR:
+ logputs (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET, "%s: %s.\n", u->host, herrmsg (h_errno));
+ return HOSTERR;
+ break;
+ case CONSOCKERR:
+ logputs (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET, "socket: %s\n", strerror (errno));
+ return CONSOCKERR;
+ break;
+ case CONREFUSED:
+ logputs (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET,
+ _("Connection to %s:%hu refused.\n"), u->host, u->port);
+ CLOSE (sock);
+ return CONREFUSED;
+ case CONERROR:
+ logputs (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET, "connect: %s\n", strerror (errno));
+ CLOSE (sock);
+ return CONERROR;
+ break;
+ case NOCONERROR:
+ /* Everything is fine! */
+ logputs (LOG_VERBOSE, _("connected!\n"));
+ break;
+ default:
+ abort ();
+ break;
+ }
+ }
+ else
+ {
+ logprintf (LOG_VERBOSE, _("Reusing connection to %s:%hu.\n"), u->host, u->port);
+ sock = pc_last_fd;
+ reused_connection = 1;
+ }
if (u->proxy)
path = u->proxy->url;
User-Agent: %s\r\n\
Host: %s%s\r\n\
Accept: %s\r\n\
+Connection: Keep-Alive\r\n\
%s%s%s%s%s%s\r\n",
command, path, useragent, remhost,
host_port ? host_port : "",
num_written = iwrite (sock, request, strlen (request));
if (num_written < 0)
{
- logputs (LOG_VERBOSE, _("Failed writing HTTP request.\n"));
- CLOSE (sock);
+ logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
+ strerror (errno));
+ CLOSE_INVALIDATE (sock);
return WRITEFAILED;
}
logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
FREE_MAYBE (type);
FREE_MAYBE (hs->newloc);
FREE_MAYBE (all_headers);
- CLOSE (sock);
+ CLOSE_INVALIDATE (sock);
return HEOF;
}
else if (status == HG_ERROR)
FREE_MAYBE (type);
FREE_MAYBE (hs->newloc);
FREE_MAYBE (all_headers);
- CLOSE (sock);
+ CLOSE_INVALIDATE (sock);
return HERR;
}
goto done_header;
}
}
+ /* Check for the `Keep-Alive' header. */
+ if (!http_keep_alive_1)
+ {
+ if (header_process (hdr, "Keep-Alive", header_exists,
+ &http_keep_alive_1))
+ goto done_header;
+ }
+ /* Check for `Connection: Keep-Alive'. */
+ if (!http_keep_alive_2)
+ {
+ if (header_process (hdr, "Connection", http_process_connection,
+ &http_keep_alive_2))
+ goto done_header;
+ }
done_header:
free (hdr);
}
logputs (LOG_VERBOSE, "\n");
+ if (contlen != -1
+ && (http_keep_alive_1 || http_keep_alive_2))
+ keep_alive = 1;
+ if (keep_alive && !reused_connection)
+ register_persistent (u->host, u->port, sock);
+
if ((statcode == HTTP_STATUS_UNAUTHORIZED)
&& authenticate_h)
{
FREE_MAYBE (type);
type = NULL;
FREEHSTAT (*hs);
- CLOSE (sock);
+ CLOSE_FINISH (sock);
if (auth_tried_already)
{
/* If we have tried it already, then there is not point
/* We don't assume text/html by default. */
*dt &= ~TEXTHTML;
+ if (opt.html_extension && (*dt & TEXTHTML))
+ /* -E / --html-extension / html_extension = on was specified, and this is a
+ text/html file. If some case-insensitive variation on ".htm[l]" isn't
+ already the file's suffix, tack on ".html". */
+ {
+ char* last_period_in_local_filename = strrchr(u->local, '.');
+
+ if (last_period_in_local_filename == NULL ||
+ !(strcasecmp(last_period_in_local_filename, ".htm") == EQ ||
+ strcasecmp(last_period_in_local_filename, ".html") == EQ))
+ {
+ size_t local_filename_len = strlen(u->local);
+
+ u->local = xrealloc(u->local, local_filename_len + sizeof(".html"));
+ strcpy(u->local + local_filename_len, ".html");
+
+ *dt |= ADDED_HTML_EXTENSION;
+ }
+ }
+
if (contrange == -1)
hs->restval = 0;
else if (contrange != hs->restval ||
FREE_MAYBE (type);
FREE_MAYBE (hs->newloc);
FREE_MAYBE (all_headers);
- CLOSE (sock);
+ CLOSE_INVALIDATE (sock);
return RANGEERR;
}
_("Location: %s%s\n"),
hs->newloc ? hs->newloc : _("unspecified"),
hs->newloc ? _(" [following]") : "");
- CLOSE (sock);
+ CLOSE_FINISH (sock);
FREE_MAYBE (type);
FREE_MAYBE (all_headers);
return NEWLOCATION;
hs->res = 0;
FREE_MAYBE (type);
FREE_MAYBE (all_headers);
- CLOSE (sock);
+ CLOSE_FINISH (sock);
return RETRFINISHED;
}
if (!fp)
{
logprintf (LOG_NOTQUIET, "%s: %s\n", u->local, strerror (errno));
- CLOSE (sock);
+ CLOSE_FINISH (sock);
FREE_MAYBE (all_headers);
return FOPENERR;
}
}
- else /* opt.dfp */
- fp = opt.dfp;
+ else /* opt.dfp */
+ {
+ fp = opt.dfp;
+ if (!hs->restval)
+ {
+ /* This will silently fail for streams that don't correspond
+ to regular files, but that's OK. */
+ rewind (fp);
+ clearerr (fp);
+ }
+ }
/* #### This confuses the code that checks for file size. There
should be some overhead information. */
/* Get the contents of the document. */
hs->res = get_contents (sock, fp, &hs->len, hs->restval,
(contlen != -1 ? contlen : 0),
- &rbuf);
+ &rbuf, keep_alive);
hs->dltime = elapsed_time ();
- if (!opt.dfp)
- fclose (fp);
- else
- fflush (fp);
+ {
+ /* Close or flush the file. We have to be careful to check for
+ error here. Checking the result of fwrite() is not enough --
+ errors could go unnoticed! */
+ int flush_res;
+ if (!opt.dfp)
+ flush_res = fclose (fp);
+ else
+ flush_res = fflush (fp);
+ if (flush_res == EOF)
+ hs->res = -2;
+ }
FREE_MAYBE (all_headers);
- CLOSE (sock);
+ CLOSE_FINISH (sock);
if (hs->res == -2)
return FWRITEERR;
return RETRFINISHED;
static int first_retrieval = 1;
int count;
- int local_dot_orig_file_exists = FALSE;
int use_ts, got_head = 0; /* time-stamping info */
+ char *filename_plus_orig_suffix;
+ char *local_filename = NULL;
char *tms, *suf, *locf, *tmrate;
uerr_t err;
time_t tml = -1, tmr = -1; /* local and remote time-stamps */
long local_size = 0; /* the size of the local file */
+ size_t filename_len;
struct http_stat hstat; /* HTTP status */
struct stat st;
else
locf = opt.output_document;
+ /* Yuck. Multiple returns suck. We need to remember to free() the space we
+ xmalloc() here before EACH return. This is one reason it's better to set
+ flags that influence flow control and then return once at the end. */
+ filename_len = strlen(u->local);
+ filename_plus_orig_suffix = xmalloc(filename_len + sizeof(".orig"));
+
if (opt.noclobber && file_exists_p (u->local))
{
/* If opt.noclobber is turned on and file already exists, do not
&& (!strcmp (suf, "html") || !strcmp (suf, "htm")))
*dt |= TEXTHTML;
free (suf);
+ free(filename_plus_orig_suffix); /* must precede every return! */
/* Another harmless lie: */
return RETROK;
}
use_ts = 0;
if (opt.timestamping)
{
- boolean local_file_exists = FALSE;
+ boolean local_dot_orig_file_exists = FALSE;
if (opt.backup_converted)
/* If -K is specified, we'll act on the assumption that it was specified
_wasn't_ specified last time, or the server contains files called
*.orig, -N will be back to not operating correctly with -k. */
{
- size_t filename_len = strlen(u->local);
- char* filename_plus_orig_suffix = malloc(filename_len +
- sizeof(".orig"));
-
- /* Would a single s[n]printf() call be faster? */
+ /* Would a single s[n]printf() call be faster? --dan
+
+ It wouldn't. sprintf() is horribly slow. At one point I
+ profiled Wget, and found that a measurable and
+ non-negligible amount of time was lost calling sprintf()
+ in url.c. Replacing sprintf with inline calls to
+ strcpy() and long_to_string() made a difference.
+ --hniksic */
strcpy(filename_plus_orig_suffix, u->local);
strcpy(filename_plus_orig_suffix + filename_len, ".orig");
/* Try to stat() the .orig file. */
if (stat(filename_plus_orig_suffix, &st) == 0)
{
- local_file_exists = TRUE;
local_dot_orig_file_exists = TRUE;
+ local_filename = filename_plus_orig_suffix;
}
-
- free(filename_plus_orig_suffix);
}
if (!local_dot_orig_file_exists)
/* Couldn't stat() <file>.orig, so try to stat() <file>. */
if (stat (u->local, &st) == 0)
- local_file_exists = TRUE;
+ local_filename = u->local;
- if (local_file_exists)
+ if (local_filename != NULL)
/* There was a local file, so we'll check later to see if the version
the server has is the same version we already have, allowing us to
skip a download. */
/* Try fetching the document, or at least its head. :-) */
err = gethttp (u, &hstat, dt);
+
+ /* It's unfortunate that wget determines the local filename before finding
+ out the Content-Type of the file. Barring a major restructuring of the
+ code, we need to re-set locf here, since gethttp() may have xrealloc()d
+ u->local to tack on ".html". */
+ if (!opt.output_document)
+ locf = u->local;
+ else
+ locf = opt.output_document;
+
/* Time? */
tms = time_str (NULL);
/* Get the new location (with or without the redirection). */
case HOSTERR: case CONREFUSED: case PROXERR: case AUTHFAILED:
/* Fatal errors just return from the function. */
FREEHSTAT (hstat);
+ free(filename_plus_orig_suffix); /* must precede every return! */
return err;
break;
case FWRITEERR: case FOPENERR:
logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
u->local, strerror (errno));
FREEHSTAT (hstat);
+ free(filename_plus_orig_suffix); /* must precede every return! */
return err;
break;
case NEWLOCATION:
logprintf (LOG_NOTQUIET,
_("ERROR: Redirection (%d) without location.\n"),
hstat.statcode);
+ free(filename_plus_orig_suffix); /* must precede every return! */
return WRONGCODE;
}
FREEHSTAT (hstat);
+ free(filename_plus_orig_suffix); /* must precede every return! */
return NEWLOCATION;
break;
case RETRFINISHED:
tms, hstat.statcode, hstat.error);
logputs (LOG_VERBOSE, "\n");
FREEHSTAT (hstat);
+ free(filename_plus_orig_suffix); /* must precede every return! */
return WRONGCODE;
}
if (tml >= tmr &&
(hstat.contlen == -1 || local_size == hstat.contlen))
{
- if (local_dot_orig_file_exists)
- /* We can't collapse this down into just one logprintf()
- call with a variable set to u->local or the .orig
- filename because we have to malloc() space for the
- latter, and because there are multiple returns above (a
- coding style no-no by many measures, for reasons such as
- this) we'd have to remember to free() the string at each
- one to avoid a memory leak. */
- logprintf (LOG_VERBOSE, _("\
-Server file no newer than local file `%s.orig' -- not retrieving.\n\n"),
- u->local);
- else
- logprintf (LOG_VERBOSE, _("\
-Server file no newer than local file `%s' -- not retrieving.\n\n"), u->local);
+ logprintf (LOG_VERBOSE, _("\
+Server file no newer than local file `%s' -- not retrieving.\n\n"),
+ local_filename);
FREEHSTAT (hstat);
+ free(filename_plus_orig_suffix);/*must precede every return!*/
return RETROK;
}
else if (tml >= tmr)
if (opt.spider)
{
logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error);
+ free(filename_plus_orig_suffix); /* must precede every return! */
return RETROK;
}
tms, u->url, hstat.len, hstat.contlen, locf, count);
}
++opt.numurls;
- opt.downloaded += hstat.len;
- downloaded_file(ADD_FILE, locf);
+ downloaded_increase (hstat.len);
+
+ /* Remember that we downloaded the file for later ".orig" code. */
+ if (*dt & ADDED_HTML_EXTENSION)
+ downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
+ else
+ downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
+
+ free(filename_plus_orig_suffix); /* must precede every return! */
return RETROK;
}
else if (hstat.res == 0) /* No read error */
tms, u->url, hstat.len, locf, count);
}
++opt.numurls;
- opt.downloaded += hstat.len;
- downloaded_file(ADD_FILE, locf);
+ downloaded_increase (hstat.len);
+
+ /* Remember that we downloaded the file for later ".orig" code. */
+ if (*dt & ADDED_HTML_EXTENSION)
+ downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
+ else
+ downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
+
+ free(filename_plus_orig_suffix); /* must precede every return! */
return RETROK;
}
else if (hstat.len < hstat.contlen) /* meaning we lost the
"%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n",
tms, u->url, hstat.len, hstat.contlen, locf, count);
++opt.numurls;
- opt.downloaded += hstat.len;
- downloaded_file(ADD_FILE, locf);
+ downloaded_increase (hstat.len);
+
+ /* Remember that we downloaded the file for later ".orig" code. */
+ if (*dt & ADDED_HTML_EXTENSION)
+ downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
+ else
+ downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
+
+ free(filename_plus_orig_suffix); /* must precede every return! */
return RETROK;
}
else /* the same, but not accepted */
break;
}
while (!opt.ntry || (count < opt.ntry));
+ free(filename_plus_orig_suffix); /* must precede every return! */
return TRYLIMEXC;
}
\f
static int
known_authentication_scheme_p (const char *au)
{
- return HACK_O_MATIC (au, "Basic") || HACK_O_MATIC (au, "Digest");
+ return HACK_O_MATIC (au, "Basic")
+ || HACK_O_MATIC (au, "Digest")
+ || HACK_O_MATIC (au, "NTLM");
}
#undef HACK_O_MATIC
if (!strncasecmp (au, "Basic", 5))
wwwauth = basic_authentication_encode (user, passwd, "Authorization");
+ if (!strncasecmp (au, "NTLM", 4))
+ wwwauth = basic_authentication_encode (user, passwd, "Authorization");
#ifdef USE_DIGEST
else if (!strncasecmp (au, "Digest", 6))
wwwauth = digest_authentication_encode (au, user, passwd, method, path);