X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fhttp.c;h=11af939a170fc20d073795988b13ef717d0a1b5b;hp=a882c2d1acd7a8dd1630e8a721c00a486efe66a9;hb=1b28d66fcb583791fb1f92199a29e1063cdd6ed8;hpb=823228830e57766ebabe529b75765816cb2507dc diff --git a/src/http.c b/src/http.c index a882c2d1..11af939a 100644 --- a/src/http.c +++ b/src/http.c @@ -1,11 +1,12 @@ /* HTTP support. - Copyright (C) 1996-2005 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006, 2007 Free Software Foundation, Inc. This file is part of GNU Wget. GNU Wget is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or +the Free Software Foundation; either version 3 of the License, or (at your option) any later version. GNU Wget is distributed in the hope that it will be useful, @@ -14,8 +15,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Wget; if not, write to the Free Software Foundation, Inc., -51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +along with Wget. If not, see . In addition, as a special exception, the Free Software Foundation gives permission to link the code of its release of Wget with the @@ -41,6 +41,7 @@ so, delete this exception statement from your version. */ #include #include "wget.h" +#include "hash.h" #include "http.h" #include "utils.h" #include "url.h" @@ -59,9 +60,22 @@ so, delete this exception statement from your version. */ # include "gen-md5.h" #endif #include "convert.h" +#include "spider.h" + +#ifdef TESTING +#include "test.h" +#endif extern char *version_string; +/* Forward decls. */ +static char *create_authorization_line (const char *, const char *, + const char *, const char *, + const char *, bool *); +static char *basic_authentication_encode (const char *, const char *); +static bool known_authentication_scheme_p (const char *, const char *); +static void load_cookies (void); + #ifndef MIN # define MIN(x, y) ((x) > (y) ? (y) : (x)) #endif @@ -369,6 +383,50 @@ request_free (struct request *req) xfree (req); } +static struct hash_table *basic_authed_hosts; + +/* Find out if this host has issued a Basic challenge yet; if so, give + * it the username, password. A temporary measure until we can get + * proper authentication in place. */ + +static int +maybe_send_basic_creds (const char *hostname, const char *user, + const char *passwd, struct request *req) +{ + int did_challenge = 0; + + if (basic_authed_hosts + && hash_table_contains(basic_authed_hosts, hostname)) + { + DEBUGP(("Found `%s' in basic_authed_hosts.\n", hostname)); + request_set_header (req, "Authorization", + basic_authentication_encode (user, passwd), + rel_value); + did_challenge = 1; + } + else + { + DEBUGP(("Host `%s' has not issued a general basic challenge.\n", + hostname)); + } + return did_challenge; +} + +static void +register_basic_auth_host (const char *hostname) +{ + if (!basic_authed_hosts) + { + basic_authed_hosts = make_nocase_string_hash_table (1); + } + if (!hash_table_contains(basic_authed_hosts, hostname)) + { + hash_table_put (basic_authed_hosts, xstrdup(hostname), NULL); + DEBUGP(("Inserted `%s' into basic_authed_hosts\n", hostname)); + } +} + + /* Send the contents of FILE_NAME to SOCK. Make sure that exactly PROMISED_SIZE bytes are sent over the wire -- if the file is longer, read only that much; if the file is shorter, report an error. */ @@ -733,6 +791,20 @@ resp_free (struct response *resp) xfree (resp); } +/* Print a single line of response, the characters [b, e). We tried + getting away with + logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, (int) (e - b), b); + but that failed to escape the non-printable characters and, in fact, + caused crashes in UTF-8 locales. */ + +static void +print_response_line(const char *prefix, const char *b, const char *e) +{ + char *copy; + BOUNDED_TO_ALLOCA(b, e, copy); + logprintf (LOG_VERBOSE, "%s%s\n", prefix, escnonprint(copy)); +} + /* Print the server response, line by line, omitting the trailing CRLF from individual header lines, and prefixed with PREFIX. */ @@ -751,9 +823,7 @@ print_server_response (const struct response *resp, const char *prefix) --e; if (b < e && e[-1] == '\r') --e; - /* This is safe even on printfs with broken handling of "%.s" - because resp->headers ends with \0. */ - logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, e - b, b); + print_response_line(prefix, b, e); } } @@ -850,6 +920,140 @@ skip_short_body (int fd, wgint contlen) DEBUGP (("] done.\n")); return true; } + +/* Extract a parameter from the string (typically an HTTP header) at + **SOURCE and advance SOURCE to the next parameter. Return false + when there are no more parameters to extract. The name of the + parameter is returned in NAME, and the value in VALUE. If the + parameter has no value, the token's value is zeroed out. + + For example, if *SOURCE points to the string "attachment; + filename=\"foo bar\"", the first call to this function will return + the token named "attachment" and no value, and the second call will + return the token named "filename" and value "foo bar". The third + call will return false, indicating no more valid tokens. */ + +bool +extract_param (const char **source, param_token *name, param_token *value, + char separator) +{ + const char *p = *source; + + while (ISSPACE (*p)) ++p; + if (!*p) + { + *source = p; + return false; /* no error; nothing more to extract */ + } + + /* Extract name. */ + name->b = p; + while (*p && !ISSPACE (*p) && *p != '=' && *p != separator) ++p; + name->e = p; + if (name->b == name->e) + return false; /* empty name: error */ + while (ISSPACE (*p)) ++p; + if (*p == separator || !*p) /* no value */ + { + xzero (*value); + if (*p == separator) ++p; + *source = p; + return true; + } + if (*p != '=') + return false; /* error */ + + /* *p is '=', extract value */ + ++p; + while (ISSPACE (*p)) ++p; + if (*p == '"') /* quoted */ + { + value->b = ++p; + while (*p && *p != '"') ++p; + if (!*p) + return false; + value->e = p++; + /* Currently at closing quote; find the end of param. */ + while (ISSPACE (*p)) ++p; + while (*p && *p != separator) ++p; + if (*p == separator) + ++p; + else if (*p) + /* garbage after closed quote, e.g. foo="bar"baz */ + return false; + } + else /* unquoted */ + { + value->b = p; + while (*p && *p != separator) ++p; + value->e = p; + while (value->e != value->b && ISSPACE (value->e[-1])) + --value->e; + if (*p == separator) ++p; + } + *source = p; + return true; +} + +#undef MAX +#define MAX(p, q) ((p) > (q) ? (p) : (q)) + +/* Parse the contents of the `Content-Disposition' header, extracting + the information useful to Wget. Content-Disposition is a header + borrowed from MIME; when used in HTTP, it typically serves for + specifying the desired file name of the resource. For example: + + Content-Disposition: attachment; filename="flora.jpg" + + Wget will skip the tokens it doesn't care about, such as + "attachment" in the previous example; it will also skip other + unrecognized params. If the header is syntactically correct and + contains a file name, a copy of the file name is stored in + *filename and true is returned. Otherwise, the function returns + false. + + The file name is stripped of directory components and must not be + empty. */ + +static bool +parse_content_disposition (const char *hdr, char **filename) +{ + param_token name, value; + while (extract_param (&hdr, &name, &value, ';')) + if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename") && value.b != NULL) + { + /* Make the file name begin at the last slash or backslash. */ + const char *last_slash = memrchr (value.b, '/', value.e - value.b); + const char *last_bs = memrchr (value.b, '\\', value.e - value.b); + if (last_slash && last_bs) + value.b = 1 + MAX (last_slash, last_bs); + else if (last_slash || last_bs) + value.b = 1 + (last_slash ? last_slash : last_bs); + if (value.b == value.e) + continue; + /* Start with the directory prefix, if specified. */ + if (opt.dir_prefix) + { + int prefix_length = strlen (opt.dir_prefix); + bool add_slash = (opt.dir_prefix[prefix_length - 1] != '/'); + int total_length; + + if (add_slash) + ++prefix_length; + total_length = prefix_length + (value.e - value.b); + *filename = xmalloc (total_length + 1); + strcpy (*filename, opt.dir_prefix); + if (add_slash) + (*filename)[prefix_length - 1] = '/'; + memcpy (*filename + prefix_length, value.b, (value.e - value.b)); + (*filename)[total_length] = '\0'; + } + else + *filename = strdupdelim (value.b, value.e); + return true; + } + return false; +} /* Persistent connections. Currently, we cache the most recently used connection as persistent, provided that the HTTP server agrees to @@ -1109,13 +1313,6 @@ free_hstat (struct http_stat *hs) hs->error = NULL; } -static char *create_authorization_line (const char *, const char *, - const char *, const char *, - const char *, bool *); -static char *basic_authentication_encode (const char *, const char *); -static bool known_authentication_scheme_p (const char *, const char *); -static void load_cookies (void); - #define BEGINS_WITH(line, string_constant) \ (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \ && (ISSPACE (line[sizeof (string_constant) - 1]) \ @@ -1162,10 +1359,15 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) int sock = -1; int flags; - /* Set to 1 when the authorization has failed permanently and should + /* Set to 1 when the authorization has already been sent and should not be tried again. */ bool auth_finished = false; + /* Set to 1 when just globally-set Basic authorization has been sent; + * should prevent further Basic negotiations, but not other + * mechanisms. */ + bool basic_auth_finished = false; + /* Whether NTLM authentication is used for this request. */ bool ntlm_seen = false; @@ -1200,8 +1402,6 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) bool host_lookup_failed = false; - DEBUGP(("in gethttp 1\n")); - #ifdef HAVE_SSL if (u->scheme == SCHEME_HTTPS) { @@ -1217,9 +1417,6 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) } #endif /* HAVE_SSL */ - DEBUGP(("in gethttp 2\n")); - DEBUGP(("in gethttp 3\n")); - /* Initialize certain elements of struct http_stat. */ hs->len = 0; hs->contlen = -1; @@ -1276,31 +1473,13 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) user = user ? user : (opt.http_user ? opt.http_user : opt.user); passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd); - if (user && passwd) + if (user && passwd + && !u->user) /* We only do "site-wide" authentication with "global" + user/password values; URL user/password info overrides. */ { - /* We have the username and the password, but haven't tried - any authorization yet. Let's see if the "Basic" method - works. If not, we'll come back here and construct a - proper authorization method with the right challenges. - - If we didn't employ this kind of logic, every URL that - requires authorization would have to be processed twice, - which is very suboptimal and generates a bunch of false - "unauthorized" errors in the server log. - - #### But this logic also has a serious problem when used - with stronger authentications: we *first* transmit the - username and the password in clear text, and *then* attempt a - stronger authentication scheme. That cannot be right! We - are only fortunate that almost everyone still uses the - `Basic' scheme anyway. - - There should be an option to prevent this from happening, for - those who use strong authentication schemes and value their - passwords. */ - request_set_header (req, "Authorization", - basic_authentication_encode (user, passwd), - rel_value); + /* If this is a host for which we've already received a Basic + * challenge, we'll go ahead and send Basic authentication creds. */ + basic_auth_finished = maybe_send_basic_creds(u->host, user, passwd, req); } proxyauth = NULL; @@ -1443,19 +1622,18 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) only hurts us. */ request_remove_header (req, "Authorization"); } - } - - if (sock < 0) - { - /* In its current implementation, persistent_available_p will - look up conn->host in some cases. If that lookup failed, we - don't need to bother with connect_to_host. */ - if (host_lookup_failed) + else if (host_lookup_failed) { request_free (req); + logprintf(LOG_NOTQUIET, + _("%s: unable to resolve host address `%s'\n"), + exec_name, relevant->host); return HOSTERR; } + } + if (sock < 0) + { sock = connect_to_host (conn->host, conn->port); if (sock == E_HOST) { @@ -1609,42 +1787,48 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) print_server_response (resp, " "); } - DEBUGP(("in gethttp 4\n")); - /* Determine the local filename if needed. Notice that if -O is used * hstat.local_file is set by http_loop to the argument of -O. */ - if (!hs->local_file) + if (!hs->local_file) { - if (resp_header_copy (resp, "Content-Disposition", hdrval, sizeof (hdrval))) - /* Honor Content-Disposition. */ - { - hs->local_file = xstrdup (hdrval); - } - else - /* Choose filename according to URL name. */ + /* Honor Content-Disposition whether possible. */ + if (!opt.content_disposition + || !resp_header_copy (resp, "Content-Disposition", + hdrval, sizeof (hdrval)) + || !parse_content_disposition (hdrval, &hs->local_file)) { + /* The Content-Disposition header is missing or broken. + * Choose unique file name according to given URL. */ hs->local_file = url_file_name (u); } } - DEBUGP(("in gethttp 5\n")); - /* TODO: perform this check only once. */ - if (opt.noclobber && file_exists_p (hs->local_file)) + if (file_exists_p (hs->local_file)) { - /* If opt.noclobber is turned on and file already exists, do not - retrieve the file */ - logprintf (LOG_VERBOSE, _("\ + if (opt.noclobber) + { + /* If opt.noclobber is turned on and file already exists, do not + retrieve the file */ + logprintf (LOG_VERBOSE, _("\ File `%s' already there; not retrieving.\n\n"), hs->local_file); - /* If the file is there, we suppose it's retrieved OK. */ - *dt |= RETROKF; + /* If the file is there, we suppose it's retrieved OK. */ + *dt |= RETROKF; - /* #### Bogusness alert. */ - /* If its suffix is "html" or "htm" or similar, assume text/html. */ - if (has_html_suffix_p (hs->local_file)) - *dt |= TEXTHTML; + /* #### Bogusness alert. */ + /* If its suffix is "html" or "htm" or similar, assume text/html. */ + if (has_html_suffix_p (hs->local_file)) + *dt |= TEXTHTML; - return RETROK; + return RETRUNNEEDED; + } + else if (!ALLOW_CLOBBER) + { + char *unique = unique_name (hs->local_file, true); + if (unique != hs->local_file) + xfree (hs->local_file); + hs->local_file = unique; + } } /* Support timestamping */ @@ -1682,7 +1866,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); /* Try to stat() the .orig file. */ if (stat (filename_plus_orig_suffix, &st) == 0) { - local_dot_orig_file_exists = 1; + local_dot_orig_file_exists = true; local_filename = filename_plus_orig_suffix; } } @@ -1715,12 +1899,20 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); errno = 0; parsed = str_to_wgint (hdrval, NULL, 10); if (parsed == WGINT_MAX && errno == ERANGE) - /* Out of range. - #### If Content-Length is out of range, it most likely - means that the file is larger than 2G and that we're - compiled without LFS. In that case we should probably - refuse to even attempt to download the file. */ - contlen = -1; + { + /* Out of range. + #### If Content-Length is out of range, it most likely + means that the file is larger than 2G and that we're + compiled without LFS. In that case we should probably + refuse to even attempt to download the file. */ + contlen = -1; + } + else if (parsed < 0) + { + /* Negative Content-Length; nonsensical, so we can't + assume any information about the content to receive. */ + contlen = -1; + } else contlen = parsed; } @@ -1768,16 +1960,13 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); } if (!www_authenticate) - /* If the authentication header is missing or - unrecognized, there's no sense in retrying. */ - logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n")); - else if (BEGINS_WITH (www_authenticate, "Basic")) - /* If the authentication scheme is "Basic", which we send - by default, there's no sense in retrying either. (This - should be changed when we stop sending "Basic" data by - default.) */ - ; - else + { + /* If the authentication header is missing or + unrecognized, there's no sense in retrying. */ + logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n")); + } + else if (!basic_auth_finished + || !BEGINS_WITH (www_authenticate, "Basic")) { char *pth; pth = url_full_path (u); @@ -1790,9 +1979,20 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); rel_value); if (BEGINS_WITH (www_authenticate, "NTLM")) ntlm_seen = true; + else if (!u->user && BEGINS_WITH (www_authenticate, "Basic")) + { + /* Need to register this host as using basic auth, + * so we automatically send creds next time. */ + register_basic_auth_host (u->host); + } xfree (pth); goto retry_with_auth; } + else + { + /* We already did Basic auth, and it failed. Gotta + * give up. */ + } } logputs (LOG_NOTQUIET, _("Authorization failed.\n")); request_free (req); @@ -1890,7 +2090,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); content-type. */ if (!type || 0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) || - 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S))) + 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S))) *dt |= TEXTHTML; else *dt &= ~TEXTHTML; @@ -1914,7 +2114,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); strcpy(hs->local_file + local_filename_len, ".html"); /* If clobbering is not allowed and the file, as named, exists, tack on ".NUMBER.html" instead. */ - if (!ALLOW_CLOBBER) + if (!ALLOW_CLOBBER && file_exists_p (hs->local_file)) { int ext_num = 1; do @@ -2049,6 +2249,13 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); else fp = output_stream; + /* Print fetch message, if opt.verbose. */ + if (opt.verbose) + { + logprintf (LOG_NOTQUIET, _("Saving to: `%s'\n"), + HYPHENP (hs->local_file) ? "STDOUT" : hs->local_file); + } + /* This confuses the timestamping code that checks for file size. #### The timestamping code should be smarter about file size. */ if (opt.save_headers && hs->restval == 0) @@ -2097,16 +2304,16 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, int *dt, struct url *proxy) { int count; - bool got_head = false; /* used for time-stamping */ + bool got_head = false; /* used for time-stamping and filename detection */ + bool time_came_from_head = false; + bool got_name = false; char *tms; const char *tmrate; - uerr_t err; + uerr_t err, ret = TRYLIMEXC; time_t tmr = -1; /* remote time-stamp */ - wgint local_size = 0; /* the size of the local file */ struct http_stat hstat; /* HTTP status */ - struct_stat st; - - DEBUGP(("in http_loop\n")); + struct_stat st; + bool send_head_first = true; /* Assert that no value for *LOCAL_FILE was passed. */ assert (local_file == NULL || *local_file == NULL); @@ -2133,7 +2340,10 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, hstat.referer = referer; if (opt.output_document) - hstat.local_file = xstrdup (opt.output_document); + { + hstat.local_file = xstrdup (opt.output_document); + got_name = true; + } /* Reset the counter. */ count = 0; @@ -2141,44 +2351,49 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, /* Reset the document type. */ *dt = 0; + /* Skip preliminary HEAD request if we're not in spider mode AND + * if -O was given or HTTP Content-Disposition support is disabled. */ + if (!opt.spider + && (got_name || !opt.content_disposition)) + send_head_first = false; + + /* Send preliminary HEAD request if -N is given and we have an existing + * destination file. */ + if (opt.timestamping + && !opt.content_disposition + && file_exists_p (url_file_name (u))) + send_head_first = true; + /* THE loop */ do { - DEBUGP(("in http_loop LOOP\n")); - /* Increment the pass counter. */ ++count; sleep_between_retrievals (count); /* Get the current time string. */ - tms = time_str (NULL); + tms = datetime_str (time (NULL)); + if (opt.spider && !got_head) + logprintf (LOG_VERBOSE, _("\ +Spider mode enabled. Check if remote file exists.\n")); + /* Print fetch message, if opt.verbose. */ if (opt.verbose) { - char *hurl = url_string (u, true); - logprintf (LOG_VERBOSE, "--%s-- %s\n", - tms, hurl); + char *hurl = url_string (u, URL_AUTH_HIDE_PASSWD); if (count > 1) { char tmp[256]; sprintf (tmp, _("(try:%2d)"), count); - logprintf (LOG_VERBOSE, " %s", tmp); + logprintf (LOG_NOTQUIET, "--%s-- %s %s\n", + tms, tmp, hurl); } else { - logprintf (LOG_VERBOSE, " "); - } - - if (hstat.local_file) - { - logprintf (LOG_VERBOSE, " => `%s'\n", - HYPHENP (hstat.local_file) ? "STDOUT" : hstat.local_file); - } - else - { - logprintf (LOG_VERBOSE, "\n"); + logprintf (LOG_NOTQUIET, "--%s-- %s\n", + tms, hurl); } #ifdef WINDOWS @@ -2190,13 +2405,14 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, /* Default document type is empty. However, if spider mode is on or time-stamping is employed, HEAD_ONLY commands is encoded within *dt. */ - if (opt.spider || (opt.timestamping && !got_head)) + if (send_head_first && !got_head) *dt |= HEAD_ONLY; else *dt &= ~HEAD_ONLY; /* Decide whether or not to restart. */ if (opt.always_rest + && got_name && stat (hstat.local_file, &st) == 0 && S_ISREG (st.st_mode)) /* When -c is used, continue from on-disk size. (Can't use @@ -2216,8 +2432,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, we require a fresh get. b) caching is explicitly inhibited. */ if ((proxy && count > 1) /* a */ - || !opt.allow_cache /* b */ - ) + || !opt.allow_cache) /* b */ *dt |= SEND_NOCACHE; else *dt &= ~SEND_NOCACHE; @@ -2226,12 +2441,12 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, err = gethttp (u, &hstat, dt, proxy); /* Time? */ - tms = time_str (NULL); + tms = datetime_str (time (NULL)); /* Get the new location (with or without the redirection). */ if (hstat.newloc) *newloc = xstrdup (hstat.newloc); - + switch (err) { case HERR: case HEOF: case CONSOCKERR: case CONCLOSED: @@ -2240,26 +2455,23 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, /* Non-fatal errors continue executing the loop, which will bring them to "while" statement at the end, to judge whether the number of tries was exceeded. */ - /* free_hstat (&hstat); */ printwhat (count, opt.ntry); continue; - case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED: - case SSLINITFAILED: case CONTNOTSUPPORTED: - /* Fatal errors just return from the function. */ - free_hstat (&hstat); - return err; case FWRITEERR: case FOPENERR: /* Another fatal error. */ logputs (LOG_VERBOSE, "\n"); logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"), hstat.local_file, strerror (errno)); - free_hstat (&hstat); - return err; + case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED: + case SSLINITFAILED: case CONTNOTSUPPORTED: + /* Fatal errors just return from the function. */ + ret = err; + goto exit; case CONSSLERR: /* Another fatal error. */ logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n")); - free_hstat (&hstat); - return err; + ret = err; + goto exit; case NEWLOCATION: /* Return the new location to the caller. */ if (!*newloc) @@ -2267,15 +2479,17 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, logprintf (LOG_NOTQUIET, _("ERROR: Redirection (%d) without location.\n"), hstat.statcode); - free_hstat (&hstat); - return WRONGCODE; + ret = WRONGCODE; } - free_hstat (&hstat); - return NEWLOCATION; + else + { + ret = NEWLOCATION; + } + goto exit; case RETRUNNEEDED: /* The file was already fully retrieved. */ - free_hstat (&hstat); - return RETROK; + ret = RETROK; + goto exit; case RETRFINISHED: /* Deal with you later. */ break; @@ -2286,23 +2500,48 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, if (!(*dt & RETROKF)) { + char *hurl = NULL; if (!opt.verbose) { /* #### Ugly ugly ugly! */ - char *hurl = url_string (u, true); + hurl = url_string (u, URL_AUTH_HIDE_PASSWD); logprintf (LOG_NONVERBOSE, "%s:\n", hurl); - xfree (hurl); } - logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), - tms, hstat.statcode, escnonprint (hstat.error)); + + /* Fall back to GET if HEAD fails with a 500 or 501 error code. */ + if (*dt & HEAD_ONLY + && (hstat.statcode == 500 || hstat.statcode == 501)) + { + got_head = true; + continue; + } + /* Maybe we should always keep track of broken links, not just in + * spider mode. */ + else if (opt.spider) + { + /* #### Again: ugly ugly ugly! */ + if (!hurl) + hurl = url_string (u, URL_AUTH_HIDE_PASSWD); + nonexisting_url (hurl); + logprintf (LOG_NOTQUIET, _("\ +Remote file does not exist -- broken link!!!\n")); + } + else + { + logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), + tms, hstat.statcode, escnonprint (hstat.error)); + } logputs (LOG_VERBOSE, "\n"); - free_hstat (&hstat); - return WRONGCODE; + ret = WRONGCODE; + xfree_null (hurl); + goto exit; } /* Did we get the time-stamp? */ - if (!got_head) + if (send_head_first && !got_head) { + bool restart_loop = false; + if (opt.timestamping && !hstat.remote_time) { logputs (LOG_NOTQUIET, _("\ @@ -2315,52 +2554,98 @@ Last-modified header missing -- time-stamps turned off.\n")); if (tmr == (time_t) (-1)) logputs (LOG_VERBOSE, _("\ Last-modified header invalid -- time-stamp ignored.\n")); + if (*dt & HEAD_ONLY) + time_came_from_head = true; } - } - - /* The time-stamping section. */ - if (opt.timestamping && !got_head) - { - got_head = true; /* no more time-stamping */ - *dt &= ~HEAD_ONLY; - count = 0; /* the retrieve count for HEAD is reset */ - - if (hstat.remote_time && tmr != (time_t) (-1)) + + /* The time-stamping section. */ + if (opt.timestamping) { - /* Now time-stamping can be used validly. Time-stamping - means that if the sizes of the local and remote file - match, and local file is newer than the remote file, - it will not be retrieved. Otherwise, the normal - download procedure is resumed. */ - if (hstat.orig_file_tstamp >= tmr) + if (hstat.orig_file_name) /* Perform the following checks only + if the file we're supposed to + download already exists. */ { - if (hstat.contlen == -1 || hstat.orig_file_size == hstat.contlen) + if (hstat.remote_time && + tmr != (time_t) (-1)) { - logprintf (LOG_VERBOSE, _("\ + /* Now time-stamping can be used validly. Time-stamping + means that if the sizes of the local and remote file + match, and local file is newer than the remote file, + it will not be retrieved. Otherwise, the normal + download procedure is resumed. */ + if (hstat.orig_file_tstamp >= tmr) + { + if (hstat.contlen == -1 + || hstat.orig_file_size == hstat.contlen) + { + logprintf (LOG_VERBOSE, _("\ Server file no newer than local file `%s' -- not retrieving.\n\n"), - hstat.orig_file_name); - free_hstat (&hstat); - return RETROK; + hstat.orig_file_name); + ret = RETROK; + goto exit; + } + else + { + logprintf (LOG_VERBOSE, _("\ +The sizes do not match (local %s) -- retrieving.\n"), + number_to_static_string (hstat.orig_file_size)); + } + } + else + logputs (LOG_VERBOSE, + _("Remote file is newer, retrieving.\n")); + + logputs (LOG_VERBOSE, "\n"); } - else + } + + /* free_hstat (&hstat); */ + hstat.timestamp_checked = true; + restart_loop = true; + } + + if (opt.spider) + { + if (opt.recursive) + { + if (*dt & TEXTHTML) + { + logputs (LOG_VERBOSE, _("\ +Remote file exists and could contain links to other resources -- retrieving.\n\n")); + restart_loop = true; + } + else { logprintf (LOG_VERBOSE, _("\ -The sizes do not match (local %s) -- retrieving.\n"), - number_to_static_string (local_size)); +Remote file exists but does not contain any link -- not retrieving.\n\n")); + ret = RETROK; /* RETRUNNEEDED is not for caller. */ + goto exit; } } else - logputs (LOG_VERBOSE, - _("Remote file is newer, retrieving.\n")); + { + logprintf (LOG_VERBOSE, _("\ +Remote file exists but recursion is disabled -- not retrieving.\n\n")); + ret = RETROK; /* RETRUNNEEDED is not for caller. */ + goto exit; + } + } + + if (send_head_first) + { + got_name = true; + restart_loop = true; } - /* free_hstat (&hstat); */ - hstat.timestamp_checked = true; - continue; + got_head = true; /* no more time-stamping */ + *dt &= ~HEAD_ONLY; + count = 0; /* the retrieve count for HEAD is reset */ + + if (restart_loop) + continue; } - + if ((tmr != (time_t) (-1)) - && !opt.spider && ((hstat.len == hstat.contlen) || ((hstat.res == 0) && (hstat.contlen == -1)))) { @@ -2375,17 +2660,21 @@ The sizes do not match (local %s) -- retrieving.\n"), else fl = hstat.local_file; if (fl) - touch (fl, tmr); + { + time_t newtmr = -1; + /* Reparse time header, in case it's changed. */ + if (time_came_from_head + && hstat.remote_time && hstat.remote_time[0]) + { + newtmr = http_atotm (hstat.remote_time); + if (newtmr != -1) + tmr = newtmr; + } + touch (fl, tmr); + } } /* End of time-stamping section. */ - if (opt.spider) - { - logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, - escnonprint (hstat.error)); - return RETROK; - } - tmrate = retr_rate (hstat.rd_size, hstat.dltime); total_download_time += hstat.dltime; @@ -2414,8 +2703,8 @@ The sizes do not match (local %s) -- retrieving.\n"), else downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file); - free_hstat (&hstat); - return RETROK; + ret = RETROK; + goto exit; } else if (hstat.res == 0) /* No read error */ { @@ -2442,8 +2731,8 @@ The sizes do not match (local %s) -- retrieving.\n"), else downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file); - free_hstat (&hstat); - return RETROK; + ret = RETROK; + goto exit; } else if (hstat.len < hstat.contlen) /* meaning we lost the connection too soon */ @@ -2452,7 +2741,6 @@ The sizes do not match (local %s) -- retrieving.\n"), _("%s (%s) - Connection closed at byte %s. "), tms, tmrate, number_to_static_string (hstat.len)); printwhat (count, opt.ntry); - /* free_hstat (&hstat); */ continue; } else @@ -2469,7 +2757,6 @@ The sizes do not match (local %s) -- retrieving.\n"), tms, tmrate, number_to_static_string (hstat.len), hstat.rderrmsg); printwhat (count, opt.ntry); - /* free_hstat (&hstat); */ continue; } else /* hstat.res == -1 and contlen is given */ @@ -2481,15 +2768,19 @@ The sizes do not match (local %s) -- retrieving.\n"), number_to_static_string (hstat.contlen), hstat.rderrmsg); printwhat (count, opt.ntry); - /* free_hstat (&hstat); */ continue; } } /* not reached */ } while (!opt.ntry || (count < opt.ntry)); + +exit: + if (ret == RETROK) + *local_file = xstrdup (hstat.local_file); + free_hstat (&hstat); - return TRYLIMEXC; + return ret; } /* Check whether the result of strptime() indicates success. @@ -2630,45 +2921,6 @@ basic_authentication_encode (const char *user, const char *passwd) } while (0) #ifdef ENABLE_DIGEST -/* Parse HTTP `WWW-Authenticate:' header. AU points to the beginning - of a field in such a header. If the field is the one specified by - ATTR_NAME ("realm", "opaque", and "nonce" are used by the current - digest authorization code), extract its value in the (char*) - variable pointed by RET. Returns negative on a malformed header, - or number of bytes that have been parsed by this call. */ -static int -extract_header_attr (const char *au, const char *attr_name, char **ret) -{ - const char *ep; - const char *cp = au; - - if (strncmp (cp, attr_name, strlen (attr_name)) == 0) - { - cp += strlen (attr_name); - if (!*cp) - return -1; - SKIP_WS (cp); - if (*cp != '=') - return -1; - if (!*++cp) - return -1; - SKIP_WS (cp); - if (*cp != '\"') - return -1; - if (!*++cp) - return -1; - for (ep = cp; *ep && *ep != '\"'; ep++) - ; - if (!*ep) - return -1; - xfree_null (*ret); - *ret = strdupdelim (cp, ep); - return ep - au + 1; - } - else - return 0; -} - /* Dump the hexadecimal representation of HASH to BUF. HASH should be an array of 16 bytes containing the hash keys, and BUF should be a buffer of 33 writable characters (32 for hex digits plus one for @@ -2703,53 +2955,21 @@ digest_authentication_encode (const char *au, const char *user, { "nonce", &nonce } }; char *res; + param_token name, value; realm = opaque = nonce = NULL; au += 6; /* skip over `Digest' */ - while (*au) + while (extract_param (&au, &name, &value, ',')) { int i; - - SKIP_WS (au); for (i = 0; i < countof (options); i++) - { - int skip = extract_header_attr (au, options[i].name, - options[i].variable); - if (skip < 0) - { - xfree_null (realm); - xfree_null (opaque); - xfree_null (nonce); - return NULL; - } - else if (skip) - { - au += skip; - break; - } - } - if (i == countof (options)) - { - while (*au && *au != '=') - au++; - if (*au && *++au) - { - SKIP_WS (au); - if (*au == '\"') - { - au++; - while (*au && *au != '\"') - au++; - if (*au) - au++; - } - } - } - while (*au && *au != ',') - au++; - if (*au) - au++; + if (name.e - name.b == strlen (options[i].name) + && 0 == strncmp (name.b, options[i].name, name.e - name.b)) + { + *options[i].variable = strdupdelim (value.b, value.e); + break; + } } if (!realm || !nonce || !user || !passwd || !path || !method) { @@ -2910,7 +3130,49 @@ http_cleanup (void) cookie_jar_delete (wget_cookie_jar); } + +#ifdef TESTING + +const char * +test_parse_content_disposition() +{ + int i; + struct { + char *hdrval; + char *opt_dir_prefix; + char *filename; + bool result; + } test_array[] = { + { "filename=\"file.ext\"", NULL, "file.ext", true }, + { "filename=\"file.ext\"", "somedir", "somedir/file.ext", true }, + { "attachment; filename=\"file.ext\"", NULL, "file.ext", true }, + { "attachment; filename=\"file.ext\"", "somedir", "somedir/file.ext", true }, + { "attachment; filename=\"file.ext\"; dummy", NULL, "file.ext", true }, + { "attachment; filename=\"file.ext\"; dummy", "somedir", "somedir/file.ext", true }, + { "attachment", NULL, NULL, false }, + { "attachment", "somedir", NULL, false }, + }; + + for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i) + { + char *filename; + bool res; + + opt.dir_prefix = test_array[i].opt_dir_prefix; + res = parse_content_disposition (test_array[i].hdrval, &filename); + + mu_assert ("test_parse_content_disposition: wrong result", + res == test_array[i].result + && (res == false + || 0 == strcmp (test_array[i].filename, filename))); + } + + return NULL; +} + +#endif /* TESTING */ + /* - * vim: et ts=2 sw=2 + * vim: et sts=2 sw=2 cino+={s */