X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fhttp.c;h=d24db5fc38f3393ef6937c32e1ba03e3e6eba24b;hp=d3f6704f87f7af033cf8239aa1101b94cb73ea5d;hb=9dadbf6fe9577a6a6b7e7bab4e4b782fc1a6f86c;hpb=a0d0f332d5f230e40fe7fff8fc76839c4f4704ce diff --git a/src/http.c b/src/http.c index d3f6704f..d24db5fc 100644 --- a/src/http.c +++ b/src/http.c @@ -1,11 +1,12 @@ /* HTTP support. - Copyright (C) 1996-2006 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. This file is part of GNU Wget. GNU Wget is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or +the Free Software Foundation; either version 3 of the License, or (at your option) any later version. GNU Wget is distributed in the hope that it will be useful, @@ -14,20 +15,20 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Wget; if not, write to the Free Software Foundation, Inc., -51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +along with Wget. If not, see . -In addition, as a special exception, the Free Software Foundation -gives permission to link the code of its release of Wget with the -OpenSSL project's "OpenSSL" library (or with modified versions of it -that use the same license as the "OpenSSL" library), and distribute -the linked executables. You must obey the GNU General Public License -in all respects for all of the code used other than "OpenSSL". If you -modify this file, you may extend this exception to your version of the -file, but you are not obligated to do so. If you do not wish to do -so, delete this exception statement from your version. */ +Additional permission under GNU GPL version 3 section 7 -#include +If you modify this program, or any covered work, by linking or +combining it with the OpenSSL project's OpenSSL library (or a +modified version of that library), containing parts covered by the +terms of the OpenSSL or SSLeay licenses, the Free Software Foundation +grants you additional permission to convey the resulting work. +Corresponding Source for a non-source form of such a combination +shall include the source code for the parts of OpenSSL used as well +as that of the covered work. */ + +#include "wget.h" #include #include @@ -40,7 +41,7 @@ so, delete this exception statement from your version. */ #include #include -#include "wget.h" +#include "hash.h" #include "http.h" #include "utils.h" #include "url.h" @@ -65,8 +66,22 @@ so, delete this exception statement from your version. */ #include "test.h" #endif +#ifdef __VMS +# include "vms.h" +#endif /* def __VMS */ + extern char *version_string; +/* Forward decls. */ +struct http_stat; +static char *create_authorization_line (const char *, const char *, + const char *, const char *, + const char *, bool *); +static char *basic_authentication_encode (const char *, const char *); +static bool known_authentication_scheme_p (const char *, const char *); +static void ensure_extension (struct http_stat *, const char *, int *); +static void load_cookies (void); + #ifndef MIN # define MIN(x, y) ((x) > (y) ? (y) : (x)) #endif @@ -131,6 +146,8 @@ struct request { int hcount, hcapacity; }; +extern int numurls; + /* Create a new, empty request. At least request_set_method must be called before the request can be used. */ @@ -271,7 +288,7 @@ request_set_user_header (struct request *req, const char *header) return; BOUNDED_TO_ALLOCA (header, p, name); ++p; - while (ISSPACE (*p)) + while (c_isspace (*p)) ++p; request_set_header (req, xstrdup (name), (char *) p, rel_name); } @@ -375,6 +392,58 @@ request_free (struct request *req) xfree (req); } +static struct hash_table *basic_authed_hosts; + +/* Find out if this host has issued a Basic challenge yet; if so, give + * it the username, password. A temporary measure until we can get + * proper authentication in place. */ + +static bool +maybe_send_basic_creds (const char *hostname, const char *user, + const char *passwd, struct request *req) +{ + bool do_challenge = false; + + if (opt.auth_without_challenge) + { + DEBUGP(("Auth-without-challenge set, sending Basic credentials.\n")); + do_challenge = true; + } + else if (basic_authed_hosts + && hash_table_contains(basic_authed_hosts, hostname)) + { + DEBUGP(("Found %s in basic_authed_hosts.\n", quote (hostname))); + do_challenge = true; + } + else + { + DEBUGP(("Host %s has not issued a general basic challenge.\n", + quote (hostname))); + } + if (do_challenge) + { + request_set_header (req, "Authorization", + basic_authentication_encode (user, passwd), + rel_value); + } + return do_challenge; +} + +static void +register_basic_auth_host (const char *hostname) +{ + if (!basic_authed_hosts) + { + basic_authed_hosts = make_nocase_string_hash_table (1); + } + if (!hash_table_contains(basic_authed_hosts, hostname)) + { + hash_table_put (basic_authed_hosts, xstrdup(hostname), NULL); + DEBUGP(("Inserted %s into basic_authed_hosts\n", quote (hostname))); + } +} + + /* Send the contents of FILE_NAME to SOCK. Make sure that exactly PROMISED_SIZE bytes are sent over the wire -- if the file is longer, read only that much; if the file is shorter, report an error. */ @@ -601,9 +670,9 @@ resp_header_locate (const struct response *resp, const char *name, int start, && 0 == strncasecmp (b, name, name_len)) { b += name_len + 1; - while (b < e && ISSPACE (*b)) + while (b < e && c_isspace (*b)) ++b; - while (b < e && ISSPACE (e[-1])) + while (b < e && c_isspace (e[-1])) --e; *begptr = b; *endptr = e; @@ -702,17 +771,17 @@ resp_status (const struct response *resp, char **message) if (p < end && *p == '/') { ++p; - while (p < end && ISDIGIT (*p)) + while (p < end && c_isdigit (*p)) ++p; if (p < end && *p == '.') ++p; - while (p < end && ISDIGIT (*p)) + while (p < end && c_isdigit (*p)) ++p; } - while (p < end && ISSPACE (*p)) + while (p < end && c_isspace (*p)) ++p; - if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2])) + if (end - p < 3 || !c_isdigit (p[0]) || !c_isdigit (p[1]) || !c_isdigit (p[2])) return -1; status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0'); @@ -720,9 +789,9 @@ resp_status (const struct response *resp, char **message) if (message) { - while (p < end && ISSPACE (*p)) + while (p < end && c_isspace (*p)) ++p; - while (p < end && ISSPACE (end[-1])) + while (p < end && c_isspace (end[-1])) --end; *message = strdupdelim (p, end); } @@ -739,6 +808,21 @@ resp_free (struct response *resp) xfree (resp); } +/* Print a single line of response, the characters [b, e). We tried + getting away with + logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, (int) (e - b), b); + but that failed to escape the non-printable characters and, in fact, + caused crashes in UTF-8 locales. */ + +static void +print_response_line(const char *prefix, const char *b, const char *e) +{ + char *copy; + BOUNDED_TO_ALLOCA(b, e, copy); + logprintf (LOG_ALWAYS, "%s%s\n", prefix, + quotearg_style (escape_quoting_style, copy)); +} + /* Print the server response, line by line, omitting the trailing CRLF from individual header lines, and prefixed with PREFIX. */ @@ -757,9 +841,7 @@ print_server_response (const struct response *resp, const char *prefix) --e; if (b < e && e[-1] == '\r') --e; - /* This is safe even on printfs with broken handling of "%.s" - because resp->headers ends with \0. */ - logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, (int) (e - b), b); + print_response_line(prefix, b, e); } } @@ -781,27 +863,30 @@ parse_content_range (const char *hdr, wgint *first_byte_ptr, HTTP spec. */ if (*hdr == ':') ++hdr; - while (ISSPACE (*hdr)) + while (c_isspace (*hdr)) ++hdr; if (!*hdr) return false; } - if (!ISDIGIT (*hdr)) + if (!c_isdigit (*hdr)) return false; - for (num = 0; ISDIGIT (*hdr); hdr++) + for (num = 0; c_isdigit (*hdr); hdr++) num = 10 * num + (*hdr - '0'); - if (*hdr != '-' || !ISDIGIT (*(hdr + 1))) + if (*hdr != '-' || !c_isdigit (*(hdr + 1))) return false; *first_byte_ptr = num; ++hdr; - for (num = 0; ISDIGIT (*hdr); hdr++) + for (num = 0; c_isdigit (*hdr); hdr++) num = 10 * num + (*hdr - '0'); - if (*hdr != '/' || !ISDIGIT (*(hdr + 1))) + if (*hdr != '/' || !c_isdigit (*(hdr + 1))) return false; *last_byte_ptr = num; ++hdr; - for (num = 0; ISDIGIT (*hdr); hdr++) - num = 10 * num + (*hdr - '0'); + if (*hdr == '*') + num = -1; + else + for (num = 0; c_isdigit (*hdr); hdr++) + num = 10 * num + (*hdr - '0'); *entity_length_ptr = num; return true; } @@ -871,25 +956,25 @@ skip_short_body (int fd, wgint contlen) bool extract_param (const char **source, param_token *name, param_token *value, - char separator) + char separator) { const char *p = *source; - while (ISSPACE (*p)) ++p; + while (c_isspace (*p)) ++p; if (!*p) { *source = p; - return false; /* no error; nothing more to extract */ + return false; /* no error; nothing more to extract */ } /* Extract name. */ name->b = p; - while (*p && !ISSPACE (*p) && *p != '=' && *p != separator) ++p; + while (*p && !c_isspace (*p) && *p != '=' && *p != separator) ++p; name->e = p; if (name->b == name->e) - return false; /* empty name: error */ - while (ISSPACE (*p)) ++p; - if (*p == separator || !*p) /* no value */ + return false; /* empty name: error */ + while (c_isspace (*p)) ++p; + if (*p == separator || !*p) /* no value */ { xzero (*value); if (*p == separator) ++p; @@ -897,12 +982,12 @@ extract_param (const char **source, param_token *name, param_token *value, return true; } if (*p != '=') - return false; /* error */ + return false; /* error */ /* *p is '=', extract value */ ++p; - while (ISSPACE (*p)) ++p; - if (*p == '"') /* quoted */ + while (c_isspace (*p)) ++p; + if (*p == '"') /* quoted */ { value->b = ++p; while (*p && *p != '"') ++p; @@ -910,20 +995,20 @@ extract_param (const char **source, param_token *name, param_token *value, return false; value->e = p++; /* Currently at closing quote; find the end of param. */ - while (ISSPACE (*p)) ++p; + while (c_isspace (*p)) ++p; while (*p && *p != separator) ++p; if (*p == separator) - ++p; + ++p; else if (*p) - /* garbage after closed quote, e.g. foo="bar"baz */ - return false; + /* garbage after closed quote, e.g. foo="bar"baz */ + return false; } - else /* unquoted */ + else /* unquoted */ { value->b = p; while (*p && *p != separator) ++p; value->e = p; - while (value->e != value->b && ISSPACE (value->e[-1])) + while (value->e != value->b && c_isspace (value->e[-1])) --value->e; if (*p == separator) ++p; } @@ -958,16 +1043,34 @@ parse_content_disposition (const char *hdr, char **filename) while (extract_param (&hdr, &name, &value, ';')) if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename") && value.b != NULL) { - /* Make the file name begin at the last slash or backslash. */ + /* Make the file name begin at the last slash or backslash. */ const char *last_slash = memrchr (value.b, '/', value.e - value.b); const char *last_bs = memrchr (value.b, '\\', value.e - value.b); if (last_slash && last_bs) value.b = 1 + MAX (last_slash, last_bs); else if (last_slash || last_bs) value.b = 1 + (last_slash ? last_slash : last_bs); - if (value.b == value.e) - continue; - *filename = strdupdelim (value.b, value.e); + if (value.b == value.e) + continue; + /* Start with the directory prefix, if specified. */ + if (opt.dir_prefix) + { + int prefix_length = strlen (opt.dir_prefix); + bool add_slash = (opt.dir_prefix[prefix_length - 1] != '/'); + int total_length; + + if (add_slash) + ++prefix_length; + total_length = prefix_length + (value.e - value.b); + *filename = xmalloc (total_length + 1); + strcpy (*filename, opt.dir_prefix); + if (add_slash) + (*filename)[prefix_length - 1] = '/'; + memcpy (*filename + prefix_length, value.b, (value.e - value.b)); + (*filename)[total_length] = '\0'; + } + else + *filename = strdupdelim (value.b, value.e); return true; } return false; @@ -1202,10 +1305,15 @@ struct http_stat char *remote_time; /* remote time-stamp string */ char *error; /* textual HTTP error */ int statcode; /* status code */ + char *message; /* status message */ wgint rd_size; /* amount of data read from socket */ double dltime; /* time it took to download the data */ const char *referer; /* value of the referer header. */ char *local_file; /* local file name. */ + bool existence_checked; /* true if we already checked for a file's + existence after having begun to download + (needed in gethttp for when connection is + interrupted/restarted. */ bool timestamp_checked; /* true if pre-download time-stamping checks * have already been performed */ char *orig_file_name; /* name of file to compare for time-stamping @@ -1224,6 +1332,7 @@ free_hstat (struct http_stat *hs) xfree_null (hs->rderrmsg); xfree_null (hs->local_file); xfree_null (hs->orig_file_name); + xfree_null (hs->message); /* Guard against being called twice. */ hs->newloc = NULL; @@ -1231,26 +1340,32 @@ free_hstat (struct http_stat *hs) hs->error = NULL; } -static char *create_authorization_line (const char *, const char *, - const char *, const char *, - const char *, bool *); -static char *basic_authentication_encode (const char *, const char *); -static bool known_authentication_scheme_p (const char *, const char *); -static void ensure_extension (struct http_stat *, const char *, int *); -static void load_cookies (void); - #define BEGINS_WITH(line, string_constant) \ (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \ - && (ISSPACE (line[sizeof (string_constant) - 1]) \ + && (c_isspace (line[sizeof (string_constant) - 1]) \ || !line[sizeof (string_constant) - 1])) +#ifdef __VMS +#define SET_USER_AGENT(req) do { \ + if (!opt.useragent) \ + request_set_header (req, "User-Agent", \ + aprintf ("Wget/%s (VMS %s %s)", \ + version_string, vms_arch(), vms_vers()), \ + rel_value); \ + else if (*opt.useragent) \ + request_set_header (req, "User-Agent", opt.useragent, rel_none); \ +} while (0) +#else /* def __VMS */ #define SET_USER_AGENT(req) do { \ if (!opt.useragent) \ request_set_header (req, "User-Agent", \ - aprintf ("Wget/%s", version_string), rel_value); \ + aprintf ("Wget/%s (%s)", \ + version_string, OS_TYPE), \ + rel_value); \ else if (*opt.useragent) \ request_set_header (req, "User-Agent", opt.useragent, rel_none); \ } while (0) +#endif /* def __VMS [else] */ /* The flags that allow clobbering the file (opening with "wb"). Defined here to avoid repetition later. #### This will require @@ -1269,7 +1384,8 @@ static void load_cookies (void); If PROXY is non-NULL, the connection will be made to the proxy server, and u->url will be requested. */ static uerr_t -gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) +gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, + struct iri *iri) { struct request *req; @@ -1285,10 +1401,15 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) int sock = -1; int flags; - /* Set to 1 when the authorization has failed permanently and should + /* Set to 1 when the authorization has already been sent and should not be tried again. */ bool auth_finished = false; + /* Set to 1 when just globally-set Basic authorization has been sent; + * should prevent further Basic negotiations, but not other + * mechanisms. */ + bool basic_auth_finished = false; + /* Whether NTLM authentication is used for this request. */ bool ntlm_seen = false; @@ -1346,6 +1467,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) hs->newloc = NULL; hs->remote_time = NULL; hs->error = NULL; + hs->message = NULL; conn = u; @@ -1394,66 +1516,14 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) user = user ? user : (opt.http_user ? opt.http_user : opt.user); passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd); - if (user && passwd) - { - /* We have the username and the password, but haven't tried - any authorization yet. Let's see if the "Basic" method - works. If not, we'll come back here and construct a - proper authorization method with the right challenges. - - If we didn't employ this kind of logic, every URL that - requires authorization would have to be processed twice, - which is very suboptimal and generates a bunch of false - "unauthorized" errors in the server log. - - #### But this logic also has a serious problem when used - with stronger authentications: we *first* transmit the - username and the password in clear text, and *then* attempt a - stronger authentication scheme. That cannot be right! We - are only fortunate that almost everyone still uses the - `Basic' scheme anyway. - - There should be an option to prevent this from happening, for - those who use strong authentication schemes and value their - passwords. */ - request_set_header (req, "Authorization", - basic_authentication_encode (user, passwd), - rel_value); - } - - proxyauth = NULL; - if (proxy) + /* We only do "site-wide" authentication with "global" user/password + * values unless --auth-no-challange has been requested; URL user/password + * info overrides. */ + if (user && passwd && (!u->user || opt.auth_without_challenge)) { - char *proxy_user, *proxy_passwd; - /* For normal username and password, URL components override - command-line/wgetrc parameters. With proxy - authentication, it's the reverse, because proxy URLs are - normally the "permanent" ones, so command-line args - should take precedence. */ - if (opt.proxy_user && opt.proxy_passwd) - { - proxy_user = opt.proxy_user; - proxy_passwd = opt.proxy_passwd; - } - else - { - proxy_user = proxy->user; - proxy_passwd = proxy->passwd; - } - /* #### This does not appear right. Can't the proxy request, - say, `Digest' authentication? */ - if (proxy_user && proxy_passwd) - proxyauth = basic_authentication_encode (proxy_user, proxy_passwd); - - /* If we're using a proxy, we will be connecting to the proxy - server. */ - conn = proxy; - - /* Proxy authorization over SSL is handled below. */ -#ifdef HAVE_SSL - if (u->scheme != SCHEME_HTTPS) -#endif - request_set_header (req, "Proxy-Authorization", proxyauth, rel_value); + /* If this is a host for which we've already received a Basic + * challenge, we'll go ahead and send Basic authentication creds. */ + basic_auth_finished = maybe_send_basic_creds(u->host, user, passwd, req); } /* Generate the Host header, HOST:PORT. Take into account that: @@ -1503,8 +1573,8 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) post_data_size = file_size (opt.post_file_name); if (post_data_size == -1) { - logprintf (LOG_NOTQUIET, _("POST data file `%s' missing: %s\n"), - opt.post_file_name, strerror (errno)); + logprintf (LOG_NOTQUIET, _("POST data file %s missing: %s\n"), + quote (opt.post_file_name), strerror (errno)); post_data_size = 0; } } @@ -1526,6 +1596,41 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) without authorization header fails. (Expected to happen at least for the Digest authorization scheme.) */ + proxyauth = NULL; + if (proxy) + { + char *proxy_user, *proxy_passwd; + /* For normal username and password, URL components override + command-line/wgetrc parameters. With proxy + authentication, it's the reverse, because proxy URLs are + normally the "permanent" ones, so command-line args + should take precedence. */ + if (opt.proxy_user && opt.proxy_passwd) + { + proxy_user = opt.proxy_user; + proxy_passwd = opt.proxy_passwd; + } + else + { + proxy_user = proxy->user; + proxy_passwd = proxy->passwd; + } + /* #### This does not appear right. Can't the proxy request, + say, `Digest' authentication? */ + if (proxy_user && proxy_passwd) + proxyauth = basic_authentication_encode (proxy_user, proxy_passwd); + + /* If we're using a proxy, we will be connecting to the proxy + server. */ + conn = proxy; + + /* Proxy authorization over SSL is handled below. */ +#ifdef HAVE_SSL + if (u->scheme != SCHEME_HTTPS) +#endif + request_set_header (req, "Proxy-Authorization", proxyauth, rel_value); + } + keep_alive = false; /* Establish the connection. */ @@ -1553,7 +1658,8 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) sock = pconn.socket; using_ssl = pconn.ssl; logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"), - escnonprint (pconn.host), pconn.port); + quotearg_style (escape_quoting_style, pconn.host), + pconn.port); DEBUGP (("Reusing fd %d.\n", sock)); if (pconn.authorized) /* If the connection is already authorized, the "Basic" @@ -1561,19 +1667,18 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) only hurts us. */ request_remove_header (req, "Authorization"); } - } - - if (sock < 0) - { - /* In its current implementation, persistent_available_p will - look up conn->host in some cases. If that lookup failed, we - don't need to bother with connect_to_host. */ - if (host_lookup_failed) + else if (host_lookup_failed) { request_free (req); + logprintf(LOG_NOTQUIET, + _("%s: unable to resolve host address %s\n"), + exec_name, quote (relevant->host)); return HOSTERR; } + } + if (sock < 0) + { sock = connect_to_host (conn->host, conn->port); if (sock == E_HOST) { @@ -1636,13 +1741,14 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) resp = resp_new (head); statcode = resp_status (resp, &message); + hs->message = xstrdup (message); resp_free (resp); xfree (head); if (statcode != 200) { failed_tunnel: logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"), - message ? escnonprint (message) : "?"); + message ? quotearg_style (escape_quoting_style, message) : "?"); xfree_null (message); return CONSSLERR; } @@ -1656,7 +1762,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) if (conn->scheme == SCHEME_HTTPS) { - if (!ssl_connect (sock) || !ssl_check_certificate (sock, u->host)) + if (!ssl_connect_wget (sock) || !ssl_check_certificate (sock, u->host)) { fd_close (sock); return CONSSLERR; @@ -1718,15 +1824,111 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) /* Check for status line. */ message = NULL; statcode = resp_status (resp, &message); + hs->message = xstrdup (message); if (!opt.server_response) logprintf (LOG_VERBOSE, "%2d %s\n", statcode, - message ? escnonprint (message) : ""); + message ? quotearg_style (escape_quoting_style, message) : ""); else { logprintf (LOG_VERBOSE, "\n"); print_server_response (resp, " "); } + /* Check for keep-alive related responses. */ + if (!inhibit_keep_alive && contlen != -1) + { + if (resp_header_copy (resp, "Keep-Alive", NULL, 0)) + keep_alive = true; + else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval))) + { + if (0 == strcasecmp (hdrval, "Keep-Alive")) + keep_alive = true; + } + } + + if (keep_alive) + /* The server has promised that it will not close the connection + when we're done. This means that we can register it. */ + register_persistent (conn->host, conn->port, sock, using_ssl); + + if (statcode == HTTP_STATUS_UNAUTHORIZED) + { + /* Authorization is required. */ + if (keep_alive && !head_only && skip_short_body (sock, contlen)) + CLOSE_FINISH (sock); + else + CLOSE_INVALIDATE (sock); + pconn.authorized = false; + if (!auth_finished && (user && passwd)) + { + /* IIS sends multiple copies of WWW-Authenticate, one with + the value "negotiate", and other(s) with data. Loop over + all the occurrences and pick the one we recognize. */ + int wapos; + const char *wabeg, *waend; + char *www_authenticate = NULL; + for (wapos = 0; + (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos, + &wabeg, &waend)) != -1; + ++wapos) + if (known_authentication_scheme_p (wabeg, waend)) + { + BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate); + break; + } + + if (!www_authenticate) + { + /* If the authentication header is missing or + unrecognized, there's no sense in retrying. */ + logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n")); + } + else if (!basic_auth_finished + || !BEGINS_WITH (www_authenticate, "Basic")) + { + char *pth; + pth = url_full_path (u); + request_set_header (req, "Authorization", + create_authorization_line (www_authenticate, + user, passwd, + request_method (req), + pth, + &auth_finished), + rel_value); + if (BEGINS_WITH (www_authenticate, "NTLM")) + ntlm_seen = true; + else if (!u->user && BEGINS_WITH (www_authenticate, "Basic")) + { + /* Need to register this host as using basic auth, + * so we automatically send creds next time. */ + register_basic_auth_host (u->host); + } + xfree (pth); + xfree_null (message); + resp_free (resp); + xfree (head); + goto retry_with_auth; + } + else + { + /* We already did Basic auth, and it failed. Gotta + * give up. */ + } + } + logputs (LOG_NOTQUIET, _("Authorization failed.\n")); + request_free (req); + xfree_null (message); + resp_free (resp); + xfree (head); + return AUTHFAILED; + } + else /* statcode != HTTP_STATUS_UNAUTHORIZED */ + { + /* Kludge: if NTLM is used, mark the TCP connection as authorized. */ + if (ntlm_seen) + pconn.authorized = true; + } + /* Determine the local filename if needed. Notice that if -O is used * hstat.local_file is set by http_loop to the argument of -O. */ if (!hs->local_file) @@ -1742,16 +1944,17 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) hs->local_file = url_file_name (u); } } - + /* TODO: perform this check only once. */ - if (file_exists_p (hs->local_file)) + if (!hs->existence_checked && file_exists_p (hs->local_file)) { - if (opt.noclobber) + if (opt.noclobber && !opt.output_document) { /* If opt.noclobber is turned on and file already exists, do not - retrieve the file */ + retrieve the file. But if the output_document was given, then this + test was already done and the file didn't exist. Hence the !opt.output_document */ logprintf (LOG_VERBOSE, _("\ -File `%s' already there; not retrieving.\n\n"), hs->local_file); +File %s already there; not retrieving.\n\n"), quote (hs->local_file)); /* If the file is there, we suppose it's retrieved OK. */ *dt |= RETROKF; @@ -1760,7 +1963,9 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); if (has_html_suffix_p (hs->local_file)) *dt |= TEXTHTML; - return RETROK; + xfree (head); + xfree_null (message); + return RETRUNNEEDED; } else if (!ALLOW_CLOBBER) { @@ -1770,13 +1975,14 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); hs->local_file = unique; } } + hs->existence_checked = true; /* Support timestamping */ /* TODO: move this code out of gethttp. */ if (opt.timestamping && !hs->timestamp_checked) { size_t filename_len = strlen (hs->local_file); - char *filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig")); + char *filename_plus_orig_suffix = alloca (filename_len + sizeof (ORIG_SFX)); bool local_dot_orig_file_exists = false; char *local_filename = NULL; struct_stat st; @@ -1801,7 +2007,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); --hniksic */ memcpy (filename_plus_orig_suffix, hs->local_file, filename_len); memcpy (filename_plus_orig_suffix + filename_len, - ".orig", sizeof (".orig")); + ORIG_SFX, sizeof (ORIG_SFX)); /* Try to stat() the .orig file. */ if (stat (filename_plus_orig_suffix, &st) == 0) @@ -1809,7 +2015,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); local_dot_orig_file_exists = true; local_filename = filename_plus_orig_suffix; } - } + } if (!local_dot_orig_file_exists) /* Couldn't stat() .orig, so try to stat() . */ @@ -1839,95 +2045,24 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); errno = 0; parsed = str_to_wgint (hdrval, NULL, 10); if (parsed == WGINT_MAX && errno == ERANGE) - /* Out of range. - #### If Content-Length is out of range, it most likely - means that the file is larger than 2G and that we're - compiled without LFS. In that case we should probably - refuse to even attempt to download the file. */ - contlen = -1; - else - contlen = parsed; - } - - /* Check for keep-alive related responses. */ - if (!inhibit_keep_alive && contlen != -1) - { - if (resp_header_copy (resp, "Keep-Alive", NULL, 0)) - keep_alive = true; - else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval))) { - if (0 == strcasecmp (hdrval, "Keep-Alive")) - keep_alive = true; + /* Out of range. + #### If Content-Length is out of range, it most likely + means that the file is larger than 2G and that we're + compiled without LFS. In that case we should probably + refuse to even attempt to download the file. */ + contlen = -1; } - } - if (keep_alive) - /* The server has promised that it will not close the connection - when we're done. This means that we can register it. */ - register_persistent (conn->host, conn->port, sock, using_ssl); - - if (statcode == HTTP_STATUS_UNAUTHORIZED) - { - /* Authorization is required. */ - if (keep_alive && !head_only && skip_short_body (sock, contlen)) - CLOSE_FINISH (sock); - else - CLOSE_INVALIDATE (sock); - pconn.authorized = false; - if (!auth_finished && (user && passwd)) + else if (parsed < 0) { - /* IIS sends multiple copies of WWW-Authenticate, one with - the value "negotiate", and other(s) with data. Loop over - all the occurrences and pick the one we recognize. */ - int wapos; - const char *wabeg, *waend; - char *www_authenticate = NULL; - for (wapos = 0; - (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos, - &wabeg, &waend)) != -1; - ++wapos) - if (known_authentication_scheme_p (wabeg, waend)) - { - BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate); - break; - } - - if (!www_authenticate) - /* If the authentication header is missing or - unrecognized, there's no sense in retrying. */ - logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n")); - else if (BEGINS_WITH (www_authenticate, "Basic")) - /* If the authentication scheme is "Basic", which we send - by default, there's no sense in retrying either. (This - should be changed when we stop sending "Basic" data by - default.) */ - ; - else - { - char *pth; - pth = url_full_path (u); - request_set_header (req, "Authorization", - create_authorization_line (www_authenticate, - user, passwd, - request_method (req), - pth, - &auth_finished), - rel_value); - if (BEGINS_WITH (www_authenticate, "NTLM")) - ntlm_seen = true; - xfree (pth); - goto retry_with_auth; - } + /* Negative Content-Length; nonsensical, so we can't + assume any information about the content to receive. */ + contlen = -1; } - logputs (LOG_NOTQUIET, _("Authorization failed.\n")); - request_free (req); - return AUTHFAILED; - } - else /* statcode != HTTP_STATUS_UNAUTHORIZED */ - { - /* Kludge: if NTLM is used, mark the TCP connection as authorized. */ - if (ntlm_seen) - pconn.authorized = true; + else + contlen = parsed; } + request_free (req); hs->statcode = statcode; @@ -1945,9 +2080,20 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); char *tmp = strchr (type, ';'); if (tmp) { - while (tmp > type && ISSPACE (tmp[-1])) + /* sXXXav: only needed if IRI support is enabled */ + char *tmp2 = tmp + 1; + + while (tmp > type && c_isspace (tmp[-1])) --tmp; *tmp = '\0'; + + /* Try to get remote encoding if needed */ + if (opt.enable_iri && !opt.encoding_remote) + { + tmp = parse_charset (tmp2); + if (tmp) + set_content_encoding (iri, tmp); + } } } hs->newloc = resp_header_strdup (resp, "Location"); @@ -1976,7 +2122,10 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); wgint first_byte_pos, last_byte_pos, entity_length; if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos, &entity_length)) - contrange = first_byte_pos; + { + contrange = first_byte_pos; + contlen = last_byte_pos - first_byte_pos + 1; + } } resp_free (resp); @@ -2005,6 +2154,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); else CLOSE_INVALIDATE (sock); xfree_null (type); + xfree (head); return NEWLOCATION; } } @@ -2025,10 +2175,10 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); else *dt &= ~TEXTCSS; - if (opt.html_extension) + if (opt.adjust_extension) { if (*dt & TEXTHTML) - /* -E / --html-extension / html_extension = on was specified, + /* -E / --adjust-extension / adjust_extension = on was specified, and this is a text/html file. If some case-insensitive variation on ".htm[l]" isn't already the file's suffix, tack on ".html". */ @@ -2041,11 +2191,15 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); } } - if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE) + if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE + || (hs->restval > 0 && statcode == HTTP_STATUS_OK + && contrange == 0 && hs->restval >= contlen) + ) { /* If `-c' is in use and the file has been fully downloaded (or the remote file has shrunk), Wget effectively requests bytes - after the end of file and the server response with 416. */ + after the end of file and the server response with 416 + (or 200 with a <= Content-Length. */ logputs (LOG_VERBOSE, _("\ \n The file is already fully retrieved; nothing to do.\n\n")); /* In case the caller inspects. */ @@ -2056,6 +2210,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); xfree_null (type); CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there might be more bytes in the body. */ + xfree (head); return RETRUNNEEDED; } if ((contrange != 0 && contrange != hs->restval) @@ -2065,9 +2220,13 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); Bail out. */ xfree_null (type); CLOSE_INVALIDATE (sock); + xfree (head); return RANGEERR; } - hs->contlen = contlen + contrange; + if (contlen == -1) + hs->contlen = -1; + else + hs->contlen = contlen + contrange; if (opt.verbose) { @@ -2098,7 +2257,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); logputs (LOG_VERBOSE, opt.ignore_length ? _("ignored") : _("unspecified")); if (type) - logprintf (LOG_VERBOSE, " [%s]\n", escnonprint (type)); + logprintf (LOG_VERBOSE, " [%s]\n", quotearg_style (escape_quoting_style, type)); else logputs (LOG_VERBOSE, "\n"); } @@ -2125,9 +2284,20 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); CLOSE_FINISH (sock); else CLOSE_INVALIDATE (sock); + xfree (head); return RETRFINISHED; } +/* 2005-06-17 SMS. + For VMS, define common fopen() optional arguments. +*/ +#ifdef __VMS +# define FOPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id +# define FOPEN_BIN_FLAG 3 +#else /* def __VMS */ +# define FOPEN_BIN_FLAG 1 +#endif /* def __VMS [else] */ + /* Open the local file. */ if (!output_stream) { @@ -2135,9 +2305,27 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); if (opt.backups) rotate_backups (hs->local_file); if (hs->restval) - fp = fopen (hs->local_file, "ab"); + { +#ifdef __VMS + int open_id; + + open_id = 21; + fp = fopen (hs->local_file, "ab", FOPEN_OPT_ARGS); +#else /* def __VMS */ + fp = fopen (hs->local_file, "ab"); +#endif /* def __VMS [else] */ + } else if (ALLOW_CLOBBER) - fp = fopen (hs->local_file, "wb"); + { +#ifdef __VMS + int open_id; + + open_id = 22; + fp = fopen (hs->local_file, "wb", FOPEN_OPT_ARGS); +#else /* def __VMS */ + fp = fopen (hs->local_file, "wb"); +#endif /* def __VMS [else] */ + } else { fp = fopen_excl (hs->local_file, true); @@ -2151,6 +2339,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); _("%s has sprung into existence.\n"), hs->local_file); CLOSE_INVALIDATE (sock); + xfree (head); return FOPEN_EXCL_ERR; } } @@ -2158,6 +2347,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); { logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file, strerror (errno)); CLOSE_INVALIDATE (sock); + xfree (head); return FOPENERR; } } @@ -2167,8 +2357,8 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); /* Print fetch message, if opt.verbose. */ if (opt.verbose) { - logprintf (LOG_NOTQUIET, _("Saving to: `%s'\n"), - HYPHENP (hs->local_file) ? "STDOUT" : hs->local_file); + logprintf (LOG_NOTQUIET, _("Saving to: %s\n"), + HYPHENP (hs->local_file) ? quote ("STDOUT") : quote (hs->local_file)); } /* This confuses the timestamping code that checks for file size. @@ -2216,26 +2406,28 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); retried, and retried, and retried, and... */ uerr_t http_loop (struct url *u, char **newloc, char **local_file, const char *referer, - int *dt, struct url *proxy) + int *dt, struct url *proxy, struct iri *iri) { int count; bool got_head = false; /* used for time-stamping and filename detection */ + bool time_came_from_head = false; bool got_name = false; char *tms; const char *tmrate; uerr_t err, ret = TRYLIMEXC; time_t tmr = -1; /* remote time-stamp */ - wgint local_size = 0; /* the size of the local file */ struct http_stat hstat; /* HTTP status */ - struct_stat st; + struct_stat st; + bool send_head_first = true; + char *file_name; /* Assert that no value for *LOCAL_FILE was passed. */ assert (local_file == NULL || *local_file == NULL); - + /* Set LOCAL_FILE parameter. */ if (local_file && opt.output_document) *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document); - + /* Reset NEWLOC parameter. */ *newloc = NULL; @@ -2258,12 +2450,54 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, hstat.local_file = xstrdup (opt.output_document); got_name = true; } + else if (!opt.content_disposition) + { + hstat.local_file = url_file_name (u); + got_name = true; + } + + /* TODO: Ick! This code is now in both gethttp and http_loop, and is + * screaming for some refactoring. */ + if (got_name && file_exists_p (hstat.local_file) && opt.noclobber && !opt.output_document) + { + /* If opt.noclobber is turned on and file already exists, do not + retrieve the file. But if the output_document was given, then this + test was already done and the file didn't exist. Hence the !opt.output_document */ + logprintf (LOG_VERBOSE, _("\ +File %s already there; not retrieving.\n\n"), + quote (hstat.local_file)); + /* If the file is there, we suppose it's retrieved OK. */ + *dt |= RETROKF; + + /* #### Bogusness alert. */ + /* If its suffix is "html" or "htm" or similar, assume text/html. */ + if (has_html_suffix_p (hstat.local_file)) + *dt |= TEXTHTML; + + ret = RETROK; + goto exit; + } /* Reset the counter. */ count = 0; - + /* Reset the document type. */ *dt = 0; + + /* Skip preliminary HEAD request if we're not in spider mode AND + * if -O was given or HTTP Content-Disposition support is disabled. */ + if (!opt.spider + && (got_name || !opt.content_disposition)) + send_head_first = false; + + /* Send preliminary HEAD request if -N is given and we have an existing + * destination file. */ + file_name = url_file_name (u); + if (opt.timestamping + && !opt.content_disposition + && file_exists_p (file_name)) + send_head_first = true; + xfree (file_name); /* THE loop */ do @@ -2271,10 +2505,10 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, /* Increment the pass counter. */ ++count; sleep_between_retrievals (count); - + /* Get the current time string. */ - tms = time_str (time (NULL)); - + tms = datetime_str (time (NULL)); + if (opt.spider && !got_head) logprintf (LOG_VERBOSE, _("\ Spider mode enabled. Check if remote file exists.\n")); @@ -2282,21 +2516,21 @@ Spider mode enabled. Check if remote file exists.\n")); /* Print fetch message, if opt.verbose. */ if (opt.verbose) { - char *hurl = url_string (u, true); - - if (count > 1) + char *hurl = url_string (u, URL_AUTH_HIDE_PASSWD); + + if (count > 1) { char tmp[256]; sprintf (tmp, _("(try:%2d)"), count); logprintf (LOG_NOTQUIET, "--%s-- %s %s\n", tms, tmp, hurl); } - else + else { logprintf (LOG_NOTQUIET, "--%s-- %s\n", tms, hurl); } - + #ifdef WINDOWS ws_changetitle (hurl); #endif @@ -2306,8 +2540,7 @@ Spider mode enabled. Check if remote file exists.\n")); /* Default document type is empty. However, if spider mode is on or time-stamping is employed, HEAD_ONLY commands is encoded within *dt. */ - if (((opt.spider || opt.timestamping) && !got_head) - || (opt.always_rest && !got_name)) + if (send_head_first && !got_head) *dt |= HEAD_ONLY; else *dt &= ~HEAD_ONLY; @@ -2340,15 +2573,15 @@ Spider mode enabled. Check if remote file exists.\n")); *dt &= ~SEND_NOCACHE; /* Try fetching the document, or at least its head. */ - err = gethttp (u, &hstat, dt, proxy); + err = gethttp (u, &hstat, dt, proxy, iri); /* Time? */ - tms = time_str (time (NULL)); - + tms = datetime_str (time (NULL)); + /* Get the new location (with or without the redirection). */ if (hstat.newloc) *newloc = xstrdup (hstat.newloc); - + switch (err) { case HERR: case HEOF: case CONSOCKERR: case CONCLOSED: @@ -2362,8 +2595,8 @@ Spider mode enabled. Check if remote file exists.\n")); case FWRITEERR: case FOPENERR: /* Another fatal error. */ logputs (LOG_VERBOSE, "\n"); - logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"), - hstat.local_file, strerror (errno)); + logprintf (LOG_NOTQUIET, _("Cannot write to %s (%s).\n"), + quote (hstat.local_file), strerror (errno)); case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED: case SSLINITFAILED: case CONTNOTSUPPORTED: /* Fatal errors just return from the function. */ @@ -2383,7 +2616,7 @@ Spider mode enabled. Check if remote file exists.\n")); hstat.statcode); ret = WRONGCODE; } - else + else { ret = NEWLOCATION; } @@ -2399,23 +2632,33 @@ Spider mode enabled. Check if remote file exists.\n")); /* All possibilities should have been exhausted. */ abort (); } - + if (!(*dt & RETROKF)) { char *hurl = NULL; if (!opt.verbose) { /* #### Ugly ugly ugly! */ - hurl = url_string (u, true); + hurl = url_string (u, URL_AUTH_HIDE_PASSWD); logprintf (LOG_NONVERBOSE, "%s:\n", hurl); } + + /* Fall back to GET if HEAD fails with a 500 or 501 error code. */ + if (*dt & HEAD_ONLY + && (hstat.statcode == 500 || hstat.statcode == 501)) + { + got_head = true; + continue; + } /* Maybe we should always keep track of broken links, not just in - * spider mode. */ - if (opt.spider) + * spider mode. + * Don't log error if it was UTF-8 encoded because we will try + * once unencoded. */ + else if (opt.spider && !iri->utf8_encode) { /* #### Again: ugly ugly ugly! */ - if (!hurl) - hurl = url_string (u, true); + if (!hurl) + hurl = url_string (u, URL_AUTH_HIDE_PASSWD); nonexisting_url (hurl); logprintf (LOG_NOTQUIET, _("\ Remote file does not exist -- broken link!!!\n")); @@ -2423,7 +2666,8 @@ Remote file does not exist -- broken link!!!\n")); else { logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), - tms, hstat.statcode, escnonprint (hstat.error)); + tms, hstat.statcode, + quotearg_style (escape_quoting_style, hstat.error)); } logputs (LOG_VERBOSE, "\n"); ret = WRONGCODE; @@ -2434,7 +2678,7 @@ Remote file does not exist -- broken link!!!\n")); /* Did we get the time-stamp? */ if (!got_head) { - bool restart_loop = false; + got_head = true; /* no more time-stamping */ if (opt.timestamping && !hstat.remote_time) { @@ -2448,111 +2692,129 @@ Last-modified header missing -- time-stamps turned off.\n")); if (tmr == (time_t) (-1)) logputs (LOG_VERBOSE, _("\ Last-modified header invalid -- time-stamp ignored.\n")); + if (*dt & HEAD_ONLY) + time_came_from_head = true; } - /* The time-stamping section. */ - if (opt.timestamping) + if (send_head_first) { - if (hstat.orig_file_name) /* Perform the following checks only - if the file we're supposed to - download already exists. */ + /* The time-stamping section. */ + if (opt.timestamping) { - if (hstat.remote_time && - tmr != (time_t) (-1)) + if (hstat.orig_file_name) /* Perform the following + checks only if the file + we're supposed to + download already exists. */ { - /* Now time-stamping can be used validly. Time-stamping - means that if the sizes of the local and remote file - match, and local file is newer than the remote file, - it will not be retrieved. Otherwise, the normal - download procedure is resumed. */ - if (hstat.orig_file_tstamp >= tmr) + if (hstat.remote_time && + tmr != (time_t) (-1)) { - if (hstat.contlen == -1 - || hstat.orig_file_size == hstat.contlen) - { - logprintf (LOG_VERBOSE, _("\ -Server file no newer than local file `%s' -- not retrieving.\n\n"), - hstat.orig_file_name); - ret = RETROK; - goto exit; - } - else + /* Now time-stamping can be used validly. + Time-stamping means that if the sizes of + the local and remote file match, and local + file is newer than the remote file, it will + not be retrieved. Otherwise, the normal + download procedure is resumed. */ + if (hstat.orig_file_tstamp >= tmr) { - logprintf (LOG_VERBOSE, _("\ + if (hstat.contlen == -1 + || hstat.orig_file_size == hstat.contlen) + { + logprintf (LOG_VERBOSE, _("\ +Server file no newer than local file %s -- not retrieving.\n\n"), + quote (hstat.orig_file_name)); + ret = RETROK; + goto exit; + } + else + { + logprintf (LOG_VERBOSE, _("\ The sizes do not match (local %s) -- retrieving.\n"), - number_to_static_string (local_size)); + number_to_static_string (hstat.orig_file_size)); + } } - } - else - logputs (LOG_VERBOSE, - _("Remote file is newer, retrieving.\n")); + else + logputs (LOG_VERBOSE, + _("Remote file is newer, retrieving.\n")); - logputs (LOG_VERBOSE, "\n"); + logputs (LOG_VERBOSE, "\n"); + } } + + /* free_hstat (&hstat); */ + hstat.timestamp_checked = true; } - /* free_hstat (&hstat); */ - hstat.timestamp_checked = true; - restart_loop = true; - } - - if (opt.always_rest) - { - got_name = true; - restart_loop = true; - } - - if (opt.spider) - { - if (opt.recursive) + if (opt.spider) { - if (*dt & TEXTHTML) + bool finished = true; + if (opt.recursive) { - logputs (LOG_VERBOSE, _("\ + if (*dt & TEXTHTML) + { + logputs (LOG_VERBOSE, _("\ Remote file exists and could contain links to other resources -- retrieving.\n\n")); - restart_loop = true; + finished = false; + } + else + { + logprintf (LOG_VERBOSE, _("\ +Remote file exists but does not contain any link -- not retrieving.\n\n")); + ret = RETROK; /* RETRUNNEEDED is not for caller. */ + } } - else + else { - logprintf (LOG_VERBOSE, _("\ -Remote file exists but does not contain any link -- not retrieving.\n\n")); - ret = RETRUNNEEDED; + if (*dt & TEXTHTML) + { + logprintf (LOG_VERBOSE, _("\ +Remote file exists and could contain further links,\n\ +but recursion is disabled -- not retrieving.\n\n")); + } + else + { + logprintf (LOG_VERBOSE, _("\ +Remote file exists.\n\n")); + } + ret = RETROK; /* RETRUNNEEDED is not for caller. */ + } + + if (finished) + { + logprintf (LOG_NONVERBOSE, + _("%s URL: %s %2d %s\n"), + tms, u->url, hstat.statcode, + hstat.message ? quotearg_style (escape_quoting_style, hstat.message) : ""); goto exit; } } - else - { - logprintf (LOG_VERBOSE, _("\ -Remote file exists but recursion is disabled -- not retrieving.\n\n")); - ret = RETRUNNEEDED; - goto exit; - } - } - got_head = true; /* no more time-stamping */ - *dt &= ~HEAD_ONLY; - count = 0; /* the retrieve count for HEAD is reset */ - - if (restart_loop) - continue; - } + got_name = true; + *dt &= ~HEAD_ONLY; + count = 0; /* the retrieve count for HEAD is reset */ + continue; + } /* send_head_first */ + } /* !got_head */ if ((tmr != (time_t) (-1)) && ((hstat.len == hstat.contlen) || ((hstat.res == 0) && (hstat.contlen == -1)))) { - /* #### This code repeats in http.c and ftp.c. Move it to a - function! */ const char *fl = NULL; - if (opt.output_document) + set_local_file (&fl, hstat.local_file); + if (fl) { - if (output_stream_regular) - fl = opt.output_document; + time_t newtmr = -1; + /* Reparse time header, in case it's changed. */ + if (time_came_from_head + && hstat.remote_time && hstat.remote_time[0]) + { + newtmr = http_atotm (hstat.remote_time); + if (newtmr != (time_t)-1) + tmr = newtmr; + } + touch (fl, tmr); } - else - fl = hstat.local_file; - if (fl) - touch (fl, tmr); } /* End of time-stamping section. */ @@ -2563,9 +2825,14 @@ Remote file exists but recursion is disabled -- not retrieving.\n\n")); { if (*dt & RETROKF) { + bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document)); + logprintf (LOG_VERBOSE, - _("%s (%s) - `%s' saved [%s/%s]\n\n"), - tms, tmrate, hstat.local_file, + write_to_stdout + ? _("%s (%s) - written to stdout %s[%s/%s]\n\n") + : _("%s (%s) - %s saved [%s/%s]\n\n"), + tms, tmrate, + write_to_stdout ? "" : quote (hstat.local_file), number_to_static_string (hstat.len), number_to_static_string (hstat.contlen)); logprintf (LOG_NONVERBOSE, @@ -2575,7 +2842,7 @@ Remote file exists but recursion is disabled -- not retrieving.\n\n")); number_to_static_string (hstat.contlen), hstat.local_file, count); } - ++opt.numurls; + ++numurls; total_downloaded_bytes += hstat.len; /* Remember that we downloaded the file for later ".orig" code. */ @@ -2594,16 +2861,21 @@ Remote file exists but recursion is disabled -- not retrieving.\n\n")); { if (*dt & RETROKF) { + bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document)); + logprintf (LOG_VERBOSE, - _("%s (%s) - `%s' saved [%s]\n\n"), - tms, tmrate, hstat.local_file, + write_to_stdout + ? _("%s (%s) - written to stdout %s[%s]\n\n") + : _("%s (%s) - %s saved [%s]\n\n"), + tms, tmrate, + write_to_stdout ? "" : quote (hstat.local_file), number_to_static_string (hstat.len)); logprintf (LOG_NONVERBOSE, "%s URL:%s [%s] -> \"%s\" [%d]\n", tms, u->url, number_to_static_string (hstat.len), hstat.local_file, count); } - ++opt.numurls; + ++numurls; total_downloaded_bytes += hstat.len; /* Remember that we downloaded the file for later ".orig" code. */ @@ -2624,10 +2896,18 @@ Remote file exists but recursion is disabled -- not retrieving.\n\n")); printwhat (count, opt.ntry); continue; } - else + else if (hstat.len != hstat.restval) /* Getting here would mean reading more data than requested with content-length, which we never do. */ abort (); + else + { + /* Getting here probably means that the content-length was + * _less_ than the original, local size. We should probably + * truncate or re-read, or something. FIXME */ + ret = RETROK; + goto exit; + } } else /* from now on hstat.res can only be -1 */ { @@ -2677,11 +2957,11 @@ check_end (const char *p) { if (!p) return false; - while (ISSPACE (*p)) + while (c_isspace (*p)) ++p; if (!*p || (p[0] == 'G' && p[1] == 'M' && p[2] == 'T') - || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1]))) + || ((p[0] == '+' || p[0] == '-') && c_isdigit (p[1]))) return true; else return false; @@ -2732,13 +3012,24 @@ http_atotm (const char *time_string) Netscape cookie specification.) */ }; const char *oldlocale; - int i; + char savedlocale[256]; + size_t i; time_t ret = (time_t) -1; /* Solaris strptime fails to recognize English month names in non-English locales, which we work around by temporarily setting locale to C before invoking strptime. */ oldlocale = setlocale (LC_TIME, NULL); + if (oldlocale) + { + size_t l = strlen (oldlocale); + if (l >= sizeof savedlocale) + savedlocale[0] = '\0'; + else + memcpy (savedlocale, oldlocale, l); + } + else savedlocale[0] = '\0'; + setlocale (LC_TIME, "C"); for (i = 0; i < countof (time_formats); i++) @@ -2758,7 +3049,8 @@ http_atotm (const char *time_string) } /* Restore the previous locale. */ - setlocale (LC_TIME, oldlocale); + if (savedlocale[0]) + setlocale (LC_TIME, savedlocale); return ret; } @@ -2797,7 +3089,7 @@ basic_authentication_encode (const char *user, const char *passwd) } #define SKIP_WS(x) do { \ - while (ISSPACE (*(x))) \ + while (c_isspace (*(x))) \ ++(x); \ } while (0) @@ -2843,14 +3135,16 @@ digest_authentication_encode (const char *au, const char *user, au += 6; /* skip over `Digest' */ while (extract_param (&au, &name, &value, ',')) { - int i; + size_t i; + size_t namelen = name.e - name.b; for (i = 0; i < countof (options); i++) - if (name.e - name.b == strlen (options[i].name) - && 0 == strncmp (name.b, options[i].name, name.e - name.b)) - { - *options[i].variable = strdupdelim (value.b, value.e); - break; - } + if (namelen == strlen (options[i].name) + && 0 == strncmp (name.b, options[i].name, + namelen)) + { + *options[i].variable = strdupdelim (value.b, value.e); + break; + } } if (!realm || !nonce || !user || !passwd || !path || !method) { @@ -2926,10 +3220,11 @@ username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"", first argument and are followed by whitespace or terminating \0. The comparison is case-insensitive. */ #define STARTS(literal, b, e) \ - ((e) - (b) >= STRSIZE (literal) \ + ((e > b) \ + && ((size_t) ((e) - (b))) >= STRSIZE (literal) \ && 0 == strncasecmp (b, literal, STRSIZE (literal)) \ - && ((e) - (b) == STRSIZE (literal) \ - || ISSPACE (b[STRSIZE (literal)]))) + && ((size_t) ((e) - (b)) == STRSIZE (literal) \ + || c_isspace (b[STRSIZE (literal)]))) static bool known_authentication_scheme_p (const char *hdrbeg, const char *hdrend) @@ -2958,7 +3253,7 @@ create_authorization_line (const char *au, const char *user, { /* We are called only with known schemes, so we can dispatch on the first letter. */ - switch (TOUPPER (*au)) + switch (c_toupper (*au)) { case 'B': /* Basic */ *finished = true; @@ -3056,19 +3351,27 @@ test_parse_content_disposition() int i; struct { char *hdrval; + char *opt_dir_prefix; char *filename; bool result; } test_array[] = { - { "filename=\"file.ext\"", "file.ext", true }, - { "attachment; filename=\"file.ext\"", "file.ext", true }, - { "attachment; filename=\"file.ext\"; dummy", "file.ext", true }, - { "attachment", NULL, false }, + { "filename=\"file.ext\"", NULL, "file.ext", true }, + { "filename=\"file.ext\"", "somedir", "somedir/file.ext", true }, + { "attachment; filename=\"file.ext\"", NULL, "file.ext", true }, + { "attachment; filename=\"file.ext\"", "somedir", "somedir/file.ext", true }, + { "attachment; filename=\"file.ext\"; dummy", NULL, "file.ext", true }, + { "attachment; filename=\"file.ext\"; dummy", "somedir", "somedir/file.ext", true }, + { "attachment", NULL, NULL, false }, + { "attachment", "somedir", NULL, false }, }; for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i) { char *filename; - bool res = parse_content_disposition (test_array[i].hdrval, &filename); + bool res; + + opt.dir_prefix = test_array[i].opt_dir_prefix; + res = parse_content_disposition (test_array[i].hdrval, &filename); mu_assert ("test_parse_content_disposition: wrong result", res == test_array[i].result @@ -3082,6 +3385,6 @@ test_parse_content_disposition() #endif /* TESTING */ /* - * vim: et ts=2 sw=2 + * vim: et sts=2 sw=2 cino+={s */