X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fhttp.c;h=11af939a170fc20d073795988b13ef717d0a1b5b;hp=0ff820961716c90a319a45b91444dcc2fb5f0b86;hb=1b28d66fcb583791fb1f92199a29e1063cdd6ed8;hpb=dfeb089f3c3c8f895258058bfcf49ac9b0dee23f diff --git a/src/http.c b/src/http.c index 0ff82096..11af939a 100644 --- a/src/http.c +++ b/src/http.c @@ -1,11 +1,12 @@ /* HTTP support. - Copyright (C) 1996-2006 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006, 2007 Free Software Foundation, Inc. This file is part of GNU Wget. GNU Wget is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or +the Free Software Foundation; either version 3 of the License, or (at your option) any later version. GNU Wget is distributed in the hope that it will be useful, @@ -14,8 +15,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Wget; if not, write to the Free Software Foundation, Inc., -51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +along with Wget. If not, see . In addition, as a special exception, the Free Software Foundation gives permission to link the code of its release of Wget with the @@ -41,6 +41,7 @@ so, delete this exception statement from your version. */ #include #include "wget.h" +#include "hash.h" #include "http.h" #include "utils.h" #include "url.h" @@ -67,6 +68,14 @@ so, delete this exception statement from your version. */ extern char *version_string; +/* Forward decls. */ +static char *create_authorization_line (const char *, const char *, + const char *, const char *, + const char *, bool *); +static char *basic_authentication_encode (const char *, const char *); +static bool known_authentication_scheme_p (const char *, const char *); +static void load_cookies (void); + #ifndef MIN # define MIN(x, y) ((x) > (y) ? (y) : (x)) #endif @@ -374,6 +383,50 @@ request_free (struct request *req) xfree (req); } +static struct hash_table *basic_authed_hosts; + +/* Find out if this host has issued a Basic challenge yet; if so, give + * it the username, password. A temporary measure until we can get + * proper authentication in place. */ + +static int +maybe_send_basic_creds (const char *hostname, const char *user, + const char *passwd, struct request *req) +{ + int did_challenge = 0; + + if (basic_authed_hosts + && hash_table_contains(basic_authed_hosts, hostname)) + { + DEBUGP(("Found `%s' in basic_authed_hosts.\n", hostname)); + request_set_header (req, "Authorization", + basic_authentication_encode (user, passwd), + rel_value); + did_challenge = 1; + } + else + { + DEBUGP(("Host `%s' has not issued a general basic challenge.\n", + hostname)); + } + return did_challenge; +} + +static void +register_basic_auth_host (const char *hostname) +{ + if (!basic_authed_hosts) + { + basic_authed_hosts = make_nocase_string_hash_table (1); + } + if (!hash_table_contains(basic_authed_hosts, hostname)) + { + hash_table_put (basic_authed_hosts, xstrdup(hostname), NULL); + DEBUGP(("Inserted `%s' into basic_authed_hosts\n", hostname)); + } +} + + /* Send the contents of FILE_NAME to SOCK. Make sure that exactly PROMISED_SIZE bytes are sent over the wire -- if the file is longer, read only that much; if the file is shorter, report an error. */ @@ -1260,13 +1313,6 @@ free_hstat (struct http_stat *hs) hs->error = NULL; } -static char *create_authorization_line (const char *, const char *, - const char *, const char *, - const char *, bool *); -static char *basic_authentication_encode (const char *, const char *); -static bool known_authentication_scheme_p (const char *, const char *); -static void load_cookies (void); - #define BEGINS_WITH(line, string_constant) \ (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \ && (ISSPACE (line[sizeof (string_constant) - 1]) \ @@ -1313,10 +1359,15 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) int sock = -1; int flags; - /* Set to 1 when the authorization has failed permanently and should + /* Set to 1 when the authorization has already been sent and should not be tried again. */ bool auth_finished = false; + /* Set to 1 when just globally-set Basic authorization has been sent; + * should prevent further Basic negotiations, but not other + * mechanisms. */ + bool basic_auth_finished = false; + /* Whether NTLM authentication is used for this request. */ bool ntlm_seen = false; @@ -1422,31 +1473,13 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) user = user ? user : (opt.http_user ? opt.http_user : opt.user); passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd); - if (user && passwd) + if (user && passwd + && !u->user) /* We only do "site-wide" authentication with "global" + user/password values; URL user/password info overrides. */ { - /* We have the username and the password, but haven't tried - any authorization yet. Let's see if the "Basic" method - works. If not, we'll come back here and construct a - proper authorization method with the right challenges. - - If we didn't employ this kind of logic, every URL that - requires authorization would have to be processed twice, - which is very suboptimal and generates a bunch of false - "unauthorized" errors in the server log. - - #### But this logic also has a serious problem when used - with stronger authentications: we *first* transmit the - username and the password in clear text, and *then* attempt a - stronger authentication scheme. That cannot be right! We - are only fortunate that almost everyone still uses the - `Basic' scheme anyway. - - There should be an option to prevent this from happening, for - those who use strong authentication schemes and value their - passwords. */ - request_set_header (req, "Authorization", - basic_authentication_encode (user, passwd), - rel_value); + /* If this is a host for which we've already received a Basic + * challenge, we'll go ahead and send Basic authentication creds. */ + basic_auth_finished = maybe_send_basic_creds(u->host, user, passwd, req); } proxyauth = NULL; @@ -1589,19 +1622,18 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) only hurts us. */ request_remove_header (req, "Authorization"); } - } - - if (sock < 0) - { - /* In its current implementation, persistent_available_p will - look up conn->host in some cases. If that lookup failed, we - don't need to bother with connect_to_host. */ - if (host_lookup_failed) + else if (host_lookup_failed) { request_free (req); + logprintf(LOG_NOTQUIET, + _("%s: unable to resolve host address `%s'\n"), + exec_name, relevant->host); return HOSTERR; } + } + if (sock < 0) + { sock = connect_to_host (conn->host, conn->port); if (sock == E_HOST) { @@ -1788,7 +1820,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); if (has_html_suffix_p (hs->local_file)) *dt |= TEXTHTML; - return RETROK; + return RETRUNNEEDED; } else if (!ALLOW_CLOBBER) { @@ -1867,12 +1899,20 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); errno = 0; parsed = str_to_wgint (hdrval, NULL, 10); if (parsed == WGINT_MAX && errno == ERANGE) - /* Out of range. - #### If Content-Length is out of range, it most likely - means that the file is larger than 2G and that we're - compiled without LFS. In that case we should probably - refuse to even attempt to download the file. */ - contlen = -1; + { + /* Out of range. + #### If Content-Length is out of range, it most likely + means that the file is larger than 2G and that we're + compiled without LFS. In that case we should probably + refuse to even attempt to download the file. */ + contlen = -1; + } + else if (parsed < 0) + { + /* Negative Content-Length; nonsensical, so we can't + assume any information about the content to receive. */ + contlen = -1; + } else contlen = parsed; } @@ -1920,16 +1960,13 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); } if (!www_authenticate) - /* If the authentication header is missing or - unrecognized, there's no sense in retrying. */ - logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n")); - else if (BEGINS_WITH (www_authenticate, "Basic")) - /* If the authentication scheme is "Basic", which we send - by default, there's no sense in retrying either. (This - should be changed when we stop sending "Basic" data by - default.) */ - ; - else + { + /* If the authentication header is missing or + unrecognized, there's no sense in retrying. */ + logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n")); + } + else if (!basic_auth_finished + || !BEGINS_WITH (www_authenticate, "Basic")) { char *pth; pth = url_full_path (u); @@ -1942,9 +1979,20 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); rel_value); if (BEGINS_WITH (www_authenticate, "NTLM")) ntlm_seen = true; + else if (!u->user && BEGINS_WITH (www_authenticate, "Basic")) + { + /* Need to register this host as using basic auth, + * so we automatically send creds next time. */ + register_basic_auth_host (u->host); + } xfree (pth); goto retry_with_auth; } + else + { + /* We already did Basic auth, and it failed. Gotta + * give up. */ + } } logputs (LOG_NOTQUIET, _("Authorization failed.\n")); request_free (req); @@ -2257,14 +2305,15 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, { int count; bool got_head = false; /* used for time-stamping and filename detection */ + bool time_came_from_head = false; bool got_name = false; char *tms; const char *tmrate; uerr_t err, ret = TRYLIMEXC; time_t tmr = -1; /* remote time-stamp */ - wgint local_size = 0; /* the size of the local file */ struct http_stat hstat; /* HTTP status */ struct_stat st; + bool send_head_first = true; /* Assert that no value for *LOCAL_FILE was passed. */ assert (local_file == NULL || *local_file == NULL); @@ -2302,6 +2351,19 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, /* Reset the document type. */ *dt = 0; + /* Skip preliminary HEAD request if we're not in spider mode AND + * if -O was given or HTTP Content-Disposition support is disabled. */ + if (!opt.spider + && (got_name || !opt.content_disposition)) + send_head_first = false; + + /* Send preliminary HEAD request if -N is given and we have an existing + * destination file. */ + if (opt.timestamping + && !opt.content_disposition + && file_exists_p (url_file_name (u))) + send_head_first = true; + /* THE loop */ do { @@ -2310,7 +2372,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, sleep_between_retrievals (count); /* Get the current time string. */ - tms = time_str (time (NULL)); + tms = datetime_str (time (NULL)); if (opt.spider && !got_head) logprintf (LOG_VERBOSE, _("\ @@ -2319,7 +2381,7 @@ Spider mode enabled. Check if remote file exists.\n")); /* Print fetch message, if opt.verbose. */ if (opt.verbose) { - char *hurl = url_string (u, true); + char *hurl = url_string (u, URL_AUTH_HIDE_PASSWD); if (count > 1) { @@ -2343,7 +2405,7 @@ Spider mode enabled. Check if remote file exists.\n")); /* Default document type is empty. However, if spider mode is on or time-stamping is employed, HEAD_ONLY commands is encoded within *dt. */ - if (((opt.spider || opt.timestamping) && !got_head) || !got_name) + if (send_head_first && !got_head) *dt |= HEAD_ONLY; else *dt &= ~HEAD_ONLY; @@ -2379,12 +2441,12 @@ Spider mode enabled. Check if remote file exists.\n")); err = gethttp (u, &hstat, dt, proxy); /* Time? */ - tms = time_str (time (NULL)); + tms = datetime_str (time (NULL)); /* Get the new location (with or without the redirection). */ if (hstat.newloc) *newloc = xstrdup (hstat.newloc); - + switch (err) { case HERR: case HEOF: case CONSOCKERR: case CONCLOSED: @@ -2435,23 +2497,31 @@ Spider mode enabled. Check if remote file exists.\n")); /* All possibilities should have been exhausted. */ abort (); } - + if (!(*dt & RETROKF)) { char *hurl = NULL; if (!opt.verbose) { /* #### Ugly ugly ugly! */ - hurl = url_string (u, true); + hurl = url_string (u, URL_AUTH_HIDE_PASSWD); logprintf (LOG_NONVERBOSE, "%s:\n", hurl); } + + /* Fall back to GET if HEAD fails with a 500 or 501 error code. */ + if (*dt & HEAD_ONLY + && (hstat.statcode == 500 || hstat.statcode == 501)) + { + got_head = true; + continue; + } /* Maybe we should always keep track of broken links, not just in * spider mode. */ - if (opt.spider) + else if (opt.spider) { /* #### Again: ugly ugly ugly! */ if (!hurl) - hurl = url_string (u, true); + hurl = url_string (u, URL_AUTH_HIDE_PASSWD); nonexisting_url (hurl); logprintf (LOG_NOTQUIET, _("\ Remote file does not exist -- broken link!!!\n")); @@ -2468,7 +2538,7 @@ Remote file does not exist -- broken link!!!\n")); } /* Did we get the time-stamp? */ - if (!got_head) + if (send_head_first && !got_head) { bool restart_loop = false; @@ -2484,6 +2554,8 @@ Last-modified header missing -- time-stamps turned off.\n")); if (tmr == (time_t) (-1)) logputs (LOG_VERBOSE, _("\ Last-modified header invalid -- time-stamp ignored.\n")); + if (*dt & HEAD_ONLY) + time_came_from_head = true; } /* The time-stamping section. */ @@ -2516,7 +2588,7 @@ Server file no newer than local file `%s' -- not retrieving.\n\n"), { logprintf (LOG_VERBOSE, _("\ The sizes do not match (local %s) -- retrieving.\n"), - number_to_static_string (local_size)); + number_to_static_string (hstat.orig_file_size)); } } else @@ -2532,12 +2604,6 @@ The sizes do not match (local %s) -- retrieving.\n"), restart_loop = true; } - if (opt.always_rest) - { - got_name = true; - restart_loop = true; - } - if (opt.spider) { if (opt.recursive) @@ -2552,7 +2618,7 @@ Remote file exists and could contain links to other resources -- retrieving.\n\n { logprintf (LOG_VERBOSE, _("\ Remote file exists but does not contain any link -- not retrieving.\n\n")); - ret = RETRUNNEEDED; + ret = RETROK; /* RETRUNNEEDED is not for caller. */ goto exit; } } @@ -2560,11 +2626,17 @@ Remote file exists but does not contain any link -- not retrieving.\n\n")); { logprintf (LOG_VERBOSE, _("\ Remote file exists but recursion is disabled -- not retrieving.\n\n")); - ret = RETRUNNEEDED; + ret = RETROK; /* RETRUNNEEDED is not for caller. */ goto exit; } } + if (send_head_first) + { + got_name = true; + restart_loop = true; + } + got_head = true; /* no more time-stamping */ *dt &= ~HEAD_ONLY; count = 0; /* the retrieve count for HEAD is reset */ @@ -2588,7 +2660,18 @@ Remote file exists but recursion is disabled -- not retrieving.\n\n")); else fl = hstat.local_file; if (fl) - touch (fl, tmr); + { + time_t newtmr = -1; + /* Reparse time header, in case it's changed. */ + if (time_came_from_head + && hstat.remote_time && hstat.remote_time[0]) + { + newtmr = http_atotm (hstat.remote_time); + if (newtmr != -1) + tmr = newtmr; + } + touch (fl, tmr); + } } /* End of time-stamping section. */ @@ -3090,6 +3173,6 @@ test_parse_content_disposition() #endif /* TESTING */ /* - * vim: et ts=2 sw=2 + * vim: et sts=2 sw=2 cino+={s */