X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fhttp.c;h=c3adbf40755150d2ab158f674941302b8dd521cb;hb=766df9d4e9392045a4e5c730ed81e599b509557a;hp=58e9b14a94453ff5477b745a779be2d1411595d3;hpb=48b53471e89938fde4588bea068578ab2ae7864f;p=wget diff --git a/src/http.c b/src/http.c index 58e9b14a..c3adbf40 100644 --- a/src/http.c +++ b/src/http.c @@ -1,6 +1,6 @@ /* HTTP support. Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. + 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -69,11 +69,13 @@ as that of the covered work. */ extern char *version_string; /* Forward decls. */ +struct http_stat; static char *create_authorization_line (const char *, const char *, const char *, const char *, const char *, bool *); static char *basic_authentication_encode (const char *, const char *); static bool known_authentication_scheme_p (const char *, const char *); +static void ensure_extension (struct http_stat *, const char *, int *); static void load_cookies (void); #ifndef MIN @@ -86,6 +88,7 @@ static struct cookie_jar *wget_cookie_jar; #define TEXTHTML_S "text/html" #define TEXTXHTML_S "application/xhtml+xml" +#define TEXTCSS_S "text/css" /* Some status code validation macros: */ #define H_20X(x) (((x) >= 200) && ((x) < 300)) @@ -139,6 +142,8 @@ struct request { int hcount, hcapacity; }; +extern int numurls; + /* Create a new, empty request. At least request_set_method must be called before the request can be used. */ @@ -1296,6 +1301,7 @@ struct http_stat char *remote_time; /* remote time-stamp string */ char *error; /* textual HTTP error */ int statcode; /* status code */ + char *message; /* status message */ wgint rd_size; /* amount of data read from socket */ double dltime; /* time it took to download the data */ const char *referer; /* value of the referer header. */ @@ -1322,6 +1328,7 @@ free_hstat (struct http_stat *hs) xfree_null (hs->rderrmsg); xfree_null (hs->local_file); xfree_null (hs->orig_file_name); + xfree_null (hs->message); /* Guard against being called twice. */ hs->newloc = NULL; @@ -1441,6 +1448,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) hs->newloc = NULL; hs->remote_time = NULL; hs->error = NULL; + hs->message = NULL; conn = u; @@ -1489,9 +1497,10 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) user = user ? user : (opt.http_user ? opt.http_user : opt.user); passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd); - if (user && passwd - && !u->user) /* We only do "site-wide" authentication with "global" - user/password values; URL user/password info overrides. */ + /* We only do "site-wide" authentication with "global" user/password + * values unless --auth-no-challange has been requested; URL user/password + * info overrides. */ + if (user && passwd && (!u->user || opt.auth_without_challenge)) { /* If this is a host for which we've already received a Basic * challenge, we'll go ahead and send Basic authentication creds. */ @@ -1713,6 +1722,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) resp = resp_new (head); statcode = resp_status (resp, &message); + hs->message = xstrdup (message); resp_free (resp); xfree (head); if (statcode != 200) @@ -1795,6 +1805,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) /* Check for status line. */ message = NULL; statcode = resp_status (resp, &message); + hs->message = xstrdup (message); if (!opt.server_response) logprintf (LOG_VERBOSE, "%2d %s\n", statcode, message ? quotearg_style (escape_quoting_style, message) : ""); @@ -1804,6 +1815,95 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) print_server_response (resp, " "); } + /* Check for keep-alive related responses. */ + if (!inhibit_keep_alive && contlen != -1) + { + if (resp_header_copy (resp, "Keep-Alive", NULL, 0)) + keep_alive = true; + else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval))) + { + if (0 == strcasecmp (hdrval, "Keep-Alive")) + keep_alive = true; + } + } + + if (keep_alive) + /* The server has promised that it will not close the connection + when we're done. This means that we can register it. */ + register_persistent (conn->host, conn->port, sock, using_ssl); + + if (statcode == HTTP_STATUS_UNAUTHORIZED) + { + /* Authorization is required. */ + if (keep_alive && !head_only && skip_short_body (sock, contlen)) + CLOSE_FINISH (sock); + else + CLOSE_INVALIDATE (sock); + pconn.authorized = false; + if (!auth_finished && (user && passwd)) + { + /* IIS sends multiple copies of WWW-Authenticate, one with + the value "negotiate", and other(s) with data. Loop over + all the occurrences and pick the one we recognize. */ + int wapos; + const char *wabeg, *waend; + char *www_authenticate = NULL; + for (wapos = 0; + (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos, + &wabeg, &waend)) != -1; + ++wapos) + if (known_authentication_scheme_p (wabeg, waend)) + { + BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate); + break; + } + + if (!www_authenticate) + { + /* If the authentication header is missing or + unrecognized, there's no sense in retrying. */ + logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n")); + } + else if (!basic_auth_finished + || !BEGINS_WITH (www_authenticate, "Basic")) + { + char *pth; + pth = url_full_path (u); + request_set_header (req, "Authorization", + create_authorization_line (www_authenticate, + user, passwd, + request_method (req), + pth, + &auth_finished), + rel_value); + if (BEGINS_WITH (www_authenticate, "NTLM")) + ntlm_seen = true; + else if (!u->user && BEGINS_WITH (www_authenticate, "Basic")) + { + /* Need to register this host as using basic auth, + * so we automatically send creds next time. */ + register_basic_auth_host (u->host); + } + xfree (pth); + goto retry_with_auth; + } + else + { + /* We already did Basic auth, and it failed. Gotta + * give up. */ + } + } + logputs (LOG_NOTQUIET, _("Authorization failed.\n")); + request_free (req); + return AUTHFAILED; + } + else /* statcode != HTTP_STATUS_UNAUTHORIZED */ + { + /* Kludge: if NTLM is used, mark the TCP connection as authorized. */ + if (ntlm_seen) + pconn.authorized = true; + } + /* Determine the local filename if needed. Notice that if -O is used * hstat.local_file is set by http_loop to the argument of -O. */ if (!hs->local_file) @@ -1936,93 +2036,6 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); contlen = parsed; } - /* Check for keep-alive related responses. */ - if (!inhibit_keep_alive && contlen != -1) - { - if (resp_header_copy (resp, "Keep-Alive", NULL, 0)) - keep_alive = true; - else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval))) - { - if (0 == strcasecmp (hdrval, "Keep-Alive")) - keep_alive = true; - } - } - if (keep_alive) - /* The server has promised that it will not close the connection - when we're done. This means that we can register it. */ - register_persistent (conn->host, conn->port, sock, using_ssl); - - if (statcode == HTTP_STATUS_UNAUTHORIZED) - { - /* Authorization is required. */ - if (keep_alive && !head_only && skip_short_body (sock, contlen)) - CLOSE_FINISH (sock); - else - CLOSE_INVALIDATE (sock); - pconn.authorized = false; - if (!auth_finished && (user && passwd)) - { - /* IIS sends multiple copies of WWW-Authenticate, one with - the value "negotiate", and other(s) with data. Loop over - all the occurrences and pick the one we recognize. */ - int wapos; - const char *wabeg, *waend; - char *www_authenticate = NULL; - for (wapos = 0; - (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos, - &wabeg, &waend)) != -1; - ++wapos) - if (known_authentication_scheme_p (wabeg, waend)) - { - BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate); - break; - } - - if (!www_authenticate) - { - /* If the authentication header is missing or - unrecognized, there's no sense in retrying. */ - logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n")); - } - else if (!basic_auth_finished - || !BEGINS_WITH (www_authenticate, "Basic")) - { - char *pth; - pth = url_full_path (u); - request_set_header (req, "Authorization", - create_authorization_line (www_authenticate, - user, passwd, - request_method (req), - pth, - &auth_finished), - rel_value); - if (BEGINS_WITH (www_authenticate, "NTLM")) - ntlm_seen = true; - else if (!u->user && BEGINS_WITH (www_authenticate, "Basic")) - { - /* Need to register this host as using basic auth, - * so we automatically send creds next time. */ - register_basic_auth_host (u->host); - } - xfree (pth); - goto retry_with_auth; - } - else - { - /* We already did Basic auth, and it failed. Gotta - * give up. */ - } - } - logputs (LOG_NOTQUIET, _("Authorization failed.\n")); - request_free (req); - return AUTHFAILED; - } - else /* statcode != HTTP_STATUS_UNAUTHORIZED */ - { - /* Kludge: if NTLM is used, mark the TCP connection as authorized. */ - if (ntlm_seen) - pconn.authorized = true; - } request_free (req); hs->statcode = statcode; @@ -2117,42 +2130,37 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); else *dt &= ~TEXTHTML; - if (opt.html_extension && (*dt & TEXTHTML)) - /* -E / --html-extension / html_extension = on was specified, and this is a - text/html file. If some case-insensitive variation on ".htm[l]" isn't - already the file's suffix, tack on ".html". */ - { - char *last_period_in_local_filename = strrchr (hs->local_file, '.'); + if (type && + 0 == strncasecmp (type, TEXTCSS_S, strlen (TEXTCSS_S))) + *dt |= TEXTCSS; + else + *dt &= ~TEXTCSS; - if (last_period_in_local_filename == NULL - || !(0 == strcasecmp (last_period_in_local_filename, ".htm") - || 0 == strcasecmp (last_period_in_local_filename, ".html"))) + if (opt.html_extension) + { + if (*dt & TEXTHTML) + /* -E / --html-extension / html_extension = on was specified, + and this is a text/html file. If some case-insensitive + variation on ".htm[l]" isn't already the file's suffix, + tack on ".html". */ { - int local_filename_len = strlen (hs->local_file); - /* Resize the local file, allowing for ".html" preceded by - optional ".NUMBER". */ - hs->local_file = xrealloc (hs->local_file, - local_filename_len + 24 + sizeof (".html")); - strcpy(hs->local_file + local_filename_len, ".html"); - /* If clobbering is not allowed and the file, as named, - exists, tack on ".NUMBER.html" instead. */ - if (!ALLOW_CLOBBER && file_exists_p (hs->local_file)) - { - int ext_num = 1; - do - sprintf (hs->local_file + local_filename_len, - ".%d.html", ext_num++); - while (file_exists_p (hs->local_file)); - } - *dt |= ADDED_HTML_EXTENSION; + ensure_extension (hs, ".html", dt); + } + else if (*dt & TEXTCSS) + { + ensure_extension (hs, ".css", dt); } } - if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE) + if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE + || (hs->restval > 0 && statcode == HTTP_STATUS_OK + && contrange == 0 && hs->restval >= contlen) + ) { /* If `-c' is in use and the file has been fully downloaded (or the remote file has shrunk), Wget effectively requests bytes - after the end of file and the server response with 416. */ + after the end of file and the server response with 416 + (or 200 with a <= Content-Length. */ logputs (LOG_VERBOSE, _("\ \n The file is already fully retrieved; nothing to do.\n\n")); /* In case the caller inspects. */ @@ -2662,19 +2670,20 @@ The sizes do not match (local %s) -- retrieving.\n"), if (opt.spider) { + bool finished = true; if (opt.recursive) { if (*dt & TEXTHTML) { logputs (LOG_VERBOSE, _("\ Remote file exists and could contain links to other resources -- retrieving.\n\n")); + finished = false; } else { logprintf (LOG_VERBOSE, _("\ Remote file exists but does not contain any link -- not retrieving.\n\n")); ret = RETROK; /* RETRUNNEEDED is not for caller. */ - goto exit; } } else @@ -2691,6 +2700,14 @@ but recursion is disabled -- not retrieving.\n\n")); Remote file exists.\n\n")); } ret = RETROK; /* RETRUNNEEDED is not for caller. */ + } + + if (finished) + { + logprintf (LOG_NONVERBOSE, + _("%s URL:%s %2d %s\n"), + tms, u->url, hstat.statcode, + hstat.message ? quotearg_style (escape_quoting_style, hstat.message) : ""); goto exit; } } @@ -2706,16 +2723,8 @@ Remote file exists.\n\n")); && ((hstat.len == hstat.contlen) || ((hstat.res == 0) && (hstat.contlen == -1)))) { - /* #### This code repeats in http.c and ftp.c. Move it to a - function! */ const char *fl = NULL; - if (opt.output_document) - { - if (output_stream_regular) - fl = opt.output_document; - } - else - fl = hstat.local_file; + set_local_file (&fl, hstat.local_file); if (fl) { time_t newtmr = -1; @@ -2739,9 +2748,14 @@ Remote file exists.\n\n")); { if (*dt & RETROKF) { + bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document)); + logprintf (LOG_VERBOSE, - _("%s (%s) - %s saved [%s/%s]\n\n"), - tms, tmrate, quote (hstat.local_file), + write_to_stdout + ? _("%s (%s) - written to stdout %s[%s/%s]\n\n") + : _("%s (%s) - %s saved [%s/%s]\n\n"), + tms, tmrate, + write_to_stdout ? "" : quote (hstat.local_file), number_to_static_string (hstat.len), number_to_static_string (hstat.contlen)); logprintf (LOG_NONVERBOSE, @@ -2751,7 +2765,7 @@ Remote file exists.\n\n")); number_to_static_string (hstat.contlen), hstat.local_file, count); } - ++opt.numurls; + ++numurls; total_downloaded_bytes += hstat.len; /* Remember that we downloaded the file for later ".orig" code. */ @@ -2770,16 +2784,21 @@ Remote file exists.\n\n")); { if (*dt & RETROKF) { + bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document)); + logprintf (LOG_VERBOSE, - _("%s (%s) - %s saved [%s]\n\n"), - tms, tmrate, quote (hstat.local_file), + write_to_stdout + ? _("%s (%s) - written to stdout %s[%s]\n\n") + : _("%s (%s) - %s saved [%s]\n\n"), + tms, tmrate, + write_to_stdout ? "" : quote (hstat.local_file), number_to_static_string (hstat.len)); logprintf (LOG_NONVERBOSE, "%s URL:%s [%s] -> \"%s\" [%d]\n", tms, u->url, number_to_static_string (hstat.len), hstat.local_file, count); } - ++opt.numurls; + ++numurls; total_downloaded_bytes += hstat.len; /* Remember that we downloaded the file for later ".orig" code. */ @@ -2800,10 +2819,18 @@ Remote file exists.\n\n")); printwhat (count, opt.ntry); continue; } - else + else if (hstat.len != hstat.restval) /* Getting here would mean reading more data than requested with content-length, which we never do. */ abort (); + else + { + /* Getting here probably means that the content-length was + * _less_ than the original, local size. We should probably + * truncate or re-read, or something. FIXME */ + ret = RETROK; + goto exit; + } } else /* from now on hstat.res can only be -1 */ { @@ -3020,10 +3047,11 @@ digest_authentication_encode (const char *au, const char *user, while (extract_param (&au, &name, &value, ',')) { size_t i; + size_t namelen = name.e - name.b; for (i = 0; i < countof (options); i++) - if ((size_t) (name.e - name.b) == strlen (options[i].name) + if (namelen == strlen (options[i].name) && 0 == strncmp (name.b, options[i].name, - (size_t) (name.e - name.b))) + namelen)) { *options[i].variable = strdupdelim (value.b, value.e); break; @@ -3103,9 +3131,10 @@ username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"", first argument and are followed by whitespace or terminating \0. The comparison is case-insensitive. */ #define STARTS(literal, b, e) \ - (((size_t) ((e) - (b))) >= STRSIZE (literal) \ + ((e > b) \ + && ((size_t) ((e) - (b))) >= STRSIZE (literal) \ && 0 == strncasecmp (b, literal, STRSIZE (literal)) \ - && ((e) - (b) == STRSIZE (literal) \ + && ((size_t) ((e) - (b)) == STRSIZE (literal) \ || c_isspace (b[STRSIZE (literal)]))) static bool @@ -3188,6 +3217,42 @@ http_cleanup (void) cookie_jar_delete (wget_cookie_jar); } +void +ensure_extension (struct http_stat *hs, const char *ext, int *dt) +{ + char *last_period_in_local_filename = strrchr (hs->local_file, '.'); + char shortext[8]; + int len = strlen (ext); + if (len == 5) + { + strncpy (shortext, ext, len - 1); + shortext[len - 2] = '\0'; + } + + if (last_period_in_local_filename == NULL + || !(0 == strcasecmp (last_period_in_local_filename, shortext) + || 0 == strcasecmp (last_period_in_local_filename, ext))) + { + int local_filename_len = strlen (hs->local_file); + /* Resize the local file, allowing for ".html" preceded by + optional ".NUMBER". */ + hs->local_file = xrealloc (hs->local_file, + local_filename_len + 24 + len); + strcpy (hs->local_file + local_filename_len, ext); + /* If clobbering is not allowed and the file, as named, + exists, tack on ".NUMBER.html" instead. */ + if (!ALLOW_CLOBBER && file_exists_p (hs->local_file)) + { + int ext_num = 1; + do + sprintf (hs->local_file + local_filename_len, + ".%d%s", ext_num++, ext); + while (file_exists_p (hs->local_file)); + } + *dt |= ADDED_HTML_EXTENSION; + } +} + #ifdef TESTING