X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fhttp.c;h=331c2e7923ecf4ef14c495c20a7c0b42ce640d7c;hb=b7f54921f6618df7750c07ff8807b9f4b0baff06;hp=b0a9f2464d0f26744b69be9c615212472f47b325;hpb=7585b7019d5909428a477002579366a507e42b58;p=wget diff --git a/src/http.c b/src/http.c index b0a9f246..331c2e79 100644 --- a/src/http.c +++ b/src/http.c @@ -1,6 +1,6 @@ /* HTTP support. - Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, + 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -56,9 +56,7 @@ as that of the covered work. */ # include "http-ntlm.h" #endif #include "cookies.h" -#ifdef ENABLE_DIGEST -# include "gen-md5.h" -#endif +#include "md5.h" #include "convert.h" #include "spider.h" @@ -95,6 +93,7 @@ static struct cookie_jar *wget_cookie_jar; #define TEXTCSS_S "text/css" /* Some status code validation macros: */ +#define H_10X(x) (((x) >= 100) && ((x) < 200)) #define H_20X(x) (((x) >= 200) && ((x) < 300)) #define H_PARTIAL(x) ((x) == HTTP_STATUS_PARTIAL_CONTENTS) #define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY \ @@ -352,7 +351,7 @@ request_send (const struct request *req, int fd) APPEND (p, req->method); *p++ = ' '; APPEND (p, req->arg); *p++ = ' '; - memcpy (p, "HTTP/1.0\r\n", 10); p += 10; + memcpy (p, "HTTP/1.1\r\n", 10); p += 10; for (i = 0; i < req->hcount; i++) { @@ -406,18 +405,18 @@ maybe_send_basic_creds (const char *hostname, const char *user, if (opt.auth_without_challenge) { - DEBUGP(("Auth-without-challenge set, sending Basic credentials.\n")); + DEBUGP (("Auth-without-challenge set, sending Basic credentials.\n")); do_challenge = true; } else if (basic_authed_hosts && hash_table_contains(basic_authed_hosts, hostname)) { - DEBUGP(("Found %s in basic_authed_hosts.\n", quote (hostname))); + DEBUGP (("Found %s in basic_authed_hosts.\n", quote (hostname))); do_challenge = true; } else { - DEBUGP(("Host %s has not issued a general basic challenge.\n", + DEBUGP (("Host %s has not issued a general basic challenge.\n", quote (hostname))); } if (do_challenge) @@ -439,7 +438,7 @@ register_basic_auth_host (const char *hostname) if (!hash_table_contains(basic_authed_hosts, hostname)) { hash_table_put (basic_authed_hosts, xstrdup(hostname), NULL); - DEBUGP(("Inserted %s into basic_authed_hosts\n", quote (hostname))); + DEBUGP (("Inserted %s into basic_authed_hosts\n", quote (hostname))); } } @@ -901,29 +900,54 @@ parse_content_range (const char *hdr, wgint *first_byte_ptr, mode, the body is displayed for debugging purposes. */ static bool -skip_short_body (int fd, wgint contlen) +skip_short_body (int fd, wgint contlen, bool chunked) { enum { SKIP_SIZE = 512, /* size of the download buffer */ SKIP_THRESHOLD = 4096 /* the largest size we read */ }; + wgint remaining_chunk_size = 0; char dlbuf[SKIP_SIZE + 1]; dlbuf[SKIP_SIZE] = '\0'; /* so DEBUGP can safely print it */ - /* We shouldn't get here with unknown contlen. (This will change - with HTTP/1.1, which supports "chunked" transfer.) */ - assert (contlen != -1); + assert (contlen != -1 || contlen); /* If the body is too large, it makes more sense to simply close the connection than to try to read the body. */ if (contlen > SKIP_THRESHOLD) return false; - DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen))); - - while (contlen > 0) + while (contlen > 0 || chunked) { - int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1); + int ret; + if (chunked) + { + if (remaining_chunk_size == 0) + { + char *line = fd_read_line (fd); + char *endl; + if (line == NULL) + { + ret = -1; + break; + } + + remaining_chunk_size = strtol (line, &endl, 16); + if (remaining_chunk_size == 0) + { + ret = 0; + if (fd_read_line (fd) == NULL) + ret = -1; + break; + } + } + + contlen = MIN (remaining_chunk_size, SKIP_SIZE); + } + + DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen))); + + ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1); if (ret <= 0) { /* Don't normally report the error since this is an @@ -933,6 +957,15 @@ skip_short_body (int fd, wgint contlen) return false; } contlen -= ret; + + if (chunked) + { + remaining_chunk_size -= ret; + if (remaining_chunk_size == 0) + if (fd_read_line (fd) == NULL) + return false; + } + /* Safe even if %.*s bogusly expects terminating \0 because we've zero-terminated dlbuf above. */ DEBUGP (("%.*s", ret, dlbuf)); @@ -1537,6 +1570,9 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, is done. */ bool keep_alive; + /* Is the server using the chunked transfer encoding? */ + bool chunked_transfer_encoding = false; + /* Whether keep-alive should be inhibited. RFC 2068 requests that 1.0 clients not send keep-alive requests @@ -1658,18 +1694,6 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, if (!inhibit_keep_alive) request_set_header (req, "Connection", "Keep-Alive", rel_none); - if (opt.cookies) - request_set_header (req, "Cookie", - cookie_header (wget_cookie_jar, - u->host, u->port, u->path, -#ifdef HAVE_SSL - u->scheme == SCHEME_HTTPS -#else - 0 -#endif - ), - rel_value); - if (opt.post_data || opt.post_file_name) { request_set_header (req, "Content-Type", @@ -1691,6 +1715,23 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, rel_value); } + retry_with_auth: + /* We need to come back here when the initial attempt to retrieve + without authorization header fails. (Expected to happen at least + for the Digest authorization scheme.) */ + + if (opt.cookies) + request_set_header (req, "Cookie", + cookie_header (wget_cookie_jar, + u->host, u->port, u->path, +#ifdef HAVE_SSL + u->scheme == SCHEME_HTTPS +#else + 0 +#endif + ), + rel_value); + /* Add the user headers. */ if (opt.user_headers) { @@ -1699,11 +1740,6 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, request_set_user_header (req, opt.user_headers[i]); } - retry_with_auth: - /* We need to come back here when the initial attempt to retrieve - without authorization header fails. (Expected to happen at least - for the Digest authorization scheme.) */ - proxyauth = NULL; if (proxy) { @@ -1739,11 +1775,13 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, request_set_header (req, "Proxy-Authorization", proxyauth, rel_value); } - keep_alive = false; + keep_alive = true; /* Establish the connection. */ - if (!inhibit_keep_alive) + if (inhibit_keep_alive) + keep_alive = false; + else { /* Look for a persistent connection to target host, unless a proxy is used. The exception is when SSL is in use, in which @@ -1849,6 +1887,16 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, resp = resp_new (head); statcode = resp_status (resp, &message); + if (statcode < 0) + { + char *tms = datetime_str (time (NULL)); + logprintf (LOG_VERBOSE, "%d\n", statcode); + logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), tms, statcode, + quotearg_style (escape_quoting_style, + _("Malformed status line"))); + xfree (head); + return HERR; + } hs->message = xstrdup (message); resp_free (resp); xfree (head); @@ -1911,6 +1959,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, contrange = 0; *dt &= ~RETROKF; +read_header: head = read_http_response_head (sock); if (!head) { @@ -1937,6 +1986,24 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, /* Check for status line. */ message = NULL; statcode = resp_status (resp, &message); + if (statcode < 0) + { + char *tms = datetime_str (time (NULL)); + logprintf (LOG_VERBOSE, "%d\n", statcode); + logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), tms, statcode, + quotearg_style (escape_quoting_style, + _("Malformed status line"))); + CLOSE_INVALIDATE (sock); + request_free (req); + return HERR; + } + + if (H_10X (statcode)) + { + DEBUGP (("Ignoring response\n")); + goto read_header; + } + hs->message = xstrdup (message); if (!opt.server_response) logprintf (LOG_VERBOSE, "%2d %s\n", statcode, @@ -1975,15 +2042,17 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, /* Check for keep-alive related responses. */ if (!inhibit_keep_alive && contlen != -1) { - if (resp_header_copy (resp, "Keep-Alive", NULL, 0)) - keep_alive = true; - else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval))) + if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval))) { - if (0 == strcasecmp (hdrval, "Keep-Alive")) - keep_alive = true; + if (0 == strcasecmp (hdrval, "Close")) + keep_alive = false; } } + resp_header_copy (resp, "Transfer-Encoding", hdrval, sizeof (hdrval)); + if (0 == strcasecmp (hdrval, "chunked")) + chunked_transfer_encoding = true; + /* Handle (possibly multiple instances of) the Set-Cookie header. */ if (opt.cookies) { @@ -2010,7 +2079,8 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, if (statcode == HTTP_STATUS_UNAUTHORIZED) { /* Authorization is required. */ - if (keep_alive && !head_only && skip_short_body (sock, contlen)) + if (keep_alive && !head_only + && skip_short_body (sock, contlen, chunked_transfer_encoding)) CLOSE_FINISH (sock); else CLOSE_INVALIDATE (sock); @@ -2262,7 +2332,8 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); _("Location: %s%s\n"), hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"), hs->newloc ? _(" [following]") : ""); - if (keep_alive && !head_only && skip_short_body (sock, contlen)) + if (keep_alive && !head_only + && skip_short_body (sock, contlen, chunked_transfer_encoding)) CLOSE_FINISH (sock); else CLOSE_INVALIDATE (sock); @@ -2392,7 +2463,8 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); If not, they can be worked around using `--no-http-keep-alive'. */ CLOSE_FINISH (sock); - else if (keep_alive && skip_short_body (sock, contlen)) + else if (keep_alive + && skip_short_body (sock, contlen, chunked_transfer_encoding)) /* Successfully skipped the body; also keep using the socket. */ CLOSE_FINISH (sock); else @@ -2493,6 +2565,10 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); /* If the server ignored our range request, instruct fd_read_body to skip the first RESTVAL bytes of body. */ flags |= rb_skip_startpos; + + if (chunked_transfer_encoding) + flags |= rb_chunked_transfer_encoding; + hs->len = hs->restval; hs->rd_size = 0; hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0, @@ -2533,6 +2609,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, struct_stat st; bool send_head_first = true; char *file_name; + bool force_full_retrieve = false; /* Assert that no value for *LOCAL_FILE was passed. */ assert (local_file == NULL || *local_file == NULL); @@ -2548,7 +2625,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, here so that we don't go through the hoops if we're just using FTP or whatever. */ if (opt.cookies) - load_cookies(); + load_cookies (); /* Warn on (likely bogus) wildcard usage in HTTP. */ if (opt.ftp_glob && has_wildcards_p (u->path)) @@ -2597,18 +2674,15 @@ File %s already there; not retrieving.\n\n"), /* Reset the document type. */ *dt = 0; - /* Skip preliminary HEAD request if we're not in spider mode AND - * if -O was given or HTTP Content-Disposition support is disabled. */ - if (!opt.spider - && (got_name || !opt.content_disposition)) + /* Skip preliminary HEAD request if we're not in spider mode. */ + if (!opt.spider) send_head_first = false; /* Send preliminary HEAD request if -N is given and we have an existing * destination file. */ file_name = url_file_name (u); - if (opt.timestamping - && !opt.content_disposition - && file_exists_p (file_name)) + if (opt.timestamping && (file_exists_p (file_name) + || opt.content_disposition)) send_head_first = true; xfree (file_name); @@ -2659,7 +2733,9 @@ Spider mode enabled. Check if remote file exists.\n")); *dt &= ~HEAD_ONLY; /* Decide whether or not to restart. */ - if (opt.always_rest + if (force_full_retrieve) + hstat.restval = hstat.len; + else if (opt.always_rest && got_name && stat (hstat.local_file, &st) == 0 && S_ISREG (st.st_mode)) @@ -2847,8 +2923,11 @@ The sizes do not match (local %s) -- retrieving.\n"), } } else - logputs (LOG_VERBOSE, - _("Remote file is newer, retrieving.\n")); + { + force_full_retrieve = true; + logputs (LOG_VERBOSE, + _("Remote file is newer, retrieving.\n")); + } logputs (LOG_VERBOSE, "\n"); } @@ -2957,7 +3036,7 @@ Remote file exists.\n\n")); hstat.local_file, count); } ++numurls; - total_downloaded_bytes += hstat.len; + total_downloaded_bytes += hstat.rd_size; /* Remember that we downloaded the file for later ".orig" code. */ if (*dt & ADDED_HTML_EXTENSION) @@ -2990,7 +3069,7 @@ Remote file exists.\n\n")); hstat.local_file, count); } ++numurls; - total_downloaded_bytes += hstat.len; + total_downloaded_bytes += hstat.rd_size; /* Remember that we downloaded the file for later ".orig" code. */ if (*dt & ADDED_HTML_EXTENSION) @@ -3136,7 +3215,7 @@ http_atotm (const char *time_string) oldlocale = setlocale (LC_TIME, NULL); if (oldlocale) { - size_t l = strlen (oldlocale); + size_t l = strlen (oldlocale) + 1; if (l >= sizeof savedlocale) savedlocale[0] = '\0'; else @@ -3217,7 +3296,7 @@ dump_hash (char *buf, const unsigned char *hash) { int i; - for (i = 0; i < MD5_HASHLEN; i++, hash++) + for (i = 0; i < MD5_DIGEST_SIZE; i++, hash++) { *buf++ = XNUM_TO_digit (*hash >> 4); *buf++ = XNUM_TO_digit (*hash & 0xf); @@ -3270,37 +3349,37 @@ digest_authentication_encode (const char *au, const char *user, /* Calculate the digest value. */ { - ALLOCA_MD5_CONTEXT (ctx); - unsigned char hash[MD5_HASHLEN]; - char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1]; - char response_digest[MD5_HASHLEN * 2 + 1]; + struct md5_ctx ctx; + unsigned char hash[MD5_DIGEST_SIZE]; + char a1buf[MD5_DIGEST_SIZE * 2 + 1], a2buf[MD5_DIGEST_SIZE * 2 + 1]; + char response_digest[MD5_DIGEST_SIZE * 2 + 1]; /* A1BUF = H(user ":" realm ":" password) */ - gen_md5_init (ctx); - gen_md5_update ((unsigned char *)user, strlen (user), ctx); - gen_md5_update ((unsigned char *)":", 1, ctx); - gen_md5_update ((unsigned char *)realm, strlen (realm), ctx); - gen_md5_update ((unsigned char *)":", 1, ctx); - gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx); - gen_md5_finish (ctx, hash); + md5_init_ctx (&ctx); + md5_process_bytes ((unsigned char *)user, strlen (user), &ctx); + md5_process_bytes ((unsigned char *)":", 1, &ctx); + md5_process_bytes ((unsigned char *)realm, strlen (realm), &ctx); + md5_process_bytes ((unsigned char *)":", 1, &ctx); + md5_process_bytes ((unsigned char *)passwd, strlen (passwd), &ctx); + md5_finish_ctx (&ctx, hash); dump_hash (a1buf, hash); /* A2BUF = H(method ":" path) */ - gen_md5_init (ctx); - gen_md5_update ((unsigned char *)method, strlen (method), ctx); - gen_md5_update ((unsigned char *)":", 1, ctx); - gen_md5_update ((unsigned char *)path, strlen (path), ctx); - gen_md5_finish (ctx, hash); + md5_init_ctx (&ctx); + md5_process_bytes ((unsigned char *)method, strlen (method), &ctx); + md5_process_bytes ((unsigned char *)":", 1, &ctx); + md5_process_bytes ((unsigned char *)path, strlen (path), &ctx); + md5_finish_ctx (&ctx, hash); dump_hash (a2buf, hash); /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */ - gen_md5_init (ctx); - gen_md5_update ((unsigned char *)a1buf, MD5_HASHLEN * 2, ctx); - gen_md5_update ((unsigned char *)":", 1, ctx); - gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx); - gen_md5_update ((unsigned char *)":", 1, ctx); - gen_md5_update ((unsigned char *)a2buf, MD5_HASHLEN * 2, ctx); - gen_md5_finish (ctx, hash); + md5_init_ctx (&ctx); + md5_process_bytes ((unsigned char *)a1buf, MD5_DIGEST_SIZE * 2, &ctx); + md5_process_bytes ((unsigned char *)":", 1, &ctx); + md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx); + md5_process_bytes ((unsigned char *)":", 1, &ctx); + md5_process_bytes ((unsigned char *)a2buf, MD5_DIGEST_SIZE * 2, &ctx); + md5_finish_ctx (&ctx, hash); dump_hash (response_digest, hash); res = xmalloc (strlen (user) @@ -3308,7 +3387,7 @@ digest_authentication_encode (const char *au, const char *user, + strlen (realm) + strlen (nonce) + strlen (path) - + 2 * MD5_HASHLEN /*strlen (response_digest)*/ + + 2 * MD5_DIGEST_SIZE /*strlen (response_digest)*/ + (opaque ? strlen (opaque) : 0) + 128); sprintf (res, "Digest \