X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fhttp.c;h=748b4e8142a99bf28eed6d3248fd89fb61b89241;hp=331c2e7923ecf4ef14c495c20a7c0b42ce640d7c;hb=19ef7249b8679e7a88067dca3db37e947408e15f;hpb=b7f54921f6618df7750c07ff8807b9f4b0baff06 diff --git a/src/http.c b/src/http.c index 331c2e79..748b4e81 100644 --- a/src/http.c +++ b/src/http.c @@ -1,6 +1,7 @@ /* HTTP support. Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, - 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. + 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, + Inc. This file is part of GNU Wget. @@ -33,9 +34,7 @@ as that of the covered work. */ #include #include #include -#ifdef HAVE_UNISTD_H -# include -#endif +#include #include #include #include @@ -927,17 +926,12 @@ skip_short_body (int fd, wgint contlen, bool chunked) char *line = fd_read_line (fd); char *endl; if (line == NULL) - { - ret = -1; - break; - } + break; remaining_chunk_size = strtol (line, &endl, 16); if (remaining_chunk_size == 0) { - ret = 0; - if (fd_read_line (fd) == NULL) - ret = -1; + fd_read_line (fd); break; } } @@ -1149,71 +1143,44 @@ append_value_to_filename (char **filename, param_token const * const value) false. The file name is stripped of directory components and must not be - empty. */ + empty. + + Historically, this function returned filename prefixed with opt.dir_prefix, + now that logic is handled by the caller, new code should pay attention, + changed by crq, Sep 2010. + +*/ static bool parse_content_disposition (const char *hdr, char **filename) { - *filename = NULL; param_token name, value; + *filename = NULL; while (extract_param (&hdr, &name, &value, ';')) { int isFilename = BOUNDED_EQUAL_NO_CASE ( name.b, name.e, "filename" ); if ( isFilename && value.b != NULL) - { - /* Make the file name begin at the last slash or backslash. */ - const char *last_slash = memrchr (value.b, '/', value.e - value.b); - const char *last_bs = memrchr (value.b, '\\', value.e - value.b); - if (last_slash && last_bs) - value.b = 1 + MAX (last_slash, last_bs); - else if (last_slash || last_bs) - value.b = 1 + (last_slash ? last_slash : last_bs); - if (value.b == value.e) - continue; - /* Start with the directory prefix, if specified. */ - if (opt.dir_prefix) - { - if (!(*filename)) - { - int prefix_length = strlen (opt.dir_prefix); - bool add_slash = (opt.dir_prefix[prefix_length - 1] != '/'); - int total_length; - - if (add_slash) - ++prefix_length; - total_length = prefix_length + (value.e - value.b); - *filename = xmalloc (total_length + 1); - strcpy (*filename, opt.dir_prefix); - if (add_slash) - (*filename)[prefix_length - 1] = '/'; - memcpy (*filename + prefix_length, value.b, (value.e - value.b)); - (*filename)[total_length] = '\0'; - } - else - { - append_value_to_filename (filename, &value); - } - } - else - { - if (*filename) - { - append_value_to_filename (filename, &value); - } - else - { - *filename = strdupdelim (value.b, value.e); - } - } - } + { + /* Make the file name begin at the last slash or backslash. */ + const char *last_slash = memrchr (value.b, '/', value.e - value.b); + const char *last_bs = memrchr (value.b, '\\', value.e - value.b); + if (last_slash && last_bs) + value.b = 1 + MAX (last_slash, last_bs); + else if (last_slash || last_bs) + value.b = 1 + (last_slash ? last_slash : last_bs); + if (value.b == value.e) + continue; + + if (*filename) + append_value_to_filename (filename, &value); + else + *filename = strdupdelim (value.b, value.e); + } } + if (*filename) - { - return true; - } + return true; else - { - return false; - } + return false; } @@ -1481,6 +1448,20 @@ free_hstat (struct http_stat *hs) hs->error = NULL; } +static void +get_file_flags (const char *filename, int *dt) +{ + logprintf (LOG_VERBOSE, _("\ +File %s already there; not retrieving.\n\n"), quote (filename)); + /* If the file is there, we suppose it's retrieved OK. */ + *dt |= RETROKF; + + /* #### Bogusness alert. */ + /* If its suffix is "html" or "htm" or similar, assume text/html. */ + if (has_html_suffix_p (filename)) + *dt |= TEXTHTML; +} + #define BEGINS_WITH(line, string_constant) \ (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \ && (c_isspace (line[sizeof (string_constant) - 1]) \ @@ -1526,7 +1507,7 @@ free_hstat (struct http_stat *hs) server, and u->url will be requested. */ static uerr_t gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, - struct iri *iri) + struct iri *iri, int count) { struct request *req; @@ -1573,15 +1554,9 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, /* Is the server using the chunked transfer encoding? */ bool chunked_transfer_encoding = false; - /* Whether keep-alive should be inhibited. - - RFC 2068 requests that 1.0 clients not send keep-alive requests - to proxies. This is because many 1.0 proxies do not interpret - the Connection header and transfer it to the remote server, - causing it to not close the connection and leave both the proxy - and the client hanging. */ + /* Whether keep-alive should be inhibited. */ bool inhibit_keep_alive = - !opt.http_keep_alive || opt.ignore_length || proxy != NULL; + !opt.http_keep_alive || opt.ignore_length; /* Headers sent when using POST. */ wgint post_data_size = 0; @@ -1644,7 +1619,13 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, request_set_header (req, "Referer", (char *) hs->referer, rel_none); if (*dt & SEND_NOCACHE) - request_set_header (req, "Pragma", "no-cache", rel_none); + { + /* Cache-Control MUST be obeyed by all HTTP/1.1 caching mechanisms... */ + request_set_header (req, "Cache-Control", "no-cache, must-revalidate", rel_none); + + /* ... but some HTTP/1.0 caches doesn't implement Cache-Control. */ + request_set_header (req, "Pragma", "no-cache", rel_none); + } if (hs->restval) request_set_header (req, "Range", aprintf ("bytes=%s-", @@ -1691,8 +1672,18 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, rel_value); } - if (!inhibit_keep_alive) - request_set_header (req, "Connection", "Keep-Alive", rel_none); + if (inhibit_keep_alive) + request_set_header (req, "Connection", "Close", rel_none); + else + { + if (proxy == NULL) + request_set_header (req, "Connection", "Keep-Alive", rel_none); + else + { + request_set_header (req, "Connection", "Close", rel_none); + request_set_header (req, "Proxy-Connection", "Keep-Alive", rel_none); + } + } if (opt.post_data || opt.post_file_name) { @@ -1995,12 +1986,14 @@ read_header: _("Malformed status line"))); CLOSE_INVALIDATE (sock); request_free (req); + xfree (head); return HERR; } if (H_10X (statcode)) { DEBUGP (("Ignoring response\n")); + xfree (head); goto read_header; } @@ -2049,8 +2042,9 @@ read_header: } } - resp_header_copy (resp, "Transfer-Encoding", hdrval, sizeof (hdrval)); - if (0 == strcasecmp (hdrval, "chunked")) + chunked_transfer_encoding = false; + if (resp_header_copy (resp, "Transfer-Encoding", hdrval, sizeof (hdrval)) + && 0 == strcasecmp (hdrval, "chunked")) chunked_transfer_encoding = true; /* Handle (possibly multiple instances of) the Set-Cookie header. */ @@ -2159,15 +2153,23 @@ read_header: * hstat.local_file is set by http_loop to the argument of -O. */ if (!hs->local_file) { + char *local_file = NULL; + /* Honor Content-Disposition whether possible. */ if (!opt.content_disposition || !resp_header_copy (resp, "Content-Disposition", hdrval, sizeof (hdrval)) - || !parse_content_disposition (hdrval, &hs->local_file)) + || !parse_content_disposition (hdrval, &local_file)) { /* The Content-Disposition header is missing or broken. * Choose unique file name according to given URL. */ - hs->local_file = url_file_name (u); + hs->local_file = url_file_name (u, NULL); + } + else + { + DEBUGP (("Parsed filename from Content-Disposition: %s\n", + local_file)); + hs->local_file = url_file_name (u, local_file); } } @@ -2179,16 +2181,7 @@ read_header: /* If opt.noclobber is turned on and file already exists, do not retrieve the file. But if the output_document was given, then this test was already done and the file didn't exist. Hence the !opt.output_document */ - logprintf (LOG_VERBOSE, _("\ -File %s already there; not retrieving.\n\n"), quote (hs->local_file)); - /* If the file is there, we suppose it's retrieved OK. */ - *dt |= RETROKF; - - /* #### Bogusness alert. */ - /* If its suffix is "html" or "htm" or similar, assume text/html. */ - if (has_html_suffix_p (hs->local_file)) - *dt |= TEXTHTML; - + get_file_flags (hs->local_file, dt); xfree (head); xfree_null (message); return RETRUNNEEDED; @@ -2339,6 +2332,15 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); CLOSE_INVALIDATE (sock); xfree_null (type); xfree (head); + /* From RFC2616: The status codes 303 and 307 have + been added for servers that wish to make unambiguously + clear which kind of reaction is expected of the client. + + A 307 should be redirected using the same method, + in other words, a POST should be preserved and not + converted to a GET in that case. */ + if (statcode == HTTP_STATUS_TEMPORARY_REDIRECT) + return NEWLOCATION_KEEP_POST; return NEWLOCATION; } } @@ -2376,9 +2378,8 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); } if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE - || (hs->restval > 0 && statcode == HTTP_STATUS_OK - && contrange == 0 && hs->restval >= contlen) - ) + || (!opt.timestamping && hs->restval > 0 && statcode == HTTP_STATUS_OK + && contrange == 0 && contlen >= 0 && hs->restval >= contlen)) { /* If `-c' is in use and the file has been fully downloaded (or the remote file has shrunk), Wget effectively requests bytes @@ -2500,8 +2501,21 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); fp = fopen (hs->local_file, "ab"); #endif /* def __VMS [else] */ } - else if (ALLOW_CLOBBER) + else if (ALLOW_CLOBBER || count > 0) { + if (opt.unlink && file_exists_p (hs->local_file)) + { + int res = unlink (hs->local_file); + if (res < 0) + { + logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file, + strerror (errno)); + CLOSE_INVALIDATE (sock); + xfree (head); + return UNLINKERR; + } + } + #ifdef __VMS int open_id; @@ -2594,8 +2608,9 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); /* The genuine HTTP loop! This is the part where the retrieval is retried, and retried, and retried, and... */ uerr_t -http_loop (struct url *u, char **newloc, char **local_file, const char *referer, - int *dt, struct url *proxy, struct iri *iri) +http_loop (struct url *u, struct url *original_url, char **newloc, + char **local_file, const char *referer, int *dt, struct url *proxy, + struct iri *iri) { int count; bool got_head = false; /* used for time-stamping and filename detection */ @@ -2642,28 +2657,17 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, } else if (!opt.content_disposition) { - hstat.local_file = url_file_name (u); + hstat.local_file = + url_file_name (opt.trustservernames ? u : original_url, NULL); got_name = true; } - /* TODO: Ick! This code is now in both gethttp and http_loop, and is - * screaming for some refactoring. */ if (got_name && file_exists_p (hstat.local_file) && opt.noclobber && !opt.output_document) { /* If opt.noclobber is turned on and file already exists, do not retrieve the file. But if the output_document was given, then this test was already done and the file didn't exist. Hence the !opt.output_document */ - logprintf (LOG_VERBOSE, _("\ -File %s already there; not retrieving.\n\n"), - quote (hstat.local_file)); - /* If the file is there, we suppose it's retrieved OK. */ - *dt |= RETROKF; - - /* #### Bogusness alert. */ - /* If its suffix is "html" or "htm" or similar, assume text/html. */ - if (has_html_suffix_p (hstat.local_file)) - *dt |= TEXTHTML; - + get_file_flags (hstat.local_file, dt); ret = RETROK; goto exit; } @@ -2680,7 +2684,7 @@ File %s already there; not retrieving.\n\n"), /* Send preliminary HEAD request if -N is given and we have an existing * destination file. */ - file_name = url_file_name (u); + file_name = url_file_name (opt.trustservernames ? u : original_url, NULL); if (opt.timestamping && (file_exists_p (file_name) || opt.content_disposition)) send_head_first = true; @@ -2762,7 +2766,7 @@ Spider mode enabled. Check if remote file exists.\n")); *dt &= ~SEND_NOCACHE; /* Try fetching the document, or at least its head. */ - err = gethttp (u, &hstat, dt, proxy, iri); + err = gethttp (u, &hstat, dt, proxy, iri, count); /* Time? */ tms = datetime_str (time (NULL)); @@ -2796,7 +2800,15 @@ Spider mode enabled. Check if remote file exists.\n")); logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n")); ret = err; goto exit; + case UNLINKERR: + /* Another fatal error. */ + logputs (LOG_VERBOSE, "\n"); + logprintf (LOG_NOTQUIET, _("Cannot unlink %s (%s).\n"), + quote (hstat.local_file), strerror (errno)); + ret = err; + goto exit; case NEWLOCATION: + case NEWLOCATION_KEEP_POST: /* Return the new location to the caller. */ if (!*newloc) { @@ -2807,7 +2819,7 @@ Spider mode enabled. Check if remote file exists.\n")); } else { - ret = NEWLOCATION; + ret = err; } goto exit; case RETRUNNEEDED: @@ -3040,9 +3052,9 @@ Remote file exists.\n\n")); /* Remember that we downloaded the file for later ".orig" code. */ if (*dt & ADDED_HTML_EXTENSION) - downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file); + downloaded_file (FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file); else - downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file); + downloaded_file (FILE_DOWNLOADED_NORMALLY, hstat.local_file); ret = RETROK; goto exit; @@ -3073,9 +3085,9 @@ Remote file exists.\n\n")); /* Remember that we downloaded the file for later ".orig" code. */ if (*dt & ADDED_HTML_EXTENSION) - downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file); + downloaded_file (FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file); else - downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file); + downloaded_file (FILE_DOWNLOADED_NORMALLY, hstat.local_file); ret = RETROK; goto exit; @@ -3130,7 +3142,7 @@ Remote file exists.\n\n")); while (!opt.ntry || (count < opt.ntry)); exit: - if (ret == RETROK) + if (ret == RETROK && local_file) *local_file = xstrdup (hstat.local_file); free_hstat (&hstat); @@ -3508,7 +3520,7 @@ ensure_extension (struct http_stat *hs, const char *ext, int *dt) if (len == 5) { strncpy (shortext, ext, len - 1); - shortext[len - 2] = '\0'; + shortext[len - 1] = '\0'; } if (last_period_in_local_filename == NULL @@ -3544,20 +3556,15 @@ test_parse_content_disposition() int i; struct { char *hdrval; - char *opt_dir_prefix; char *filename; bool result; } test_array[] = { - { "filename=\"file.ext\"", NULL, "file.ext", true }, - { "filename=\"file.ext\"", "somedir", "somedir/file.ext", true }, - { "attachment; filename=\"file.ext\"", NULL, "file.ext", true }, - { "attachment; filename=\"file.ext\"", "somedir", "somedir/file.ext", true }, - { "attachment; filename=\"file.ext\"; dummy", NULL, "file.ext", true }, - { "attachment; filename=\"file.ext\"; dummy", "somedir", "somedir/file.ext", true }, - { "attachment", NULL, NULL, false }, - { "attachment", "somedir", NULL, false }, - { "attachement; filename*=UTF-8'en-US'hello.txt", NULL, "hello.txt", true }, - { "attachement; filename*0=\"hello\"; filename*1=\"world.txt\"", NULL, "helloworld.txt", true }, + { "filename=\"file.ext\"", "file.ext", true }, + { "attachment; filename=\"file.ext\"", "file.ext", true }, + { "attachment; filename=\"file.ext\"; dummy", "file.ext", true }, + { "attachment", NULL, false }, + { "attachement; filename*=UTF-8'en-US'hello.txt", "hello.txt", true }, + { "attachement; filename*0=\"hello\"; filename*1=\"world.txt\"", "helloworld.txt", true }, }; for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i) @@ -3565,7 +3572,6 @@ test_parse_content_disposition() char *filename; bool res; - opt.dir_prefix = test_array[i].opt_dir_prefix; res = parse_content_disposition (test_array[i].hdrval, &filename); mu_assert ("test_parse_content_disposition: wrong result",