X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fhttp.c;h=748b4e8142a99bf28eed6d3248fd89fb61b89241;hp=7b27ae67b7439b0aa588e43a14fa7349e00617da;hb=19ef7249b8679e7a88067dca3db37e947408e15f;hpb=87ad80cacc499d664f8e5ef11b0be3a97595ac42 diff --git a/src/http.c b/src/http.c index 7b27ae67..748b4e81 100644 --- a/src/http.c +++ b/src/http.c @@ -1,6 +1,7 @@ /* HTTP support. Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, - 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. + 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, + Inc. This file is part of GNU Wget. @@ -33,9 +34,7 @@ as that of the covered work. */ #include #include #include -#ifdef HAVE_UNISTD_H -# include -#endif +#include #include #include #include @@ -927,17 +926,12 @@ skip_short_body (int fd, wgint contlen, bool chunked) char *line = fd_read_line (fd); char *endl; if (line == NULL) - { - ret = -1; - break; - } + break; remaining_chunk_size = strtol (line, &endl, 16); if (remaining_chunk_size == 0) { - ret = 0; - if (fd_read_line (fd) == NULL) - ret = -1; + fd_read_line (fd); break; } } @@ -1149,71 +1143,44 @@ append_value_to_filename (char **filename, param_token const * const value) false. The file name is stripped of directory components and must not be - empty. */ + empty. + + Historically, this function returned filename prefixed with opt.dir_prefix, + now that logic is handled by the caller, new code should pay attention, + changed by crq, Sep 2010. + +*/ static bool parse_content_disposition (const char *hdr, char **filename) { - *filename = NULL; param_token name, value; + *filename = NULL; while (extract_param (&hdr, &name, &value, ';')) { int isFilename = BOUNDED_EQUAL_NO_CASE ( name.b, name.e, "filename" ); if ( isFilename && value.b != NULL) - { - /* Make the file name begin at the last slash or backslash. */ - const char *last_slash = memrchr (value.b, '/', value.e - value.b); - const char *last_bs = memrchr (value.b, '\\', value.e - value.b); - if (last_slash && last_bs) - value.b = 1 + MAX (last_slash, last_bs); - else if (last_slash || last_bs) - value.b = 1 + (last_slash ? last_slash : last_bs); - if (value.b == value.e) - continue; - /* Start with the directory prefix, if specified. */ - if (opt.dir_prefix) - { - if (!(*filename)) - { - int prefix_length = strlen (opt.dir_prefix); - bool add_slash = (opt.dir_prefix[prefix_length - 1] != '/'); - int total_length; - - if (add_slash) - ++prefix_length; - total_length = prefix_length + (value.e - value.b); - *filename = xmalloc (total_length + 1); - strcpy (*filename, opt.dir_prefix); - if (add_slash) - (*filename)[prefix_length - 1] = '/'; - memcpy (*filename + prefix_length, value.b, (value.e - value.b)); - (*filename)[total_length] = '\0'; - } - else - { - append_value_to_filename (filename, &value); - } - } - else - { - if (*filename) - { - append_value_to_filename (filename, &value); - } - else - { - *filename = strdupdelim (value.b, value.e); - } - } - } + { + /* Make the file name begin at the last slash or backslash. */ + const char *last_slash = memrchr (value.b, '/', value.e - value.b); + const char *last_bs = memrchr (value.b, '\\', value.e - value.b); + if (last_slash && last_bs) + value.b = 1 + MAX (last_slash, last_bs); + else if (last_slash || last_bs) + value.b = 1 + (last_slash ? last_slash : last_bs); + if (value.b == value.e) + continue; + + if (*filename) + append_value_to_filename (filename, &value); + else + *filename = strdupdelim (value.b, value.e); + } } + if (*filename) - { - return true; - } + return true; else - { - return false; - } + return false; } @@ -1481,6 +1448,20 @@ free_hstat (struct http_stat *hs) hs->error = NULL; } +static void +get_file_flags (const char *filename, int *dt) +{ + logprintf (LOG_VERBOSE, _("\ +File %s already there; not retrieving.\n\n"), quote (filename)); + /* If the file is there, we suppose it's retrieved OK. */ + *dt |= RETROKF; + + /* #### Bogusness alert. */ + /* If its suffix is "html" or "htm" or similar, assume text/html. */ + if (has_html_suffix_p (filename)) + *dt |= TEXTHTML; +} + #define BEGINS_WITH(line, string_constant) \ (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \ && (c_isspace (line[sizeof (string_constant) - 1]) \ @@ -1526,7 +1507,7 @@ free_hstat (struct http_stat *hs) server, and u->url will be requested. */ static uerr_t gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, - struct iri *iri) + struct iri *iri, int count) { struct request *req; @@ -1573,15 +1554,9 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, /* Is the server using the chunked transfer encoding? */ bool chunked_transfer_encoding = false; - /* Whether keep-alive should be inhibited. - - RFC 2068 requests that 1.0 clients not send keep-alive requests - to proxies. This is because many 1.0 proxies do not interpret - the Connection header and transfer it to the remote server, - causing it to not close the connection and leave both the proxy - and the client hanging. */ + /* Whether keep-alive should be inhibited. */ bool inhibit_keep_alive = - !opt.http_keep_alive || opt.ignore_length || proxy != NULL; + !opt.http_keep_alive || opt.ignore_length; /* Headers sent when using POST. */ wgint post_data_size = 0; @@ -1644,8 +1619,14 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, request_set_header (req, "Referer", (char *) hs->referer, rel_none); if (*dt & SEND_NOCACHE) - request_set_header (req, "Pragma", "no-cache", rel_none); - if (hs->restval && !opt.timestamping) + { + /* Cache-Control MUST be obeyed by all HTTP/1.1 caching mechanisms... */ + request_set_header (req, "Cache-Control", "no-cache, must-revalidate", rel_none); + + /* ... but some HTTP/1.0 caches doesn't implement Cache-Control. */ + request_set_header (req, "Pragma", "no-cache", rel_none); + } + if (hs->restval) request_set_header (req, "Range", aprintf ("bytes=%s-", number_to_static_string (hs->restval)), @@ -1694,7 +1675,15 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, if (inhibit_keep_alive) request_set_header (req, "Connection", "Close", rel_none); else - request_set_header (req, "Connection", "Keep-Alive", rel_none); + { + if (proxy == NULL) + request_set_header (req, "Connection", "Keep-Alive", rel_none); + else + { + request_set_header (req, "Connection", "Close", rel_none); + request_set_header (req, "Proxy-Connection", "Keep-Alive", rel_none); + } + } if (opt.post_data || opt.post_file_name) { @@ -1997,12 +1986,14 @@ read_header: _("Malformed status line"))); CLOSE_INVALIDATE (sock); request_free (req); + xfree (head); return HERR; } if (H_10X (statcode)) { DEBUGP (("Ignoring response\n")); + xfree (head); goto read_header; } @@ -2051,8 +2042,9 @@ read_header: } } - resp_header_copy (resp, "Transfer-Encoding", hdrval, sizeof (hdrval)); - if (0 == strcasecmp (hdrval, "chunked")) + chunked_transfer_encoding = false; + if (resp_header_copy (resp, "Transfer-Encoding", hdrval, sizeof (hdrval)) + && 0 == strcasecmp (hdrval, "chunked")) chunked_transfer_encoding = true; /* Handle (possibly multiple instances of) the Set-Cookie header. */ @@ -2161,15 +2153,23 @@ read_header: * hstat.local_file is set by http_loop to the argument of -O. */ if (!hs->local_file) { + char *local_file = NULL; + /* Honor Content-Disposition whether possible. */ if (!opt.content_disposition || !resp_header_copy (resp, "Content-Disposition", hdrval, sizeof (hdrval)) - || !parse_content_disposition (hdrval, &hs->local_file)) + || !parse_content_disposition (hdrval, &local_file)) { /* The Content-Disposition header is missing or broken. * Choose unique file name according to given URL. */ - hs->local_file = url_file_name (u); + hs->local_file = url_file_name (u, NULL); + } + else + { + DEBUGP (("Parsed filename from Content-Disposition: %s\n", + local_file)); + hs->local_file = url_file_name (u, local_file); } } @@ -2181,16 +2181,7 @@ read_header: /* If opt.noclobber is turned on and file already exists, do not retrieve the file. But if the output_document was given, then this test was already done and the file didn't exist. Hence the !opt.output_document */ - logprintf (LOG_VERBOSE, _("\ -File %s already there; not retrieving.\n\n"), quote (hs->local_file)); - /* If the file is there, we suppose it's retrieved OK. */ - *dt |= RETROKF; - - /* #### Bogusness alert. */ - /* If its suffix is "html" or "htm" or similar, assume text/html. */ - if (has_html_suffix_p (hs->local_file)) - *dt |= TEXTHTML; - + get_file_flags (hs->local_file, dt); xfree (head); xfree_null (message); return RETRUNNEEDED; @@ -2341,6 +2332,15 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); CLOSE_INVALIDATE (sock); xfree_null (type); xfree (head); + /* From RFC2616: The status codes 303 and 307 have + been added for servers that wish to make unambiguously + clear which kind of reaction is expected of the client. + + A 307 should be redirected using the same method, + in other words, a POST should be preserved and not + converted to a GET in that case. */ + if (statcode == HTTP_STATUS_TEMPORARY_REDIRECT) + return NEWLOCATION_KEEP_POST; return NEWLOCATION; } } @@ -2501,8 +2501,21 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); fp = fopen (hs->local_file, "ab"); #endif /* def __VMS [else] */ } - else if (ALLOW_CLOBBER) + else if (ALLOW_CLOBBER || count > 0) { + if (opt.unlink && file_exists_p (hs->local_file)) + { + int res = unlink (hs->local_file); + if (res < 0) + { + logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file, + strerror (errno)); + CLOSE_INVALIDATE (sock); + xfree (head); + return UNLINKERR; + } + } + #ifdef __VMS int open_id; @@ -2645,28 +2658,16 @@ http_loop (struct url *u, struct url *original_url, char **newloc, else if (!opt.content_disposition) { hstat.local_file = - url_file_name (opt.trustservernames ? u : original_url); + url_file_name (opt.trustservernames ? u : original_url, NULL); got_name = true; } - /* TODO: Ick! This code is now in both gethttp and http_loop, and is - * screaming for some refactoring. */ if (got_name && file_exists_p (hstat.local_file) && opt.noclobber && !opt.output_document) { /* If opt.noclobber is turned on and file already exists, do not retrieve the file. But if the output_document was given, then this test was already done and the file didn't exist. Hence the !opt.output_document */ - logprintf (LOG_VERBOSE, _("\ -File %s already there; not retrieving.\n\n"), - quote (hstat.local_file)); - /* If the file is there, we suppose it's retrieved OK. */ - *dt |= RETROKF; - - /* #### Bogusness alert. */ - /* If its suffix is "html" or "htm" or similar, assume text/html. */ - if (has_html_suffix_p (hstat.local_file)) - *dt |= TEXTHTML; - + get_file_flags (hstat.local_file, dt); ret = RETROK; goto exit; } @@ -2683,7 +2684,7 @@ File %s already there; not retrieving.\n\n"), /* Send preliminary HEAD request if -N is given and we have an existing * destination file. */ - file_name = url_file_name (opt.trustservernames ? u : original_url); + file_name = url_file_name (opt.trustservernames ? u : original_url, NULL); if (opt.timestamping && (file_exists_p (file_name) || opt.content_disposition)) send_head_first = true; @@ -2765,7 +2766,7 @@ Spider mode enabled. Check if remote file exists.\n")); *dt &= ~SEND_NOCACHE; /* Try fetching the document, or at least its head. */ - err = gethttp (u, &hstat, dt, proxy, iri); + err = gethttp (u, &hstat, dt, proxy, iri, count); /* Time? */ tms = datetime_str (time (NULL)); @@ -2799,7 +2800,15 @@ Spider mode enabled. Check if remote file exists.\n")); logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n")); ret = err; goto exit; + case UNLINKERR: + /* Another fatal error. */ + logputs (LOG_VERBOSE, "\n"); + logprintf (LOG_NOTQUIET, _("Cannot unlink %s (%s).\n"), + quote (hstat.local_file), strerror (errno)); + ret = err; + goto exit; case NEWLOCATION: + case NEWLOCATION_KEEP_POST: /* Return the new location to the caller. */ if (!*newloc) { @@ -2810,7 +2819,7 @@ Spider mode enabled. Check if remote file exists.\n")); } else { - ret = NEWLOCATION; + ret = err; } goto exit; case RETRUNNEEDED: @@ -3133,7 +3142,7 @@ Remote file exists.\n\n")); while (!opt.ntry || (count < opt.ntry)); exit: - if (ret == RETROK) + if (ret == RETROK && local_file) *local_file = xstrdup (hstat.local_file); free_hstat (&hstat); @@ -3511,7 +3520,7 @@ ensure_extension (struct http_stat *hs, const char *ext, int *dt) if (len == 5) { strncpy (shortext, ext, len - 1); - shortext[len - 2] = '\0'; + shortext[len - 1] = '\0'; } if (last_period_in_local_filename == NULL @@ -3547,20 +3556,15 @@ test_parse_content_disposition() int i; struct { char *hdrval; - char *opt_dir_prefix; char *filename; bool result; } test_array[] = { - { "filename=\"file.ext\"", NULL, "file.ext", true }, - { "filename=\"file.ext\"", "somedir", "somedir/file.ext", true }, - { "attachment; filename=\"file.ext\"", NULL, "file.ext", true }, - { "attachment; filename=\"file.ext\"", "somedir", "somedir/file.ext", true }, - { "attachment; filename=\"file.ext\"; dummy", NULL, "file.ext", true }, - { "attachment; filename=\"file.ext\"; dummy", "somedir", "somedir/file.ext", true }, - { "attachment", NULL, NULL, false }, - { "attachment", "somedir", NULL, false }, - { "attachement; filename*=UTF-8'en-US'hello.txt", NULL, "hello.txt", true }, - { "attachement; filename*0=\"hello\"; filename*1=\"world.txt\"", NULL, "helloworld.txt", true }, + { "filename=\"file.ext\"", "file.ext", true }, + { "attachment; filename=\"file.ext\"", "file.ext", true }, + { "attachment; filename=\"file.ext\"; dummy", "file.ext", true }, + { "attachment", NULL, false }, + { "attachement; filename*=UTF-8'en-US'hello.txt", "hello.txt", true }, + { "attachement; filename*0=\"hello\"; filename*1=\"world.txt\"", "helloworld.txt", true }, }; for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i) @@ -3568,7 +3572,6 @@ test_parse_content_disposition() char *filename; bool res; - opt.dir_prefix = test_array[i].opt_dir_prefix; res = parse_content_disposition (test_array[i].hdrval, &filename); mu_assert ("test_parse_content_disposition: wrong result",