X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fhttp.c;h=77f9797232cdb648dde8b641c4eb2d023ca0e7bb;hb=8566a727674ab3c2b0df03c31c6085a0d5d5bf81;hp=4752ce3d7eec4ef7910681f63929aa513a22a27b;hpb=46c94e5f262351556f9559148cfad57cccbeec3f;p=wget diff --git a/src/http.c b/src/http.c index 4752ce3d..77f97972 100644 --- a/src/http.c +++ b/src/http.c @@ -1,5 +1,5 @@ /* HTTP support. - Copyright (C) 1996-2005 Free Software Foundation, Inc. + Copyright (C) 1996-2006 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -757,7 +757,7 @@ print_server_response (const struct response *resp, const char *prefix) --e; /* This is safe even on printfs with broken handling of "%.s" because resp->headers ends with \0. */ - logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, e - b, b); + logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, (int) (e - b), b); } } @@ -932,6 +932,23 @@ extract_param (const char **source, param_token *name, param_token *value, #undef MAX #define MAX(p, q) ((p) > (q) ? (p) : (q)) +/* Parse the contents of the `Content-Disposition' header, extracting + the information useful to Wget. Content-Disposition is a header + borrowed from MIME; when used in HTTP, it typically serves for + specifying the desired file name of the resource. For example: + + Content-Disposition: attachment; filename="flora.jpg" + + Wget will skip the tokens it doesn't care about, such as + "attachment" in the previous example; it will also skip other + unrecognized params. If the header is syntactically correct and + contains a file name, a copy of the file name is stored in + *filename and true is returned. Otherwise, the function returns + false. + + The file name is stripped of directory components and must not be + empty. */ + static bool parse_content_disposition (const char *hdr, char **filename) { @@ -1709,33 +1726,49 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) /* Determine the local filename if needed. Notice that if -O is used * hstat.local_file is set by http_loop to the argument of -O. */ - if (!hs->local_file) + if (!hs->local_file) { /* Honor Content-Disposition whether possible. */ - if (!resp_header_copy (resp, "Content-Disposition", hdrval, sizeof (hdrval)) + if (!opt.content_disposition + || !resp_header_copy (resp, "Content-Disposition", + hdrval, sizeof (hdrval)) || !parse_content_disposition (hdrval, &hs->local_file)) { - /* Choose filename according to URL name. */ + /* The Content-Disposition header is missing or broken. + * Choose unique file name according to given URL. */ hs->local_file = url_file_name (u); } } + DEBUGP (("hs->local_file is: %s %s\n", hs->local_file, + file_exists_p (hs->local_file) ? "(existing)" : "(not existing)")); + /* TODO: perform this check only once. */ - if (opt.noclobber && file_exists_p (hs->local_file)) + if (file_exists_p (hs->local_file)) { - /* If opt.noclobber is turned on and file already exists, do not - retrieve the file */ - logprintf (LOG_VERBOSE, _("\ + if (opt.noclobber) + { + /* If opt.noclobber is turned on and file already exists, do not + retrieve the file */ + logprintf (LOG_VERBOSE, _("\ File `%s' already there; not retrieving.\n\n"), hs->local_file); - /* If the file is there, we suppose it's retrieved OK. */ - *dt |= RETROKF; + /* If the file is there, we suppose it's retrieved OK. */ + *dt |= RETROKF; - /* #### Bogusness alert. */ - /* If its suffix is "html" or "htm" or similar, assume text/html. */ - if (has_html_suffix_p (hs->local_file)) - *dt |= TEXTHTML; + /* #### Bogusness alert. */ + /* If its suffix is "html" or "htm" or similar, assume text/html. */ + if (has_html_suffix_p (hs->local_file)) + *dt |= TEXTHTML; - return RETROK; + return RETROK; + } + else + { + char *unique = unique_name (hs->local_file, true); + if (unique != hs->local_file) + xfree (hs->local_file); + hs->local_file = unique; + } } /* Support timestamping */ @@ -1981,11 +2014,13 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); content-type. */ if (!type || 0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) || - 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S))) + 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S))) *dt |= TEXTHTML; else *dt &= ~TEXTHTML; + DEBUGP (("TEXTHTML is %s.\n", *dt | TEXTHTML ? "on": "off")); + if (opt.html_extension && (*dt & TEXTHTML)) /* -E / --html-extension / html_extension = on was specified, and this is a text/html file. If some case-insensitive variation on ".htm[l]" isn't @@ -2104,13 +2139,6 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); return RETRFINISHED; } - /* Print fetch message, if opt.verbose. */ - if (opt.verbose) - { - logprintf (LOG_NOTQUIET, _("Saving to: `%s'\n"), - HYPHENP (hs->local_file) ? "STDOUT" : hs->local_file); - } - /* Open the local file. */ if (!output_stream) { @@ -2147,6 +2175,13 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); else fp = output_stream; + /* Print fetch message, if opt.verbose. */ + if (opt.verbose) + { + logprintf (LOG_NOTQUIET, _("Saving to: `%s'\n"), + HYPHENP (hs->local_file) ? "STDOUT" : hs->local_file); + } + /* This confuses the timestamping code that checks for file size. #### The timestamping code should be smarter about file size. */ if (opt.save_headers && hs->restval == 0) @@ -2245,7 +2280,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, sleep_between_retrievals (count); /* Get the current time string. */ - tms = time_str (NULL); + tms = time_str (time (NULL)); /* Print fetch message, if opt.verbose. */ if (opt.verbose) @@ -2274,7 +2309,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, /* Default document type is empty. However, if spider mode is on or time-stamping is employed, HEAD_ONLY commands is encoded within *dt. */ - if (opt.spider || (opt.timestamping && !got_head)) + if ((opt.spider && !opt.recursive) || (opt.timestamping && !got_head)) *dt |= HEAD_ONLY; else *dt &= ~HEAD_ONLY; @@ -2309,7 +2344,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, err = gethttp (u, &hstat, dt, proxy); /* Time? */ - tms = time_str (NULL); + tms = time_str (time (NULL)); /* Get the new location (with or without the redirection). */ if (hstat.newloc) @@ -2365,20 +2400,26 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, /* All possibilities should have been exhausted. */ abort (); } - + if (!(*dt & RETROKF)) { + char *hurl = NULL; if (!opt.verbose) { /* #### Ugly ugly ugly! */ - char *hurl = url_string (u, true); + hurl = url_string (u, true); logprintf (LOG_NONVERBOSE, "%s:\n", hurl); - xfree (hurl); + } + if (opt.spider && opt.recursive) + { + if (!hurl) hurl = url_string (u, true); + nonexisting_url (hurl, referer); } logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), tms, hstat.statcode, escnonprint (hstat.error)); logputs (LOG_VERBOSE, "\n"); ret = WRONGCODE; + xfree_null (hurl); goto exit; } @@ -2444,7 +2485,7 @@ The sizes do not match (local %s) -- retrieving.\n"), } if ((tmr != (time_t) (-1)) - && !opt.spider + && (!opt.spider || opt.recursive) && ((hstat.len == hstat.contlen) || ((hstat.res == 0) && (hstat.contlen == -1)))) { @@ -2463,7 +2504,7 @@ The sizes do not match (local %s) -- retrieving.\n"), } /* End of time-stamping section. */ - if (opt.spider) + if (opt.spider && !opt.recursive) { logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, escnonprint (hstat.error)); @@ -2929,7 +2970,7 @@ http_cleanup (void) #ifdef TESTING -char * +const char * test_parse_content_disposition() { int i; @@ -2953,8 +2994,6 @@ test_parse_content_disposition() res == test_array[i].result && (res == false || 0 == strcmp (test_array[i].filename, filename))); - - /* printf ("test %d: %s\n", i, res == false ? "false" : filename); */ } return NULL;