X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fhttp.c;h=77f9797232cdb648dde8b641c4eb2d023ca0e7bb;hp=c5827275896fce8283e5e9b42e70e40e3253d22e;hb=8566a727674ab3c2b0df03c31c6085a0d5d5bf81;hpb=e8b61e46abd8471f0ea45a827a53b0eb5b3220cc diff --git a/src/http.c b/src/http.c index c5827275..77f97972 100644 --- a/src/http.c +++ b/src/http.c @@ -1,5 +1,5 @@ /* HTTP support. - Copyright (C) 1996-2005 Free Software Foundation, Inc. + Copyright (C) 1996-2006 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -757,7 +757,7 @@ print_server_response (const struct response *resp, const char *prefix) --e; /* This is safe even on printfs with broken handling of "%.s" because resp->headers ends with \0. */ - logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, e - b, b); + logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, (int) (e - b), b); } } @@ -855,76 +855,119 @@ skip_short_body (int fd, wgint contlen) return true; } -static bool -extract_param_value_delim (const char *begin, const char *end, - const char *param_name, char **param_value) +/* Extract a parameter from the string (typically an HTTP header) at + **SOURCE and advance SOURCE to the next parameter. Return false + when there are no more parameters to extract. The name of the + parameter is returned in NAME, and the value in VALUE. If the + parameter has no value, the token's value is zeroed out. + + For example, if *SOURCE points to the string "attachment; + filename=\"foo bar\"", the first call to this function will return + the token named "attachment" and no value, and the second call will + return the token named "filename" and value "foo bar". The third + call will return false, indicating no more valid tokens. */ + +bool +extract_param (const char **source, param_token *name, param_token *value, + char separator) { - const char *p; - int len; - - assert (begin); - assert (end); - assert (param_name); - assert (param_value); - - len = strlen (param_name); + const char *p = *source; - /* skip initial whitespaces */ - p = begin; - while (*p && ISSPACE (*p) && p < end) ++p; - - if (end - p > len - && 0 == strncasecmp (p, param_name, len)) + while (ISSPACE (*p)) ++p; + if (!*p) { - const char *e; - - /* skip white spaces, equal sign and inital quote */ - p += len; - while (*p && (ISSPACE (*p) || *p == '\"' || *p == '=') && p < end) ++p; + *source = p; + return false; /* no error; nothing more to extract */ + } - /* find last quote */ - e = p; - while (*e && *e != '\"' && e < end) ++e; - - *param_value = strdupdelim (p, e); - + /* Extract name. */ + name->b = p; + while (*p && !ISSPACE (*p) && *p != '=' && *p != separator) ++p; + name->e = p; + if (name->b == name->e) + return false; /* empty name: error */ + while (ISSPACE (*p)) ++p; + if (*p == separator || !*p) /* no value */ + { + xzero (*value); + if (*p == separator) ++p; + *source = p; return true; } + if (*p != '=') + return false; /* error */ - return false; + /* *p is '=', extract value */ + ++p; + while (ISSPACE (*p)) ++p; + if (*p == '"') /* quoted */ + { + value->b = ++p; + while (*p && *p != '"') ++p; + if (!*p) + return false; + value->e = p++; + /* Currently at closing quote; find the end of param. */ + while (ISSPACE (*p)) ++p; + while (*p && *p != separator) ++p; + if (*p == separator) + ++p; + else if (*p) + /* garbage after closed quote, e.g. foo="bar"baz */ + return false; + } + else /* unquoted */ + { + value->b = p; + while (*p && *p != separator) ++p; + value->e = p; + while (value->e != value->b && ISSPACE (value->e[-1])) + --value->e; + if (*p == separator) ++p; + } + *source = p; + return true; } -/* Parse the `Content-Disposition' header and extract the information it - contains. Returns true if successful, false otherwise. */ -static bool -parse_content_disposition (const char *hdrval, char **filename) -{ - const char *b = hdrval; /* b - begin */ - const char *e = hdrval; /* e - end */ +#undef MAX +#define MAX(p, q) ((p) > (q) ? (p) : (q)) - assert (hdrval); - assert (filename); +/* Parse the contents of the `Content-Disposition' header, extracting + the information useful to Wget. Content-Disposition is a header + borrowed from MIME; when used in HTTP, it typically serves for + specifying the desired file name of the resource. For example: - for (; *e; ++e) - { - if (*e == ';' - && e > b) - { - /* process chars b->e-1 */ - if (true == extract_param_value_delim (b, e - 1, "filename", filename)) - return true; + Content-Disposition: attachment; filename="flora.jpg" - b = e + 1; - } - } + Wget will skip the tokens it doesn't care about, such as + "attachment" in the previous example; it will also skip other + unrecognized params. If the header is syntactically correct and + contains a file name, a copy of the file name is stored in + *filename and true is returned. Otherwise, the function returns + false. - if (b != e) - { - /* process chars b->e */ - if (true == extract_param_value_delim (b, e, "filename", filename)) - return true; - } + The file name is stripped of directory components and must not be + empty. */ +static bool +parse_content_disposition (const char *hdr, char **filename) +{ + param_token name, value; + while (extract_param (&hdr, &name, &value, ';')) + if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename") && value.b != NULL) + { + /* Make the file name begin at the last slash or backslash. */ + const char *last_slash = memrchr (value.b, '/', value.e - value.b); + const char *last_bs = memrchr (value.b, '\\', value.e - value.b); + if (last_slash && last_bs) + value.b = 1 + MAX (last_slash, last_bs); + else if (last_slash || last_bs) + value.b = 1 + (last_slash ? last_slash : last_bs); + if (value.b == value.e) + continue; + *filename = strdupdelim (value.b, value.e); + return true; + } return false; } @@ -1683,33 +1726,49 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) /* Determine the local filename if needed. Notice that if -O is used * hstat.local_file is set by http_loop to the argument of -O. */ - if (!hs->local_file) + if (!hs->local_file) { /* Honor Content-Disposition whether possible. */ - if (!resp_header_copy (resp, "Content-Disposition", hdrval, sizeof (hdrval)) - || false == parse_content_disposition (hdrval, &hs->local_file)) + if (!opt.content_disposition + || !resp_header_copy (resp, "Content-Disposition", + hdrval, sizeof (hdrval)) + || !parse_content_disposition (hdrval, &hs->local_file)) { - /* Choose filename according to URL name. */ + /* The Content-Disposition header is missing or broken. + * Choose unique file name according to given URL. */ hs->local_file = url_file_name (u); } } + DEBUGP (("hs->local_file is: %s %s\n", hs->local_file, + file_exists_p (hs->local_file) ? "(existing)" : "(not existing)")); + /* TODO: perform this check only once. */ - if (opt.noclobber && file_exists_p (hs->local_file)) + if (file_exists_p (hs->local_file)) { - /* If opt.noclobber is turned on and file already exists, do not - retrieve the file */ - logprintf (LOG_VERBOSE, _("\ + if (opt.noclobber) + { + /* If opt.noclobber is turned on and file already exists, do not + retrieve the file */ + logprintf (LOG_VERBOSE, _("\ File `%s' already there; not retrieving.\n\n"), hs->local_file); - /* If the file is there, we suppose it's retrieved OK. */ - *dt |= RETROKF; + /* If the file is there, we suppose it's retrieved OK. */ + *dt |= RETROKF; - /* #### Bogusness alert. */ - /* If its suffix is "html" or "htm" or similar, assume text/html. */ - if (has_html_suffix_p (hs->local_file)) - *dt |= TEXTHTML; + /* #### Bogusness alert. */ + /* If its suffix is "html" or "htm" or similar, assume text/html. */ + if (has_html_suffix_p (hs->local_file)) + *dt |= TEXTHTML; - return RETROK; + return RETROK; + } + else + { + char *unique = unique_name (hs->local_file, true); + if (unique != hs->local_file) + xfree (hs->local_file); + hs->local_file = unique; + } } /* Support timestamping */ @@ -1955,11 +2014,13 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); content-type. */ if (!type || 0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) || - 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S))) + 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S))) *dt |= TEXTHTML; else *dt &= ~TEXTHTML; + DEBUGP (("TEXTHTML is %s.\n", *dt | TEXTHTML ? "on": "off")); + if (opt.html_extension && (*dt & TEXTHTML)) /* -E / --html-extension / html_extension = on was specified, and this is a text/html file. If some case-insensitive variation on ".htm[l]" isn't @@ -2078,13 +2139,6 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); return RETRFINISHED; } - /* Print fetch message, if opt.verbose. */ - if (opt.verbose) - { - logprintf (LOG_NOTQUIET, _("Saving to: `%s'\n"), - HYPHENP (hs->local_file) ? "STDOUT" : hs->local_file); - } - /* Open the local file. */ if (!output_stream) { @@ -2121,6 +2175,13 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); else fp = output_stream; + /* Print fetch message, if opt.verbose. */ + if (opt.verbose) + { + logprintf (LOG_NOTQUIET, _("Saving to: `%s'\n"), + HYPHENP (hs->local_file) ? "STDOUT" : hs->local_file); + } + /* This confuses the timestamping code that checks for file size. #### The timestamping code should be smarter about file size. */ if (opt.save_headers && hs->restval == 0) @@ -2172,11 +2233,11 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, bool got_head = false; /* used for time-stamping */ char *tms; const char *tmrate; - uerr_t err; + uerr_t err, ret = TRYLIMEXC; time_t tmr = -1; /* remote time-stamp */ wgint local_size = 0; /* the size of the local file */ struct http_stat hstat; /* HTTP status */ - struct_stat st; + struct_stat st; /* Assert that no value for *LOCAL_FILE was passed. */ assert (local_file == NULL || *local_file == NULL); @@ -2219,7 +2280,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, sleep_between_retrievals (count); /* Get the current time string. */ - tms = time_str (NULL); + tms = time_str (time (NULL)); /* Print fetch message, if opt.verbose. */ if (opt.verbose) @@ -2248,7 +2309,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, /* Default document type is empty. However, if spider mode is on or time-stamping is employed, HEAD_ONLY commands is encoded within *dt. */ - if (opt.spider || (opt.timestamping && !got_head)) + if ((opt.spider && !opt.recursive) || (opt.timestamping && !got_head)) *dt |= HEAD_ONLY; else *dt &= ~HEAD_ONLY; @@ -2274,8 +2335,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, we require a fresh get. b) caching is explicitly inhibited. */ if ((proxy && count > 1) /* a */ - || !opt.allow_cache /* b */ - ) + || !opt.allow_cache) /* b */ *dt |= SEND_NOCACHE; else *dt &= ~SEND_NOCACHE; @@ -2284,7 +2344,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, err = gethttp (u, &hstat, dt, proxy); /* Time? */ - tms = time_str (NULL); + tms = time_str (time (NULL)); /* Get the new location (with or without the redirection). */ if (hstat.newloc) @@ -2298,26 +2358,23 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, /* Non-fatal errors continue executing the loop, which will bring them to "while" statement at the end, to judge whether the number of tries was exceeded. */ - /* free_hstat (&hstat); */ printwhat (count, opt.ntry); continue; - case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED: - case SSLINITFAILED: case CONTNOTSUPPORTED: - /* Fatal errors just return from the function. */ - free_hstat (&hstat); - return err; case FWRITEERR: case FOPENERR: /* Another fatal error. */ logputs (LOG_VERBOSE, "\n"); logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"), hstat.local_file, strerror (errno)); - free_hstat (&hstat); - return err; + case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED: + case SSLINITFAILED: case CONTNOTSUPPORTED: + /* Fatal errors just return from the function. */ + ret = err; + goto exit; case CONSSLERR: /* Another fatal error. */ logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n")); - free_hstat (&hstat); - return err; + ret = err; + goto exit; case NEWLOCATION: /* Return the new location to the caller. */ if (!*newloc) @@ -2325,15 +2382,17 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, logprintf (LOG_NOTQUIET, _("ERROR: Redirection (%d) without location.\n"), hstat.statcode); - free_hstat (&hstat); - return WRONGCODE; + ret = WRONGCODE; } - free_hstat (&hstat); - return NEWLOCATION; + else + { + ret = NEWLOCATION; + } + goto exit; case RETRUNNEEDED: /* The file was already fully retrieved. */ - free_hstat (&hstat); - return RETROK; + ret = RETROK; + goto exit; case RETRFINISHED: /* Deal with you later. */ break; @@ -2341,21 +2400,27 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, /* All possibilities should have been exhausted. */ abort (); } - + if (!(*dt & RETROKF)) { + char *hurl = NULL; if (!opt.verbose) { /* #### Ugly ugly ugly! */ - char *hurl = url_string (u, true); + hurl = url_string (u, true); logprintf (LOG_NONVERBOSE, "%s:\n", hurl); - xfree (hurl); + } + if (opt.spider && opt.recursive) + { + if (!hurl) hurl = url_string (u, true); + nonexisting_url (hurl, referer); } logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), tms, hstat.statcode, escnonprint (hstat.error)); logputs (LOG_VERBOSE, "\n"); - free_hstat (&hstat); - return WRONGCODE; + ret = WRONGCODE; + xfree_null (hurl); + goto exit; } /* Did we get the time-stamp? */ @@ -2397,8 +2462,8 @@ Last-modified header invalid -- time-stamp ignored.\n")); logprintf (LOG_VERBOSE, _("\ Server file no newer than local file `%s' -- not retrieving.\n\n"), hstat.orig_file_name); - free_hstat (&hstat); - return RETROK; + ret = RETROK; + goto exit; } else { @@ -2420,7 +2485,7 @@ The sizes do not match (local %s) -- retrieving.\n"), } if ((tmr != (time_t) (-1)) - && !opt.spider + && (!opt.spider || opt.recursive) && ((hstat.len == hstat.contlen) || ((hstat.res == 0) && (hstat.contlen == -1)))) { @@ -2439,11 +2504,12 @@ The sizes do not match (local %s) -- retrieving.\n"), } /* End of time-stamping section. */ - if (opt.spider) + if (opt.spider && !opt.recursive) { logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, escnonprint (hstat.error)); - return RETROK; + ret = RETROK; + goto exit; } tmrate = retr_rate (hstat.rd_size, hstat.dltime); @@ -2474,8 +2540,8 @@ The sizes do not match (local %s) -- retrieving.\n"), else downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file); - free_hstat (&hstat); - return RETROK; + ret = RETROK; + goto exit; } else if (hstat.res == 0) /* No read error */ { @@ -2502,8 +2568,8 @@ The sizes do not match (local %s) -- retrieving.\n"), else downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file); - free_hstat (&hstat); - return RETROK; + ret = RETROK; + goto exit; } else if (hstat.len < hstat.contlen) /* meaning we lost the connection too soon */ @@ -2512,7 +2578,6 @@ The sizes do not match (local %s) -- retrieving.\n"), _("%s (%s) - Connection closed at byte %s. "), tms, tmrate, number_to_static_string (hstat.len)); printwhat (count, opt.ntry); - /* free_hstat (&hstat); */ continue; } else @@ -2529,7 +2594,6 @@ The sizes do not match (local %s) -- retrieving.\n"), tms, tmrate, number_to_static_string (hstat.len), hstat.rderrmsg); printwhat (count, opt.ntry); - /* free_hstat (&hstat); */ continue; } else /* hstat.res == -1 and contlen is given */ @@ -2541,15 +2605,19 @@ The sizes do not match (local %s) -- retrieving.\n"), number_to_static_string (hstat.contlen), hstat.rderrmsg); printwhat (count, opt.ntry); - /* free_hstat (&hstat); */ continue; } } /* not reached */ } while (!opt.ntry || (count < opt.ntry)); + +exit: + if (ret == RETROK) + *local_file = xstrdup (hstat.local_file); + free_hstat (&hstat); - return TRYLIMEXC; + return ret; } /* Check whether the result of strptime() indicates success. @@ -2690,45 +2758,6 @@ basic_authentication_encode (const char *user, const char *passwd) } while (0) #ifdef ENABLE_DIGEST -/* Parse HTTP `WWW-Authenticate:' header. AU points to the beginning - of a field in such a header. If the field is the one specified by - ATTR_NAME ("realm", "opaque", and "nonce" are used by the current - digest authorization code), extract its value in the (char*) - variable pointed by RET. Returns negative on a malformed header, - or number of bytes that have been parsed by this call. */ -static int -extract_header_attr (const char *au, const char *attr_name, char **ret) -{ - const char *ep; - const char *cp = au; - - if (strncmp (cp, attr_name, strlen (attr_name)) == 0) - { - cp += strlen (attr_name); - if (!*cp) - return -1; - SKIP_WS (cp); - if (*cp != '=') - return -1; - if (!*++cp) - return -1; - SKIP_WS (cp); - if (*cp != '\"') - return -1; - if (!*++cp) - return -1; - for (ep = cp; *ep && *ep != '\"'; ep++) - ; - if (!*ep) - return -1; - xfree_null (*ret); - *ret = strdupdelim (cp, ep); - return ep - au + 1; - } - else - return 0; -} - /* Dump the hexadecimal representation of HASH to BUF. HASH should be an array of 16 bytes containing the hash keys, and BUF should be a buffer of 33 writable characters (32 for hex digits plus one for @@ -2763,53 +2792,21 @@ digest_authentication_encode (const char *au, const char *user, { "nonce", &nonce } }; char *res; + param_token name, value; realm = opaque = nonce = NULL; au += 6; /* skip over `Digest' */ - while (*au) + while (extract_param (&au, &name, &value, ',')) { int i; - - SKIP_WS (au); for (i = 0; i < countof (options); i++) - { - int skip = extract_header_attr (au, options[i].name, - options[i].variable); - if (skip < 0) - { - xfree_null (realm); - xfree_null (opaque); - xfree_null (nonce); - return NULL; - } - else if (skip) - { - au += skip; - break; - } - } - if (i == countof (options)) - { - while (*au && *au != '=') - au++; - if (*au && *++au) - { - SKIP_WS (au); - if (*au == '\"') - { - au++; - while (*au && *au != '\"') - au++; - if (*au) - au++; - } - } - } - while (*au && *au != ',') - au++; - if (*au) - au++; + if (name.e - name.b == strlen (options[i].name) + && 0 == strncmp (name.b, options[i].name, name.e - name.b)) + { + *options[i].variable = strdupdelim (value.b, value.e); + break; + } } if (!realm || !nonce || !user || !passwd || !path || !method) { @@ -2973,7 +2970,7 @@ http_cleanup (void) #ifdef TESTING -char * +const char * test_parse_content_disposition() { int i; @@ -2997,8 +2994,6 @@ test_parse_content_disposition() res == test_array[i].result && (res == false || 0 == strcmp (test_array[i].filename, filename))); - - /* printf ("test %d: %s\n", i, res == false ? "false" : filename); */ } return NULL;