X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fhttp.c;h=b50acf48ea0ac601190d5920d13488d1f8a78bf9;hp=8917fa558bd2f9183222858681d9b3d92c73529b;hb=38a7829dcb4eb5dba28dbf0f05c6a80fea9217f8;hpb=aed7d4163a9e2083d294a9471e1347ab13d6f2ab diff --git a/src/http.c b/src/http.c index 8917fa55..b50acf48 100644 --- a/src/http.c +++ b/src/http.c @@ -1040,7 +1040,7 @@ modify_param_name(param_token *name) static void modify_param_value (param_token *value, int encoding_type ) { - if (RFC2231_ENCODING == encoding_type) + if (encoding_type == RFC2231_ENCODING) { const char *delim = memrchr (value->b, '\'', value->e - value->b); if ( delim != NULL ) @@ -1060,13 +1060,22 @@ modify_param_value (param_token *value, int encoding_type ) filename=\"foo bar\"", the first call to this function will return the token named "attachment" and no value, and the second call will return the token named "filename" and value "foo bar". The third - call will return false, indicating no more valid tokens. */ + call will return false, indicating no more valid tokens. + + is_url_encoded is an out parameter. If not NULL, a boolean value will be + stored into it, letting the caller know whether or not the extracted value is + URL-encoded. The caller can then decode it with url_unescape(), which however + performs decoding in-place. URL-encoding is used by RFC 2231 to support + non-US-ASCII characters in HTTP header values. */ bool extract_param (const char **source, param_token *name, param_token *value, - char separator) + char separator, bool *is_url_encoded) { const char *p = *source; + int param_type; + if (is_url_encoded) + *is_url_encoded = false; /* initializing the out parameter */ while (c_isspace (*p)) ++p; if (!*p) @@ -1122,9 +1131,11 @@ extract_param (const char **source, param_token *name, param_token *value, } *source = p; - int param_type = modify_param_name(name); - if (NOT_RFC2231 != param_type) + param_type = modify_param_name(name); + if (param_type != NOT_RFC2231) { + if (param_type == RFC2231_ENCODING && is_url_encoded) + *is_url_encoded = true; modify_param_value(value, param_type); } return true; @@ -1137,13 +1148,16 @@ extract_param (const char **source, param_token *name, param_token *value, /* Appends the string represented by VALUE to FILENAME */ static void -append_value_to_filename (char **filename, param_token const * const value) +append_value_to_filename (char **filename, param_token const * const value, + bool is_url_encoded) { int original_length = strlen(*filename); int new_length = strlen(*filename) + (value->e - value->b); *filename = xrealloc (*filename, new_length+1); memcpy (*filename + original_length, value->b, (value->e - value->b)); (*filename)[new_length] = '\0'; + if (is_url_encoded) + url_unescape (*filename + original_length); } #undef MAX @@ -1176,7 +1190,9 @@ parse_content_disposition (const char *hdr, char **filename) { param_token name, value; *filename = NULL; - while (extract_param (&hdr, &name, &value, ';')) + bool is_url_encoded = false; + for ( ; extract_param (&hdr, &name, &value, ';', &is_url_encoded); + is_url_encoded = false) { int isFilename = BOUNDED_EQUAL_NO_CASE ( name.b, name.e, "filename" ); if ( isFilename && value.b != NULL) @@ -1192,9 +1208,13 @@ parse_content_disposition (const char *hdr, char **filename) continue; if (*filename) - append_value_to_filename (filename, &value); + append_value_to_filename (filename, &value, is_url_encoded); else - *filename = strdupdelim (value.b, value.e); + { + *filename = strdupdelim (value.b, value.e); + if (is_url_encoded) + url_unescape (*filename); + } } } @@ -1566,7 +1586,7 @@ read_response_body (struct http_stat *hs, int sock, FILE *fp, wgint contlen, /* Download the response body and write it to fp. If we are working on a WARC file, we simultaneously write the response body to warc_tmp. */ - hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0, + hs->res = fd_read_body (hs->local_file, sock, fp, contlen != -1 ? contlen : 0, hs->restval, &hs->rd_size, &hs->len, &hs->dltime, flags, warc_tmp); if (hs->res >= 0) @@ -1688,7 +1708,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, char *head; struct response *resp; - char hdrval[256]; + char hdrval[512]; char *message; /* Declare WARC variables. */ @@ -2315,23 +2335,23 @@ read_header: But if we are writing a WARC file we are: we like to keep everyting. */ if (warc_enabled) { - int err; + int _err; type = resp_header_strdup (resp, "Content-Type"); - err = read_response_body (hs, sock, NULL, contlen, 0, + _err = read_response_body (hs, sock, NULL, contlen, 0, chunked_transfer_encoding, u->url, warc_timestamp_str, warc_request_uuid, warc_ip, type, statcode, head); xfree_null (type); - if (err != RETRFINISHED || hs->res < 0) + if (_err != RETRFINISHED || hs->res < 0) { CLOSE_INVALIDATE (sock); request_free (req); xfree_null (message); resp_free (resp); xfree (head); - return err; + return _err; } else CLOSE_FINISH (sock); @@ -2576,6 +2596,7 @@ read_header: tmp = parse_charset (tmp2); if (tmp) set_content_encoding (iri, tmp); + xfree_null(tmp); } } } @@ -2598,6 +2619,22 @@ read_header: if (H_20X (statcode)) *dt |= RETROKF; + if (statcode == HTTP_STATUS_NO_CONTENT) + { + /* 204 response has no body (RFC 2616, 4.3) */ + + /* In case the caller cares to look... */ + hs->len = 0; + hs->res = 0; + hs->restval = 0; + + CLOSE_FINISH (sock); + xfree_null (type); + xfree (head); + + return RETRFINISHED; + } + /* Return if redirected. */ if (H_REDIRECTED (statcode) || statcode == HTTP_STATUS_MULTIPLE_CHOICES) { @@ -2624,18 +2661,18 @@ read_header: But if we are writing a WARC file we are: we like to keep everyting. */ if (warc_enabled) { - int err = read_response_body (hs, sock, NULL, contlen, 0, + int _err = read_response_body (hs, sock, NULL, contlen, 0, chunked_transfer_encoding, u->url, warc_timestamp_str, warc_request_uuid, warc_ip, type, statcode, head); - if (err != RETRFINISHED || hs->res < 0) + if (_err != RETRFINISHED || hs->res < 0) { CLOSE_INVALIDATE (sock); xfree_null (type); xfree (head); - return err; + return _err; } else CLOSE_FINISH (sock); @@ -2671,7 +2708,6 @@ read_header: { case HTTP_STATUS_TEMPORARY_REDIRECT: return NEWLOCATION_KEEP_POST; - break; case HTTP_STATUS_MOVED_PERMANENTLY: if (opt.method && strcasecmp (opt.method, "post") != 0) return NEWLOCATION_KEEP_POST; @@ -2682,7 +2718,6 @@ read_header: break; default: return NEWLOCATION; - break; } return NEWLOCATION; } @@ -2803,18 +2838,18 @@ read_header: But if we are writing a WARC file we are: we like to keep everyting. */ if (warc_enabled) { - int err = read_response_body (hs, sock, NULL, contlen, 0, + int _err = read_response_body (hs, sock, NULL, contlen, 0, chunked_transfer_encoding, u->url, warc_timestamp_str, warc_request_uuid, warc_ip, type, statcode, head); - if (err != RETRFINISHED || hs->res < 0) + if (_err != RETRFINISHED || hs->res < 0) { CLOSE_INVALIDATE (sock); xfree (head); xfree_null (type); - return err; + return _err; } else CLOSE_FINISH (sock); @@ -2925,11 +2960,8 @@ read_header: fp = output_stream; /* Print fetch message, if opt.verbose. */ - if (opt.verbose) - { - logprintf (LOG_NOTQUIET, _("Saving to: %s\n"), + logprintf (LOG_VERBOSE, _("Saving to: %s\n"), HYPHENP (hs->local_file) ? quote ("STDOUT") : quote (hs->local_file)); - } err = read_response_body (hs, sock, fp, contlen, contrange, @@ -3043,7 +3075,10 @@ http_loop (struct url *u, struct url *original_url, char **newloc, /* Send preliminary HEAD request if -N is given and we have an existing * destination file. */ - file_name = url_file_name (opt.trustservernames ? u : original_url, NULL); + if (!opt.output_document) + file_name = url_file_name (opt.trustservernames ? u : original_url, NULL); + else + file_name = xstrdup (opt.output_document); if (opt.timestamping && (file_exists_p (file_name) || opt.content_disposition)) send_head_first = true; @@ -3098,6 +3133,8 @@ Spider mode enabled. Check if remote file exists.\n")); /* Decide whether or not to restart. */ if (force_full_retrieve) hstat.restval = hstat.len; + else if (opt.start_pos >= 0) + hstat.restval = opt.start_pos; else if (opt.always_rest && got_name && stat (hstat.local_file, &st) == 0 @@ -3727,7 +3764,7 @@ digest_authentication_encode (const char *au, const char *user, realm = opaque = nonce = algorithm = qop = NULL; au += 6; /* skip over `Digest' */ - while (extract_param (&au, &name, &value, ',')) + while (extract_param (&au, &name, &value, ',', NULL)) { size_t i; size_t namelen = name.e - name.b; @@ -4031,12 +4068,12 @@ ensure_extension (struct http_stat *hs, const char *ext, int *dt) #ifdef TESTING const char * -test_parse_content_disposition() +test_parse_content_disposition(void) { - int i; - struct { - char *hdrval; - char *filename; + unsigned i; + static const struct { + const char *hdrval; + const char *filename; bool result; } test_array[] = { { "filename=\"file.ext\"", "file.ext", true }, @@ -4047,7 +4084,7 @@ test_parse_content_disposition() { "attachement; filename*0=\"hello\"; filename*1=\"world.txt\"", "helloworld.txt", true }, }; - for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i) + for (i = 0; i < countof(test_array); ++i) { char *filename; bool res;