X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fhttp.c;h=cf901929435679d914345745f41c040e792dd600;hp=33b9a53c46af12ea1f49b9aa21c8edc38957660f;hb=0fcd1bb235fd3a29df49a70683ac4156658b4e17;hpb=d8c11a82ca9e8fab6689b53dc12199da4cc6058d diff --git a/src/http.c b/src/http.c index 33b9a53c..cf901929 100644 --- a/src/http.c +++ b/src/http.c @@ -1,6 +1,7 @@ /* HTTP support. - Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, + 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, + Inc. This file is part of GNU Wget. @@ -33,9 +34,7 @@ as that of the covered work. */ #include #include #include -#ifdef HAVE_UNISTD_H -# include -#endif +#include #include #include #include @@ -56,16 +55,19 @@ as that of the covered work. */ # include "http-ntlm.h" #endif #include "cookies.h" -#ifdef ENABLE_DIGEST -# include "gen-md5.h" -#endif +#include "md5.h" #include "convert.h" #include "spider.h" +#include "warc.h" #ifdef TESTING #include "test.h" #endif +#ifdef __VMS +# include "vms.h" +#endif /* def __VMS */ + extern char *version_string; /* Forward decls. */ @@ -91,6 +93,7 @@ static struct cookie_jar *wget_cookie_jar; #define TEXTCSS_S "text/css" /* Some status code validation macros: */ +#define H_10X(x) (((x) >= 100) && ((x) < 200)) #define H_20X(x) (((x) >= 200) && ((x) < 300)) #define H_PARTIAL(x) ((x) == HTTP_STATUS_PARTIAL_CONTENTS) #define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY \ @@ -318,10 +321,12 @@ request_remove_header (struct request *req, char *name) p += A_len; \ } while (0) -/* Construct the request and write it to FD using fd_write. */ +/* Construct the request and write it to FD using fd_write. + If warc_tmp is set to a file pointer, the request string will + also be written to that file. */ static int -request_send (const struct request *req, int fd) +request_send (const struct request *req, int fd, FILE *warc_tmp) { char *request_string, *p; int i, size, write_error; @@ -348,7 +353,7 @@ request_send (const struct request *req, int fd) APPEND (p, req->method); *p++ = ' '; APPEND (p, req->arg); *p++ = ' '; - memcpy (p, "HTTP/1.0\r\n", 10); p += 10; + memcpy (p, "HTTP/1.1\r\n", 10); p += 10; for (i = 0; i < req->hcount; i++) { @@ -372,6 +377,13 @@ request_send (const struct request *req, int fd) if (write_error < 0) logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"), fd_errstr (fd)); + else if (warc_tmp != NULL) + { + /* Write a copy of the data to the WARC record. */ + int warc_tmp_written = fwrite (request_string, 1, size - 1, warc_tmp); + if (warc_tmp_written != size - 1) + return -2; + } return write_error; } @@ -402,18 +414,18 @@ maybe_send_basic_creds (const char *hostname, const char *user, if (opt.auth_without_challenge) { - DEBUGP(("Auth-without-challenge set, sending Basic credentials.\n")); + DEBUGP (("Auth-without-challenge set, sending Basic credentials.\n")); do_challenge = true; } else if (basic_authed_hosts && hash_table_contains(basic_authed_hosts, hostname)) { - DEBUGP(("Found %s in basic_authed_hosts.\n", quote (hostname))); + DEBUGP (("Found %s in basic_authed_hosts.\n", quote (hostname))); do_challenge = true; } else { - DEBUGP(("Host %s has not issued a general basic challenge.\n", + DEBUGP (("Host %s has not issued a general basic challenge.\n", quote (hostname))); } if (do_challenge) @@ -435,17 +447,19 @@ register_basic_auth_host (const char *hostname) if (!hash_table_contains(basic_authed_hosts, hostname)) { hash_table_put (basic_authed_hosts, xstrdup(hostname), NULL); - DEBUGP(("Inserted %s into basic_authed_hosts\n", quote (hostname))); + DEBUGP (("Inserted %s into basic_authed_hosts\n", quote (hostname))); } } /* Send the contents of FILE_NAME to SOCK. Make sure that exactly PROMISED_SIZE bytes are sent over the wire -- if the file is - longer, read only that much; if the file is shorter, report an error. */ + longer, read only that much; if the file is shorter, report an error. + If warc_tmp is set to a file pointer, the post data will + also be written to that file. */ static int -post_file (int sock, const char *file_name, wgint promised_size) +post_file (int sock, const char *file_name, wgint promised_size, FILE *warc_tmp) { static char chunk[8192]; wgint written = 0; @@ -470,6 +484,16 @@ post_file (int sock, const char *file_name, wgint promised_size) fclose (fp); return -1; } + if (warc_tmp != NULL) + { + /* Write a copy of the data to the WARC record. */ + int warc_tmp_written = fwrite (chunk, 1, towrite, warc_tmp); + if (warc_tmp_written != towrite) + { + fclose (fp); + return -2; + } + } written += towrite; } fclose (fp); @@ -770,7 +794,7 @@ resp_status (const struct response *resp, char **message) while (p < end && c_isdigit (*p)) ++p; if (p < end && *p == '.') - ++p; + ++p; while (p < end && c_isdigit (*p)) ++p; } @@ -815,7 +839,7 @@ print_response_line(const char *prefix, const char *b, const char *e) { char *copy; BOUNDED_TO_ALLOCA(b, e, copy); - logprintf (LOG_ALWAYS, "%s%s\n", prefix, + logprintf (LOG_ALWAYS, "%s%s\n", prefix, quotearg_style (escape_quoting_style, copy)); } @@ -897,29 +921,52 @@ parse_content_range (const char *hdr, wgint *first_byte_ptr, mode, the body is displayed for debugging purposes. */ static bool -skip_short_body (int fd, wgint contlen) +skip_short_body (int fd, wgint contlen, bool chunked) { enum { SKIP_SIZE = 512, /* size of the download buffer */ SKIP_THRESHOLD = 4096 /* the largest size we read */ }; + wgint remaining_chunk_size = 0; char dlbuf[SKIP_SIZE + 1]; dlbuf[SKIP_SIZE] = '\0'; /* so DEBUGP can safely print it */ - /* We shouldn't get here with unknown contlen. (This will change - with HTTP/1.1, which supports "chunked" transfer.) */ - assert (contlen != -1); + assert (contlen != -1 || contlen); /* If the body is too large, it makes more sense to simply close the connection than to try to read the body. */ if (contlen > SKIP_THRESHOLD) return false; - DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen))); - - while (contlen > 0) + while (contlen > 0 || chunked) { - int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1); + int ret; + if (chunked) + { + if (remaining_chunk_size == 0) + { + char *line = fd_read_line (fd); + char *endl; + if (line == NULL) + break; + + remaining_chunk_size = strtol (line, &endl, 16); + xfree (line); + + if (remaining_chunk_size == 0) + { + line = fd_read_line (fd); + xfree_null (line); + break; + } + } + + contlen = MIN (remaining_chunk_size, SKIP_SIZE); + } + + DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen))); + + ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1); if (ret <= 0) { /* Don't normally report the error since this is an @@ -929,6 +976,20 @@ skip_short_body (int fd, wgint contlen) return false; } contlen -= ret; + + if (chunked) + { + remaining_chunk_size -= ret; + if (remaining_chunk_size == 0) + { + char *line = fd_read_line (fd); + if (line == NULL) + return false; + else + xfree (line); + } + } + /* Safe even if %.*s bogusly expects terminating \0 because we've zero-terminated dlbuf above. */ DEBUGP (("%.*s", ret, dlbuf)); @@ -938,6 +999,66 @@ skip_short_body (int fd, wgint contlen) return true; } +#define NOT_RFC2231 0 +#define RFC2231_NOENCODING 1 +#define RFC2231_ENCODING 2 + +/* extract_param extracts the parameter name into NAME. + However, if the parameter name is in RFC2231 format then + this function adjusts NAME by stripping of the trailing + characters that are not part of the name but are present to + indicate the presence of encoding information in the value + or a fragment of a long parameter value +*/ +static int +modify_param_name(param_token *name) +{ + const char *delim1 = memchr (name->b, '*', name->e - name->b); + const char *delim2 = memrchr (name->b, '*', name->e - name->b); + + int result; + + if(delim1 == NULL) + { + result = NOT_RFC2231; + } + else if(delim1 == delim2) + { + if ((name->e - 1) == delim1) + { + result = RFC2231_ENCODING; + } + else + { + result = RFC2231_NOENCODING; + } + name->e = delim1; + } + else + { + name->e = delim1; + result = RFC2231_ENCODING; + } + return result; +} + +/* extract_param extract the paramater value into VALUE. + Like modify_param_name this function modifies VALUE by + stripping off the encoding information from the actual value +*/ +static void +modify_param_value (param_token *value, int encoding_type ) +{ + if (RFC2231_ENCODING == encoding_type) + { + const char *delim = memrchr (value->b, '\'', value->e - value->b); + if ( delim != NULL ) + { + value->b = (delim+1); + } + } +} + /* Extract a parameter from the string (typically an HTTP header) at **SOURCE and advance SOURCE to the next parameter. Return false when there are no more parameters to extract. The name of the @@ -1009,9 +1130,31 @@ extract_param (const char **source, param_token *name, param_token *value, if (*p == separator) ++p; } *source = p; + + int param_type = modify_param_name(name); + if (NOT_RFC2231 != param_type) + { + modify_param_value(value, param_type); + } return true; } +#undef NOT_RFC2231 +#undef RFC2231_NOENCODING +#undef RFC2231_ENCODING + +/* Appends the string represented by VALUE to FILENAME */ + +static void +append_value_to_filename (char **filename, param_token const * const value) +{ + int original_length = strlen(*filename); + int new_length = strlen(*filename) + (value->e - value->b); + *filename = xrealloc (*filename, new_length+1); + memcpy (*filename + original_length, value->b, (value->e - value->b)); + (*filename)[new_length] = '\0'; +} + #undef MAX #define MAX(p, q) ((p) > (q) ? (p) : (q)) @@ -1030,47 +1173,46 @@ extract_param (const char **source, param_token *name, param_token *value, false. The file name is stripped of directory components and must not be - empty. */ + empty. + Historically, this function returned filename prefixed with opt.dir_prefix, + now that logic is handled by the caller, new code should pay attention, + changed by crq, Sep 2010. + +*/ static bool parse_content_disposition (const char *hdr, char **filename) { param_token name, value; + *filename = NULL; while (extract_param (&hdr, &name, &value, ';')) - if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename") && value.b != NULL) - { - /* Make the file name begin at the last slash or backslash. */ - const char *last_slash = memrchr (value.b, '/', value.e - value.b); - const char *last_bs = memrchr (value.b, '\\', value.e - value.b); - if (last_slash && last_bs) - value.b = 1 + MAX (last_slash, last_bs); - else if (last_slash || last_bs) - value.b = 1 + (last_slash ? last_slash : last_bs); - if (value.b == value.e) - continue; - /* Start with the directory prefix, if specified. */ - if (opt.dir_prefix) - { - int prefix_length = strlen (opt.dir_prefix); - bool add_slash = (opt.dir_prefix[prefix_length - 1] != '/'); - int total_length; - - if (add_slash) - ++prefix_length; - total_length = prefix_length + (value.e - value.b); - *filename = xmalloc (total_length + 1); - strcpy (*filename, opt.dir_prefix); - if (add_slash) - (*filename)[prefix_length - 1] = '/'; - memcpy (*filename + prefix_length, value.b, (value.e - value.b)); - (*filename)[total_length] = '\0'; - } - else - *filename = strdupdelim (value.b, value.e); - return true; - } - return false; + { + int isFilename = BOUNDED_EQUAL_NO_CASE ( name.b, name.e, "filename" ); + if ( isFilename && value.b != NULL) + { + /* Make the file name begin at the last slash or backslash. */ + const char *last_slash = memrchr (value.b, '/', value.e - value.b); + const char *last_bs = memrchr (value.b, '\\', value.e - value.b); + if (last_slash && last_bs) + value.b = 1 + MAX (last_slash, last_bs); + else if (last_slash || last_bs) + value.b = 1 + (last_slash ? last_slash : last_bs); + if (value.b == value.e) + continue; + + if (*filename) + append_value_to_filename (filename, &value); + else + *filename = strdupdelim (value.b, value.e); + } + } + + if (*filename) + return true; + else + return false; } + /* Persistent connections. Currently, we cache the most recently used connection as persistent, provided that the HTTP server agrees to @@ -1310,12 +1452,12 @@ struct http_stat existence after having begun to download (needed in gethttp for when connection is interrupted/restarted. */ - bool timestamp_checked; /* true if pre-download time-stamping checks + bool timestamp_checked; /* true if pre-download time-stamping checks * have already been performed */ char *orig_file_name; /* name of file to compare for time-stamping * (might be != local_file if -K is set) */ wgint orig_file_size; /* size of file to compare for time-stamping */ - time_t orig_file_tstamp; /* time-stamp of file to compare for + time_t orig_file_tstamp; /* time-stamp of file to compare for * time-stamping */ }; @@ -1336,18 +1478,175 @@ free_hstat (struct http_stat *hs) hs->error = NULL; } +static void +get_file_flags (const char *filename, int *dt) +{ + logprintf (LOG_VERBOSE, _("\ +File %s already there; not retrieving.\n\n"), quote (filename)); + /* If the file is there, we suppose it's retrieved OK. */ + *dt |= RETROKF; + + /* #### Bogusness alert. */ + /* If its suffix is "html" or "htm" or similar, assume text/html. */ + if (has_html_suffix_p (filename)) + *dt |= TEXTHTML; +} + +/* Download the response body from the socket and writes it to + an output file. The headers have already been read from the + socket. If WARC is enabled, the response body will also be + written to a WARC response record. + + hs, contlen, contrange, chunked_transfer_encoding and url are + parameters from the gethttp method. fp is a pointer to the + output file. + + url, warc_timestamp_str, warc_request_uuid, warc_ip, type + and statcode will be saved in the headers of the WARC record. + The head parameter contains the HTTP headers of the response. + + If fp is NULL and WARC is enabled, the response body will be + written only to the WARC file. If WARC is disabled and fp + is a file pointer, the data will be written to the file. + If fp is a file pointer and WARC is enabled, the body will + be written to both destinations. + + Returns the error code. */ +static int +read_response_body (struct http_stat *hs, int sock, FILE *fp, wgint contlen, + wgint contrange, bool chunked_transfer_encoding, + char *url, char *warc_timestamp_str, char *warc_request_uuid, + ip_address *warc_ip, char *type, int statcode, char *head) +{ + int warc_payload_offset = 0; + FILE *warc_tmp = NULL; + int warcerr = 0; + + if (opt.warc_filename != NULL) + { + /* Open a temporary file where we can write the response before we + add it to the WARC record. */ + warc_tmp = warc_tempfile (); + if (warc_tmp == NULL) + warcerr = WARC_TMP_FOPENERR; + + if (warcerr == 0) + { + /* We should keep the response headers for the WARC record. */ + int head_len = strlen (head); + int warc_tmp_written = fwrite (head, 1, head_len, warc_tmp); + if (warc_tmp_written != head_len) + warcerr = WARC_TMP_FWRITEERR; + warc_payload_offset = head_len; + } + + if (warcerr != 0) + { + if (warc_tmp != NULL) + fclose (warc_tmp); + return warcerr; + } + } + + if (fp != NULL) + { + /* This confuses the timestamping code that checks for file size. + #### The timestamping code should be smarter about file size. */ + if (opt.save_headers && hs->restval == 0) + fwrite (head, 1, strlen (head), fp); + } + + /* Read the response body. */ + int flags = 0; + if (contlen != -1) + /* If content-length is present, read that much; otherwise, read + until EOF. The HTTP spec doesn't require the server to + actually close the connection when it's done sending data. */ + flags |= rb_read_exactly; + if (fp != NULL && hs->restval > 0 && contrange == 0) + /* If the server ignored our range request, instruct fd_read_body + to skip the first RESTVAL bytes of body. */ + flags |= rb_skip_startpos; + if (chunked_transfer_encoding) + flags |= rb_chunked_transfer_encoding; + + hs->len = hs->restval; + hs->rd_size = 0; + /* Download the response body and write it to fp. + If we are working on a WARC file, we simultaneously write the + response body to warc_tmp. */ + hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0, + hs->restval, &hs->rd_size, &hs->len, &hs->dltime, + flags, warc_tmp); + if (hs->res >= 0) + { + if (warc_tmp != NULL) + { + /* Create a response record and write it to the WARC file. + Note: per the WARC standard, the request and response should share + the same date header. We re-use the timestamp of the request. + The response record should also refer to the uuid of the request. */ + bool r = warc_write_response_record (url, warc_timestamp_str, + warc_request_uuid, warc_ip, + warc_tmp, warc_payload_offset, + type, statcode, hs->newloc); + + /* warc_write_response_record has closed warc_tmp. */ + + if (! r) + return WARC_ERR; + } + + return RETRFINISHED; + } + + if (warc_tmp != NULL) + fclose (warc_tmp); + + if (hs->res == -2) + { + /* Error while writing to fd. */ + return FWRITEERR; + } + else if (hs->res == -3) + { + /* Error while writing to warc_tmp. */ + return WARC_TMP_FWRITEERR; + } + else + { + /* A read error! */ + hs->rderrmsg = xstrdup (fd_errstr (sock)); + return RETRFINISHED; + } +} + #define BEGINS_WITH(line, string_constant) \ (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \ && (c_isspace (line[sizeof (string_constant) - 1]) \ || !line[sizeof (string_constant) - 1])) +#ifdef __VMS +#define SET_USER_AGENT(req) do { \ + if (!opt.useragent) \ + request_set_header (req, "User-Agent", \ + aprintf ("Wget/%s (VMS %s %s)", \ + version_string, vms_arch(), vms_vers()), \ + rel_value); \ + else if (*opt.useragent) \ + request_set_header (req, "User-Agent", opt.useragent, rel_none); \ +} while (0) +#else /* def __VMS */ #define SET_USER_AGENT(req) do { \ if (!opt.useragent) \ request_set_header (req, "User-Agent", \ - aprintf ("Wget/%s", version_string), rel_value); \ + aprintf ("Wget/%s (%s)", \ + version_string, OS_TYPE), \ + rel_value); \ else if (*opt.useragent) \ request_set_header (req, "User-Agent", opt.useragent, rel_none); \ } while (0) +#endif /* def __VMS [else] */ /* The flags that allow clobbering the file (opening with "wb"). Defined here to avoid repetition later. #### This will require @@ -1366,7 +1665,8 @@ free_hstat (struct http_stat *hs) If PROXY is non-NULL, the connection will be made to the proxy server, and u->url will be requested. */ static uerr_t -gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) +gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, + struct iri *iri, int count) { struct request *req; @@ -1378,9 +1678,9 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) wgint contlen, contrange; struct url *conn; FILE *fp; + int err; int sock = -1; - int flags; /* Set to 1 when the authorization has already been sent and should not be tried again. */ @@ -1406,19 +1706,24 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) char hdrval[256]; char *message; + /* Declare WARC variables. */ + bool warc_enabled = (opt.warc_filename != NULL); + FILE *warc_tmp = NULL; + char warc_timestamp_str [21]; + char warc_request_uuid [48]; + ip_address *warc_ip = NULL; + off_t warc_payload_offset = -1; + /* Whether this connection will be kept alive after the HTTP request is done. */ bool keep_alive; - /* Whether keep-alive should be inhibited. + /* Is the server using the chunked transfer encoding? */ + bool chunked_transfer_encoding = false; - RFC 2068 requests that 1.0 clients not send keep-alive requests - to proxies. This is because many 1.0 proxies do not interpret - the Connection header and transfer it to the remote server, - causing it to not close the connection and leave both the proxy - and the client hanging. */ + /* Whether keep-alive should be inhibited. */ bool inhibit_keep_alive = - !opt.http_keep_alive || opt.ignore_length || proxy != NULL; + !opt.http_keep_alive || opt.ignore_length; /* Headers sent when using POST. */ wgint post_data_size = 0; @@ -1481,7 +1786,13 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) request_set_header (req, "Referer", (char *) hs->referer, rel_none); if (*dt & SEND_NOCACHE) - request_set_header (req, "Pragma", "no-cache", rel_none); + { + /* Cache-Control MUST be obeyed by all HTTP/1.1 caching mechanisms... */ + request_set_header (req, "Cache-Control", "no-cache, must-revalidate", rel_none); + + /* ... but some HTTP/1.0 caches doesn't implement Cache-Control. */ + request_set_header (req, "Pragma", "no-cache", rel_none); + } if (hs->restval) request_set_header (req, "Range", aprintf ("bytes=%s-", @@ -1528,20 +1839,18 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) rel_value); } - if (!inhibit_keep_alive) - request_set_header (req, "Connection", "Keep-Alive", rel_none); - - if (opt.cookies) - request_set_header (req, "Cookie", - cookie_header (wget_cookie_jar, - u->host, u->port, u->path, -#ifdef HAVE_SSL - u->scheme == SCHEME_HTTPS -#else - 0 -#endif - ), - rel_value); + if (inhibit_keep_alive) + request_set_header (req, "Connection", "Close", rel_none); + else + { + if (proxy == NULL) + request_set_header (req, "Connection", "Keep-Alive", rel_none); + else + { + request_set_header (req, "Connection", "Close", rel_none); + request_set_header (req, "Proxy-Connection", "Keep-Alive", rel_none); + } + } if (opt.post_data || opt.post_file_name) { @@ -1564,6 +1873,23 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) rel_value); } + retry_with_auth: + /* We need to come back here when the initial attempt to retrieve + without authorization header fails. (Expected to happen at least + for the Digest authorization scheme.) */ + + if (opt.cookies) + request_set_header (req, "Cookie", + cookie_header (wget_cookie_jar, + u->host, u->port, u->path, +#ifdef HAVE_SSL + u->scheme == SCHEME_HTTPS +#else + 0 +#endif + ), + rel_value); + /* Add the user headers. */ if (opt.user_headers) { @@ -1572,11 +1898,6 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) request_set_user_header (req, opt.user_headers[i]); } - retry_with_auth: - /* We need to come back here when the initial attempt to retrieve - without authorization header fails. (Expected to happen at least - for the Digest authorization scheme.) */ - proxyauth = NULL; if (proxy) { @@ -1612,11 +1933,13 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) request_set_header (req, "Proxy-Authorization", proxyauth, rel_value); } - keep_alive = false; + keep_alive = true; /* Establish the connection. */ - if (!inhibit_keep_alive) + if (inhibit_keep_alive) + keep_alive = false; + else { /* Look for a persistent connection to target host, unless a proxy is used. The exception is when SSL is in use, in which @@ -1636,11 +1959,17 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) #endif &host_lookup_failed)) { + int family = socket_family (pconn.socket, ENDPOINT_PEER); sock = pconn.socket; using_ssl = pconn.ssl; - logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"), - quotearg_style (escape_quoting_style, pconn.host), - pconn.port); + if (family == AF_INET6) + logprintf (LOG_VERBOSE, _("Reusing existing connection to [%s]:%d.\n"), + quotearg_style (escape_quoting_style, pconn.host), + pconn.port); + else + logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"), + quotearg_style (escape_quoting_style, pconn.host), + pconn.port); DEBUGP (("Reusing fd %d.\n", sock)); if (pconn.authorized) /* If the connection is already authorized, the "Basic" @@ -1696,11 +2025,12 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) that the contents of Host would be exactly the same as the contents of CONNECT. */ - write_error = request_send (connreq, sock); + write_error = request_send (connreq, sock, 0); request_free (connreq); if (write_error < 0) { CLOSE_INVALIDATE (sock); + request_free (req); return WRITEFAILED; } @@ -1710,6 +2040,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"), fd_errstr (sock)); CLOSE_INVALIDATE (sock); + request_free (req); return HERR; } message = NULL; @@ -1722,6 +2053,17 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) resp = resp_new (head); statcode = resp_status (resp, &message); + if (statcode < 0) + { + char *tms = datetime_str (time (NULL)); + logprintf (LOG_VERBOSE, "%d\n", statcode); + logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), tms, statcode, + quotearg_style (escape_quoting_style, + _("Malformed status line"))); + xfree (head); + request_free (req); + return HERR; + } hs->message = xstrdup (message); resp_free (resp); xfree (head); @@ -1731,6 +2073,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"), message ? quotearg_style (escape_quoting_style, message) : "?"); xfree_null (message); + request_free (req); return CONSSLERR; } xfree_null (message); @@ -1743,18 +2086,43 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) if (conn->scheme == SCHEME_HTTPS) { - if (!ssl_connect (sock) || !ssl_check_certificate (sock, u->host)) + if (!ssl_connect_wget (sock, u->host)) { fd_close (sock); + request_free (req); return CONSSLERR; } + else if (!ssl_check_certificate (sock, u->host)) + { + fd_close (sock); + request_free (req); + return VERIFCERTERR; + } using_ssl = true; } #endif /* HAVE_SSL */ } + /* Open the temporary file where we will write the request. */ + if (warc_enabled) + { + warc_tmp = warc_tempfile (); + if (warc_tmp == NULL) + { + CLOSE_INVALIDATE (sock); + request_free (req); + return WARC_TMP_FOPENERR; + } + + if (! proxy) + { + warc_ip = (ip_address *) alloca (sizeof (ip_address)); + socket_ip_address (sock, warc_ip, ENDPOINT_PEER); + } + } + /* Send the request to server. */ - write_error = request_send (req, sock); + write_error = request_send (req, sock, warc_tmp); if (write_error >= 0) { @@ -1762,16 +2130,39 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) { DEBUGP (("[POST data: %s]\n", opt.post_data)); write_error = fd_write (sock, opt.post_data, post_data_size, -1); + if (write_error >= 0 && warc_tmp != NULL) + { + /* Remember end of headers / start of payload. */ + warc_payload_offset = ftello (warc_tmp); + + /* Write a copy of the data to the WARC record. */ + int warc_tmp_written = fwrite (opt.post_data, 1, post_data_size, warc_tmp); + if (warc_tmp_written != post_data_size) + write_error = -2; + } } else if (opt.post_file_name && post_data_size != 0) - write_error = post_file (sock, opt.post_file_name, post_data_size); + { + if (warc_tmp != NULL) + /* Remember end of headers / start of payload. */ + warc_payload_offset = ftello (warc_tmp); + + write_error = post_file (sock, opt.post_file_name, post_data_size, warc_tmp); + } } if (write_error < 0) { CLOSE_INVALIDATE (sock); request_free (req); - return WRITEFAILED; + + if (warc_tmp != NULL) + fclose (warc_tmp); + + if (write_error == -2) + return WARC_TMP_FWRITEERR; + else + return WRITEFAILED; } logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "), proxy ? "Proxy" : "HTTP"); @@ -1779,6 +2170,30 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) contrange = 0; *dt &= ~RETROKF; + + if (warc_enabled) + { + bool warc_result; + /* Generate a timestamp and uuid for this request. */ + warc_timestamp (warc_timestamp_str); + warc_uuid_str (warc_request_uuid); + + /* Create a request record and store it in the WARC file. */ + warc_result = warc_write_request_record (u->url, warc_timestamp_str, + warc_request_uuid, warc_ip, + warc_tmp, warc_payload_offset); + if (! warc_result) + { + CLOSE_INVALIDATE (sock); + request_free (req); + return WARC_ERR; + } + + /* warc_write_request_record has also closed warc_tmp. */ + } + + +read_header: head = read_http_response_head (sock); if (!head) { @@ -1805,6 +2220,28 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) /* Check for status line. */ message = NULL; statcode = resp_status (resp, &message); + if (statcode < 0) + { + char *tms = datetime_str (time (NULL)); + logprintf (LOG_VERBOSE, "%d\n", statcode); + logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), tms, statcode, + quotearg_style (escape_quoting_style, + _("Malformed status line"))); + CLOSE_INVALIDATE (sock); + resp_free (resp); + request_free (req); + xfree (head); + return HERR; + } + + if (H_10X (statcode)) + { + DEBUGP (("Ignoring response\n")); + resp_free (resp); + xfree (head); + goto read_header; + } + hs->message = xstrdup (message); if (!opt.server_response) logprintf (LOG_VERBOSE, "%2d %s\n", statcode, @@ -1815,129 +2252,22 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) print_server_response (resp, " "); } - /* Determine the local filename if needed. Notice that if -O is used - * hstat.local_file is set by http_loop to the argument of -O. */ - if (!hs->local_file) + if (!opt.ignore_length + && resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval))) { - /* Honor Content-Disposition whether possible. */ - if (!opt.content_disposition - || !resp_header_copy (resp, "Content-Disposition", - hdrval, sizeof (hdrval)) - || !parse_content_disposition (hdrval, &hs->local_file)) + wgint parsed; + errno = 0; + parsed = str_to_wgint (hdrval, NULL, 10); + if (parsed == WGINT_MAX && errno == ERANGE) { - /* The Content-Disposition header is missing or broken. - * Choose unique file name according to given URL. */ - hs->local_file = url_file_name (u); + /* Out of range. + #### If Content-Length is out of range, it most likely + means that the file is larger than 2G and that we're + compiled without LFS. In that case we should probably + refuse to even attempt to download the file. */ + contlen = -1; } - } - - /* TODO: perform this check only once. */ - if (!hs->existence_checked && file_exists_p (hs->local_file)) - { - if (opt.noclobber && !opt.output_document) - { - /* If opt.noclobber is turned on and file already exists, do not - retrieve the file. But if the output_document was given, then this - test was already done and the file didn't exist. Hence the !opt.output_document */ - logprintf (LOG_VERBOSE, _("\ -File %s already there; not retrieving.\n\n"), quote (hs->local_file)); - /* If the file is there, we suppose it's retrieved OK. */ - *dt |= RETROKF; - - /* #### Bogusness alert. */ - /* If its suffix is "html" or "htm" or similar, assume text/html. */ - if (has_html_suffix_p (hs->local_file)) - *dt |= TEXTHTML; - - return RETRUNNEEDED; - } - else if (!ALLOW_CLOBBER) - { - char *unique = unique_name (hs->local_file, true); - if (unique != hs->local_file) - xfree (hs->local_file); - hs->local_file = unique; - } - } - hs->existence_checked = true; - - /* Support timestamping */ - /* TODO: move this code out of gethttp. */ - if (opt.timestamping && !hs->timestamp_checked) - { - size_t filename_len = strlen (hs->local_file); - char *filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig")); - bool local_dot_orig_file_exists = false; - char *local_filename = NULL; - struct_stat st; - - if (opt.backup_converted) - /* If -K is specified, we'll act on the assumption that it was specified - last time these files were downloaded as well, and instead of just - comparing local file X against server file X, we'll compare local - file X.orig (if extant, else X) against server file X. If -K - _wasn't_ specified last time, or the server contains files called - *.orig, -N will be back to not operating correctly with -k. */ - { - /* Would a single s[n]printf() call be faster? --dan - - Definitely not. sprintf() is horribly slow. It's a - different question whether the difference between the two - affects a program. Usually I'd say "no", but at one - point I profiled Wget, and found that a measurable and - non-negligible amount of time was lost calling sprintf() - in url.c. Replacing sprintf with inline calls to - strcpy() and number_to_string() made a difference. - --hniksic */ - memcpy (filename_plus_orig_suffix, hs->local_file, filename_len); - memcpy (filename_plus_orig_suffix + filename_len, - ".orig", sizeof (".orig")); - - /* Try to stat() the .orig file. */ - if (stat (filename_plus_orig_suffix, &st) == 0) - { - local_dot_orig_file_exists = true; - local_filename = filename_plus_orig_suffix; - } - } - - if (!local_dot_orig_file_exists) - /* Couldn't stat() .orig, so try to stat() . */ - if (stat (hs->local_file, &st) == 0) - local_filename = hs->local_file; - - if (local_filename != NULL) - /* There was a local file, so we'll check later to see if the version - the server has is the same version we already have, allowing us to - skip a download. */ - { - hs->orig_file_name = xstrdup (local_filename); - hs->orig_file_size = st.st_size; - hs->orig_file_tstamp = st.st_mtime; -#ifdef WINDOWS - /* Modification time granularity is 2 seconds for Windows, so - increase local time by 1 second for later comparison. */ - ++hs->orig_file_tstamp; -#endif - } - } - - if (!opt.ignore_length - && resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval))) - { - wgint parsed; - errno = 0; - parsed = str_to_wgint (hdrval, NULL, 10); - if (parsed == WGINT_MAX && errno == ERANGE) - { - /* Out of range. - #### If Content-Length is out of range, it most likely - means that the file is larger than 2G and that we're - compiled without LFS. In that case we should probably - refuse to even attempt to download the file. */ - contlen = -1; - } - else if (parsed < 0) + else if (parsed < 0) { /* Negative Content-Length; nonsensical, so we can't assume any information about the content to receive. */ @@ -1950,14 +2280,36 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); /* Check for keep-alive related responses. */ if (!inhibit_keep_alive && contlen != -1) { - if (resp_header_copy (resp, "Keep-Alive", NULL, 0)) - keep_alive = true; - else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval))) + if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval))) { - if (0 == strcasecmp (hdrval, "Keep-Alive")) - keep_alive = true; + if (0 == strcasecmp (hdrval, "Close")) + keep_alive = false; } } + + chunked_transfer_encoding = false; + if (resp_header_copy (resp, "Transfer-Encoding", hdrval, sizeof (hdrval)) + && 0 == strcasecmp (hdrval, "chunked")) + chunked_transfer_encoding = true; + + /* Handle (possibly multiple instances of) the Set-Cookie header. */ + if (opt.cookies) + { + int scpos; + const char *scbeg, *scend; + /* The jar should have been created by now. */ + assert (wget_cookie_jar != NULL); + for (scpos = 0; + (scpos = resp_header_locate (resp, "Set-Cookie", scpos, + &scbeg, &scend)) != -1; + ++scpos) + { + char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie); + cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, + u->path, set_cookie); + } + } + if (keep_alive) /* The server has promised that it will not close the connection when we're done. This means that we can register it. */ @@ -1966,10 +2318,42 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); if (statcode == HTTP_STATUS_UNAUTHORIZED) { /* Authorization is required. */ - if (keep_alive && !head_only && skip_short_body (sock, contlen)) - CLOSE_FINISH (sock); + + /* Normally we are not interested in the response body. + But if we are writing a WARC file we are: we like to keep everyting. */ + if (warc_enabled) + { + int err; + type = resp_header_strdup (resp, "Content-Type"); + err = read_response_body (hs, sock, NULL, contlen, 0, + chunked_transfer_encoding, + u->url, warc_timestamp_str, + warc_request_uuid, warc_ip, type, + statcode, head); + xfree_null (type); + + if (err != RETRFINISHED || hs->res < 0) + { + CLOSE_INVALIDATE (sock); + request_free (req); + xfree_null (message); + resp_free (resp); + xfree (head); + return err; + } + else + CLOSE_FINISH (sock); + } else - CLOSE_INVALIDATE (sock); + { + /* Since WARC is disabled, we are not interested in the response body. */ + if (keep_alive && !head_only + && skip_short_body (sock, contlen, chunked_transfer_encoding)) + CLOSE_FINISH (sock); + else + CLOSE_INVALIDATE (sock); + } + pconn.authorized = false; if (!auth_finished && (user && passwd)) { @@ -2016,6 +2400,9 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); register_basic_auth_host (u->host); } xfree (pth); + xfree_null (message); + resp_free (resp); + xfree (head); goto retry_with_auth; } else @@ -2026,6 +2413,9 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); } logputs (LOG_NOTQUIET, _("Authorization failed.\n")); request_free (req); + xfree_null (message); + resp_free (resp); + xfree (head); return AUTHFAILED; } else /* statcode != HTTP_STATUS_UNAUTHORIZED */ @@ -2034,6 +2424,117 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); if (ntlm_seen) pconn.authorized = true; } + + /* Determine the local filename if needed. Notice that if -O is used + * hstat.local_file is set by http_loop to the argument of -O. */ + if (!hs->local_file) + { + char *local_file = NULL; + + /* Honor Content-Disposition whether possible. */ + if (!opt.content_disposition + || !resp_header_copy (resp, "Content-Disposition", + hdrval, sizeof (hdrval)) + || !parse_content_disposition (hdrval, &local_file)) + { + /* The Content-Disposition header is missing or broken. + * Choose unique file name according to given URL. */ + hs->local_file = url_file_name (u, NULL); + } + else + { + DEBUGP (("Parsed filename from Content-Disposition: %s\n", + local_file)); + hs->local_file = url_file_name (u, local_file); + } + } + + /* TODO: perform this check only once. */ + if (!hs->existence_checked && file_exists_p (hs->local_file)) + { + if (opt.noclobber && !opt.output_document) + { + /* If opt.noclobber is turned on and file already exists, do not + retrieve the file. But if the output_document was given, then this + test was already done and the file didn't exist. Hence the !opt.output_document */ + get_file_flags (hs->local_file, dt); + request_free (req); + resp_free (resp); + xfree (head); + xfree_null (message); + return RETRUNNEEDED; + } + else if (!ALLOW_CLOBBER) + { + char *unique = unique_name (hs->local_file, true); + if (unique != hs->local_file) + xfree (hs->local_file); + hs->local_file = unique; + } + } + hs->existence_checked = true; + + /* Support timestamping */ + /* TODO: move this code out of gethttp. */ + if (opt.timestamping && !hs->timestamp_checked) + { + size_t filename_len = strlen (hs->local_file); + char *filename_plus_orig_suffix = alloca (filename_len + sizeof (ORIG_SFX)); + bool local_dot_orig_file_exists = false; + char *local_filename = NULL; + struct_stat st; + + if (opt.backup_converted) + /* If -K is specified, we'll act on the assumption that it was specified + last time these files were downloaded as well, and instead of just + comparing local file X against server file X, we'll compare local + file X.orig (if extant, else X) against server file X. If -K + _wasn't_ specified last time, or the server contains files called + *.orig, -N will be back to not operating correctly with -k. */ + { + /* Would a single s[n]printf() call be faster? --dan + + Definitely not. sprintf() is horribly slow. It's a + different question whether the difference between the two + affects a program. Usually I'd say "no", but at one + point I profiled Wget, and found that a measurable and + non-negligible amount of time was lost calling sprintf() + in url.c. Replacing sprintf with inline calls to + strcpy() and number_to_string() made a difference. + --hniksic */ + memcpy (filename_plus_orig_suffix, hs->local_file, filename_len); + memcpy (filename_plus_orig_suffix + filename_len, + ORIG_SFX, sizeof (ORIG_SFX)); + + /* Try to stat() the .orig file. */ + if (stat (filename_plus_orig_suffix, &st) == 0) + { + local_dot_orig_file_exists = true; + local_filename = filename_plus_orig_suffix; + } + } + + if (!local_dot_orig_file_exists) + /* Couldn't stat() .orig, so try to stat() . */ + if (stat (hs->local_file, &st) == 0) + local_filename = hs->local_file; + + if (local_filename != NULL) + /* There was a local file, so we'll check later to see if the version + the server has is the same version we already have, allowing us to + skip a download. */ + { + hs->orig_file_name = xstrdup (local_filename); + hs->orig_file_size = st.st_size; + hs->orig_file_tstamp = st.st_mtime; +#ifdef WINDOWS + /* Modification time granularity is 2 seconds for Windows, so + increase local time by 1 second for later comparison. */ + ++hs->orig_file_tstamp; +#endif + } + } + request_free (req); hs->statcode = statcode; @@ -2051,32 +2552,25 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); char *tmp = strchr (type, ';'); if (tmp) { + /* sXXXav: only needed if IRI support is enabled */ + char *tmp2 = tmp + 1; + while (tmp > type && c_isspace (tmp[-1])) --tmp; *tmp = '\0'; + + /* Try to get remote encoding if needed */ + if (opt.enable_iri && !opt.encoding_remote) + { + tmp = parse_charset (tmp2); + if (tmp) + set_content_encoding (iri, tmp); + } } } hs->newloc = resp_header_strdup (resp, "Location"); hs->remote_time = resp_header_strdup (resp, "Last-Modified"); - /* Handle (possibly multiple instances of) the Set-Cookie header. */ - if (opt.cookies) - { - int scpos; - const char *scbeg, *scend; - /* The jar should have been created by now. */ - assert (wget_cookie_jar != NULL); - for (scpos = 0; - (scpos = resp_header_locate (resp, "Set-Cookie", scpos, - &scbeg, &scend)) != -1; - ++scpos) - { - char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie); - cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, - u->path, set_cookie); - } - } - if (resp_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval))) { wgint first_byte_pos, last_byte_pos, entity_length; @@ -2109,11 +2603,53 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); _("Location: %s%s\n"), hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"), hs->newloc ? _(" [following]") : ""); - if (keep_alive && !head_only && skip_short_body (sock, contlen)) - CLOSE_FINISH (sock); + + /* In case the caller cares to look... */ + hs->len = 0; + hs->res = 0; + hs->restval = 0; + + /* Normally we are not interested in the response body of a redirect. + But if we are writing a WARC file we are: we like to keep everyting. */ + if (warc_enabled) + { + int err = read_response_body (hs, sock, NULL, contlen, 0, + chunked_transfer_encoding, + u->url, warc_timestamp_str, + warc_request_uuid, warc_ip, type, + statcode, head); + + if (err != RETRFINISHED || hs->res < 0) + { + CLOSE_INVALIDATE (sock); + xfree_null (type); + xfree (head); + return err; + } + else + CLOSE_FINISH (sock); + } else - CLOSE_INVALIDATE (sock); + { + /* Since WARC is disabled, we are not interested in the response body. */ + if (keep_alive && !head_only + && skip_short_body (sock, contlen, chunked_transfer_encoding)) + CLOSE_FINISH (sock); + else + CLOSE_INVALIDATE (sock); + } + xfree_null (type); + xfree (head); + /* From RFC2616: The status codes 303 and 307 have + been added for servers that wish to make unambiguously + clear which kind of reaction is expected of the client. + + A 307 should be redirected using the same method, + in other words, a POST should be preserved and not + converted to a GET in that case. */ + if (statcode == HTTP_STATUS_TEMPORARY_REDIRECT) + return NEWLOCATION_KEEP_POST; return NEWLOCATION; } } @@ -2123,7 +2659,7 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); content-type. */ if (!type || 0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) || - 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S))) + 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S))) *dt |= TEXTHTML; else *dt &= ~TEXTHTML; @@ -2134,10 +2670,10 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); else *dt &= ~TEXTCSS; - if (opt.html_extension) + if (opt.adjust_extension) { if (*dt & TEXTHTML) - /* -E / --html-extension / html_extension = on was specified, + /* -E / --adjust-extension / adjust_extension = on was specified, and this is a text/html file. If some case-insensitive variation on ".htm[l]" isn't already the file's suffix, tack on ".html". */ @@ -2151,9 +2687,8 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); } if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE - || (hs->restval > 0 && statcode == HTTP_STATUS_OK - && contrange == 0 && hs->restval >= contlen) - ) + || (!opt.timestamping && hs->restval > 0 && statcode == HTTP_STATUS_OK + && contrange == 0 && contlen >= 0 && hs->restval >= contlen)) { /* If `-c' is in use and the file has been fully downloaded (or the remote file has shrunk), Wget effectively requests bytes @@ -2169,6 +2704,7 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); xfree_null (type); CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there might be more bytes in the body. */ + xfree (head); return RETRUNNEEDED; } if ((contrange != 0 && contrange != hs->restval) @@ -2178,6 +2714,7 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); Bail out. */ xfree_null (type); CLOSE_INVALIDATE (sock); + xfree (head); return RANGEERR; } if (contlen == -1) @@ -2219,31 +2756,68 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); logputs (LOG_VERBOSE, "\n"); } } - xfree_null (type); - type = NULL; /* We don't need it any more. */ /* Return if we have no intention of further downloading. */ - if (!(*dt & RETROKF) || head_only) + if ((!(*dt & RETROKF) && !opt.content_on_error) || head_only) { /* In case the caller cares to look... */ hs->len = 0; hs->res = 0; - xfree_null (type); - if (head_only) - /* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the - servers not to send body in response to a HEAD request, and - those that do will likely be caught by test_socket_open. - If not, they can be worked around using - `--no-http-keep-alive'. */ - CLOSE_FINISH (sock); - else if (keep_alive && skip_short_body (sock, contlen)) - /* Successfully skipped the body; also keep using the socket. */ - CLOSE_FINISH (sock); + hs->restval = 0; + + /* Normally we are not interested in the response body of a error responses. + But if we are writing a WARC file we are: we like to keep everyting. */ + if (warc_enabled) + { + int err = read_response_body (hs, sock, NULL, contlen, 0, + chunked_transfer_encoding, + u->url, warc_timestamp_str, + warc_request_uuid, warc_ip, type, + statcode, head); + + if (err != RETRFINISHED || hs->res < 0) + { + CLOSE_INVALIDATE (sock); + xfree (head); + xfree_null (type); + return err; + } + else + CLOSE_FINISH (sock); + } else - CLOSE_INVALIDATE (sock); + { + /* Since WARC is disabled, we are not interested in the response body. */ + if (head_only) + /* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the + servers not to send body in response to a HEAD request, and + those that do will likely be caught by test_socket_open. + If not, they can be worked around using + `--no-http-keep-alive'. */ + CLOSE_FINISH (sock); + else if (keep_alive + && skip_short_body (sock, contlen, chunked_transfer_encoding)) + /* Successfully skipped the body; also keep using the socket. */ + CLOSE_FINISH (sock); + else + CLOSE_INVALIDATE (sock); + } + + xfree (head); + xfree_null (type); return RETRFINISHED; } +/* 2005-06-17 SMS. + For VMS, define common fopen() optional arguments. +*/ +#ifdef __VMS +# define FOPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id +# define FOPEN_BIN_FLAG 3 +#else /* def __VMS */ +# define FOPEN_BIN_FLAG true +#endif /* def __VMS [else] */ + /* Open the local file. */ if (!output_stream) { @@ -2251,12 +2825,44 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); if (opt.backups) rotate_backups (hs->local_file); if (hs->restval) - fp = fopen (hs->local_file, "ab"); - else if (ALLOW_CLOBBER) - fp = fopen (hs->local_file, "wb"); + { +#ifdef __VMS + int open_id; + + open_id = 21; + fp = fopen (hs->local_file, "ab", FOPEN_OPT_ARGS); +#else /* def __VMS */ + fp = fopen (hs->local_file, "ab"); +#endif /* def __VMS [else] */ + } + else if (ALLOW_CLOBBER || count > 0) + { + if (opt.unlink && file_exists_p (hs->local_file)) + { + int res = unlink (hs->local_file); + if (res < 0) + { + logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file, + strerror (errno)); + CLOSE_INVALIDATE (sock); + xfree (head); + xfree_null (type); + return UNLINKERR; + } + } + +#ifdef __VMS + int open_id; + + open_id = 22; + fp = fopen (hs->local_file, "wb", FOPEN_OPT_ARGS); +#else /* def __VMS */ + fp = fopen (hs->local_file, "wb"); +#endif /* def __VMS [else] */ + } else { - fp = fopen_excl (hs->local_file, true); + fp = fopen_excl (hs->local_file, FOPEN_BIN_FLAG); if (!fp && errno == EEXIST) { /* We cannot just invent a new name and use it (which is @@ -2267,6 +2873,8 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); _("%s has sprung into existence.\n"), hs->local_file); CLOSE_INVALIDATE (sock); + xfree (head); + xfree_null (type); return FOPEN_EXCL_ERR; } } @@ -2274,6 +2882,8 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); { logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file, strerror (errno)); CLOSE_INVALIDATE (sock); + xfree (head); + xfree_null (type); return FOPENERR; } } @@ -2283,56 +2893,38 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); /* Print fetch message, if opt.verbose. */ if (opt.verbose) { - logprintf (LOG_NOTQUIET, _("Saving to: %s\n"), + logprintf (LOG_NOTQUIET, _("Saving to: %s\n"), HYPHENP (hs->local_file) ? quote ("STDOUT") : quote (hs->local_file)); } - - /* This confuses the timestamping code that checks for file size. - #### The timestamping code should be smarter about file size. */ - if (opt.save_headers && hs->restval == 0) - fwrite (head, 1, strlen (head), fp); + + + err = read_response_body (hs, sock, fp, contlen, contrange, + chunked_transfer_encoding, + u->url, warc_timestamp_str, + warc_request_uuid, warc_ip, type, + statcode, head); /* Now we no longer need to store the response header. */ xfree (head); - - /* Download the request body. */ - flags = 0; - if (contlen != -1) - /* If content-length is present, read that much; otherwise, read - until EOF. The HTTP spec doesn't require the server to - actually close the connection when it's done sending data. */ - flags |= rb_read_exactly; - if (hs->restval > 0 && contrange == 0) - /* If the server ignored our range request, instruct fd_read_body - to skip the first RESTVAL bytes of body. */ - flags |= rb_skip_startpos; - hs->len = hs->restval; - hs->rd_size = 0; - hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0, - hs->restval, &hs->rd_size, &hs->len, &hs->dltime, - flags); + xfree_null (type); if (hs->res >= 0) CLOSE_FINISH (sock); else - { - if (hs->res < 0) - hs->rderrmsg = xstrdup (fd_errstr (sock)); - CLOSE_INVALIDATE (sock); - } + CLOSE_INVALIDATE (sock); if (!output_stream) fclose (fp); - if (hs->res == -2) - return FWRITEERR; - return RETRFINISHED; + + return err; } /* The genuine HTTP loop! This is the part where the retrieval is retried, and retried, and retried, and... */ uerr_t -http_loop (struct url *u, char **newloc, char **local_file, const char *referer, - int *dt, struct url *proxy) +http_loop (struct url *u, struct url *original_url, char **newloc, + char **local_file, const char *referer, int *dt, struct url *proxy, + struct iri *iri) { int count; bool got_head = false; /* used for time-stamping and filename detection */ @@ -2343,16 +2935,24 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, uerr_t err, ret = TRYLIMEXC; time_t tmr = -1; /* remote time-stamp */ struct http_stat hstat; /* HTTP status */ - struct_stat st; + struct_stat st; bool send_head_first = true; + char *file_name; + bool force_full_retrieve = false; + + + /* If we are writing to a WARC file: always retrieve the whole file. */ + if (opt.warc_filename != NULL) + force_full_retrieve = true; + /* Assert that no value for *LOCAL_FILE was passed. */ assert (local_file == NULL || *local_file == NULL); - + /* Set LOCAL_FILE parameter. */ if (local_file && opt.output_document) *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document); - + /* Reset NEWLOC parameter. */ *newloc = NULL; @@ -2360,7 +2960,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, here so that we don't go through the hoops if we're just using FTP or whatever. */ if (opt.cookies) - load_cookies(); + load_cookies (); /* Warn on (likely bogus) wildcard usage in HTTP. */ if (opt.ftp_glob && has_wildcards_p (u->path)) @@ -2377,61 +2977,49 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, } else if (!opt.content_disposition) { - hstat.local_file = url_file_name (u); + hstat.local_file = + url_file_name (opt.trustservernames ? u : original_url, NULL); got_name = true; } - /* TODO: Ick! This code is now in both gethttp and http_loop, and is - * screaming for some refactoring. */ if (got_name && file_exists_p (hstat.local_file) && opt.noclobber && !opt.output_document) { /* If opt.noclobber is turned on and file already exists, do not retrieve the file. But if the output_document was given, then this test was already done and the file didn't exist. Hence the !opt.output_document */ - logprintf (LOG_VERBOSE, _("\ -File %s already there; not retrieving.\n\n"), - quote (hstat.local_file)); - /* If the file is there, we suppose it's retrieved OK. */ - *dt |= RETROKF; - - /* #### Bogusness alert. */ - /* If its suffix is "html" or "htm" or similar, assume text/html. */ - if (has_html_suffix_p (hstat.local_file)) - *dt |= TEXTHTML; - + get_file_flags (hstat.local_file, dt); ret = RETROK; goto exit; } /* Reset the counter. */ count = 0; - + /* Reset the document type. */ *dt = 0; - - /* Skip preliminary HEAD request if we're not in spider mode AND - * if -O was given or HTTP Content-Disposition support is disabled. */ - if (!opt.spider - && (got_name || !opt.content_disposition)) + + /* Skip preliminary HEAD request if we're not in spider mode. */ + if (!opt.spider) send_head_first = false; - /* Send preliminary HEAD request if -N is given and we have an existing + /* Send preliminary HEAD request if -N is given and we have an existing * destination file. */ - if (opt.timestamping - && !opt.content_disposition - && file_exists_p (url_file_name (u))) + file_name = url_file_name (opt.trustservernames ? u : original_url, NULL); + if (opt.timestamping && (file_exists_p (file_name) + || opt.content_disposition)) send_head_first = true; - + xfree (file_name); + /* THE loop */ do { /* Increment the pass counter. */ ++count; sleep_between_retrievals (count); - + /* Get the current time string. */ tms = datetime_str (time (NULL)); - + if (opt.spider && !got_head) logprintf (LOG_VERBOSE, _("\ Spider mode enabled. Check if remote file exists.\n")); @@ -2440,20 +3028,20 @@ Spider mode enabled. Check if remote file exists.\n")); if (opt.verbose) { char *hurl = url_string (u, URL_AUTH_HIDE_PASSWD); - - if (count > 1) + + if (count > 1) { char tmp[256]; sprintf (tmp, _("(try:%2d)"), count); logprintf (LOG_NOTQUIET, "--%s-- %s %s\n", tms, tmp, hurl); } - else + else { logprintf (LOG_NOTQUIET, "--%s-- %s\n", tms, hurl); } - + #ifdef WINDOWS ws_changetitle (hurl); #endif @@ -2463,13 +3051,15 @@ Spider mode enabled. Check if remote file exists.\n")); /* Default document type is empty. However, if spider mode is on or time-stamping is employed, HEAD_ONLY commands is encoded within *dt. */ - if (send_head_first && !got_head) + if (send_head_first && !got_head) *dt |= HEAD_ONLY; else *dt &= ~HEAD_ONLY; /* Decide whether or not to restart. */ - if (opt.always_rest + if (force_full_retrieve) + hstat.restval = hstat.len; + else if (opt.always_rest && got_name && stat (hstat.local_file, &st) == 0 && S_ISREG (st.st_mode)) @@ -2496,11 +3086,11 @@ Spider mode enabled. Check if remote file exists.\n")); *dt &= ~SEND_NOCACHE; /* Try fetching the document, or at least its head. */ - err = gethttp (u, &hstat, dt, proxy); + err = gethttp (u, &hstat, dt, proxy, iri, count); /* Time? */ tms = datetime_str (time (NULL)); - + /* Get the new location (with or without the redirection). */ if (hstat.newloc) *newloc = xstrdup (hstat.newloc); @@ -2520,17 +3110,37 @@ Spider mode enabled. Check if remote file exists.\n")); logputs (LOG_VERBOSE, "\n"); logprintf (LOG_NOTQUIET, _("Cannot write to %s (%s).\n"), quote (hstat.local_file), strerror (errno)); - case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED: - case SSLINITFAILED: case CONTNOTSUPPORTED: + case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED: + case SSLINITFAILED: case CONTNOTSUPPORTED: case VERIFCERTERR: /* Fatal errors just return from the function. */ ret = err; goto exit; + case WARC_ERR: + /* A fatal WARC error. */ + logputs (LOG_VERBOSE, "\n"); + logprintf (LOG_NOTQUIET, _("Cannot write to WARC file..\n")); + ret = err; + goto exit; + case WARC_TMP_FOPENERR: case WARC_TMP_FWRITEERR: + /* A fatal WARC error. */ + logputs (LOG_VERBOSE, "\n"); + logprintf (LOG_NOTQUIET, _("Cannot write to temporary WARC file.\n")); + ret = err; + goto exit; case CONSSLERR: /* Another fatal error. */ logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n")); ret = err; goto exit; + case UNLINKERR: + /* Another fatal error. */ + logputs (LOG_VERBOSE, "\n"); + logprintf (LOG_NOTQUIET, _("Cannot unlink %s (%s).\n"), + quote (hstat.local_file), strerror (errno)); + ret = err; + goto exit; case NEWLOCATION: + case NEWLOCATION_KEEP_POST: /* Return the new location to the caller. */ if (!*newloc) { @@ -2539,9 +3149,9 @@ Spider mode enabled. Check if remote file exists.\n")); hstat.statcode); ret = WRONGCODE; } - else + else { - ret = NEWLOCATION; + ret = err; } goto exit; case RETRUNNEEDED: @@ -2555,7 +3165,7 @@ Spider mode enabled. Check if remote file exists.\n")); /* All possibilities should have been exhausted. */ abort (); } - + if (!(*dt & RETROKF)) { char *hurl = NULL; @@ -2574,11 +3184,13 @@ Spider mode enabled. Check if remote file exists.\n")); continue; } /* Maybe we should always keep track of broken links, not just in - * spider mode. */ - else if (opt.spider) + * spider mode. + * Don't log error if it was UTF-8 encoded because we will try + * once unencoded. */ + else if (opt.spider && !iri->utf8_encode) { /* #### Again: ugly ugly ugly! */ - if (!hurl) + if (!hurl) hurl = url_string (u, URL_AUTH_HIDE_PASSWD); nonexisting_url (hurl); logprintf (LOG_NOTQUIET, _("\ @@ -2587,7 +3199,7 @@ Remote file does not exist -- broken link!!!\n")); else { logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), - tms, hstat.statcode, + tms, hstat.statcode, quotearg_style (escape_quoting_style, hstat.error)); } logputs (LOG_VERBOSE, "\n"); @@ -2616,7 +3228,7 @@ Last-modified header invalid -- time-stamp ignored.\n")); if (*dt & HEAD_ONLY) time_came_from_head = true; } - + if (send_head_first) { /* The time-stamping section. */ @@ -2627,7 +3239,7 @@ Last-modified header invalid -- time-stamp ignored.\n")); we're supposed to download already exists. */ { - if (hstat.remote_time && + if (hstat.remote_time && tmr != (time_t) (-1)) { /* Now time-stamping can be used validly. @@ -2638,7 +3250,7 @@ Last-modified header invalid -- time-stamp ignored.\n")); download procedure is resumed. */ if (hstat.orig_file_tstamp >= tmr) { - if (hstat.contlen == -1 + if (hstat.contlen == -1 || hstat.orig_file_size == hstat.contlen) { logprintf (LOG_VERBOSE, _("\ @@ -2655,17 +3267,20 @@ The sizes do not match (local %s) -- retrieving.\n"), } } else - logputs (LOG_VERBOSE, - _("Remote file is newer, retrieving.\n")); + { + force_full_retrieve = true; + logputs (LOG_VERBOSE, + _("Remote file is newer, retrieving.\n")); + } logputs (LOG_VERBOSE, "\n"); } } - + /* free_hstat (&hstat); */ hstat.timestamp_checked = true; } - + if (opt.spider) { bool finished = true; @@ -2677,7 +3292,7 @@ The sizes do not match (local %s) -- retrieving.\n"), Remote file exists and could contain links to other resources -- retrieving.\n\n")); finished = false; } - else + else { logprintf (LOG_VERBOSE, _("\ Remote file exists but does not contain any link -- not retrieving.\n\n")); @@ -2692,18 +3307,18 @@ Remote file exists but does not contain any link -- not retrieving.\n\n")); Remote file exists and could contain further links,\n\ but recursion is disabled -- not retrieving.\n\n")); } - else + else { logprintf (LOG_VERBOSE, _("\ Remote file exists.\n\n")); } ret = RETROK; /* RETRUNNEEDED is not for caller. */ } - + if (finished) { - logprintf (LOG_NONVERBOSE, - _("%s URL:%s %2d %s\n"), + logprintf (LOG_NONVERBOSE, + _("%s URL: %s %2d %s\n"), tms, u->url, hstat.statcode, hstat.message ? quotearg_style (escape_quoting_style, hstat.message) : ""); goto exit; @@ -2716,21 +3331,14 @@ Remote file exists.\n\n")); continue; } /* send_head_first */ } /* !got_head */ - - if ((tmr != (time_t) (-1)) + + if (opt.useservertimestamps + && (tmr != (time_t) (-1)) && ((hstat.len == hstat.contlen) || ((hstat.res == 0) && (hstat.contlen == -1)))) { - /* #### This code repeats in http.c and ftp.c. Move it to a - function! */ const char *fl = NULL; - if (opt.output_document) - { - if (output_stream_regular) - fl = opt.output_document; - } - else - fl = hstat.local_file; + set_local_file (&fl, hstat.local_file); if (fl) { time_t newtmr = -1; @@ -2739,7 +3347,7 @@ Remote file exists.\n\n")); && hstat.remote_time && hstat.remote_time[0]) { newtmr = http_atotm (hstat.remote_time); - if (newtmr != -1) + if (newtmr != (time_t)-1) tmr = newtmr; } touch (fl, tmr); @@ -2754,9 +3362,14 @@ Remote file exists.\n\n")); { if (*dt & RETROKF) { + bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document)); + logprintf (LOG_VERBOSE, - _("%s (%s) - %s saved [%s/%s]\n\n"), - tms, tmrate, quote (hstat.local_file), + write_to_stdout + ? _("%s (%s) - written to stdout %s[%s/%s]\n\n") + : _("%s (%s) - %s saved [%s/%s]\n\n"), + tms, tmrate, + write_to_stdout ? "" : quote (hstat.local_file), number_to_static_string (hstat.len), number_to_static_string (hstat.contlen)); logprintf (LOG_NONVERBOSE, @@ -2767,13 +3380,13 @@ Remote file exists.\n\n")); hstat.local_file, count); } ++numurls; - total_downloaded_bytes += hstat.len; + total_downloaded_bytes += hstat.rd_size; /* Remember that we downloaded the file for later ".orig" code. */ if (*dt & ADDED_HTML_EXTENSION) - downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file); + downloaded_file (FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file); else - downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file); + downloaded_file (FILE_DOWNLOADED_NORMALLY, hstat.local_file); ret = RETROK; goto exit; @@ -2781,13 +3394,18 @@ Remote file exists.\n\n")); else if (hstat.res == 0) /* No read error */ { if (hstat.contlen == -1) /* We don't know how much we were supposed - to get, so assume we succeeded. */ + to get, so assume we succeeded. */ { if (*dt & RETROKF) { + bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document)); + logprintf (LOG_VERBOSE, - _("%s (%s) - %s saved [%s]\n\n"), - tms, tmrate, quote (hstat.local_file), + write_to_stdout + ? _("%s (%s) - written to stdout %s[%s]\n\n") + : _("%s (%s) - %s saved [%s]\n\n"), + tms, tmrate, + write_to_stdout ? "" : quote (hstat.local_file), number_to_static_string (hstat.len)); logprintf (LOG_NONVERBOSE, "%s URL:%s [%s] -> \"%s\" [%d]\n", @@ -2795,14 +3413,14 @@ Remote file exists.\n\n")); hstat.local_file, count); } ++numurls; - total_downloaded_bytes += hstat.len; + total_downloaded_bytes += hstat.rd_size; /* Remember that we downloaded the file for later ".orig" code. */ if (*dt & ADDED_HTML_EXTENSION) - downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file); + downloaded_file (FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file); else - downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file); - + downloaded_file (FILE_DOWNLOADED_NORMALLY, hstat.local_file); + ret = RETROK; goto exit; } @@ -2856,10 +3474,10 @@ Remote file exists.\n\n")); while (!opt.ntry || (count < opt.ntry)); exit: - if (ret == RETROK) + if (ret == RETROK && local_file) *local_file = xstrdup (hstat.local_file); free_hstat (&hstat); - + return ret; } @@ -2931,6 +3549,7 @@ http_atotm (const char *time_string) Netscape cookie specification.) */ }; const char *oldlocale; + char savedlocale[256]; size_t i; time_t ret = (time_t) -1; @@ -2938,6 +3557,16 @@ http_atotm (const char *time_string) non-English locales, which we work around by temporarily setting locale to C before invoking strptime. */ oldlocale = setlocale (LC_TIME, NULL); + if (oldlocale) + { + size_t l = strlen (oldlocale) + 1; + if (l >= sizeof savedlocale) + savedlocale[0] = '\0'; + else + memcpy (savedlocale, oldlocale, l); + } + else savedlocale[0] = '\0'; + setlocale (LC_TIME, "C"); for (i = 0; i < countof (time_formats); i++) @@ -2957,7 +3586,8 @@ http_atotm (const char *time_string) } /* Restore the previous locale. */ - setlocale (LC_TIME, oldlocale); + if (savedlocale[0]) + setlocale (LC_TIME, savedlocale); return ret; } @@ -3010,7 +3640,7 @@ dump_hash (char *buf, const unsigned char *hash) { int i; - for (i = 0; i < MD5_HASHLEN; i++, hash++) + for (i = 0; i < MD5_DIGEST_SIZE; i++, hash++) { *buf++ = XNUM_TO_digit (*hash >> 4); *buf++ = XNUM_TO_digit (*hash & 0xf); @@ -3063,37 +3693,37 @@ digest_authentication_encode (const char *au, const char *user, /* Calculate the digest value. */ { - ALLOCA_MD5_CONTEXT (ctx); - unsigned char hash[MD5_HASHLEN]; - char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1]; - char response_digest[MD5_HASHLEN * 2 + 1]; + struct md5_ctx ctx; + unsigned char hash[MD5_DIGEST_SIZE]; + char a1buf[MD5_DIGEST_SIZE * 2 + 1], a2buf[MD5_DIGEST_SIZE * 2 + 1]; + char response_digest[MD5_DIGEST_SIZE * 2 + 1]; /* A1BUF = H(user ":" realm ":" password) */ - gen_md5_init (ctx); - gen_md5_update ((unsigned char *)user, strlen (user), ctx); - gen_md5_update ((unsigned char *)":", 1, ctx); - gen_md5_update ((unsigned char *)realm, strlen (realm), ctx); - gen_md5_update ((unsigned char *)":", 1, ctx); - gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx); - gen_md5_finish (ctx, hash); + md5_init_ctx (&ctx); + md5_process_bytes ((unsigned char *)user, strlen (user), &ctx); + md5_process_bytes ((unsigned char *)":", 1, &ctx); + md5_process_bytes ((unsigned char *)realm, strlen (realm), &ctx); + md5_process_bytes ((unsigned char *)":", 1, &ctx); + md5_process_bytes ((unsigned char *)passwd, strlen (passwd), &ctx); + md5_finish_ctx (&ctx, hash); dump_hash (a1buf, hash); /* A2BUF = H(method ":" path) */ - gen_md5_init (ctx); - gen_md5_update ((unsigned char *)method, strlen (method), ctx); - gen_md5_update ((unsigned char *)":", 1, ctx); - gen_md5_update ((unsigned char *)path, strlen (path), ctx); - gen_md5_finish (ctx, hash); + md5_init_ctx (&ctx); + md5_process_bytes ((unsigned char *)method, strlen (method), &ctx); + md5_process_bytes ((unsigned char *)":", 1, &ctx); + md5_process_bytes ((unsigned char *)path, strlen (path), &ctx); + md5_finish_ctx (&ctx, hash); dump_hash (a2buf, hash); /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */ - gen_md5_init (ctx); - gen_md5_update ((unsigned char *)a1buf, MD5_HASHLEN * 2, ctx); - gen_md5_update ((unsigned char *)":", 1, ctx); - gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx); - gen_md5_update ((unsigned char *)":", 1, ctx); - gen_md5_update ((unsigned char *)a2buf, MD5_HASHLEN * 2, ctx); - gen_md5_finish (ctx, hash); + md5_init_ctx (&ctx); + md5_process_bytes ((unsigned char *)a1buf, MD5_DIGEST_SIZE * 2, &ctx); + md5_process_bytes ((unsigned char *)":", 1, &ctx); + md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx); + md5_process_bytes ((unsigned char *)":", 1, &ctx); + md5_process_bytes ((unsigned char *)a2buf, MD5_DIGEST_SIZE * 2, &ctx); + md5_finish_ctx (&ctx, hash); dump_hash (response_digest, hash); res = xmalloc (strlen (user) @@ -3101,7 +3731,7 @@ digest_authentication_encode (const char *au, const char *user, + strlen (realm) + strlen (nonce) + strlen (path) - + 2 * MD5_HASHLEN /*strlen (response_digest)*/ + + 2 * MD5_DIGEST_SIZE /*strlen (response_digest)*/ + (opaque ? strlen (opaque) : 0) + 128); sprintf (res, "Digest \ @@ -3222,7 +3852,7 @@ ensure_extension (struct http_stat *hs, const char *ext, int *dt) if (len == 5) { strncpy (shortext, ext, len - 1); - shortext[len - 2] = '\0'; + shortext[len - 1] = '\0'; } if (last_period_in_local_filename == NULL @@ -3257,32 +3887,28 @@ test_parse_content_disposition() { int i; struct { - char *hdrval; - char *opt_dir_prefix; + char *hdrval; char *filename; bool result; } test_array[] = { - { "filename=\"file.ext\"", NULL, "file.ext", true }, - { "filename=\"file.ext\"", "somedir", "somedir/file.ext", true }, - { "attachment; filename=\"file.ext\"", NULL, "file.ext", true }, - { "attachment; filename=\"file.ext\"", "somedir", "somedir/file.ext", true }, - { "attachment; filename=\"file.ext\"; dummy", NULL, "file.ext", true }, - { "attachment; filename=\"file.ext\"; dummy", "somedir", "somedir/file.ext", true }, - { "attachment", NULL, NULL, false }, - { "attachment", "somedir", NULL, false }, + { "filename=\"file.ext\"", "file.ext", true }, + { "attachment; filename=\"file.ext\"", "file.ext", true }, + { "attachment; filename=\"file.ext\"; dummy", "file.ext", true }, + { "attachment", NULL, false }, + { "attachement; filename*=UTF-8'en-US'hello.txt", "hello.txt", true }, + { "attachement; filename*0=\"hello\"; filename*1=\"world.txt\"", "helloworld.txt", true }, }; - - for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i) + + for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i) { char *filename; bool res; - opt.dir_prefix = test_array[i].opt_dir_prefix; res = parse_content_disposition (test_array[i].hdrval, &filename); - mu_assert ("test_parse_content_disposition: wrong result", + mu_assert ("test_parse_content_disposition: wrong result", res == test_array[i].result - && (res == false + && (res == false || 0 == strcmp (test_array[i].filename, filename))); }