X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fhttp.c;h=97a773a1e2d720abe45ac5041c3245e0477d49a7;hb=0967c21094580317353f0742c4836c5bbea34059;hp=de1cb084dbb1d15baabb8266332d5558d54c06cc;hpb=d9fea91a0a319e348adb504bd3edff148ff3d8a0;p=wget diff --git a/src/http.c b/src/http.c index de1cb084..97a773a1 100644 --- a/src/http.c +++ b/src/http.c @@ -1,6 +1,5 @@ /* HTTP support. - Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2002 - Free Software Foundation, Inc. + Copyright (C) 2005 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -62,14 +61,16 @@ extern int errno; #include "url.h" #include "host.h" #include "retr.h" -#include "headers.h" #include "connect.h" #include "netrc.h" #ifdef HAVE_SSL -# include "gen_sslfunc.h" -#endif /* HAVE_SSL */ +# include "ssl.h" +#endif +#ifdef ENABLE_NTLM +# include "http-ntlm.h" +#endif #include "cookies.h" -#ifdef USE_DIGEST +#ifdef ENABLE_DIGEST # include "gen-md5.h" #endif #include "convert.h" @@ -77,19 +78,26 @@ extern int errno; extern char *version_string; extern LARGE_INT total_downloaded_bytes; +extern FILE *output_stream; +extern int output_stream_regular; + +#ifndef MIN +# define MIN(x, y) ((x) > (y) ? (y) : (x)) +#endif + static int cookies_loaded_p; -struct cookie_jar *wget_cookie_jar; +static struct cookie_jar *wget_cookie_jar; #define TEXTHTML_S "text/html" #define TEXTXHTML_S "application/xhtml+xml" -#define HTTP_ACCEPT "*/*" /* Some status code validation macros: */ #define H_20X(x) (((x) >= 200) && ((x) < 300)) #define H_PARTIAL(x) ((x) == HTTP_STATUS_PARTIAL_CONTENTS) -#define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY \ - || (x) == HTTP_STATUS_MOVED_TEMPORARILY \ +#define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY \ + || (x) == HTTP_STATUS_MOVED_TEMPORARILY \ + || (x) == HTTP_STATUS_SEE_OTHER \ || (x) == HTTP_STATUS_TEMPORARY_REDIRECT) /* HTTP/1.0 status codes from RFC1945, provided for reference. */ @@ -104,97 +112,291 @@ struct cookie_jar *wget_cookie_jar; #define HTTP_STATUS_MULTIPLE_CHOICES 300 #define HTTP_STATUS_MOVED_PERMANENTLY 301 #define HTTP_STATUS_MOVED_TEMPORARILY 302 +#define HTTP_STATUS_SEE_OTHER 303 /* from HTTP/1.1 */ #define HTTP_STATUS_NOT_MODIFIED 304 -#define HTTP_STATUS_TEMPORARY_REDIRECT 307 +#define HTTP_STATUS_TEMPORARY_REDIRECT 307 /* from HTTP/1.1 */ /* Client error 4xx. */ #define HTTP_STATUS_BAD_REQUEST 400 #define HTTP_STATUS_UNAUTHORIZED 401 #define HTTP_STATUS_FORBIDDEN 403 #define HTTP_STATUS_NOT_FOUND 404 +#define HTTP_STATUS_RANGE_NOT_SATISFIABLE 416 /* Server errors 5xx. */ #define HTTP_STATUS_INTERNAL 500 #define HTTP_STATUS_NOT_IMPLEMENTED 501 #define HTTP_STATUS_BAD_GATEWAY 502 #define HTTP_STATUS_UNAVAILABLE 503 - -/* Parse the HTTP status line, which is of format: +enum rp { + rel_none, rel_name, rel_value, rel_both +}; - HTTP-Version SP Status-Code SP Reason-Phrase +struct request { + const char *method; + char *arg; + + struct request_header { + char *name, *value; + enum rp release_policy; + } *headers; + int hcount, hcapacity; +}; + +/* Create a new, empty request. At least request_set_method must be + called before the request can be used. */ + +static struct request * +request_new () +{ + struct request *req = xnew0 (struct request); + req->hcapacity = 8; + req->headers = xnew_array (struct request_header, req->hcapacity); + return req; +} + +/* Set the request's method and its arguments. METH should be a + literal string (or it should outlive the request) because it will + not be freed. ARG will be freed by request_free. */ + +static void +request_set_method (struct request *req, const char *meth, char *arg) +{ + req->method = meth; + req->arg = arg; +} + +/* Return the method string passed with the last call to + request_set_method. */ + +static const char * +request_method (const struct request *req) +{ + return req->method; +} + +/* Free one header according to the release policy specified with + request_set_header. */ + +static void +release_header (struct request_header *hdr) +{ + switch (hdr->release_policy) + { + case rel_none: + break; + case rel_name: + xfree (hdr->name); + break; + case rel_value: + xfree (hdr->value); + break; + case rel_both: + xfree (hdr->name); + xfree (hdr->value); + break; + } +} + +/* Set the request named NAME to VALUE. Specifically, this means that + a "NAME: VALUE\r\n" header line will be used in the request. If a + header with the same name previously existed in the request, its + value will be replaced by this one. A NULL value means do nothing. + + RELEASE_POLICY determines whether NAME and VALUE should be released + (freed) with request_free. Allowed values are: + + - rel_none - don't free NAME or VALUE + - rel_name - free NAME when done + - rel_value - free VALUE when done + - rel_both - free both NAME and VALUE when done + + Setting release policy is useful when arguments come from different + sources. For example: + + // Don't free literal strings! + request_set_header (req, "Pragma", "no-cache", rel_none); + + // Don't free a global variable, we'll need it later. + request_set_header (req, "Referer", opt.referer, rel_none); + + // Value freshly allocated, free it when done. + request_set_header (req, "Range", + aprintf ("bytes=%s-", number_to_static_string (hs->restval)), + rel_value); + */ + +static void +request_set_header (struct request *req, char *name, char *value, + enum rp release_policy) +{ + struct request_header *hdr; + int i; + + if (!value) + { + /* A NULL value is a no-op; if freeing the name is requested, + free it now to avoid leaks. */ + if (release_policy == rel_name || release_policy == rel_both) + xfree (name); + return; + } + + for (i = 0; i < req->hcount; i++) + { + hdr = &req->headers[i]; + if (0 == strcasecmp (name, hdr->name)) + { + /* Replace existing header. */ + release_header (hdr); + hdr->name = name; + hdr->value = value; + hdr->release_policy = release_policy; + return; + } + } + + /* Install new header. */ + + if (req->hcount >= req->hcapacity) + { + req->hcapacity <<= 1; + req->headers = xrealloc (req->headers, req->hcapacity * sizeof (*hdr)); + } + hdr = &req->headers[req->hcount++]; + hdr->name = name; + hdr->value = value; + hdr->release_policy = release_policy; +} + +/* Like request_set_header, but sets the whole header line, as + provided by the user using the `--header' option. For example, + request_set_user_header (req, "Foo: bar") works just like + request_set_header (req, "Foo", "bar"). */ + +static void +request_set_user_header (struct request *req, const char *header) +{ + char *name; + const char *p = strchr (header, ':'); + if (!p) + return; + BOUNDED_TO_ALLOCA (header, p, name); + ++p; + while (ISSPACE (*p)) + ++p; + request_set_header (req, xstrdup (name), (char *) p, rel_name); +} + +/* Remove the header with specified name from REQ. Returns 1 if the + header was actually removed, 0 otherwise. */ + +static int +request_remove_header (struct request *req, char *name) +{ + int i; + for (i = 0; i < req->hcount; i++) + { + struct request_header *hdr = &req->headers[i]; + if (0 == strcasecmp (name, hdr->name)) + { + release_header (hdr); + /* Move the remaining headers by one. */ + if (i < req->hcount - 1) + memmove (hdr, hdr + 1, (req->hcount - i - 1) * sizeof (*hdr)); + --req->hcount; + return 1; + } + } + return 0; +} + +#define APPEND(p, str) do { \ + int A_len = strlen (str); \ + memcpy (p, str, A_len); \ + p += A_len; \ +} while (0) + +/* Construct the request and write it to FD using fd_write. */ - The function returns the status-code, or -1 if the status line is - malformed. The pointer to reason-phrase is returned in RP. */ static int -parse_http_status_line (const char *line, const char **reason_phrase_ptr) +request_send (const struct request *req, int fd) { - /* (the variables must not be named `major' and `minor', because - that breaks compilation with SunOS4 cc.) */ - int mjr, mnr, statcode; - const char *p; + char *request_string, *p; + int i, size, write_error; - *reason_phrase_ptr = NULL; + /* Count the request size. */ + size = 0; - /* The standard format of HTTP-Version is: `HTTP/X.Y', where X is - major version, and Y is minor version. */ - if (strncmp (line, "HTTP/", 5) != 0) - return -1; - line += 5; + /* METHOD " " ARG " " "HTTP/1.0" "\r\n" */ + size += strlen (req->method) + 1 + strlen (req->arg) + 1 + 8 + 2; - /* Calculate major HTTP version. */ - p = line; - for (mjr = 0; ISDIGIT (*line); line++) - mjr = 10 * mjr + (*line - '0'); - if (*line != '.' || p == line) - return -1; - ++line; + for (i = 0; i < req->hcount; i++) + { + struct request_header *hdr = &req->headers[i]; + /* NAME ": " VALUE "\r\n" */ + size += strlen (hdr->name) + 2 + strlen (hdr->value) + 2; + } - /* Calculate minor HTTP version. */ - p = line; - for (mnr = 0; ISDIGIT (*line); line++) - mnr = 10 * mnr + (*line - '0'); - if (*line != ' ' || p == line) - return -1; - /* Wget will accept only 1.0 and higher HTTP-versions. The value of - minor version can be safely ignored. */ - if (mjr < 1) - return -1; - ++line; + /* "\r\n\0" */ + size += 3; - /* Calculate status code. */ - if (!(ISDIGIT (*line) && ISDIGIT (line[1]) && ISDIGIT (line[2]))) - return -1; - statcode = 100 * (*line - '0') + 10 * (line[1] - '0') + (line[2] - '0'); + p = request_string = alloca_array (char, size); - /* Set up the reason phrase pointer. */ - line += 3; - /* RFC2068 requires SPC here, but we allow the string to finish - here, in case no reason-phrase is present. */ - if (*line != ' ') + /* Generate the request. */ + + APPEND (p, req->method); *p++ = ' '; + APPEND (p, req->arg); *p++ = ' '; + memcpy (p, "HTTP/1.0\r\n", 10); p += 10; + + for (i = 0; i < req->hcount; i++) { - if (!*line) - *reason_phrase_ptr = line; - else - return -1; + struct request_header *hdr = &req->headers[i]; + APPEND (p, hdr->name); + *p++ = ':', *p++ = ' '; + APPEND (p, hdr->value); + *p++ = '\r', *p++ = '\n'; } - else - *reason_phrase_ptr = line + 1; - return statcode; + *p++ = '\r', *p++ = '\n', *p++ = '\0'; + assert (p - request_string == size); + +#undef APPEND + + DEBUGP (("\n---request begin---\n%s---request end---\n", request_string)); + + /* Send the request to the server. */ + + write_error = fd_write (fd, request_string, size - 1, -1.0); + if (write_error < 0) + logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"), + strerror (errno)); + return write_error; } - -#define WMIN(x, y) ((x) > (y) ? (y) : (x)) -/* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly +/* Release the resources used by REQ. */ + +static void +request_free (struct request *req) +{ + int i; + xfree_null (req->arg); + for (i = 0; i < req->hcount; i++) + release_header (&req->headers[i]); + xfree_null (req->headers); + xfree (req); +} + +/* Send the contents of FILE_NAME to SOCK. Make sure that exactly PROMISED_SIZE bytes are sent over the wire -- if the file is longer, read only that much; if the file is shorter, report an error. */ static int -post_file (int sock, const char *file_name, long promised_size) +post_file (int sock, const char *file_name, wgint promised_size) { static char chunk[8192]; - long written = 0; + wgint written = 0; int write_error; FILE *fp; @@ -209,8 +411,8 @@ post_file (int sock, const char *file_name, long promised_size) int length = fread (chunk, 1, sizeof (chunk), fp); if (length == 0) break; - towrite = WMIN (promised_size - written, length); - write_error = fd_write (sock, chunk, towrite, -1); + towrite = MIN (promised_size - written, length); + write_error = fd_write (sock, chunk, towrite, -1.0); if (write_error < 0) { fclose (fp); @@ -234,44 +436,346 @@ post_file (int sock, const char *file_name, long promised_size) } static const char * -next_header (const char *h) +response_head_terminator (const char *hunk, int oldlen, int peeklen) { - const char *end = NULL; - const char *p = h; - do + const char *start, *end; + + /* If at first peek, verify whether HUNK starts with "HTTP". If + not, this is a HTTP/0.9 request and we must bail out without + reading anything. */ + if (oldlen == 0 && 0 != memcmp (hunk, "HTTP", MIN (peeklen, 4))) + return hunk; + + if (oldlen < 4) + start = hunk; + else + start = hunk + oldlen - 4; + end = hunk + oldlen + peeklen; + + for (; start < end - 1; start++) + if (*start == '\n') + { + if (start < end - 2 + && start[1] == '\r' + && start[2] == '\n') + return start + 3; + if (start[1] == '\n') + return start + 2; + } + return NULL; +} + +/* The maximum size of a single HTTP response we care to read. This + is not meant to impose an arbitrary limit, but to protect the user + from Wget slurping up available memory upon encountering malicious + or buggy server output. Define it to 0 to remove the limit. */ + +#define HTTP_RESPONSE_MAX_SIZE 65536 + +/* Read the HTTP request head from FD and return it. The error + conditions are the same as with fd_read_hunk. + + To support HTTP/0.9 responses, this function tries to make sure + that the data begins with "HTTP". If this is not the case, no data + is read and an empty request is returned, so that the remaining + data can be treated as body. */ + +static char * +read_http_response_head (int fd) +{ + return fd_read_hunk (fd, response_head_terminator, 512, + HTTP_RESPONSE_MAX_SIZE); +} + +struct response { + /* The response data. */ + const char *data; + + /* The array of pointers that indicate where each header starts. + For example, given this HTTP response: + + HTTP/1.0 200 Ok + Description: some + text + Etag: x + + The headers are located like this: + + "HTTP/1.0 200 Ok\r\nDescription: some\r\n text\r\nEtag: x\r\n\r\n" + ^ ^ ^ ^ + headers[0] headers[1] headers[2] headers[3] + + I.e. headers[0] points to the beginning of the request, + headers[1] points to the end of the first header and the + beginning of the second one, etc. */ + + const char **headers; +}; + +/* Create a new response object from the text of the HTTP response, + available in HEAD. That text is automatically split into + constituent header lines for fast retrieval using + resp_header_*. */ + +static struct response * +resp_new (const char *head) +{ + const char *hdr; + int count, size; + + struct response *resp = xnew0 (struct response); + resp->data = head; + + if (*head == '\0') { - p = strchr (p, '\n'); - if (!p) - return end; - end = ++p; + /* Empty head means that we're dealing with a headerless + (HTTP/0.9) response. In that case, don't set HEADERS at + all. */ + return resp; } - while (*p == ' ' || *p == '\t'); - return end; + /* Split HEAD into header lines, so that resp_header_* functions + don't need to do this over and over again. */ + + size = count = 0; + hdr = head; + while (1) + { + DO_REALLOC (resp->headers, size, count + 1, const char *); + resp->headers[count++] = hdr; + + /* Break upon encountering an empty line. */ + if (!hdr[0] || (hdr[0] == '\r' && hdr[1] == '\n') || hdr[0] == '\n') + break; + + /* Find the end of HDR, including continuations. */ + do + { + const char *end = strchr (hdr, '\n'); + if (end) + hdr = end + 1; + else + hdr += strlen (hdr); + } + while (*hdr == ' ' || *hdr == '\t'); + } + DO_REALLOC (resp->headers, size, count + 1, const char *); + resp->headers[count] = NULL; + + return resp; } - -/* Functions to be used as arguments to header_process(): */ +/* Locate the header named NAME in the request data, starting with + position START. This allows the code to loop through the request + data, filtering for all requests of a given name. Returns the + found position, or -1 for failure. The code that uses this + function typically looks like this: -struct http_process_range_closure { - long first_byte_pos; - long last_byte_pos; - long entity_length; -}; + for (pos = 0; (pos = resp_header_locate (...)) != -1; pos++) + ... do something with header ... + + If you only care about one header, use resp_header_get instead of + this function. */ + +static int +resp_header_locate (const struct response *resp, const char *name, int start, + const char **begptr, const char **endptr) +{ + int i; + const char **headers = resp->headers; + int name_len; + + if (!headers || !headers[1]) + return -1; + + name_len = strlen (name); + if (start > 0) + i = start; + else + i = 1; + + for (; headers[i + 1]; i++) + { + const char *b = headers[i]; + const char *e = headers[i + 1]; + if (e - b > name_len + && b[name_len] == ':' + && 0 == strncasecmp (b, name, name_len)) + { + b += name_len + 1; + while (b < e && ISSPACE (*b)) + ++b; + while (b < e && ISSPACE (e[-1])) + --e; + *begptr = b; + *endptr = e; + return i; + } + } + return -1; +} + +/* Find and retrieve the header named NAME in the request data. If + found, set *BEGPTR to its starting, and *ENDPTR to its ending + position, and return 1. Otherwise return 0. + + This function is used as a building block for resp_header_copy + and resp_header_strdup. */ + +static int +resp_header_get (const struct response *resp, const char *name, + const char **begptr, const char **endptr) +{ + int pos = resp_header_locate (resp, name, 0, begptr, endptr); + return pos != -1; +} + +/* Copy the response header named NAME to buffer BUF, no longer than + BUFSIZE (BUFSIZE includes the terminating 0). If the header + exists, 1 is returned, otherwise 0. If there should be no limit on + the size of the header, use resp_header_strdup instead. + + If BUFSIZE is 0, no data is copied, but the boolean indication of + whether the header is present is still returned. */ + +static int +resp_header_copy (const struct response *resp, const char *name, + char *buf, int bufsize) +{ + const char *b, *e; + if (!resp_header_get (resp, name, &b, &e)) + return 0; + if (bufsize) + { + int len = MIN (e - b, bufsize - 1); + memcpy (buf, b, len); + buf[len] = '\0'; + } + return 1; +} + +/* Return the value of header named NAME in RESP, allocated with + malloc. If such a header does not exist in RESP, return NULL. */ + +static char * +resp_header_strdup (const struct response *resp, const char *name) +{ + const char *b, *e; + if (!resp_header_get (resp, name, &b, &e)) + return NULL; + return strdupdelim (b, e); +} + +/* Parse the HTTP status line, which is of format: + + HTTP-Version SP Status-Code SP Reason-Phrase + + The function returns the status-code, or -1 if the status line + appears malformed. The pointer to "reason-phrase" message is + returned in *MESSAGE. */ + +static int +resp_status (const struct response *resp, char **message) +{ + int status; + const char *p, *end; + + if (!resp->headers) + { + /* For a HTTP/0.9 response, assume status 200. */ + if (message) + *message = xstrdup (_("No headers, assuming HTTP/0.9")); + return 200; + } + + p = resp->headers[0]; + end = resp->headers[1]; + + if (!end) + return -1; + + /* "HTTP" */ + if (end - p < 4 || 0 != strncmp (p, "HTTP", 4)) + return -1; + p += 4; + + /* Match the HTTP version. This is optional because Gnutella + servers have been reported to not specify HTTP version. */ + if (p < end && *p == '/') + { + ++p; + while (p < end && ISDIGIT (*p)) + ++p; + if (p < end && *p == '.') + ++p; + while (p < end && ISDIGIT (*p)) + ++p; + } + + while (p < end && ISSPACE (*p)) + ++p; + if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2])) + return -1; + + status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0'); + p += 3; + + if (message) + { + while (p < end && ISSPACE (*p)) + ++p; + while (p < end && ISSPACE (end[-1])) + --end; + *message = strdupdelim (p, end); + } + + return status; +} + +/* Release the resources used by RESP. */ + +static void +resp_free (struct response *resp) +{ + xfree_null (resp->headers); + xfree (resp); +} + +/* Print the server response, line by line, omitting the trailing CRLF + from individual header lines, and prefixed with PREFIX. */ + +static void +print_server_response (const struct response *resp, const char *prefix) +{ + int i; + if (!resp->headers) + return; + for (i = 0; resp->headers[i + 1]; i++) + { + const char *b = resp->headers[i]; + const char *e = resp->headers[i + 1]; + /* Skip CRLF */ + if (b < e && e[-1] == '\n') + --e; + if (b < e && e[-1] == '\r') + --e; + /* This is safe even on printfs with broken handling of "%.s" + because resp->headers ends with \0. */ + logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, e - b, b); + } +} /* Parse the `Content-Range' header and extract the information it contains. Returns 1 if successful, -1 otherwise. */ static int -http_process_range (const char *hdr, void *arg) +parse_content_range (const char *hdr, wgint *first_byte_ptr, + wgint *last_byte_ptr, wgint *entity_length_ptr) { - struct http_process_range_closure *closure - = (struct http_process_range_closure *)arg; - long num; - - /* Certain versions of Nutscape proxy server send out - `Content-Length' without "bytes" specifier, which is a breach of - RFC2068 (as well as the HTTP/1.1 draft which was current at the - time). But hell, I must support it... */ + wgint num; + + /* Ancient versions of Netscape proxy server, presumably predating + rfc2068, sent out `Content-Range' without the "bytes" + specifier. */ if (!strncasecmp (hdr, "bytes", 5)) { hdr += 5; @@ -279,7 +783,8 @@ http_process_range (const char *hdr, void *arg) HTTP spec. */ if (*hdr == ':') ++hdr; - hdr += skip_lws (hdr); + while (ISSPACE (*hdr)) + ++hdr; if (!*hdr) return 0; } @@ -289,73 +794,70 @@ http_process_range (const char *hdr, void *arg) num = 10 * num + (*hdr - '0'); if (*hdr != '-' || !ISDIGIT (*(hdr + 1))) return 0; - closure->first_byte_pos = num; + *first_byte_ptr = num; ++hdr; for (num = 0; ISDIGIT (*hdr); hdr++) num = 10 * num + (*hdr - '0'); if (*hdr != '/' || !ISDIGIT (*(hdr + 1))) return 0; - closure->last_byte_pos = num; + *last_byte_ptr = num; ++hdr; for (num = 0; ISDIGIT (*hdr); hdr++) num = 10 * num + (*hdr - '0'); - closure->entity_length = num; + *entity_length_ptr = num; return 1; } -/* Place 1 to ARG if the HDR contains the word "none", 0 otherwise. - Used for `Accept-Ranges'. */ -static int -http_process_none (const char *hdr, void *arg) -{ - int *where = (int *)arg; +/* Read the body of the request, but don't store it anywhere and don't + display a progress gauge. This is useful for reading the bodies of + administrative responses to which we will soon issue another + request. The response is not useful to the user, but reading it + allows us to continue using the same connection to the server. - if (strstr (hdr, "none")) - *where = 1; - else - *where = 0; - return 1; -} + If reading fails, 0 is returned, non-zero otherwise. In debug + mode, the body is displayed for debugging purposes. */ -/* Place the malloc-ed copy of HDR hdr, to the first `;' to ARG. */ static int -http_process_type (const char *hdr, void *arg) +skip_short_body (int fd, wgint contlen) { - char **result = (char **)arg; - /* Locate P on `;' or the terminating zero, whichever comes first. */ - const char *p = strchr (hdr, ';'); - if (!p) - p = hdr + strlen (hdr); - while (p > hdr && ISSPACE (*(p - 1))) - --p; - *result = strdupdelim (hdr, p); - return 1; -} + enum { + SKIP_SIZE = 512, /* size of the download buffer */ + SKIP_THRESHOLD = 4096 /* the largest size we read */ + }; + char dlbuf[SKIP_SIZE + 1]; + dlbuf[SKIP_SIZE] = '\0'; /* so DEBUGP can safely print it */ -/* Check whether the `Connection' header is set to "keep-alive". */ -static int -http_process_connection (const char *hdr, void *arg) -{ - int *flag = (int *)arg; - if (!strcasecmp (hdr, "Keep-Alive")) - *flag = 1; - return 1; -} + /* We shouldn't get here with unknown contlen. (This will change + with HTTP/1.1, which supports "chunked" transfer.) */ + assert (contlen != -1); -/* Commit the cookie to the cookie jar. */ + /* If the body is too large, it makes more sense to simply close the + connection than to try to read the body. */ + if (contlen > SKIP_THRESHOLD) + return 0; -int -http_process_set_cookie (const char *hdr, void *arg) -{ - struct url *u = (struct url *)arg; + DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen))); - /* The jar should have been created by now. */ - assert (wget_cookie_jar != NULL); + while (contlen > 0) + { + int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1.0); + if (ret <= 0) + { + /* Don't normally report the error since this is an + optimization that should be invisible to the user. */ + DEBUGP (("] aborting (%s).\n", + ret < 0 ? strerror (errno) : "EOF received")); + return 0; + } + contlen -= ret; + /* Safe even if %.*s bogusly expects terminating \0 because + we've zero-terminated dlbuf above. */ + DEBUGP (("%.*s", ret, dlbuf)); + } - cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, u->path, hdr); + DEBUGP (("] done.\n")); return 1; } - /* Persistent connections. Currently, we cache the most recently used connection as persistent, provided that the HTTP server agrees to @@ -376,6 +878,17 @@ static struct { /* Whether a ssl handshake has occoured on this connection. */ int ssl; + + /* Whether the connection was authorized. This is only done by + NTLM, which authorizes *connections* rather than individual + requests. (That practice is peculiar for HTTP, but it is a + useful optimization.) */ + int authorized; + +#ifdef ENABLE_NTLM + /* NTLM data of the current connection. */ + struct ntlmdata ntlm; +#endif } pconn; /* Mark the persistent connection as invalid and free the resources it @@ -426,6 +939,7 @@ register_persistent (const char *host, int port, int fd, int ssl) pconn.host = xstrdup (host); pconn.port = port; pconn.ssl = ssl; + pconn.authorized = 0; DEBUGP (("Registered socket %d for persistent reuse.\n", fd)); } @@ -542,7 +1056,10 @@ persistent_available_p (const char *host, int port, int ssl, if (pconn_active && (fd) == pconn.socket) \ invalidate_persistent (); \ else \ - fd_close (fd); \ + { \ + fd_close (fd); \ + fd = -1; \ + } \ } \ } while (0) @@ -551,21 +1068,21 @@ persistent_available_p (const char *host, int port, int ssl, invalidate_persistent (); \ else \ fd_close (fd); \ + fd = -1; \ } while (0) struct http_stat { - long len; /* received length */ - long contlen; /* expected length */ - long restval; /* the restart value */ + wgint len; /* received length */ + wgint contlen; /* expected length */ + wgint restval; /* the restart value */ int res; /* the result of last read */ char *newloc; /* new location (redirection) */ char *remote_time; /* remote time-stamp string */ char *error; /* textual HTTP error */ int statcode; /* status code */ - double dltime; /* time of the download in msecs */ - int no_truncate; /* whether truncating the file is - forbidden. */ + wgint rd_size; /* amount of data read from socket */ + double dltime; /* time it took to download the data */ const char *referer; /* value of the referer header. */ char **local_file; /* local file. */ }; @@ -585,10 +1102,9 @@ free_hstat (struct http_stat *hs) static char *create_authorization_line PARAMS ((const char *, const char *, const char *, const char *, - const char *)); -static char *basic_authentication_encode PARAMS ((const char *, const char *, - const char *)); -static int known_authentication_scheme_p PARAMS ((const char *)); + const char *, int *)); +static char *basic_authentication_encode PARAMS ((const char *, const char *)); +static int known_authentication_scheme_p PARAMS ((const char *, const char *)); time_t http_atotm PARAMS ((const char *)); @@ -597,6 +1113,14 @@ time_t http_atotm PARAMS ((const char *)); && (ISSPACE (line[sizeof (string_constant) - 1]) \ || !line[sizeof (string_constant) - 1])) +#define SET_USER_AGENT(req) \ + if (opt.useragent) \ + request_set_header (req, "User-Agent", opt.useragent, rel_none); \ + else \ + request_set_header (req, "User-Agent", \ + aprintf ("Wget/%s", version_string), rel_value); + + /* Retrieve a document through HTTP protocol. It recognizes status code, and correctly handles redirections. It closes the network socket. If it receives an error from the functions below it, it @@ -610,73 +1134,66 @@ time_t http_atotm PARAMS ((const char *)); static uerr_t gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) { - char *request, *type, *command, *full_path; + struct request *req; + + char *type; char *user, *passwd; - char *pragma_h, *referer, *useragent, *range, *wwwauth; - char *authenticate_h; char *proxyauth; - char *port_maybe; - char *request_keep_alive; - int sock, hcount, statcode; + int statcode; int write_error; - long contlen, contrange; + wgint contlen, contrange; struct url *conn; FILE *fp; - int auth_tried_already; + + int sock = -1; + int flags; + + /* Set to 1 when the authorization has failed permanently and should + not be tried again. */ + int auth_finished = 0; + + /* Whether NTLM authentication is used for this request. */ + int ntlm_seen = 0; + + /* Whether our connection to the remote host is through SSL. */ int using_ssl = 0; - char *cookies = NULL; char *head; - const char *hdr_beg, *hdr_end; + struct response *resp; + char hdrval[256]; + char *message; /* Whether this connection will be kept alive after the HTTP request is done. */ int keep_alive; - /* Flags that detect the two ways of specifying HTTP keep-alive - response. */ - int http_keep_alive_1, http_keep_alive_2; + /* Whether keep-alive should be inhibited. - /* Whether keep-alive should be inhibited. */ - int inhibit_keep_alive; - - /* Whether we need to print the host header with braces around host, - e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the usual - "Host: symbolic-name:1234". */ - int squares_around_host = 0; + RFC 2068 requests that 1.0 clients not send keep-alive requests + to proxies. This is because many 1.0 proxies do not interpret + the Connection header and transfer it to the remote server, + causing it to not close the connection and leave both the proxy + and the client hanging. */ + int inhibit_keep_alive = + !opt.http_keep_alive || opt.ignore_length || proxy != NULL; /* Headers sent when using POST. */ - char *post_content_type, *post_content_length; - long post_data_size = 0; + wgint post_data_size = 0; - int host_lookup_failed; + int host_lookup_failed = 0; #ifdef HAVE_SSL - /* Initialize the SSL context. After the first run, this is a - no-op. */ - switch (ssl_init ()) + if (u->scheme == SCHEME_HTTPS) { - case SSLERRCTXCREATE: - /* this is fatal */ - logprintf (LOG_NOTQUIET, _("Failed to set up an SSL context\n")); - return SSLERRCTXCREATE; - case SSLERRCERTFILE: - /* try without certfile */ - logprintf (LOG_NOTQUIET, - _("Failed to load certificates from %s\n"), - opt.sslcertfile); - logprintf (LOG_NOTQUIET, - _("Trying without the specified certificate\n")); - break; - case SSLERRCERTKEY: - logprintf (LOG_NOTQUIET, - _("Failed to get certificate key from %s\n"), - opt.sslcertkey); - logprintf (LOG_NOTQUIET, - _("Trying without the specified certificate\n")); - break; - default: - break; + /* Initialize the SSL context. After this has once been done, + it becomes a no-op. */ + if (!ssl_init ()) + { + scheme_disable (SCHEME_HTTPS); + logprintf (LOG_NOTQUIET, + _("Disabling SSL due to encountered errors.\n")); + return SSLINITFAILED; + } } #endif /* HAVE_SSL */ @@ -685,167 +1202,75 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) know the local filename so we can save to it. */ assert (*hs->local_file != NULL); - authenticate_h = 0; - auth_tried_already = 0; - - inhibit_keep_alive = !opt.http_keep_alive || proxy != NULL; - - again: - /* We need to come back here when the initial attempt to retrieve - without authorization header fails. (Expected to happen at least - for the Digest authorization scheme.) */ - - keep_alive = 0; - http_keep_alive_1 = http_keep_alive_2 = 0; - - post_content_type = NULL; - post_content_length = NULL; - /* Initialize certain elements of struct http_stat. */ - hs->len = 0L; + hs->len = 0; hs->contlen = -1; hs->res = -1; hs->newloc = NULL; hs->remote_time = NULL; hs->error = NULL; - /* If we're using a proxy, we will be connecting to the proxy - server. */ - conn = proxy ? proxy : u; - - host_lookup_failed = 0; - - /* First: establish the connection. */ - if (inhibit_keep_alive - || !persistent_available_p (conn->host, conn->port, -#ifdef HAVE_SSL - u->scheme == SCHEME_HTTPS -#else - 0 -#endif - , &host_lookup_failed)) - { - /* In its current implementation, persistent_available_p will - look up conn->host in some cases. If that lookup failed, we - don't need to bother with connect_to_host. */ - if (host_lookup_failed) - return HOSTERR; - - sock = connect_to_host (conn->host, conn->port); - if (sock == E_HOST) - return HOSTERR; - else if (sock < 0) - return (retryable_socket_connect_error (errno) - ? CONERROR : CONIMPOSSIBLE); + conn = u; -#ifdef HAVE_SSL - if (conn->scheme == SCHEME_HTTPS) - { - if (!ssl_connect (sock)) - { - logputs (LOG_VERBOSE, "\n"); - logprintf (LOG_NOTQUIET, - _("Unable to establish SSL connection.\n")); - fd_close (sock); - return CONSSLERR; - } - using_ssl = 1; - } -#endif /* HAVE_SSL */ - } - else - { - logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"), - pconn.host, pconn.port); - sock = pconn.socket; - using_ssl = pconn.ssl; - DEBUGP (("Reusing fd %d.\n", sock)); - } - - if (*dt & HEAD_ONLY) - command = "HEAD"; - else if (opt.post_file_name || opt.post_data) - command = "POST"; - else - command = "GET"; + /* Prepare the request to send. */ - referer = NULL; - if (hs->referer) - { - referer = (char *)alloca (9 + strlen (hs->referer) + 3); - sprintf (referer, "Referer: %s\r\n", hs->referer); - } + req = request_new (); + { + const char *meth = "GET"; + if (*dt & HEAD_ONLY) + meth = "HEAD"; + else if (opt.post_file_name || opt.post_data) + meth = "POST"; + /* Use the full path, i.e. one that includes the leading slash and + the query string. E.g. if u->path is "foo/bar" and u->query is + "param=value", full_path will be "/foo/bar?param=value". */ + request_set_method (req, meth, + proxy ? xstrdup (u->url) : url_full_path (u)); + } + request_set_header (req, "Referer", (char *) hs->referer, rel_none); if (*dt & SEND_NOCACHE) - pragma_h = "Pragma: no-cache\r\n"; - else - pragma_h = ""; - + request_set_header (req, "Pragma", "no-cache", rel_none); if (hs->restval) - { - range = (char *)alloca (13 + numdigit (hs->restval) + 4); - /* Gag me! Some servers (e.g. WebSitePro) have been known to - respond to the following `Range' format by generating a - multipart/x-byte-ranges MIME document! This MIME type was - present in an old draft of the byteranges specification. - HTTP/1.1 specifies a multipart/byte-ranges MIME type, but - only if multiple non-overlapping ranges are requested -- - which Wget never does. */ - sprintf (range, "Range: bytes=%ld-\r\n", hs->restval); - } - else - range = NULL; - if (opt.useragent) - STRDUP_ALLOCA (useragent, opt.useragent); - else - { - useragent = (char *)alloca (10 + strlen (version_string)); - sprintf (useragent, "Wget/%s", version_string); - } - /* Construct the authentication, if userid is present. */ + request_set_header (req, "Range", + aprintf ("bytes=%s-", + number_to_static_string (hs->restval)), + rel_value); + SET_USER_AGENT (req); + request_set_header (req, "Accept", "*/*", rel_none); + + /* Find the username and password for authentication. */ user = u->user; passwd = u->passwd; search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0); - user = user ? user : opt.http_user; - passwd = passwd ? passwd : opt.http_passwd; + user = user ? user : (opt.http_user ? opt.http_user : opt.user); + passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd); - wwwauth = NULL; if (user && passwd) { - if (!authenticate_h) - { - /* We have the username and the password, but haven't tried - any authorization yet. Let's see if the "Basic" method - works. If not, we'll come back here and construct a - proper authorization method with the right challenges. - - If we didn't employ this kind of logic, every URL that - requires authorization would have to be processed twice, - which is very suboptimal and generates a bunch of false - "unauthorized" errors in the server log. - - #### But this logic also has a serious problem when used - with stronger authentications: we *first* transmit the - username and the password in clear text, and *then* - attempt a stronger authentication scheme. That cannot be - right! We are only fortunate that almost everyone still - uses the `Basic' scheme anyway. - - There should be an option to prevent this from happening, - for those who use strong authentication schemes and value - their passwords. */ - wwwauth = basic_authentication_encode (user, passwd, "Authorization"); - } - else - { - /* Use the full path, i.e. one that includes the leading - slash and the query string, but is independent of proxy - setting. */ - char *pth = url_full_path (u); - wwwauth = create_authorization_line (authenticate_h, user, passwd, - command, pth); - xfree (pth); - } + /* We have the username and the password, but haven't tried + any authorization yet. Let's see if the "Basic" method + works. If not, we'll come back here and construct a + proper authorization method with the right challenges. + + If we didn't employ this kind of logic, every URL that + requires authorization would have to be processed twice, + which is very suboptimal and generates a bunch of false + "unauthorized" errors in the server log. + + #### But this logic also has a serious problem when used + with stronger authentications: we *first* transmit the + username and the password in clear text, and *then* attempt a + stronger authentication scheme. That cannot be right! We + are only fortunate that almost everyone still uses the + `Basic' scheme anyway. + + There should be an option to prevent this from happening, for + those who use strong authentication schemes and value their + passwords. */ + request_set_header (req, "Authorization", + basic_authentication_encode (user, passwd), + rel_value); } proxyauth = NULL; @@ -853,10 +1278,10 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) { char *proxy_user, *proxy_passwd; /* For normal username and password, URL components override - command-line/wgetrc parameters. With proxy authentication, - it's the reverse, because proxy URLs are normally the - "permanent" ones, so command-line args should take - precedence. */ + command-line/wgetrc parameters. With proxy + authentication, it's the reverse, because proxy URLs are + normally the "permanent" ones, so command-line args + should take precedence. */ if (opt.proxy_user && opt.proxy_passwd) { proxy_user = opt.proxy_user; @@ -868,37 +1293,56 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) proxy_passwd = proxy->passwd; } /* #### This does not appear right. Can't the proxy request, - say, `Digest' authentication? */ + say, `Digest' authentication? */ if (proxy_user && proxy_passwd) - proxyauth = basic_authentication_encode (proxy_user, proxy_passwd, - "Proxy-Authorization"); - } + proxyauth = basic_authentication_encode (proxy_user, proxy_passwd); - /* String of the form :PORT. Used only for non-standard ports. */ - port_maybe = NULL; - if (u->port != scheme_default_port (u->scheme)) - { - port_maybe = (char *)alloca (numdigit (u->port) + 2); - sprintf (port_maybe, ":%d", u->port); + /* If we're using a proxy, we will be connecting to the proxy + server. */ + conn = proxy; + + /* Proxy authorization over SSL is handled below. */ +#ifdef HAVE_SSL + if (u->scheme != SCHEME_HTTPS) +#endif + request_set_header (req, "Proxy-Authorization", proxyauth, rel_value); } + { + /* Whether we need to print the host header with braces around + host, e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the + usual "Host: symbolic-name:1234". */ + int squares = strchr (u->host, ':') != NULL; + if (u->port == scheme_default_port (u->scheme)) + request_set_header (req, "Host", + aprintf (squares ? "[%s]" : "%s", u->host), + rel_value); + else + request_set_header (req, "Host", + aprintf (squares ? "[%s]:%d" : "%s:%d", + u->host, u->port), + rel_value); + } + if (!inhibit_keep_alive) - request_keep_alive = "Connection: Keep-Alive\r\n"; - else - request_keep_alive = NULL; + request_set_header (req, "Connection", "Keep-Alive", rel_none); if (opt.cookies) - cookies = cookie_header (wget_cookie_jar, u->host, u->port, u->path, + request_set_header (req, "Cookie", + cookie_header (wget_cookie_jar, + u->host, u->port, u->path, #ifdef HAVE_SSL - u->scheme == SCHEME_HTTPS + u->scheme == SCHEME_HTTPS #else - 0 + 0 #endif - ); + ), + rel_value); if (opt.post_data || opt.post_file_name) { - post_content_type = "Content-Type: application/x-www-form-urlencoded\r\n"; + request_set_header (req, "Content-Type", + "application/x-www-form-urlencoded", rel_none); if (opt.post_data) post_data_size = strlen (opt.post_data); else @@ -911,112 +1355,202 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) post_data_size = 0; } } - post_content_length = xmalloc (16 + numdigit (post_data_size) + 2 + 1); - sprintf (post_content_length, - "Content-Length: %ld\r\n", post_data_size); + request_set_header (req, "Content-Length", + xstrdup (number_to_static_string (post_data_size)), + rel_value); } - if (proxy) - full_path = xstrdup (u->url); - else - /* Use the full path, i.e. one that includes the leading slash and - the query string. E.g. if u->path is "foo/bar" and u->query is - "param=value", full_path will be "/foo/bar?param=value". */ - full_path = url_full_path (u); - - if (strchr (u->host, ':')) - squares_around_host = 1; - - /* Allocate the memory for the request. */ - request = (char *)alloca (strlen (command) - + strlen (full_path) - + strlen (useragent) - + strlen (u->host) - + (port_maybe ? strlen (port_maybe) : 0) - + strlen (HTTP_ACCEPT) - + (request_keep_alive - ? strlen (request_keep_alive) : 0) - + (referer ? strlen (referer) : 0) - + (cookies ? strlen (cookies) : 0) - + (wwwauth ? strlen (wwwauth) : 0) - + (proxyauth ? strlen (proxyauth) : 0) - + (range ? strlen (range) : 0) - + strlen (pragma_h) - + (post_content_type - ? strlen (post_content_type) : 0) - + (post_content_length - ? strlen (post_content_length) : 0) - + (opt.user_header ? strlen (opt.user_header) : 0) - + 64); - /* Construct the request. */ - sprintf (request, "\ -%s %s HTTP/1.0\r\n\ -User-Agent: %s\r\n\ -Host: %s%s%s%s\r\n\ -Accept: %s\r\n\ -%s%s%s%s%s%s%s%s%s%s\r\n", - command, full_path, - useragent, - squares_around_host ? "[" : "", u->host, squares_around_host ? "]" : "", - port_maybe ? port_maybe : "", - HTTP_ACCEPT, - request_keep_alive ? request_keep_alive : "", - referer ? referer : "", - cookies ? cookies : "", - wwwauth ? wwwauth : "", - proxyauth ? proxyauth : "", - range ? range : "", - pragma_h, - post_content_type ? post_content_type : "", - post_content_length ? post_content_length : "", - opt.user_header ? opt.user_header : ""); - DEBUGP (("\n---request begin---\n%s", request)); - - /* Free the temporary memory. */ - xfree_null (wwwauth); - xfree_null (proxyauth); - xfree_null (cookies); - xfree (full_path); + /* Add the user headers. */ + if (opt.user_headers) + { + int i; + for (i = 0; opt.user_headers[i]; i++) + request_set_user_header (req, opt.user_headers[i]); + } + + retry_with_auth: + /* We need to come back here when the initial attempt to retrieve + without authorization header fails. (Expected to happen at least + for the Digest authorization scheme.) */ + + keep_alive = 0; + + /* Establish the connection. */ + + if (!inhibit_keep_alive) + { + /* Look for a persistent connection to target host, unless a + proxy is used. The exception is when SSL is in use, in which + case the proxy is nothing but a passthrough to the target + host, registered as a connection to the latter. */ + struct url *relevant = conn; +#ifdef HAVE_SSL + if (u->scheme == SCHEME_HTTPS) + relevant = u; +#endif + + if (persistent_available_p (relevant->host, relevant->port, +#ifdef HAVE_SSL + relevant->scheme == SCHEME_HTTPS, +#else + 0, +#endif + &host_lookup_failed)) + { + sock = pconn.socket; + using_ssl = pconn.ssl; + logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"), + escnonprint (pconn.host), pconn.port); + DEBUGP (("Reusing fd %d.\n", sock)); + if (pconn.authorized) + /* If the connection is already authorized, the "Basic" + authorization added by code above is unnecessary and + only hurts us. */ + request_remove_header (req, "Authorization"); + } + } + + if (sock < 0) + { + /* In its current implementation, persistent_available_p will + look up conn->host in some cases. If that lookup failed, we + don't need to bother with connect_to_host. */ + if (host_lookup_failed) + { + request_free (req); + return HOSTERR; + } + + sock = connect_to_host (conn->host, conn->port); + if (sock == E_HOST) + { + request_free (req); + return HOSTERR; + } + else if (sock < 0) + { + request_free (req); + return (retryable_socket_connect_error (errno) + ? CONERROR : CONIMPOSSIBLE); + } + +#ifdef HAVE_SSL + if (proxy && u->scheme == SCHEME_HTTPS) + { + /* When requesting SSL URLs through proxies, use the + CONNECT method to request passthrough. */ + struct request *connreq = request_new (); + request_set_method (connreq, "CONNECT", + aprintf ("%s:%d", u->host, u->port)); + SET_USER_AGENT (connreq); + if (proxyauth) + { + request_set_header (connreq, "Proxy-Authorization", + proxyauth, rel_value); + /* Now that PROXYAUTH is part of the CONNECT request, + zero it out so we don't send proxy authorization with + the regular request below. */ + proxyauth = NULL; + } + /* Examples in rfc2817 use the Host header in CONNECT + requests. I don't see how that gains anything, given + that the contents of Host would be exactly the same as + the contents of CONNECT. */ + + write_error = request_send (connreq, sock); + request_free (connreq); + if (write_error < 0) + { + logprintf (LOG_VERBOSE, _("Failed writing to proxy: %s.\n"), + strerror (errno)); + CLOSE_INVALIDATE (sock); + return WRITEFAILED; + } + + head = read_http_response_head (sock); + if (!head) + { + logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"), + strerror (errno)); + CLOSE_INVALIDATE (sock); + return HERR; + } + message = NULL; + if (!*head) + { + xfree (head); + goto failed_tunnel; + } + DEBUGP (("proxy responded with: [%s]\n", head)); + + resp = resp_new (head); + statcode = resp_status (resp, &message); + resp_free (resp); + xfree (head); + if (statcode != 200) + { + failed_tunnel: + logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"), + message ? escnonprint (message) : "?"); + xfree_null (message); + return CONSSLERR; + } + xfree_null (message); + + /* SOCK is now *really* connected to u->host, so update CONN + to reflect this. That way register_persistent will + register SOCK as being connected to u->host:u->port. */ + conn = u; + } + + if (conn->scheme == SCHEME_HTTPS) + { + if (!ssl_connect (sock)) + { + fd_close (sock); + return CONSSLERR; + } + using_ssl = 1; + } +#endif /* HAVE_SSL */ + } /* Send the request to server. */ - write_error = fd_write (sock, request, strlen (request), -1); + write_error = request_send (req, sock); if (write_error >= 0) { if (opt.post_data) { DEBUGP (("[POST data: %s]\n", opt.post_data)); - write_error = fd_write (sock, opt.post_data, post_data_size, -1); + write_error = fd_write (sock, opt.post_data, post_data_size, -1.0); } else if (opt.post_file_name && post_data_size != 0) write_error = post_file (sock, opt.post_file_name, post_data_size); } - DEBUGP (("---request end---\n")); if (write_error < 0) { logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"), strerror (errno)); CLOSE_INVALIDATE (sock); + request_free (req); return WRITEFAILED; } logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "), proxy ? "Proxy" : "HTTP"); - contlen = contrange = -1; - type = NULL; - statcode = -1; + contlen = -1; + contrange = 0; *dt &= ~RETROKF; - DEBUGP (("\n---response begin---\n")); - - head = fd_read_head (sock); + head = read_http_response_head (sock); if (!head) { - logputs (LOG_VERBOSE, "\n"); if (errno == 0) { logputs (LOG_NOTQUIET, _("No data received.\n")); CLOSE_INVALIDATE (sock); + request_free (req); return HEOF; } else @@ -1024,206 +1558,189 @@ Accept: %s\r\n\ logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"), strerror (errno)); CLOSE_INVALIDATE (sock); + request_free (req); return HERR; } } + DEBUGP (("\n---response begin---\n%s---response end---\n", head)); - /* Loop through the headers and process them. */ + resp = resp_new (head); - hcount = 0; - for (hdr_beg = head; - (hdr_end = next_header (hdr_beg)); - hdr_beg = hdr_end) + /* Check for status line. */ + message = NULL; + statcode = resp_status (resp, &message); + if (!opt.server_response) + logprintf (LOG_VERBOSE, "%2d %s\n", statcode, + message ? escnonprint (message) : ""); + else { - char *hdr = strdupdelim (hdr_beg, hdr_end); - { - char *tmp = hdr + strlen (hdr); - if (tmp > hdr && tmp[-1] == '\n') - *--tmp = '\0'; - if (tmp > hdr && tmp[-1] == '\r') - *--tmp = '\0'; - } - ++hcount; - - /* Check for status line. */ - if (hcount == 1) - { - const char *error; - /* Parse the first line of server response. */ - statcode = parse_http_status_line (hdr, &error); - hs->statcode = statcode; - /* Store the descriptive response. */ - if (statcode == -1) /* malformed response */ - { - /* A common reason for "malformed response" error is the - case when no data was actually received. Handle this - special case. */ - if (!*hdr) - hs->error = xstrdup (_("No data received")); - else - hs->error = xstrdup (_("Malformed status line")); - xfree (hdr); - break; - } - else if (!*error) - hs->error = xstrdup (_("(no description)")); - else - hs->error = xstrdup (error); - - if ((statcode != -1) -#ifdef ENABLE_DEBUG - && !opt.debug -#endif - ) - { - if (opt.server_response) - logprintf (LOG_VERBOSE, "\n%2d %s", hcount, hdr); - else - logprintf (LOG_VERBOSE, "%2d %s", statcode, error); - } + logprintf (LOG_VERBOSE, "\n"); + print_server_response (resp, " "); + } - goto done_header; - } + if (!opt.ignore_length + && resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval))) + { + wgint parsed; + errno = 0; + parsed = str_to_wgint (hdrval, NULL, 10); + if (parsed == WGINT_MAX && errno == ERANGE) + /* Out of range. + #### If Content-Length is out of range, it most likely + means that the file is larger than 2G and that we're + compiled without LFS. In that case we should probably + refuse to even attempt to download the file. */ + contlen = -1; + else + contlen = parsed; + } - /* Exit on empty header. */ - if (!*hdr) + /* Check for keep-alive related responses. */ + if (!inhibit_keep_alive && contlen != -1) + { + if (resp_header_copy (resp, "Keep-Alive", NULL, 0)) + keep_alive = 1; + else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval))) { - xfree (hdr); - break; + if (0 == strcasecmp (hdrval, "Keep-Alive")) + keep_alive = 1; } + } + if (keep_alive) + /* The server has promised that it will not close the connection + when we're done. This means that we can register it. */ + register_persistent (conn->host, conn->port, sock, using_ssl); - /* Print the header if requested. */ - if (opt.server_response && hcount != 1) - logprintf (LOG_VERBOSE, "\n%2d %s", hcount, hdr); - - /* Try getting content-length. */ - if (contlen == -1 && !opt.ignore_length) - if (header_process (hdr, "Content-Length", header_extract_number, - &contlen)) - goto done_header; - /* Try getting content-type. */ - if (!type) - if (header_process (hdr, "Content-Type", http_process_type, &type)) - goto done_header; - /* Try getting location. */ - if (!hs->newloc) - if (header_process (hdr, "Location", header_strdup, &hs->newloc)) - goto done_header; - /* Try getting last-modified. */ - if (!hs->remote_time) - if (header_process (hdr, "Last-Modified", header_strdup, - &hs->remote_time)) - goto done_header; - /* Try getting cookies. */ - if (opt.cookies) - if (header_process (hdr, "Set-Cookie", http_process_set_cookie, u)) - goto done_header; - /* Try getting www-authentication. */ - if (!authenticate_h) - if (header_process (hdr, "WWW-Authenticate", header_strdup, - &authenticate_h)) - goto done_header; - /* Check for accept-ranges header. If it contains the word - `none', disable the ranges. */ - if (*dt & ACCEPTRANGES) + if (statcode == HTTP_STATUS_UNAUTHORIZED) + { + /* Authorization is required. */ + if (keep_alive) { - int nonep; - if (header_process (hdr, "Accept-Ranges", http_process_none, &nonep)) - { - if (nonep) - *dt &= ~ACCEPTRANGES; - goto done_header; - } + if (skip_short_body (sock, contlen)) + CLOSE_FINISH (sock); + else + CLOSE_INVALIDATE (sock); } - /* Try getting content-range. */ - if (contrange == -1) + pconn.authorized = 0; + if (auth_finished || !(user && passwd)) { - struct http_process_range_closure closure; - if (header_process (hdr, "Content-Range", http_process_range, &closure)) - { - contrange = closure.first_byte_pos; - goto done_header; - } + /* If we have tried it already, then there is not point + retrying it. */ + logputs (LOG_NOTQUIET, _("Authorization failed.\n")); } - /* Check for keep-alive related responses. */ - if (!inhibit_keep_alive) + else { - /* Check for the `Keep-Alive' header. */ - if (!http_keep_alive_1) + /* IIS sometimes sends two instances of WWW-Authenticate + header, one with the keyword "negotiate", and other with + useful data. Loop over all occurrences of this header + and use the one we recognize. */ + int wapos; + const char *wabeg, *waend; + char *www_authenticate = NULL; + for (wapos = 0; + (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos, + &wabeg, &waend)) != -1; + ++wapos) + if (known_authentication_scheme_p (wabeg, waend)) + { + www_authenticate = strdupdelim (wabeg, waend); + break; + } + /* If the authentication header is missing or recognized, or + if the authentication scheme is "Basic" (which we send by + default), there's no sense in retrying. */ + if (!www_authenticate + || BEGINS_WITH (www_authenticate, "Basic")) { - if (header_process (hdr, "Keep-Alive", header_exists, - &http_keep_alive_1)) - goto done_header; + xfree_null (www_authenticate); + logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n")); } - /* Check for `Connection: Keep-Alive'. */ - if (!http_keep_alive_2) + else { - if (header_process (hdr, "Connection", http_process_connection, - &http_keep_alive_2)) - goto done_header; + char *pth; + pth = url_full_path (u); + request_set_header (req, "Authorization", + create_authorization_line (www_authenticate, + user, passwd, + request_method (req), + pth, + &auth_finished), + rel_value); + if (BEGINS_WITH (www_authenticate, "NTLM")) + ntlm_seen = 1; + xfree (pth); + xfree (www_authenticate); + goto retry_with_auth; } } - done_header: - xfree (hdr); + request_free (req); + return AUTHFAILED; } - DEBUGP (("---response end---\n")); - - logputs (LOG_VERBOSE, "\n"); - - if (contlen != -1 - && (http_keep_alive_1 || http_keep_alive_2)) + else /* statcode != HTTP_STATUS_UNAUTHORIZED */ { - assert (inhibit_keep_alive == 0); - keep_alive = 1; + /* Kludge: if NTLM is used, mark the TCP connection as authorized. */ + if (ntlm_seen) + pconn.authorized = 1; } - if (keep_alive) - /* The server has promised that it will not close the connection - when we're done. This means that we can register it. */ - register_persistent (conn->host, conn->port, sock, using_ssl); + request_free (req); + + hs->statcode = statcode; + if (statcode == -1) + hs->error = xstrdup (_("Malformed status line")); + else if (!*message) + hs->error = xstrdup (_("(no description)")); + else + hs->error = xstrdup (message); + xfree (message); - if ((statcode == HTTP_STATUS_UNAUTHORIZED) - && authenticate_h) + type = resp_header_strdup (resp, "Content-Type"); + if (type) { - /* Authorization is required. */ - xfree_null (type); - type = NULL; - free_hstat (hs); - CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there - might be more bytes in the body. */ - if (auth_tried_already) - { - /* If we have tried it already, then there is not point - retrying it. */ - failed: - logputs (LOG_NOTQUIET, _("Authorization failed.\n")); - xfree (authenticate_h); - return AUTHFAILED; - } - else if (!known_authentication_scheme_p (authenticate_h)) + char *tmp = strchr (type, ';'); + if (tmp) { - xfree (authenticate_h); - logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n")); - return AUTHFAILED; - } - else if (BEGINS_WITH (authenticate_h, "Basic")) - { - /* The authentication scheme is basic, the one we try by - default, and it failed. There's no sense in trying - again. */ - goto failed; - } - else - { - auth_tried_already = 1; - goto again; + while (tmp > type && ISSPACE (tmp[-1])) + --tmp; + *tmp = '\0'; } } - /* We do not need this anymore. */ - if (authenticate_h) + hs->newloc = resp_header_strdup (resp, "Location"); + hs->remote_time = resp_header_strdup (resp, "Last-Modified"); + + /* Handle (possibly multiple instances of) the Set-Cookie header. */ + { + char *pth = NULL; + int scpos; + const char *scbeg, *scend; + /* The jar should have been created by now. */ + assert (wget_cookie_jar != NULL); + for (scpos = 0; + (scpos = resp_header_locate (resp, "Set-Cookie", scpos, + &scbeg, &scend)) != -1; + ++scpos) + { + char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie); + if (pth == NULL) + { + /* u->path doesn't begin with /, which cookies.c expects. */ + pth = (char *) alloca (1 + strlen (u->path) + 1); + pth[0] = '/'; + strcpy (pth + 1, u->path); + } + cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, pth, + set_cookie); + } + } + + if (resp_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval))) { - xfree (authenticate_h); - authenticate_h = NULL; + wgint first_byte_pos, last_byte_pos, entity_length; + if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos, + &entity_length)) + contrange = first_byte_pos; } + resp_free (resp); + xfree (head); /* 20x responses are counted among successful by default. */ if (H_20X (statcode)) @@ -1243,10 +1760,15 @@ Accept: %s\r\n\ { logprintf (LOG_VERBOSE, _("Location: %s%s\n"), - hs->newloc ? hs->newloc : _("unspecified"), + hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"), hs->newloc ? _(" [following]") : ""); - CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there - might be more bytes in the body. */ + if (keep_alive) + { + if (skip_short_body (sock, contlen)) + CLOSE_FINISH (sock); + else + CLOSE_INVALIDATE (sock); + } xfree_null (type); return NEWLOCATION; } @@ -1283,87 +1805,37 @@ Accept: %s\r\n\ } } - if (contrange == -1) + if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE) { - /* We did not get a content-range header. This means that the - server did not honor our `Range' request. Normally, this - means we should reset hs->restval and continue normally. */ - - /* However, if `-c' is used, we need to be a bit more careful: - - 1. If `-c' is specified and the file already existed when - Wget was started, it would be a bad idea for us to start - downloading it from scratch, effectively truncating it. I - believe this cannot happen unless `-c' was specified. - - 2. If `-c' is used on a file that is already fully - downloaded, we're requesting bytes after the end of file, - which can result in server not honoring `Range'. If this is - the case, `Content-Length' will be equal to the length of the - file. */ - if (opt.always_rest) - { - /* Check for condition #2. */ - if (hs->restval > 0 /* restart was requested. */ - && contlen != -1 /* we got content-length. */ - && hs->restval >= contlen /* file fully downloaded - or has shrunk. */ - ) - { - logputs (LOG_VERBOSE, _("\ + /* If `-c' is in use and the file has been fully downloaded (or + the remote file has shrunk), Wget effectively requests bytes + after the end of file and the server response with 416. */ + logputs (LOG_VERBOSE, _("\ \n The file is already fully retrieved; nothing to do.\n\n")); - /* In case the caller inspects. */ - hs->len = contlen; - hs->res = 0; - /* Mark as successfully retrieved. */ - *dt |= RETROKF; - xfree_null (type); - CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there - might be more bytes in the body. */ - return RETRUNNEEDED; - } - - /* Check for condition #1. */ - if (hs->no_truncate) - { - logprintf (LOG_NOTQUIET, - _("\ -\n\ -Continued download failed on this file, which conflicts with `-c'.\n\ -Refusing to truncate existing file `%s'.\n\n"), *hs->local_file); - xfree_null (type); - CLOSE_INVALIDATE (sock); - return CONTNOTSUPPORTED; - } - - /* Fallthrough */ - } - - hs->restval = 0; + /* In case the caller inspects. */ + hs->len = contlen; + hs->res = 0; + /* Mark as successfully retrieved. */ + *dt |= RETROKF; + xfree_null (type); + CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there + might be more bytes in the body. */ + return RETRUNNEEDED; } - else if (contrange != hs->restval || - (H_PARTIAL (statcode) && contrange == -1)) + if ((contrange != 0 && contrange != hs->restval) + || (H_PARTIAL (statcode) && !contrange)) { - /* This means the whole request was somehow misunderstood by the - server. Bail out. */ + /* The Range request was somehow misunderstood by the server. + Bail out. */ xfree_null (type); CLOSE_INVALIDATE (sock); return RANGEERR; } - - if (hs->restval) - { - if (contlen != -1) - contlen += contrange; - else - contrange = -1; /* If conent-length was not sent, - content-range will be ignored. */ - } - hs->contlen = contlen; + hs->contlen = contlen + contrange; if (opt.verbose) { - if ((*dt & RETROKF) && !opt.server_response) + if (*dt & RETROKF) { /* No need to print this output if the body won't be downloaded at all, or if the original server response is @@ -1371,16 +1843,26 @@ Refusing to truncate existing file `%s'.\n\n"), *hs->local_file); logputs (LOG_VERBOSE, _("Length: ")); if (contlen != -1) { - logputs (LOG_VERBOSE, legible (contlen)); - if (contrange != -1) - logprintf (LOG_VERBOSE, _(" (%s to go)"), - legible (contlen - contrange)); + logputs (LOG_VERBOSE, with_thousand_seps (contlen + contrange)); + if (contlen + contrange >= 1024) + logprintf (LOG_VERBOSE, " (%s)", + human_readable (contlen + contrange)); + if (contrange) + { + if (contlen >= 1024) + logprintf (LOG_VERBOSE, _(", %s (%s) remaining"), + with_thousand_seps (contlen), + human_readable (contlen)); + else + logprintf (LOG_VERBOSE, _(", %s remaining"), + with_thousand_seps (contlen)); + } } else logputs (LOG_VERBOSE, opt.ignore_length ? _("ignored") : _("unspecified")); if (type) - logprintf (LOG_VERBOSE, " [%s]\n", type); + logprintf (LOG_VERBOSE, " [%s]\n", escnonprint (type)); else logputs (LOG_VERBOSE, "\n"); } @@ -1392,71 +1874,72 @@ Refusing to truncate existing file `%s'.\n\n"), *hs->local_file); if (!(*dt & RETROKF) || (*dt & HEAD_ONLY)) { /* In case the caller cares to look... */ - hs->len = 0L; + hs->len = 0; hs->res = 0; xfree_null (type); - CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there - might be more bytes in the body. */ + /* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the + servers not to send body in response to a HEAD request. If + you encounter such a server (more likely a broken CGI), use + `--no-http-keep-alive'. */ + CLOSE_FINISH (sock); return RETRFINISHED; } /* Open the local file. */ - if (!opt.dfp) + if (!output_stream) { mkalldirs (*hs->local_file); if (opt.backups) rotate_backups (*hs->local_file); - fp = fopen (*hs->local_file, hs->restval ? "ab" : "wb"); + if (hs->restval) + fp = fopen (*hs->local_file, "ab"); + else if (opt.noclobber || opt.always_rest || opt.timestamping || opt.dirstruct + || opt.output_document) + fp = fopen (*hs->local_file, "wb"); + else + { + fp = fopen_excl (*hs->local_file, 1); + if (!fp && errno == EEXIST) + { + /* We cannot just invent a new name and use it (which is + what functions like unique_create typically do) + because we told the user we'd use this name. + Instead, return and retry the download. */ + logprintf (LOG_NOTQUIET, + _("%s has sprung into existence.\n"), + *hs->local_file); + CLOSE_INVALIDATE (sock); + return FOPEN_EXCL_ERR; + } + } if (!fp) { logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno)); - CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there - might be more bytes in the body. */ + CLOSE_INVALIDATE (sock); return FOPENERR; } } - else /* opt.dfp */ - { - extern int global_download_count; - fp = opt.dfp; - /* To ensure that repeated "from scratch" downloads work for -O - files, we rewind the file pointer, unless restval is - non-zero. (This works only when -O is used on regular files, - but it's still a valuable feature.) - - However, this loses when more than one URL is specified on - the command line the second rewinds eradicates the contents - of the first download. Thus we disable the above trick for - all the downloads except the very first one. - - #### A possible solution to this would be to remember the - file position in the output document and to seek to that - position, instead of rewinding. - - We don't truncate stdout, since that breaks - "wget -O - [...] >> foo". - */ - if (!hs->restval && global_download_count == 0 && opt.dfp != stdout) - { - /* This will silently fail for streams that don't correspond - to regular files, but that's OK. */ - rewind (fp); - /* ftruncate is needed because opt.dfp is opened in append - mode if opt.always_rest is set. */ - ftruncate (fileno (fp), 0); - clearerr (fp); - } - } + else + fp = output_stream; - /* #### This confuses the code that checks for file size. There - should be some overhead information. */ + /* #### This confuses the timestamping code that checks for file + size. Maybe we should save some additional information? */ if (opt.save_headers) fwrite (head, 1, strlen (head), fp); - /* Get the contents of the document. */ - hs->res = fd_read_body (sock, fp, &hs->len, hs->restval, - (contlen != -1 ? contlen : 0), - keep_alive, &hs->dltime); + /* Download the request body. */ + flags = 0; + if (keep_alive) + flags |= rb_read_exactly; + if (hs->restval > 0 && contrange == 0) + /* If the server ignored our range request, instruct fd_read_body + to skip the first RESTVAL bytes of body. */ + flags |= rb_skip_startpos; + hs->len = hs->restval; + hs->rd_size = 0; + hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0, + hs->restval, &hs->rd_size, &hs->len, &hs->dltime, + flags); if (hs->res >= 0) CLOSE_FINISH (sock); @@ -1468,7 +1951,7 @@ Refusing to truncate existing file `%s'.\n\n"), *hs->local_file); error here. Checking the result of fwrite() is not enough -- errors could go unnoticed! */ int flush_res; - if (!opt.dfp) + if (!output_stream) flush_res = fclose (fp); else flush_res = fflush (fp); @@ -1493,10 +1976,10 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, char *tms, *locf, *tmrate; uerr_t err; time_t tml = -1, tmr = -1; /* local and remote time-stamps */ - long local_size = 0; /* the size of the local file */ + wgint local_size = 0; /* the size of the local file */ size_t filename_len; struct http_stat hstat; /* HTTP status */ - struct stat st; + struct_stat st; char *dummy = NULL; /* This used to be done in main(), but it's a better idea to do it @@ -1521,10 +2004,12 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, if (strchr (u->url, '*')) logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n")); + xzero (hstat); + /* Determine the local filename. */ if (local_file && *local_file) hstat.local_file = local_file; - else if (local_file) + else if (local_file && !opt.output_document) { *local_file = url_file_name (u); hstat.local_file = local_file; @@ -1533,6 +2018,9 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, { dummy = url_file_name (u); hstat.local_file = &dummy; + /* be honest about where we will save the file */ + if (local_file && opt.output_document) + *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document); } if (!opt.output_document) @@ -1584,7 +2072,7 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); point I profiled Wget, and found that a measurable and non-negligible amount of time was lost calling sprintf() in url.c. Replacing sprintf with inline calls to - strcpy() and long_to_string() made a difference. + strcpy() and number_to_string() made a difference. --hniksic */ memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len); memcpy (filename_plus_orig_suffix + filename_len, @@ -1621,7 +2109,7 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); } /* Reset the counter. */ count = 0; - *dt = 0 | ACCEPTRANGES; + *dt = 0; /* THE loop */ do { @@ -1634,14 +2122,14 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); if (opt.verbose) { char *hurl = url_string (u, 1); - char tmp[15]; + char tmp[256]; strcpy (tmp, " "); if (count > 1) sprintf (tmp, _("(try:%2d)"), count); logprintf (LOG_VERBOSE, "--%s-- %s\n %s => `%s'\n", tms, hurl, tmp, locf); #ifdef WINDOWS - ws_changetitle (hurl, 1); + ws_changetitle (hurl); #endif xfree (hurl); } @@ -1653,21 +2141,15 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); *dt |= HEAD_ONLY; else *dt &= ~HEAD_ONLY; - /* Assume no restarting. */ - hstat.restval = 0L; + /* Decide whether or not to restart. */ - if (((count > 1 && (*dt & ACCEPTRANGES)) || opt.always_rest) - /* #### this calls access() and then stat(); could be optimized. */ - && file_exists_p (locf)) - if (stat (locf, &st) == 0 && S_ISREG (st.st_mode)) - hstat.restval = st.st_size; - - /* In `-c' is used and the file is existing and non-empty, - refuse to truncate it if the server doesn't support continued - downloads. */ - hstat.no_truncate = 0; - if (opt.always_rest && hstat.restval) - hstat.no_truncate = 1; + hstat.restval = 0; + if (count > 1) + hstat.restval = hstat.len; /* continue where we left off */ + else if (opt.always_rest + && stat (locf, &st) == 0 + && S_ISREG (st.st_mode)) + hstat.restval = st.st_size; /* Decide whether to send the no-cache directive. We send it in two cases: @@ -1691,8 +2173,6 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); *hstat.local_file to tack on ".html". */ if (!opt.output_document) locf = *hstat.local_file; - else - locf = opt.output_document; /* Time? */ tms = time_str (NULL); @@ -1703,21 +2183,42 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); { case HERR: case HEOF: case CONSOCKERR: case CONCLOSED: case CONERROR: case READERR: case WRITEFAILED: - case RANGEERR: + case RANGEERR: case FOPEN_EXCL_ERR: /* Non-fatal errors continue executing the loop, which will bring them to "while" statement at the end, to judge whether the number of tries was exceeded. */ free_hstat (&hstat); printwhat (count, opt.ntry); + if (err == FOPEN_EXCL_ERR) + { + /* Re-determine the file name. */ + if (local_file && *local_file) + { + xfree (*local_file); + *local_file = url_file_name (u); + hstat.local_file = local_file; + } + else + { + xfree (dummy); + dummy = url_file_name (u); + hstat.local_file = &dummy; + } + /* be honest about where we will save the file */ + if (local_file && opt.output_document) + *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document); + if (!opt.output_document) + locf = *hstat.local_file; + else + locf = opt.output_document; + } continue; - break; case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED: - case SSLERRCTXCREATE: case CONTNOTSUPPORTED: + case SSLINITFAILED: case CONTNOTSUPPORTED: /* Fatal errors just return from the function. */ free_hstat (&hstat); xfree_null (dummy); return err; - break; case FWRITEERR: case FOPENERR: /* Another fatal error. */ logputs (LOG_VERBOSE, "\n"); @@ -1726,7 +2227,6 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); free_hstat (&hstat); xfree_null (dummy); return err; - break; case CONSSLERR: /* Another fatal error. */ logputs (LOG_VERBOSE, "\n"); @@ -1734,7 +2234,6 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); free_hstat (&hstat); xfree_null (dummy); return err; - break; case NEWLOCATION: /* Return the new location to the caller. */ if (!hstat.newloc) @@ -1749,13 +2248,11 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); free_hstat (&hstat); xfree_null (dummy); return NEWLOCATION; - break; case RETRUNNEEDED: /* The file was already fully retrieved. */ free_hstat (&hstat); xfree_null (dummy); return RETROK; - break; case RETRFINISHED: /* Deal with you later. */ break; @@ -1773,7 +2270,7 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); xfree (hurl); } logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), - tms, hstat.statcode, hstat.error); + tms, hstat.statcode, escnonprint (hstat.error)); logputs (LOG_VERBOSE, "\n"); free_hstat (&hstat); xfree_null (dummy); @@ -1825,7 +2322,8 @@ Server file no newer than local file `%s' -- not retrieving.\n\n"), } else if (tml >= tmr) logprintf (LOG_VERBOSE, _("\ -The sizes do not match (local %ld) -- retrieving.\n"), local_size); +The sizes do not match (local %s) -- retrieving.\n"), + number_to_static_string (local_size)); else logputs (LOG_VERBOSE, _("Remote file is newer, retrieving.\n")); @@ -1845,7 +2343,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); const char *fl = NULL; if (opt.output_document) { - if (opt.od_known_regular) + if (output_stream_regular) fl = opt.output_document; } else @@ -1857,23 +2355,29 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); if (opt.spider) { - logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error); + logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, + escnonprint (hstat.error)); xfree_null (dummy); return RETROK; } - tmrate = retr_rate (hstat.len - hstat.restval, hstat.dltime, 0); + tmrate = retr_rate (hstat.rd_size, hstat.dltime, 0); if (hstat.len == hstat.contlen) { if (*dt & RETROKF) { logprintf (LOG_VERBOSE, - _("%s (%s) - `%s' saved [%ld/%ld]\n\n"), - tms, tmrate, locf, hstat.len, hstat.contlen); + _("%s (%s) - `%s' saved [%s/%s]\n\n"), + tms, tmrate, locf, + number_to_static_string (hstat.len), + number_to_static_string (hstat.contlen)); logprintf (LOG_NONVERBOSE, - "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n", - tms, u->url, hstat.len, hstat.contlen, locf, count); + "%s URL:%s [%s/%s] -> \"%s\" [%d]\n", + tms, u->url, + number_to_static_string (hstat.len), + number_to_static_string (hstat.contlen), + locf, count); } ++opt.numurls; total_downloaded_bytes += hstat.len; @@ -1896,11 +2400,13 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); if (*dt & RETROKF) { logprintf (LOG_VERBOSE, - _("%s (%s) - `%s' saved [%ld]\n\n"), - tms, tmrate, locf, hstat.len); + _("%s (%s) - `%s' saved [%s]\n\n"), + tms, tmrate, locf, + number_to_static_string (hstat.len)); logprintf (LOG_NONVERBOSE, - "%s URL:%s [%ld] -> \"%s\" [%d]\n", - tms, u->url, hstat.len, locf, count); + "%s URL:%s [%s] -> \"%s\" [%d]\n", + tms, u->url, number_to_static_string (hstat.len), + locf, count); } ++opt.numurls; total_downloaded_bytes += hstat.len; @@ -1919,8 +2425,8 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); connection too soon */ { logprintf (LOG_VERBOSE, - _("%s (%s) - Connection closed at byte %ld. "), - tms, tmrate, hstat.len); + _("%s (%s) - Connection closed at byte %s. "), + tms, tmrate, number_to_static_string (hstat.len)); printwhat (count, opt.ntry); free_hstat (&hstat); continue; @@ -1928,11 +2434,16 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); else if (!opt.kill_longer) /* meaning we got more than expected */ { logprintf (LOG_VERBOSE, - _("%s (%s) - `%s' saved [%ld/%ld])\n\n"), - tms, tmrate, locf, hstat.len, hstat.contlen); + _("%s (%s) - `%s' saved [%s/%s])\n\n"), + tms, tmrate, locf, + number_to_static_string (hstat.len), + number_to_static_string (hstat.contlen)); logprintf (LOG_NONVERBOSE, - "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n", - tms, u->url, hstat.len, hstat.contlen, locf, count); + "%s URL:%s [%s/%s] -> \"%s\" [%d]\n", + tms, u->url, + number_to_static_string (hstat.len), + number_to_static_string (hstat.contlen), + locf, count); ++opt.numurls; total_downloaded_bytes += hstat.len; @@ -1949,8 +2460,10 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); else /* the same, but not accepted */ { logprintf (LOG_VERBOSE, - _("%s (%s) - Connection closed at byte %ld/%ld. "), - tms, tmrate, hstat.len, hstat.contlen); + _("%s (%s) - Connection closed at byte %s/%s. "), + tms, tmrate, + number_to_static_string (hstat.len), + number_to_static_string (hstat.contlen)); printwhat (count, opt.ntry); free_hstat (&hstat); continue; @@ -1961,8 +2474,9 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); if (hstat.contlen == -1) { logprintf (LOG_VERBOSE, - _("%s (%s) - Read error at byte %ld (%s)."), - tms, tmrate, hstat.len, strerror (errno)); + _("%s (%s) - Read error at byte %s (%s)."), + tms, tmrate, number_to_static_string (hstat.len), + strerror (errno)); printwhat (count, opt.ntry); free_hstat (&hstat); continue; @@ -1970,8 +2484,10 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); else /* hstat.res == -1 and contlen is given */ { logprintf (LOG_VERBOSE, - _("%s (%s) - Read error at byte %ld/%ld (%s). "), - tms, tmrate, hstat.len, hstat.contlen, + _("%s (%s) - Read error at byte %s/%s (%s). "), + tms, tmrate, + number_to_static_string (hstat.len), + number_to_static_string (hstat.contlen), strerror (errno)); printwhat (count, opt.ntry); free_hstat (&hstat); @@ -1979,7 +2495,6 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); } } /* not reached */ - break; } while (!opt.ntry || (count < opt.ntry)); return TRYLIMEXC; @@ -2147,77 +2662,45 @@ http_atotm (const char *time_string) return -1; } -/* Authorization support: We support two authorization schemes: +/* Authorization support: We support three authorization schemes: * `Basic' scheme, consisting of base64-ing USER:PASSWORD string; * `Digest' scheme, added by Junio Hamano , consisting of answering to the server's challenge with the proper - MD5 digests. */ + MD5 digests. -/* How many bytes it will take to store LEN bytes in base64. */ -#define BASE64_LENGTH(len) (4 * (((len) + 2) / 3)) - -/* Encode the string S of length LENGTH to base64 format and place it - to STORE. STORE will be 0-terminated, and must point to a writable - buffer of at least 1+BASE64_LENGTH(length) bytes. */ -static void -base64_encode (const char *s, char *store, int length) -{ - /* Conversion table. */ - static char tbl[64] = { - 'A','B','C','D','E','F','G','H', - 'I','J','K','L','M','N','O','P', - 'Q','R','S','T','U','V','W','X', - 'Y','Z','a','b','c','d','e','f', - 'g','h','i','j','k','l','m','n', - 'o','p','q','r','s','t','u','v', - 'w','x','y','z','0','1','2','3', - '4','5','6','7','8','9','+','/' - }; - int i; - unsigned char *p = (unsigned char *)store; - - /* Transform the 3x8 bits to 4x6 bits, as required by base64. */ - for (i = 0; i < length; i += 3) - { - *p++ = tbl[s[0] >> 2]; - *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)]; - *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)]; - *p++ = tbl[s[2] & 0x3f]; - s += 3; - } - /* Pad the result if necessary... */ - if (i == length + 1) - *(p - 1) = '='; - else if (i == length + 2) - *(p - 1) = *(p - 2) = '='; - /* ...and zero-terminate it. */ - *p = '\0'; -} + * `NTLM' ("NT Lan Manager") scheme, based on code written by Daniel + Stenberg for libcurl. Like digest, NTLM is based on a + challenge-response mechanism, but unlike digest, it is non-standard + (authenticates TCP connections rather than requests), undocumented + and Microsoft-specific. */ /* Create the authentication header contents for the `Basic' scheme. This is done by encoding the string `USER:PASS' in base64 and prepending `HEADER: Basic ' to it. */ + static char * -basic_authentication_encode (const char *user, const char *passwd, - const char *header) +basic_authentication_encode (const char *user, const char *passwd) { - char *t1, *t2, *res; + char *t1, *t2; int len1 = strlen (user) + 1 + strlen (passwd); - int len2 = BASE64_LENGTH (len1); t1 = (char *)alloca (len1 + 1); sprintf (t1, "%s:%s", user, passwd); - t2 = (char *)alloca (1 + len2); - base64_encode (t1, t2, len1); - res = (char *)xmalloc (len2 + 11 + strlen (header)); - sprintf (res, "%s: Basic %s\r\n", header, t2); - return res; + t2 = (char *)alloca (BASE64_LENGTH (len1) + 1); + base64_encode (t1, len1, t2); + + return concat_strings ("Basic ", t2, (char *) 0); } -#ifdef USE_DIGEST +#define SKIP_WS(x) do { \ + while (ISSPACE (*(x))) \ + ++(x); \ +} while (0) + +#ifdef ENABLE_DIGEST /* Parse HTTP `WWW-Authenticate:' header. AU points to the beginning of a field in such a header. If the field is the one specified by ATTR_NAME ("realm", "opaque", and "nonce" are used by the current @@ -2227,21 +2710,20 @@ basic_authentication_encode (const char *user, const char *passwd, static int extract_header_attr (const char *au, const char *attr_name, char **ret) { - const char *cp, *ep; - - ep = cp = au; + const char *ep; + const char *cp = au; if (strncmp (cp, attr_name, strlen (attr_name)) == 0) { cp += strlen (attr_name); if (!*cp) return -1; - cp += skip_lws (cp); + SKIP_WS (cp); if (*cp != '=') return -1; if (!*++cp) return -1; - cp += skip_lws (cp); + SKIP_WS (cp); if (*cp != '\"') return -1; if (!*++cp) @@ -2300,7 +2782,7 @@ digest_authentication_encode (const char *au, const char *user, { int i; - au += skip_lws (au); + SKIP_WS (au); for (i = 0; i < countof (options); i++) { int skip = extract_header_attr (au, options[i].name, @@ -2324,7 +2806,7 @@ digest_authentication_encode (const char *au, const char *user, au++; if (*au && *++au) { - au += skip_lws (au); + SKIP_WS (au); if (*au == '\"') { au++; @@ -2391,7 +2873,7 @@ digest_authentication_encode (const char *au, const char *user, + 2 * MD5_HASHLEN /*strlen (response_digest)*/ + (opaque ? strlen (opaque) : 0) + 128); - sprintf (res, "Authorization: Digest \ + sprintf (res, "Digest \ username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"", user, realm, nonce, path, response_digest); if (opaque) @@ -2401,27 +2883,38 @@ username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"", strcat (p, opaque); strcat (p, "\""); } - strcat (res, "\r\n"); } return res; } -#endif /* USE_DIGEST */ +#endif /* ENABLE_DIGEST */ +/* Computing the size of a string literal must take into account that + value returned by sizeof includes the terminating \0. */ +#define STRSIZE(literal) (sizeof (literal) - 1) -#define BEGINS_WITH(line, string_constant) \ - (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \ - && (ISSPACE (line[sizeof (string_constant) - 1]) \ - || !line[sizeof (string_constant) - 1])) +/* Whether chars in [b, e) begin with the literal string provided as + first argument and are followed by whitespace or terminating \0. + The comparison is case-insensitive. */ +#define STARTS(literal, b, e) \ + ((e) - (b) >= STRSIZE (literal) \ + && 0 == strncasecmp (b, literal, STRSIZE (literal)) \ + && ((e) - (b) == STRSIZE (literal) \ + || ISSPACE (b[STRSIZE (literal)]))) static int -known_authentication_scheme_p (const char *au) +known_authentication_scheme_p (const char *hdrbeg, const char *hdrend) { - return BEGINS_WITH (au, "Basic") - || BEGINS_WITH (au, "Digest") - || BEGINS_WITH (au, "NTLM"); + return STARTS ("Basic", hdrbeg, hdrend) +#ifdef ENABLE_DIGEST + || STARTS ("Digest", hdrbeg, hdrend) +#endif +#ifdef ENABLE_NTLM + || STARTS ("NTLM", hdrbeg, hdrend) +#endif + ; } -#undef BEGINS_WITH +#undef STARTS /* Create the HTTP authorization request header. When the `WWW-Authenticate' response header is seen, according to the @@ -2431,22 +2924,47 @@ known_authentication_scheme_p (const char *au) static char * create_authorization_line (const char *au, const char *user, const char *passwd, const char *method, - const char *path) + const char *path, int *finished) { - char *wwwauth = NULL; - - if (!strncasecmp (au, "Basic", 5)) - wwwauth = basic_authentication_encode (user, passwd, "Authorization"); - if (!strncasecmp (au, "NTLM", 4)) - wwwauth = basic_authentication_encode (user, passwd, "Authorization"); -#ifdef USE_DIGEST - else if (!strncasecmp (au, "Digest", 6)) - wwwauth = digest_authentication_encode (au, user, passwd, method, path); -#endif /* USE_DIGEST */ - return wwwauth; + /* We are called only with known schemes, so we can dispatch on the + first letter. */ + switch (TOUPPER (*au)) + { + case 'B': /* Basic */ + *finished = 1; + return basic_authentication_encode (user, passwd); +#ifdef ENABLE_DIGEST + case 'D': /* Digest */ + *finished = 1; + return digest_authentication_encode (au, user, passwd, method, path); +#endif +#ifdef ENABLE_NTLM + case 'N': /* NTLM */ + if (!ntlm_input (&pconn.ntlm, au)) + { + *finished = 1; + return NULL; + } + return ntlm_output (&pconn.ntlm, user, passwd, finished); +#endif + default: + /* We shouldn't get here -- this function should be only called + with values approved by known_authentication_scheme_p. */ + abort (); + } } +void +save_cookies (void) +{ + if (wget_cookie_jar) + cookie_jar_save (wget_cookie_jar, opt.cookies_output); +} + void http_cleanup (void) { + xfree_null (pconn.host); + if (wget_cookie_jar) + cookie_jar_delete (wget_cookie_jar); }