X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fhttp.c;h=97a773a1e2d720abe45ac5041c3245e0477d49a7;hb=0967c21094580317353f0742c4836c5bbea34059;hp=8bdfbbc1eca2345bec81c0079fc3e79cf0ebd0af;hpb=dfe1f43dc0bbdd63efa1dd4649152cc9f6e3999c;p=wget diff --git a/src/http.c b/src/http.c index 8bdfbbc1..97a773a1 100644 --- a/src/http.c +++ b/src/http.c @@ -1,6 +1,5 @@ /* HTTP support. - Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2002 - Free Software Foundation, Inc. + Copyright (C) 2005 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -65,10 +64,13 @@ extern int errno; #include "connect.h" #include "netrc.h" #ifdef HAVE_SSL -# include "gen_sslfunc.h" -#endif /* HAVE_SSL */ +# include "ssl.h" +#endif +#ifdef ENABLE_NTLM +# include "http-ntlm.h" +#endif #include "cookies.h" -#ifdef USE_DIGEST +#ifdef ENABLE_DIGEST # include "gen-md5.h" #endif #include "convert.h" @@ -76,23 +78,26 @@ extern int errno; extern char *version_string; extern LARGE_INT total_downloaded_bytes; +extern FILE *output_stream; +extern int output_stream_regular; + #ifndef MIN # define MIN(x, y) ((x) > (y) ? (y) : (x)) #endif static int cookies_loaded_p; -struct cookie_jar *wget_cookie_jar; +static struct cookie_jar *wget_cookie_jar; #define TEXTHTML_S "text/html" #define TEXTXHTML_S "application/xhtml+xml" -#define HTTP_ACCEPT "*/*" /* Some status code validation macros: */ #define H_20X(x) (((x) >= 200) && ((x) < 300)) #define H_PARTIAL(x) ((x) == HTTP_STATUS_PARTIAL_CONTENTS) -#define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY \ - || (x) == HTTP_STATUS_MOVED_TEMPORARILY \ +#define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY \ + || (x) == HTTP_STATUS_MOVED_TEMPORARILY \ + || (x) == HTTP_STATUS_SEE_OTHER \ || (x) == HTTP_STATUS_TEMPORARY_REDIRECT) /* HTTP/1.0 status codes from RFC1945, provided for reference. */ @@ -107,23 +112,331 @@ struct cookie_jar *wget_cookie_jar; #define HTTP_STATUS_MULTIPLE_CHOICES 300 #define HTTP_STATUS_MOVED_PERMANENTLY 301 #define HTTP_STATUS_MOVED_TEMPORARILY 302 +#define HTTP_STATUS_SEE_OTHER 303 /* from HTTP/1.1 */ #define HTTP_STATUS_NOT_MODIFIED 304 -#define HTTP_STATUS_TEMPORARY_REDIRECT 307 +#define HTTP_STATUS_TEMPORARY_REDIRECT 307 /* from HTTP/1.1 */ /* Client error 4xx. */ #define HTTP_STATUS_BAD_REQUEST 400 #define HTTP_STATUS_UNAUTHORIZED 401 #define HTTP_STATUS_FORBIDDEN 403 #define HTTP_STATUS_NOT_FOUND 404 +#define HTTP_STATUS_RANGE_NOT_SATISFIABLE 416 /* Server errors 5xx. */ #define HTTP_STATUS_INTERNAL 500 #define HTTP_STATUS_NOT_IMPLEMENTED 501 #define HTTP_STATUS_BAD_GATEWAY 502 #define HTTP_STATUS_UNAVAILABLE 503 + +enum rp { + rel_none, rel_name, rel_value, rel_both +}; + +struct request { + const char *method; + char *arg; + + struct request_header { + char *name, *value; + enum rp release_policy; + } *headers; + int hcount, hcapacity; +}; + +/* Create a new, empty request. At least request_set_method must be + called before the request can be used. */ + +static struct request * +request_new () +{ + struct request *req = xnew0 (struct request); + req->hcapacity = 8; + req->headers = xnew_array (struct request_header, req->hcapacity); + return req; +} + +/* Set the request's method and its arguments. METH should be a + literal string (or it should outlive the request) because it will + not be freed. ARG will be freed by request_free. */ + +static void +request_set_method (struct request *req, const char *meth, char *arg) +{ + req->method = meth; + req->arg = arg; +} + +/* Return the method string passed with the last call to + request_set_method. */ static const char * -head_terminator (const char *hunk, int oldlen, int peeklen) +request_method (const struct request *req) +{ + return req->method; +} + +/* Free one header according to the release policy specified with + request_set_header. */ + +static void +release_header (struct request_header *hdr) +{ + switch (hdr->release_policy) + { + case rel_none: + break; + case rel_name: + xfree (hdr->name); + break; + case rel_value: + xfree (hdr->value); + break; + case rel_both: + xfree (hdr->name); + xfree (hdr->value); + break; + } +} + +/* Set the request named NAME to VALUE. Specifically, this means that + a "NAME: VALUE\r\n" header line will be used in the request. If a + header with the same name previously existed in the request, its + value will be replaced by this one. A NULL value means do nothing. + + RELEASE_POLICY determines whether NAME and VALUE should be released + (freed) with request_free. Allowed values are: + + - rel_none - don't free NAME or VALUE + - rel_name - free NAME when done + - rel_value - free VALUE when done + - rel_both - free both NAME and VALUE when done + + Setting release policy is useful when arguments come from different + sources. For example: + + // Don't free literal strings! + request_set_header (req, "Pragma", "no-cache", rel_none); + + // Don't free a global variable, we'll need it later. + request_set_header (req, "Referer", opt.referer, rel_none); + + // Value freshly allocated, free it when done. + request_set_header (req, "Range", + aprintf ("bytes=%s-", number_to_static_string (hs->restval)), + rel_value); + */ + +static void +request_set_header (struct request *req, char *name, char *value, + enum rp release_policy) +{ + struct request_header *hdr; + int i; + + if (!value) + { + /* A NULL value is a no-op; if freeing the name is requested, + free it now to avoid leaks. */ + if (release_policy == rel_name || release_policy == rel_both) + xfree (name); + return; + } + + for (i = 0; i < req->hcount; i++) + { + hdr = &req->headers[i]; + if (0 == strcasecmp (name, hdr->name)) + { + /* Replace existing header. */ + release_header (hdr); + hdr->name = name; + hdr->value = value; + hdr->release_policy = release_policy; + return; + } + } + + /* Install new header. */ + + if (req->hcount >= req->hcapacity) + { + req->hcapacity <<= 1; + req->headers = xrealloc (req->headers, req->hcapacity * sizeof (*hdr)); + } + hdr = &req->headers[req->hcount++]; + hdr->name = name; + hdr->value = value; + hdr->release_policy = release_policy; +} + +/* Like request_set_header, but sets the whole header line, as + provided by the user using the `--header' option. For example, + request_set_user_header (req, "Foo: bar") works just like + request_set_header (req, "Foo", "bar"). */ + +static void +request_set_user_header (struct request *req, const char *header) +{ + char *name; + const char *p = strchr (header, ':'); + if (!p) + return; + BOUNDED_TO_ALLOCA (header, p, name); + ++p; + while (ISSPACE (*p)) + ++p; + request_set_header (req, xstrdup (name), (char *) p, rel_name); +} + +/* Remove the header with specified name from REQ. Returns 1 if the + header was actually removed, 0 otherwise. */ + +static int +request_remove_header (struct request *req, char *name) +{ + int i; + for (i = 0; i < req->hcount; i++) + { + struct request_header *hdr = &req->headers[i]; + if (0 == strcasecmp (name, hdr->name)) + { + release_header (hdr); + /* Move the remaining headers by one. */ + if (i < req->hcount - 1) + memmove (hdr, hdr + 1, (req->hcount - i - 1) * sizeof (*hdr)); + --req->hcount; + return 1; + } + } + return 0; +} + +#define APPEND(p, str) do { \ + int A_len = strlen (str); \ + memcpy (p, str, A_len); \ + p += A_len; \ +} while (0) + +/* Construct the request and write it to FD using fd_write. */ + +static int +request_send (const struct request *req, int fd) +{ + char *request_string, *p; + int i, size, write_error; + + /* Count the request size. */ + size = 0; + + /* METHOD " " ARG " " "HTTP/1.0" "\r\n" */ + size += strlen (req->method) + 1 + strlen (req->arg) + 1 + 8 + 2; + + for (i = 0; i < req->hcount; i++) + { + struct request_header *hdr = &req->headers[i]; + /* NAME ": " VALUE "\r\n" */ + size += strlen (hdr->name) + 2 + strlen (hdr->value) + 2; + } + + /* "\r\n\0" */ + size += 3; + + p = request_string = alloca_array (char, size); + + /* Generate the request. */ + + APPEND (p, req->method); *p++ = ' '; + APPEND (p, req->arg); *p++ = ' '; + memcpy (p, "HTTP/1.0\r\n", 10); p += 10; + + for (i = 0; i < req->hcount; i++) + { + struct request_header *hdr = &req->headers[i]; + APPEND (p, hdr->name); + *p++ = ':', *p++ = ' '; + APPEND (p, hdr->value); + *p++ = '\r', *p++ = '\n'; + } + + *p++ = '\r', *p++ = '\n', *p++ = '\0'; + assert (p - request_string == size); + +#undef APPEND + + DEBUGP (("\n---request begin---\n%s---request end---\n", request_string)); + + /* Send the request to the server. */ + + write_error = fd_write (fd, request_string, size - 1, -1.0); + if (write_error < 0) + logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"), + strerror (errno)); + return write_error; +} + +/* Release the resources used by REQ. */ + +static void +request_free (struct request *req) +{ + int i; + xfree_null (req->arg); + for (i = 0; i < req->hcount; i++) + release_header (&req->headers[i]); + xfree_null (req->headers); + xfree (req); +} + +/* Send the contents of FILE_NAME to SOCK. Make sure that exactly + PROMISED_SIZE bytes are sent over the wire -- if the file is + longer, read only that much; if the file is shorter, report an error. */ + +static int +post_file (int sock, const char *file_name, wgint promised_size) +{ + static char chunk[8192]; + wgint written = 0; + int write_error; + FILE *fp; + + DEBUGP (("[writing POST file %s ... ", file_name)); + + fp = fopen (file_name, "rb"); + if (!fp) + return -1; + while (!feof (fp) && written < promised_size) + { + int towrite; + int length = fread (chunk, 1, sizeof (chunk), fp); + if (length == 0) + break; + towrite = MIN (promised_size - written, length); + write_error = fd_write (sock, chunk, towrite, -1.0); + if (write_error < 0) + { + fclose (fp); + return -1; + } + written += towrite; + } + fclose (fp); + + /* If we've written less than was promised, report a (probably + nonsensical) error rather than break the promise. */ + if (written < promised_size) + { + errno = EINVAL; + return -1; + } + + assert (written == promised_size); + DEBUGP (("done]\n")); + return 0; +} + +static const char * +response_head_terminator (const char *hunk, int oldlen, int peeklen) { const char *start, *end; @@ -152,6 +465,13 @@ head_terminator (const char *hunk, int oldlen, int peeklen) return NULL; } +/* The maximum size of a single HTTP response we care to read. This + is not meant to impose an arbitrary limit, but to protect the user + from Wget slurping up available memory upon encountering malicious + or buggy server output. Define it to 0 to remove the limit. */ + +#define HTTP_RESPONSE_MAX_SIZE 65536 + /* Read the HTTP request head from FD and return it. The error conditions are the same as with fd_read_hunk. @@ -161,9 +481,10 @@ head_terminator (const char *hunk, int oldlen, int peeklen) data can be treated as body. */ static char * -fd_read_http_head (int fd) +read_http_response_head (int fd) { - return fd_read_hunk (fd, head_terminator, 512); + return fd_read_hunk (fd, response_head_terminator, 512, + HTTP_RESPONSE_MAX_SIZE); } struct response { @@ -171,16 +492,33 @@ struct response { const char *data; /* The array of pointers that indicate where each header starts. - For example, given three headers "foo", "bar", and "baz": - foo: value\r\nbar: value\r\nbaz: value\r\n\r\n - 0 1 2 3 - I.e. headers[0] points to the beginning of foo, headers[1] points - to the end of foo and the beginning of bar, etc. */ + For example, given this HTTP response: + + HTTP/1.0 200 Ok + Description: some + text + Etag: x + + The headers are located like this: + + "HTTP/1.0 200 Ok\r\nDescription: some\r\n text\r\nEtag: x\r\n\r\n" + ^ ^ ^ ^ + headers[0] headers[1] headers[2] headers[3] + + I.e. headers[0] points to the beginning of the request, + headers[1] points to the end of the first header and the + beginning of the second one, etc. */ + const char **headers; }; +/* Create a new response object from the text of the HTTP response, + available in HEAD. That text is automatically split into + constituent header lines for fast retrieval using + resp_header_*. */ + static struct response * -response_new (const char *head) +resp_new (const char *head) { const char *hdr; int count, size; @@ -196,7 +534,7 @@ response_new (const char *head) return resp; } - /* Split HEAD into header lines, so that response_header_* functions + /* Split HEAD into header lines, so that resp_header_* functions don't need to do this over and over again. */ size = count = 0; @@ -222,25 +560,41 @@ response_new (const char *head) while (*hdr == ' ' || *hdr == '\t'); } DO_REALLOC (resp->headers, size, count + 1, const char *); - resp->headers[count++] = NULL; + resp->headers[count] = NULL; return resp; } +/* Locate the header named NAME in the request data, starting with + position START. This allows the code to loop through the request + data, filtering for all requests of a given name. Returns the + found position, or -1 for failure. The code that uses this + function typically looks like this: + + for (pos = 0; (pos = resp_header_locate (...)) != -1; pos++) + ... do something with header ... + + If you only care about one header, use resp_header_get instead of + this function. */ + static int -response_header_bounds (const struct response *resp, const char *name, - const char **begptr, const char **endptr) +resp_header_locate (const struct response *resp, const char *name, int start, + const char **begptr, const char **endptr) { int i; const char **headers = resp->headers; int name_len; if (!headers || !headers[1]) - return 0; + return -1; name_len = strlen (name); + if (start > 0) + i = start; + else + i = 1; - for (i = 1; headers[i + 1]; i++) + for (; headers[i + 1]; i++) { const char *b = headers[i]; const char *e = headers[i + 1]; @@ -255,33 +609,59 @@ response_header_bounds (const struct response *resp, const char *name, --e; *begptr = b; *endptr = e; - return 1; + return i; } } - return 0; + return -1; +} + +/* Find and retrieve the header named NAME in the request data. If + found, set *BEGPTR to its starting, and *ENDPTR to its ending + position, and return 1. Otherwise return 0. + + This function is used as a building block for resp_header_copy + and resp_header_strdup. */ + +static int +resp_header_get (const struct response *resp, const char *name, + const char **begptr, const char **endptr) +{ + int pos = resp_header_locate (resp, name, 0, begptr, endptr); + return pos != -1; } +/* Copy the response header named NAME to buffer BUF, no longer than + BUFSIZE (BUFSIZE includes the terminating 0). If the header + exists, 1 is returned, otherwise 0. If there should be no limit on + the size of the header, use resp_header_strdup instead. + + If BUFSIZE is 0, no data is copied, but the boolean indication of + whether the header is present is still returned. */ + static int -response_header_copy (const struct response *resp, const char *name, - char *buf, int bufsize) +resp_header_copy (const struct response *resp, const char *name, + char *buf, int bufsize) { const char *b, *e; - if (!response_header_bounds (resp, name, &b, &e)) + if (!resp_header_get (resp, name, &b, &e)) return 0; if (bufsize) { - int len = MIN (e - b, bufsize); - strncpy (buf, b, len); + int len = MIN (e - b, bufsize - 1); + memcpy (buf, b, len); buf[len] = '\0'; } return 1; } +/* Return the value of header named NAME in RESP, allocated with + malloc. If such a header does not exist in RESP, return NULL. */ + static char * -response_header_strdup (const struct response *resp, const char *name) +resp_header_strdup (const struct response *resp, const char *name) { const char *b, *e; - if (!response_header_bounds (resp, name, &b, &e)) + if (!resp_header_get (resp, name, &b, &e)) return NULL; return strdupdelim (b, e); } @@ -295,16 +675,16 @@ response_header_strdup (const struct response *resp, const char *name) returned in *MESSAGE. */ static int -response_status (const struct response *resp, char **message) +resp_status (const struct response *resp, char **message) { int status; const char *p, *end; if (!resp->headers) { - /* For a HTTP/0.9 response, always assume 200 response. */ + /* For a HTTP/0.9 response, assume status 200. */ if (message) - *message = xstrdup ("OK"); + *message = xstrdup (_("No headers, assuming HTTP/0.9")); return 200; } @@ -319,8 +699,8 @@ response_status (const struct response *resp, char **message) return -1; p += 4; - /* "/x.x" (optional because some Gnutella servers have been reported - as not sending the "/x.x" part. */ + /* Match the HTTP version. This is optional because Gnutella + servers have been reported to not specify HTTP version. */ if (p < end && *p == '/') { ++p; @@ -352,42 +732,46 @@ response_status (const struct response *resp, char **message) return status; } +/* Release the resources used by RESP. */ + static void -response_free (struct response *resp) +resp_free (struct response *resp) { xfree_null (resp->headers); xfree (resp); } -static void -print_server_response_1 (const char *b, const char *e) -{ - char *ln; - if (b < e && e[-1] == '\n') - --e; - if (b < e && e[-1] == '\r') - --e; - BOUNDED_TO_ALLOCA (b, e, ln); - logprintf (LOG_VERBOSE, " %s\n", ln); -} +/* Print the server response, line by line, omitting the trailing CRLF + from individual header lines, and prefixed with PREFIX. */ static void -print_server_response (const struct response *resp) +print_server_response (const struct response *resp, const char *prefix) { int i; if (!resp->headers) return; for (i = 0; resp->headers[i + 1]; i++) - print_server_response_1 (resp->headers[i], resp->headers[i + 1]); + { + const char *b = resp->headers[i]; + const char *e = resp->headers[i + 1]; + /* Skip CRLF */ + if (b < e && e[-1] == '\n') + --e; + if (b < e && e[-1] == '\r') + --e; + /* This is safe even on printfs with broken handling of "%.s" + because resp->headers ends with \0. */ + logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, e - b, b); + } } /* Parse the `Content-Range' header and extract the information it contains. Returns 1 if successful, -1 otherwise. */ static int -parse_content_range (const char *hdr, long *first_byte_ptr, - long *last_byte_ptr, long *entity_length_ptr) +parse_content_range (const char *hdr, wgint *first_byte_ptr, + wgint *last_byte_ptr, wgint *entity_length_ptr) { - long num; + wgint num; /* Ancient versions of Netscape proxy server, presumably predating rfc2068, sent out `Content-Range' without the "bytes" @@ -423,52 +807,56 @@ parse_content_range (const char *hdr, long *first_byte_ptr, *entity_length_ptr = num; return 1; } - -/* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly - PROMISED_SIZE bytes are sent over the wire -- if the file is - longer, read only that much; if the file is shorter, report an error. */ + +/* Read the body of the request, but don't store it anywhere and don't + display a progress gauge. This is useful for reading the bodies of + administrative responses to which we will soon issue another + request. The response is not useful to the user, but reading it + allows us to continue using the same connection to the server. + + If reading fails, 0 is returned, non-zero otherwise. In debug + mode, the body is displayed for debugging purposes. */ static int -post_file (int sock, const char *file_name, long promised_size) +skip_short_body (int fd, wgint contlen) { - static char chunk[8192]; - long written = 0; - int write_error; - FILE *fp; + enum { + SKIP_SIZE = 512, /* size of the download buffer */ + SKIP_THRESHOLD = 4096 /* the largest size we read */ + }; + char dlbuf[SKIP_SIZE + 1]; + dlbuf[SKIP_SIZE] = '\0'; /* so DEBUGP can safely print it */ - DEBUGP (("[writing POST file %s ... ", file_name)); + /* We shouldn't get here with unknown contlen. (This will change + with HTTP/1.1, which supports "chunked" transfer.) */ + assert (contlen != -1); - fp = fopen (file_name, "rb"); - if (!fp) - return -1; - while (!feof (fp) && written < promised_size) + /* If the body is too large, it makes more sense to simply close the + connection than to try to read the body. */ + if (contlen > SKIP_THRESHOLD) + return 0; + + DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen))); + + while (contlen > 0) { - int towrite; - int length = fread (chunk, 1, sizeof (chunk), fp); - if (length == 0) - break; - towrite = MIN (promised_size - written, length); - write_error = fd_write (sock, chunk, towrite, -1); - if (write_error < 0) + int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1.0); + if (ret <= 0) { - fclose (fp); - return -1; + /* Don't normally report the error since this is an + optimization that should be invisible to the user. */ + DEBUGP (("] aborting (%s).\n", + ret < 0 ? strerror (errno) : "EOF received")); + return 0; } - written += towrite; - } - fclose (fp); - - /* If we've written less than was promised, report a (probably - nonsensical) error rather than break the promise. */ - if (written < promised_size) - { - errno = EINVAL; - return -1; + contlen -= ret; + /* Safe even if %.*s bogusly expects terminating \0 because + we've zero-terminated dlbuf above. */ + DEBUGP (("%.*s", ret, dlbuf)); } - assert (written == promised_size); - DEBUGP (("done]\n")); - return 0; + DEBUGP (("] done.\n")); + return 1; } /* Persistent connections. Currently, we cache the most recently used @@ -490,6 +878,17 @@ static struct { /* Whether a ssl handshake has occoured on this connection. */ int ssl; + + /* Whether the connection was authorized. This is only done by + NTLM, which authorizes *connections* rather than individual + requests. (That practice is peculiar for HTTP, but it is a + useful optimization.) */ + int authorized; + +#ifdef ENABLE_NTLM + /* NTLM data of the current connection. */ + struct ntlmdata ntlm; +#endif } pconn; /* Mark the persistent connection as invalid and free the resources it @@ -540,6 +939,7 @@ register_persistent (const char *host, int port, int fd, int ssl) pconn.host = xstrdup (host); pconn.port = port; pconn.ssl = ssl; + pconn.authorized = 0; DEBUGP (("Registered socket %d for persistent reuse.\n", fd)); } @@ -656,7 +1056,10 @@ persistent_available_p (const char *host, int port, int ssl, if (pconn_active && (fd) == pconn.socket) \ invalidate_persistent (); \ else \ - fd_close (fd); \ + { \ + fd_close (fd); \ + fd = -1; \ + } \ } \ } while (0) @@ -665,21 +1068,21 @@ persistent_available_p (const char *host, int port, int ssl, invalidate_persistent (); \ else \ fd_close (fd); \ + fd = -1; \ } while (0) struct http_stat { - long len; /* received length */ - long contlen; /* expected length */ - long restval; /* the restart value */ + wgint len; /* received length */ + wgint contlen; /* expected length */ + wgint restval; /* the restart value */ int res; /* the result of last read */ char *newloc; /* new location (redirection) */ char *remote_time; /* remote time-stamp string */ char *error; /* textual HTTP error */ int statcode; /* status code */ - double dltime; /* time of the download in msecs */ - int no_truncate; /* whether truncating the file is - forbidden. */ + wgint rd_size; /* amount of data read from socket */ + double dltime; /* time it took to download the data */ const char *referer; /* value of the referer header. */ char **local_file; /* local file. */ }; @@ -699,10 +1102,9 @@ free_hstat (struct http_stat *hs) static char *create_authorization_line PARAMS ((const char *, const char *, const char *, const char *, - const char *)); -static char *basic_authentication_encode PARAMS ((const char *, const char *, - const char *)); -static int known_authentication_scheme_p PARAMS ((const char *)); + const char *, int *)); +static char *basic_authentication_encode PARAMS ((const char *, const char *)); +static int known_authentication_scheme_p PARAMS ((const char *, const char *)); time_t http_atotm PARAMS ((const char *)); @@ -711,6 +1113,14 @@ time_t http_atotm PARAMS ((const char *)); && (ISSPACE (line[sizeof (string_constant) - 1]) \ || !line[sizeof (string_constant) - 1])) +#define SET_USER_AGENT(req) \ + if (opt.useragent) \ + request_set_header (req, "User-Agent", opt.useragent, rel_none); \ + else \ + request_set_header (req, "User-Agent", \ + aprintf ("Wget/%s", version_string), rel_value); + + /* Retrieve a document through HTTP protocol. It recognizes status code, and correctly handles redirections. It closes the network socket. If it receives an error from the functions below it, it @@ -724,75 +1134,66 @@ time_t http_atotm PARAMS ((const char *)); static uerr_t gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) { - char *request, *type, *command, *full_path; + struct request *req; + + char *type; char *user, *passwd; - char *pragma_h, *referer, *useragent, *range, *wwwauth; - char *authenticate_h; char *proxyauth; - char *port_maybe; - char *request_keep_alive; - int sock, statcode; + int statcode; int write_error; - long contlen, contrange; + wgint contlen, contrange; struct url *conn; FILE *fp; - int auth_tried_already; + + int sock = -1; + int flags; + + /* Set to 1 when the authorization has failed permanently and should + not be tried again. */ + int auth_finished = 0; + + /* Whether NTLM authentication is used for this request. */ + int ntlm_seen = 0; + + /* Whether our connection to the remote host is through SSL. */ int using_ssl = 0; - char *cookies = NULL; char *head; struct response *resp; char hdrval[256]; char *message; - char *set_cookie; /* Whether this connection will be kept alive after the HTTP request is done. */ int keep_alive; - /* Flag that detects having received a keep-alive response. */ - int keep_alive_confirmed; - - /* Whether keep-alive should be inhibited. */ - int inhibit_keep_alive; + /* Whether keep-alive should be inhibited. - /* Whether we need to print the host header with braces around host, - e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the usual - "Host: symbolic-name:1234". */ - int squares_around_host = 0; + RFC 2068 requests that 1.0 clients not send keep-alive requests + to proxies. This is because many 1.0 proxies do not interpret + the Connection header and transfer it to the remote server, + causing it to not close the connection and leave both the proxy + and the client hanging. */ + int inhibit_keep_alive = + !opt.http_keep_alive || opt.ignore_length || proxy != NULL; /* Headers sent when using POST. */ - char *post_content_type, *post_content_length; - long post_data_size = 0; + wgint post_data_size = 0; - int host_lookup_failed; + int host_lookup_failed = 0; #ifdef HAVE_SSL - /* Initialize the SSL context. After the first run, this is a - no-op. */ - switch (ssl_init ()) + if (u->scheme == SCHEME_HTTPS) { - case SSLERRCTXCREATE: - /* this is fatal */ - logprintf (LOG_NOTQUIET, _("Failed to set up an SSL context\n")); - return SSLERRCTXCREATE; - case SSLERRCERTFILE: - /* try without certfile */ - logprintf (LOG_NOTQUIET, - _("Failed to load certificates from %s\n"), - opt.sslcertfile); - logprintf (LOG_NOTQUIET, - _("Trying without the specified certificate\n")); - break; - case SSLERRCERTKEY: - logprintf (LOG_NOTQUIET, - _("Failed to get certificate key from %s\n"), - opt.sslcertkey); - logprintf (LOG_NOTQUIET, - _("Trying without the specified certificate\n")); - break; - default: - break; + /* Initialize the SSL context. After this has once been done, + it becomes a no-op. */ + if (!ssl_init ()) + { + scheme_disable (SCHEME_HTTPS); + logprintf (LOG_NOTQUIET, + _("Disabling SSL due to encountered errors.\n")); + return SSLINITFAILED; + } } #endif /* HAVE_SSL */ @@ -801,167 +1202,75 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) know the local filename so we can save to it. */ assert (*hs->local_file != NULL); - authenticate_h = NULL; - auth_tried_already = 0; - - inhibit_keep_alive = !opt.http_keep_alive || proxy != NULL; - - again: - /* We need to come back here when the initial attempt to retrieve - without authorization header fails. (Expected to happen at least - for the Digest authorization scheme.) */ - - keep_alive = 0; - keep_alive_confirmed = 0; - - post_content_type = NULL; - post_content_length = NULL; - /* Initialize certain elements of struct http_stat. */ - hs->len = 0L; + hs->len = 0; hs->contlen = -1; hs->res = -1; hs->newloc = NULL; hs->remote_time = NULL; hs->error = NULL; - /* If we're using a proxy, we will be connecting to the proxy - server. */ - conn = proxy ? proxy : u; - - host_lookup_failed = 0; - - /* First: establish the connection. */ - if (inhibit_keep_alive - || !persistent_available_p (conn->host, conn->port, -#ifdef HAVE_SSL - u->scheme == SCHEME_HTTPS -#else - 0 -#endif - , &host_lookup_failed)) - { - /* In its current implementation, persistent_available_p will - look up conn->host in some cases. If that lookup failed, we - don't need to bother with connect_to_host. */ - if (host_lookup_failed) - return HOSTERR; - - sock = connect_to_host (conn->host, conn->port); - if (sock == E_HOST) - return HOSTERR; - else if (sock < 0) - return (retryable_socket_connect_error (errno) - ? CONERROR : CONIMPOSSIBLE); + conn = u; -#ifdef HAVE_SSL - if (conn->scheme == SCHEME_HTTPS) - { - if (!ssl_connect (sock)) - { - logputs (LOG_VERBOSE, "\n"); - logprintf (LOG_NOTQUIET, - _("Unable to establish SSL connection.\n")); - fd_close (sock); - return CONSSLERR; - } - using_ssl = 1; - } -#endif /* HAVE_SSL */ - } - else - { - logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"), - pconn.host, pconn.port); - sock = pconn.socket; - using_ssl = pconn.ssl; - DEBUGP (("Reusing fd %d.\n", sock)); - } - - if (*dt & HEAD_ONLY) - command = "HEAD"; - else if (opt.post_file_name || opt.post_data) - command = "POST"; - else - command = "GET"; + /* Prepare the request to send. */ - referer = NULL; - if (hs->referer) - { - referer = (char *)alloca (9 + strlen (hs->referer) + 3); - sprintf (referer, "Referer: %s\r\n", hs->referer); - } + req = request_new (); + { + const char *meth = "GET"; + if (*dt & HEAD_ONLY) + meth = "HEAD"; + else if (opt.post_file_name || opt.post_data) + meth = "POST"; + /* Use the full path, i.e. one that includes the leading slash and + the query string. E.g. if u->path is "foo/bar" and u->query is + "param=value", full_path will be "/foo/bar?param=value". */ + request_set_method (req, meth, + proxy ? xstrdup (u->url) : url_full_path (u)); + } + request_set_header (req, "Referer", (char *) hs->referer, rel_none); if (*dt & SEND_NOCACHE) - pragma_h = "Pragma: no-cache\r\n"; - else - pragma_h = ""; - + request_set_header (req, "Pragma", "no-cache", rel_none); if (hs->restval) - { - range = (char *)alloca (13 + numdigit (hs->restval) + 4); - /* Gag me! Some servers (e.g. WebSitePro) have been known to - respond to the following `Range' format by generating a - multipart/x-byte-ranges MIME document! This MIME type was - present in an old draft of the byteranges specification. - HTTP/1.1 specifies a multipart/byte-ranges MIME type, but - only if multiple non-overlapping ranges are requested -- - which Wget never does. */ - sprintf (range, "Range: bytes=%ld-\r\n", hs->restval); - } - else - range = NULL; - if (opt.useragent) - STRDUP_ALLOCA (useragent, opt.useragent); - else - { - useragent = (char *)alloca (10 + strlen (version_string)); - sprintf (useragent, "Wget/%s", version_string); - } - /* Construct the authentication, if userid is present. */ + request_set_header (req, "Range", + aprintf ("bytes=%s-", + number_to_static_string (hs->restval)), + rel_value); + SET_USER_AGENT (req); + request_set_header (req, "Accept", "*/*", rel_none); + + /* Find the username and password for authentication. */ user = u->user; passwd = u->passwd; search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0); - user = user ? user : opt.http_user; - passwd = passwd ? passwd : opt.http_passwd; + user = user ? user : (opt.http_user ? opt.http_user : opt.user); + passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd); - wwwauth = NULL; if (user && passwd) { - if (!authenticate_h) - { - /* We have the username and the password, but haven't tried - any authorization yet. Let's see if the "Basic" method - works. If not, we'll come back here and construct a - proper authorization method with the right challenges. - - If we didn't employ this kind of logic, every URL that - requires authorization would have to be processed twice, - which is very suboptimal and generates a bunch of false - "unauthorized" errors in the server log. - - #### But this logic also has a serious problem when used - with stronger authentications: we *first* transmit the - username and the password in clear text, and *then* - attempt a stronger authentication scheme. That cannot be - right! We are only fortunate that almost everyone still - uses the `Basic' scheme anyway. - - There should be an option to prevent this from happening, - for those who use strong authentication schemes and value - their passwords. */ - wwwauth = basic_authentication_encode (user, passwd, "Authorization"); - } - else - { - /* Use the full path, i.e. one that includes the leading - slash and the query string, but is independent of proxy - setting. */ - char *pth = url_full_path (u); - wwwauth = create_authorization_line (authenticate_h, user, passwd, - command, pth); - xfree (pth); - } + /* We have the username and the password, but haven't tried + any authorization yet. Let's see if the "Basic" method + works. If not, we'll come back here and construct a + proper authorization method with the right challenges. + + If we didn't employ this kind of logic, every URL that + requires authorization would have to be processed twice, + which is very suboptimal and generates a bunch of false + "unauthorized" errors in the server log. + + #### But this logic also has a serious problem when used + with stronger authentications: we *first* transmit the + username and the password in clear text, and *then* attempt a + stronger authentication scheme. That cannot be right! We + are only fortunate that almost everyone still uses the + `Basic' scheme anyway. + + There should be an option to prevent this from happening, for + those who use strong authentication schemes and value their + passwords. */ + request_set_header (req, "Authorization", + basic_authentication_encode (user, passwd), + rel_value); } proxyauth = NULL; @@ -969,10 +1278,10 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) { char *proxy_user, *proxy_passwd; /* For normal username and password, URL components override - command-line/wgetrc parameters. With proxy authentication, - it's the reverse, because proxy URLs are normally the - "permanent" ones, so command-line args should take - precedence. */ + command-line/wgetrc parameters. With proxy + authentication, it's the reverse, because proxy URLs are + normally the "permanent" ones, so command-line args + should take precedence. */ if (opt.proxy_user && opt.proxy_passwd) { proxy_user = opt.proxy_user; @@ -984,37 +1293,56 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) proxy_passwd = proxy->passwd; } /* #### This does not appear right. Can't the proxy request, - say, `Digest' authentication? */ + say, `Digest' authentication? */ if (proxy_user && proxy_passwd) - proxyauth = basic_authentication_encode (proxy_user, proxy_passwd, - "Proxy-Authorization"); - } + proxyauth = basic_authentication_encode (proxy_user, proxy_passwd); - /* String of the form :PORT. Used only for non-standard ports. */ - port_maybe = NULL; - if (u->port != scheme_default_port (u->scheme)) - { - port_maybe = (char *)alloca (numdigit (u->port) + 2); - sprintf (port_maybe, ":%d", u->port); + /* If we're using a proxy, we will be connecting to the proxy + server. */ + conn = proxy; + + /* Proxy authorization over SSL is handled below. */ +#ifdef HAVE_SSL + if (u->scheme != SCHEME_HTTPS) +#endif + request_set_header (req, "Proxy-Authorization", proxyauth, rel_value); } + { + /* Whether we need to print the host header with braces around + host, e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the + usual "Host: symbolic-name:1234". */ + int squares = strchr (u->host, ':') != NULL; + if (u->port == scheme_default_port (u->scheme)) + request_set_header (req, "Host", + aprintf (squares ? "[%s]" : "%s", u->host), + rel_value); + else + request_set_header (req, "Host", + aprintf (squares ? "[%s]:%d" : "%s:%d", + u->host, u->port), + rel_value); + } + if (!inhibit_keep_alive) - request_keep_alive = "Connection: Keep-Alive\r\n"; - else - request_keep_alive = NULL; + request_set_header (req, "Connection", "Keep-Alive", rel_none); if (opt.cookies) - cookies = cookie_header (wget_cookie_jar, u->host, u->port, u->path, + request_set_header (req, "Cookie", + cookie_header (wget_cookie_jar, + u->host, u->port, u->path, #ifdef HAVE_SSL - u->scheme == SCHEME_HTTPS + u->scheme == SCHEME_HTTPS #else - 0 + 0 #endif - ); + ), + rel_value); if (opt.post_data || opt.post_file_name) { - post_content_type = "Content-Type: application/x-www-form-urlencoded\r\n"; + request_set_header (req, "Content-Type", + "application/x-www-form-urlencoded", rel_none); if (opt.post_data) post_data_size = strlen (opt.post_data); else @@ -1027,110 +1355,202 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) post_data_size = 0; } } - post_content_length = xmalloc (16 + numdigit (post_data_size) + 2 + 1); - sprintf (post_content_length, - "Content-Length: %ld\r\n", post_data_size); + request_set_header (req, "Content-Length", + xstrdup (number_to_static_string (post_data_size)), + rel_value); } - if (proxy) - full_path = xstrdup (u->url); - else - /* Use the full path, i.e. one that includes the leading slash and - the query string. E.g. if u->path is "foo/bar" and u->query is - "param=value", full_path will be "/foo/bar?param=value". */ - full_path = url_full_path (u); - - if (strchr (u->host, ':')) - squares_around_host = 1; - - /* Allocate the memory for the request. */ - request = (char *)alloca (strlen (command) - + strlen (full_path) - + strlen (useragent) - + strlen (u->host) - + (port_maybe ? strlen (port_maybe) : 0) - + strlen (HTTP_ACCEPT) - + (request_keep_alive - ? strlen (request_keep_alive) : 0) - + (referer ? strlen (referer) : 0) - + (cookies ? strlen (cookies) : 0) - + (wwwauth ? strlen (wwwauth) : 0) - + (proxyauth ? strlen (proxyauth) : 0) - + (range ? strlen (range) : 0) - + strlen (pragma_h) - + (post_content_type - ? strlen (post_content_type) : 0) - + (post_content_length - ? strlen (post_content_length) : 0) - + (opt.user_header ? strlen (opt.user_header) : 0) - + 64); - /* Construct the request. */ - sprintf (request, "\ -%s %s HTTP/1.0\r\n\ -User-Agent: %s\r\n\ -Host: %s%s%s%s\r\n\ -Accept: %s\r\n\ -%s%s%s%s%s%s%s%s%s%s\r\n", - command, full_path, - useragent, - squares_around_host ? "[" : "", u->host, squares_around_host ? "]" : "", - port_maybe ? port_maybe : "", - HTTP_ACCEPT, - request_keep_alive ? request_keep_alive : "", - referer ? referer : "", - cookies ? cookies : "", - wwwauth ? wwwauth : "", - proxyauth ? proxyauth : "", - range ? range : "", - pragma_h, - post_content_type ? post_content_type : "", - post_content_length ? post_content_length : "", - opt.user_header ? opt.user_header : ""); - DEBUGP (("\n---request begin---\n%s", request)); - - /* Free the temporary memory. */ - xfree_null (wwwauth); - xfree_null (proxyauth); - xfree_null (cookies); - xfree (full_path); + /* Add the user headers. */ + if (opt.user_headers) + { + int i; + for (i = 0; opt.user_headers[i]; i++) + request_set_user_header (req, opt.user_headers[i]); + } + + retry_with_auth: + /* We need to come back here when the initial attempt to retrieve + without authorization header fails. (Expected to happen at least + for the Digest authorization scheme.) */ + + keep_alive = 0; + + /* Establish the connection. */ + + if (!inhibit_keep_alive) + { + /* Look for a persistent connection to target host, unless a + proxy is used. The exception is when SSL is in use, in which + case the proxy is nothing but a passthrough to the target + host, registered as a connection to the latter. */ + struct url *relevant = conn; +#ifdef HAVE_SSL + if (u->scheme == SCHEME_HTTPS) + relevant = u; +#endif + + if (persistent_available_p (relevant->host, relevant->port, +#ifdef HAVE_SSL + relevant->scheme == SCHEME_HTTPS, +#else + 0, +#endif + &host_lookup_failed)) + { + sock = pconn.socket; + using_ssl = pconn.ssl; + logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"), + escnonprint (pconn.host), pconn.port); + DEBUGP (("Reusing fd %d.\n", sock)); + if (pconn.authorized) + /* If the connection is already authorized, the "Basic" + authorization added by code above is unnecessary and + only hurts us. */ + request_remove_header (req, "Authorization"); + } + } + + if (sock < 0) + { + /* In its current implementation, persistent_available_p will + look up conn->host in some cases. If that lookup failed, we + don't need to bother with connect_to_host. */ + if (host_lookup_failed) + { + request_free (req); + return HOSTERR; + } + + sock = connect_to_host (conn->host, conn->port); + if (sock == E_HOST) + { + request_free (req); + return HOSTERR; + } + else if (sock < 0) + { + request_free (req); + return (retryable_socket_connect_error (errno) + ? CONERROR : CONIMPOSSIBLE); + } + +#ifdef HAVE_SSL + if (proxy && u->scheme == SCHEME_HTTPS) + { + /* When requesting SSL URLs through proxies, use the + CONNECT method to request passthrough. */ + struct request *connreq = request_new (); + request_set_method (connreq, "CONNECT", + aprintf ("%s:%d", u->host, u->port)); + SET_USER_AGENT (connreq); + if (proxyauth) + { + request_set_header (connreq, "Proxy-Authorization", + proxyauth, rel_value); + /* Now that PROXYAUTH is part of the CONNECT request, + zero it out so we don't send proxy authorization with + the regular request below. */ + proxyauth = NULL; + } + /* Examples in rfc2817 use the Host header in CONNECT + requests. I don't see how that gains anything, given + that the contents of Host would be exactly the same as + the contents of CONNECT. */ + + write_error = request_send (connreq, sock); + request_free (connreq); + if (write_error < 0) + { + logprintf (LOG_VERBOSE, _("Failed writing to proxy: %s.\n"), + strerror (errno)); + CLOSE_INVALIDATE (sock); + return WRITEFAILED; + } + + head = read_http_response_head (sock); + if (!head) + { + logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"), + strerror (errno)); + CLOSE_INVALIDATE (sock); + return HERR; + } + message = NULL; + if (!*head) + { + xfree (head); + goto failed_tunnel; + } + DEBUGP (("proxy responded with: [%s]\n", head)); + + resp = resp_new (head); + statcode = resp_status (resp, &message); + resp_free (resp); + xfree (head); + if (statcode != 200) + { + failed_tunnel: + logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"), + message ? escnonprint (message) : "?"); + xfree_null (message); + return CONSSLERR; + } + xfree_null (message); + + /* SOCK is now *really* connected to u->host, so update CONN + to reflect this. That way register_persistent will + register SOCK as being connected to u->host:u->port. */ + conn = u; + } + + if (conn->scheme == SCHEME_HTTPS) + { + if (!ssl_connect (sock)) + { + fd_close (sock); + return CONSSLERR; + } + using_ssl = 1; + } +#endif /* HAVE_SSL */ + } /* Send the request to server. */ - write_error = fd_write (sock, request, strlen (request), -1); + write_error = request_send (req, sock); if (write_error >= 0) { if (opt.post_data) { DEBUGP (("[POST data: %s]\n", opt.post_data)); - write_error = fd_write (sock, opt.post_data, post_data_size, -1); + write_error = fd_write (sock, opt.post_data, post_data_size, -1.0); } else if (opt.post_file_name && post_data_size != 0) write_error = post_file (sock, opt.post_file_name, post_data_size); } - DEBUGP (("---request end---\n")); if (write_error < 0) { logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"), strerror (errno)); CLOSE_INVALIDATE (sock); + request_free (req); return WRITEFAILED; } logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "), proxy ? "Proxy" : "HTTP"); - contlen = contrange = -1; - type = NULL; - statcode = -1; + contlen = -1; + contrange = 0; *dt &= ~RETROKF; - head = fd_read_http_head (sock); + head = read_http_response_head (sock); if (!head) { - logputs (LOG_VERBOSE, "\n"); if (errno == 0) { logputs (LOG_NOTQUIET, _("No data received.\n")); CLOSE_INVALIDATE (sock); + request_free (req); return HEOF; } else @@ -1138,130 +1558,189 @@ Accept: %s\r\n\ logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"), strerror (errno)); CLOSE_INVALIDATE (sock); + request_free (req); return HERR; } } + DEBUGP (("\n---response begin---\n%s---response end---\n", head)); - DEBUGP (("\n---response begin---\n")); - DEBUGP (("%s", head)); - DEBUGP (("---response end---\n")); - - resp = response_new (head); + resp = resp_new (head); /* Check for status line. */ message = NULL; - statcode = response_status (resp, &message); + statcode = resp_status (resp, &message); if (!opt.server_response) - logprintf (LOG_VERBOSE, "%2d %s\n", statcode, message ? message : ""); + logprintf (LOG_VERBOSE, "%2d %s\n", statcode, + message ? escnonprint (message) : ""); else { logprintf (LOG_VERBOSE, "\n"); - print_server_response (resp); + print_server_response (resp, " "); } - hs->statcode = statcode; - if (statcode == -1) - hs->error = xstrdup (_("Malformed status line")); - else if (!*message) - hs->error = xstrdup (_("(no description)")); - else - hs->error = xstrdup (message); - - if (response_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval))) - contlen = strtol (hdrval, NULL, 10); - type = response_header_strdup (resp, "Content-Type"); - if (type) - { - char *tmp = strchr (type, ';'); - if (tmp) - { - while (tmp > type && ISSPACE (tmp[-1])) - --tmp; - *tmp = '\0'; - } - } - hs->newloc = response_header_strdup (resp, "Location"); - hs->remote_time = response_header_strdup (resp, "Last-Modified"); - set_cookie = response_header_strdup (resp, "Set-Cookie"); - if (set_cookie) - { - /* The jar should have been created by now. */ - assert (wget_cookie_jar != NULL); - cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, u->path, - set_cookie); - xfree (set_cookie); - } - authenticate_h = response_header_strdup (resp, "WWW-Authenticate"); - if (response_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval))) + if (!opt.ignore_length + && resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval))) { - long first_byte_pos, last_byte_pos, entity_length; - if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos, - &entity_length)) - contrange = first_byte_pos; + wgint parsed; + errno = 0; + parsed = str_to_wgint (hdrval, NULL, 10); + if (parsed == WGINT_MAX && errno == ERANGE) + /* Out of range. + #### If Content-Length is out of range, it most likely + means that the file is larger than 2G and that we're + compiled without LFS. In that case we should probably + refuse to even attempt to download the file. */ + contlen = -1; + else + contlen = parsed; } /* Check for keep-alive related responses. */ if (!inhibit_keep_alive && contlen != -1) { - if (response_header_copy (resp, "Keep-Alive", NULL, 0)) + if (resp_header_copy (resp, "Keep-Alive", NULL, 0)) keep_alive = 1; - else if (response_header_copy (resp, "Connection", hdrval, - sizeof (hdrval))) + else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval))) { if (0 == strcasecmp (hdrval, "Keep-Alive")) keep_alive = 1; } } - response_free (resp); - if (keep_alive) /* The server has promised that it will not close the connection when we're done. This means that we can register it. */ register_persistent (conn->host, conn->port, sock, using_ssl); - if ((statcode == HTTP_STATUS_UNAUTHORIZED) - && authenticate_h) + if (statcode == HTTP_STATUS_UNAUTHORIZED) { /* Authorization is required. */ - xfree_null (type); - type = NULL; - free_hstat (hs); - CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there - might be more bytes in the body. */ - if (auth_tried_already) + if (keep_alive) + { + if (skip_short_body (sock, contlen)) + CLOSE_FINISH (sock); + else + CLOSE_INVALIDATE (sock); + } + pconn.authorized = 0; + if (auth_finished || !(user && passwd)) { /* If we have tried it already, then there is not point retrying it. */ - failed: logputs (LOG_NOTQUIET, _("Authorization failed.\n")); - xfree (authenticate_h); - return AUTHFAILED; } - else if (!known_authentication_scheme_p (authenticate_h)) - { - xfree (authenticate_h); - logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n")); - return AUTHFAILED; - } - else if (BEGINS_WITH (authenticate_h, "Basic")) + else { - /* The authentication scheme is basic, the one we try by - default, and it failed. There's no sense in trying - again. */ - goto failed; + /* IIS sometimes sends two instances of WWW-Authenticate + header, one with the keyword "negotiate", and other with + useful data. Loop over all occurrences of this header + and use the one we recognize. */ + int wapos; + const char *wabeg, *waend; + char *www_authenticate = NULL; + for (wapos = 0; + (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos, + &wabeg, &waend)) != -1; + ++wapos) + if (known_authentication_scheme_p (wabeg, waend)) + { + www_authenticate = strdupdelim (wabeg, waend); + break; + } + /* If the authentication header is missing or recognized, or + if the authentication scheme is "Basic" (which we send by + default), there's no sense in retrying. */ + if (!www_authenticate + || BEGINS_WITH (www_authenticate, "Basic")) + { + xfree_null (www_authenticate); + logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n")); + } + else + { + char *pth; + pth = url_full_path (u); + request_set_header (req, "Authorization", + create_authorization_line (www_authenticate, + user, passwd, + request_method (req), + pth, + &auth_finished), + rel_value); + if (BEGINS_WITH (www_authenticate, "NTLM")) + ntlm_seen = 1; + xfree (pth); + xfree (www_authenticate); + goto retry_with_auth; + } } - else + request_free (req); + return AUTHFAILED; + } + else /* statcode != HTTP_STATUS_UNAUTHORIZED */ + { + /* Kludge: if NTLM is used, mark the TCP connection as authorized. */ + if (ntlm_seen) + pconn.authorized = 1; + } + request_free (req); + + hs->statcode = statcode; + if (statcode == -1) + hs->error = xstrdup (_("Malformed status line")); + else if (!*message) + hs->error = xstrdup (_("(no description)")); + else + hs->error = xstrdup (message); + xfree (message); + + type = resp_header_strdup (resp, "Content-Type"); + if (type) + { + char *tmp = strchr (type, ';'); + if (tmp) { - auth_tried_already = 1; - goto again; + while (tmp > type && ISSPACE (tmp[-1])) + --tmp; + *tmp = '\0'; } } - /* We do not need this anymore. */ - if (authenticate_h) + hs->newloc = resp_header_strdup (resp, "Location"); + hs->remote_time = resp_header_strdup (resp, "Last-Modified"); + + /* Handle (possibly multiple instances of) the Set-Cookie header. */ + { + char *pth = NULL; + int scpos; + const char *scbeg, *scend; + /* The jar should have been created by now. */ + assert (wget_cookie_jar != NULL); + for (scpos = 0; + (scpos = resp_header_locate (resp, "Set-Cookie", scpos, + &scbeg, &scend)) != -1; + ++scpos) + { + char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie); + if (pth == NULL) + { + /* u->path doesn't begin with /, which cookies.c expects. */ + pth = (char *) alloca (1 + strlen (u->path) + 1); + pth[0] = '/'; + strcpy (pth + 1, u->path); + } + cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, pth, + set_cookie); + } + } + + if (resp_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval))) { - xfree (authenticate_h); - authenticate_h = NULL; + wgint first_byte_pos, last_byte_pos, entity_length; + if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos, + &entity_length)) + contrange = first_byte_pos; } + resp_free (resp); + xfree (head); /* 20x responses are counted among successful by default. */ if (H_20X (statcode)) @@ -1281,10 +1760,15 @@ Accept: %s\r\n\ { logprintf (LOG_VERBOSE, _("Location: %s%s\n"), - hs->newloc ? hs->newloc : _("unspecified"), + hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"), hs->newloc ? _(" [following]") : ""); - CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there - might be more bytes in the body. */ + if (keep_alive) + { + if (skip_short_body (sock, contlen)) + CLOSE_FINISH (sock); + else + CLOSE_INVALIDATE (sock); + } xfree_null (type); return NEWLOCATION; } @@ -1321,87 +1805,37 @@ Accept: %s\r\n\ } } - if (contrange == -1) + if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE) { - /* We did not get a content-range header. This means that the - server did not honor our `Range' request. Normally, this - means we should reset hs->restval and continue normally. */ - - /* However, if `-c' is used, we need to be a bit more careful: - - 1. If `-c' is specified and the file already existed when - Wget was started, it would be a bad idea for us to start - downloading it from scratch, effectively truncating it. I - believe this cannot happen unless `-c' was specified. - - 2. If `-c' is used on a file that is already fully - downloaded, we're requesting bytes after the end of file, - which can result in server not honoring `Range'. If this is - the case, `Content-Length' will be equal to the length of the - file. */ - if (opt.always_rest) - { - /* Check for condition #2. */ - if (hs->restval > 0 /* restart was requested. */ - && contlen != -1 /* we got content-length. */ - && hs->restval >= contlen /* file fully downloaded - or has shrunk. */ - ) - { - logputs (LOG_VERBOSE, _("\ + /* If `-c' is in use and the file has been fully downloaded (or + the remote file has shrunk), Wget effectively requests bytes + after the end of file and the server response with 416. */ + logputs (LOG_VERBOSE, _("\ \n The file is already fully retrieved; nothing to do.\n\n")); - /* In case the caller inspects. */ - hs->len = contlen; - hs->res = 0; - /* Mark as successfully retrieved. */ - *dt |= RETROKF; - xfree_null (type); - CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there - might be more bytes in the body. */ - return RETRUNNEEDED; - } - - /* Check for condition #1. */ - if (hs->no_truncate) - { - logprintf (LOG_NOTQUIET, - _("\ -\n\ -Continued download failed on this file, which conflicts with `-c'.\n\ -Refusing to truncate existing file `%s'.\n\n"), *hs->local_file); - xfree_null (type); - CLOSE_INVALIDATE (sock); - return CONTNOTSUPPORTED; - } - - /* Fallthrough */ - } - - hs->restval = 0; + /* In case the caller inspects. */ + hs->len = contlen; + hs->res = 0; + /* Mark as successfully retrieved. */ + *dt |= RETROKF; + xfree_null (type); + CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there + might be more bytes in the body. */ + return RETRUNNEEDED; } - else if (contrange != hs->restval || - (H_PARTIAL (statcode) && contrange == -1)) + if ((contrange != 0 && contrange != hs->restval) + || (H_PARTIAL (statcode) && !contrange)) { - /* This means the whole request was somehow misunderstood by the - server. Bail out. */ + /* The Range request was somehow misunderstood by the server. + Bail out. */ xfree_null (type); CLOSE_INVALIDATE (sock); return RANGEERR; } - - if (hs->restval) - { - if (contlen != -1) - contlen += contrange; - else - contrange = -1; /* If conent-length was not sent, - content-range will be ignored. */ - } - hs->contlen = contlen; + hs->contlen = contlen + contrange; if (opt.verbose) { - if ((*dt & RETROKF) && !opt.server_response) + if (*dt & RETROKF) { /* No need to print this output if the body won't be downloaded at all, or if the original server response is @@ -1409,16 +1843,26 @@ Refusing to truncate existing file `%s'.\n\n"), *hs->local_file); logputs (LOG_VERBOSE, _("Length: ")); if (contlen != -1) { - logputs (LOG_VERBOSE, legible (contlen)); - if (contrange != -1) - logprintf (LOG_VERBOSE, _(" (%s to go)"), - legible (contlen - contrange)); + logputs (LOG_VERBOSE, with_thousand_seps (contlen + contrange)); + if (contlen + contrange >= 1024) + logprintf (LOG_VERBOSE, " (%s)", + human_readable (contlen + contrange)); + if (contrange) + { + if (contlen >= 1024) + logprintf (LOG_VERBOSE, _(", %s (%s) remaining"), + with_thousand_seps (contlen), + human_readable (contlen)); + else + logprintf (LOG_VERBOSE, _(", %s remaining"), + with_thousand_seps (contlen)); + } } else logputs (LOG_VERBOSE, opt.ignore_length ? _("ignored") : _("unspecified")); if (type) - logprintf (LOG_VERBOSE, " [%s]\n", type); + logprintf (LOG_VERBOSE, " [%s]\n", escnonprint (type)); else logputs (LOG_VERBOSE, "\n"); } @@ -1430,71 +1874,72 @@ Refusing to truncate existing file `%s'.\n\n"), *hs->local_file); if (!(*dt & RETROKF) || (*dt & HEAD_ONLY)) { /* In case the caller cares to look... */ - hs->len = 0L; + hs->len = 0; hs->res = 0; xfree_null (type); - CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there - might be more bytes in the body. */ + /* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the + servers not to send body in response to a HEAD request. If + you encounter such a server (more likely a broken CGI), use + `--no-http-keep-alive'. */ + CLOSE_FINISH (sock); return RETRFINISHED; } /* Open the local file. */ - if (!opt.dfp) + if (!output_stream) { mkalldirs (*hs->local_file); if (opt.backups) rotate_backups (*hs->local_file); - fp = fopen (*hs->local_file, hs->restval ? "ab" : "wb"); + if (hs->restval) + fp = fopen (*hs->local_file, "ab"); + else if (opt.noclobber || opt.always_rest || opt.timestamping || opt.dirstruct + || opt.output_document) + fp = fopen (*hs->local_file, "wb"); + else + { + fp = fopen_excl (*hs->local_file, 1); + if (!fp && errno == EEXIST) + { + /* We cannot just invent a new name and use it (which is + what functions like unique_create typically do) + because we told the user we'd use this name. + Instead, return and retry the download. */ + logprintf (LOG_NOTQUIET, + _("%s has sprung into existence.\n"), + *hs->local_file); + CLOSE_INVALIDATE (sock); + return FOPEN_EXCL_ERR; + } + } if (!fp) { logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno)); - CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there - might be more bytes in the body. */ + CLOSE_INVALIDATE (sock); return FOPENERR; } } - else /* opt.dfp */ - { - extern int global_download_count; - fp = opt.dfp; - /* To ensure that repeated "from scratch" downloads work for -O - files, we rewind the file pointer, unless restval is - non-zero. (This works only when -O is used on regular files, - but it's still a valuable feature.) - - However, this loses when more than one URL is specified on - the command line the second rewinds eradicates the contents - of the first download. Thus we disable the above trick for - all the downloads except the very first one. - - #### A possible solution to this would be to remember the - file position in the output document and to seek to that - position, instead of rewinding. - - We don't truncate stdout, since that breaks - "wget -O - [...] >> foo". - */ - if (!hs->restval && global_download_count == 0 && opt.dfp != stdout) - { - /* This will silently fail for streams that don't correspond - to regular files, but that's OK. */ - rewind (fp); - /* ftruncate is needed because opt.dfp is opened in append - mode if opt.always_rest is set. */ - ftruncate (fileno (fp), 0); - clearerr (fp); - } - } + else + fp = output_stream; - /* #### This confuses the code that checks for file size. There - should be some overhead information. */ + /* #### This confuses the timestamping code that checks for file + size. Maybe we should save some additional information? */ if (opt.save_headers) fwrite (head, 1, strlen (head), fp); - /* Get the contents of the document. */ - hs->res = fd_read_body (sock, fp, &hs->len, hs->restval, - (contlen != -1 ? contlen : 0), - keep_alive, &hs->dltime); + /* Download the request body. */ + flags = 0; + if (keep_alive) + flags |= rb_read_exactly; + if (hs->restval > 0 && contrange == 0) + /* If the server ignored our range request, instruct fd_read_body + to skip the first RESTVAL bytes of body. */ + flags |= rb_skip_startpos; + hs->len = hs->restval; + hs->rd_size = 0; + hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0, + hs->restval, &hs->rd_size, &hs->len, &hs->dltime, + flags); if (hs->res >= 0) CLOSE_FINISH (sock); @@ -1506,7 +1951,7 @@ Refusing to truncate existing file `%s'.\n\n"), *hs->local_file); error here. Checking the result of fwrite() is not enough -- errors could go unnoticed! */ int flush_res; - if (!opt.dfp) + if (!output_stream) flush_res = fclose (fp); else flush_res = fflush (fp); @@ -1531,10 +1976,10 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, char *tms, *locf, *tmrate; uerr_t err; time_t tml = -1, tmr = -1; /* local and remote time-stamps */ - long local_size = 0; /* the size of the local file */ + wgint local_size = 0; /* the size of the local file */ size_t filename_len; struct http_stat hstat; /* HTTP status */ - struct stat st; + struct_stat st; char *dummy = NULL; /* This used to be done in main(), but it's a better idea to do it @@ -1559,10 +2004,12 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, if (strchr (u->url, '*')) logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n")); + xzero (hstat); + /* Determine the local filename. */ if (local_file && *local_file) hstat.local_file = local_file; - else if (local_file) + else if (local_file && !opt.output_document) { *local_file = url_file_name (u); hstat.local_file = local_file; @@ -1571,6 +2018,9 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, { dummy = url_file_name (u); hstat.local_file = &dummy; + /* be honest about where we will save the file */ + if (local_file && opt.output_document) + *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document); } if (!opt.output_document) @@ -1622,7 +2072,7 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); point I profiled Wget, and found that a measurable and non-negligible amount of time was lost calling sprintf() in url.c. Replacing sprintf with inline calls to - strcpy() and long_to_string() made a difference. + strcpy() and number_to_string() made a difference. --hniksic */ memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len); memcpy (filename_plus_orig_suffix + filename_len, @@ -1659,7 +2109,7 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); } /* Reset the counter. */ count = 0; - *dt = 0 | ACCEPTRANGES; + *dt = 0; /* THE loop */ do { @@ -1672,14 +2122,14 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); if (opt.verbose) { char *hurl = url_string (u, 1); - char tmp[15]; + char tmp[256]; strcpy (tmp, " "); if (count > 1) sprintf (tmp, _("(try:%2d)"), count); logprintf (LOG_VERBOSE, "--%s-- %s\n %s => `%s'\n", tms, hurl, tmp, locf); #ifdef WINDOWS - ws_changetitle (hurl, 1); + ws_changetitle (hurl); #endif xfree (hurl); } @@ -1691,21 +2141,15 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); *dt |= HEAD_ONLY; else *dt &= ~HEAD_ONLY; - /* Assume no restarting. */ - hstat.restval = 0L; + /* Decide whether or not to restart. */ - if (((count > 1 && (*dt & ACCEPTRANGES)) || opt.always_rest) - /* #### this calls access() and then stat(); could be optimized. */ - && file_exists_p (locf)) - if (stat (locf, &st) == 0 && S_ISREG (st.st_mode)) - hstat.restval = st.st_size; - - /* In `-c' is used and the file is existing and non-empty, - refuse to truncate it if the server doesn't support continued - downloads. */ - hstat.no_truncate = 0; - if (opt.always_rest && hstat.restval) - hstat.no_truncate = 1; + hstat.restval = 0; + if (count > 1) + hstat.restval = hstat.len; /* continue where we left off */ + else if (opt.always_rest + && stat (locf, &st) == 0 + && S_ISREG (st.st_mode)) + hstat.restval = st.st_size; /* Decide whether to send the no-cache directive. We send it in two cases: @@ -1729,8 +2173,6 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); *hstat.local_file to tack on ".html". */ if (!opt.output_document) locf = *hstat.local_file; - else - locf = opt.output_document; /* Time? */ tms = time_str (NULL); @@ -1741,21 +2183,42 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); { case HERR: case HEOF: case CONSOCKERR: case CONCLOSED: case CONERROR: case READERR: case WRITEFAILED: - case RANGEERR: + case RANGEERR: case FOPEN_EXCL_ERR: /* Non-fatal errors continue executing the loop, which will bring them to "while" statement at the end, to judge whether the number of tries was exceeded. */ free_hstat (&hstat); printwhat (count, opt.ntry); + if (err == FOPEN_EXCL_ERR) + { + /* Re-determine the file name. */ + if (local_file && *local_file) + { + xfree (*local_file); + *local_file = url_file_name (u); + hstat.local_file = local_file; + } + else + { + xfree (dummy); + dummy = url_file_name (u); + hstat.local_file = &dummy; + } + /* be honest about where we will save the file */ + if (local_file && opt.output_document) + *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document); + if (!opt.output_document) + locf = *hstat.local_file; + else + locf = opt.output_document; + } continue; - break; case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED: - case SSLERRCTXCREATE: case CONTNOTSUPPORTED: + case SSLINITFAILED: case CONTNOTSUPPORTED: /* Fatal errors just return from the function. */ free_hstat (&hstat); xfree_null (dummy); return err; - break; case FWRITEERR: case FOPENERR: /* Another fatal error. */ logputs (LOG_VERBOSE, "\n"); @@ -1764,7 +2227,6 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); free_hstat (&hstat); xfree_null (dummy); return err; - break; case CONSSLERR: /* Another fatal error. */ logputs (LOG_VERBOSE, "\n"); @@ -1772,7 +2234,6 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); free_hstat (&hstat); xfree_null (dummy); return err; - break; case NEWLOCATION: /* Return the new location to the caller. */ if (!hstat.newloc) @@ -1787,13 +2248,11 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); free_hstat (&hstat); xfree_null (dummy); return NEWLOCATION; - break; case RETRUNNEEDED: /* The file was already fully retrieved. */ free_hstat (&hstat); xfree_null (dummy); return RETROK; - break; case RETRFINISHED: /* Deal with you later. */ break; @@ -1811,7 +2270,7 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); xfree (hurl); } logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), - tms, hstat.statcode, hstat.error); + tms, hstat.statcode, escnonprint (hstat.error)); logputs (LOG_VERBOSE, "\n"); free_hstat (&hstat); xfree_null (dummy); @@ -1863,7 +2322,8 @@ Server file no newer than local file `%s' -- not retrieving.\n\n"), } else if (tml >= tmr) logprintf (LOG_VERBOSE, _("\ -The sizes do not match (local %ld) -- retrieving.\n"), local_size); +The sizes do not match (local %s) -- retrieving.\n"), + number_to_static_string (local_size)); else logputs (LOG_VERBOSE, _("Remote file is newer, retrieving.\n")); @@ -1883,7 +2343,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); const char *fl = NULL; if (opt.output_document) { - if (opt.od_known_regular) + if (output_stream_regular) fl = opt.output_document; } else @@ -1895,23 +2355,29 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); if (opt.spider) { - logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error); + logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, + escnonprint (hstat.error)); xfree_null (dummy); return RETROK; } - tmrate = retr_rate (hstat.len - hstat.restval, hstat.dltime, 0); + tmrate = retr_rate (hstat.rd_size, hstat.dltime, 0); if (hstat.len == hstat.contlen) { if (*dt & RETROKF) { logprintf (LOG_VERBOSE, - _("%s (%s) - `%s' saved [%ld/%ld]\n\n"), - tms, tmrate, locf, hstat.len, hstat.contlen); + _("%s (%s) - `%s' saved [%s/%s]\n\n"), + tms, tmrate, locf, + number_to_static_string (hstat.len), + number_to_static_string (hstat.contlen)); logprintf (LOG_NONVERBOSE, - "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n", - tms, u->url, hstat.len, hstat.contlen, locf, count); + "%s URL:%s [%s/%s] -> \"%s\" [%d]\n", + tms, u->url, + number_to_static_string (hstat.len), + number_to_static_string (hstat.contlen), + locf, count); } ++opt.numurls; total_downloaded_bytes += hstat.len; @@ -1934,11 +2400,13 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); if (*dt & RETROKF) { logprintf (LOG_VERBOSE, - _("%s (%s) - `%s' saved [%ld]\n\n"), - tms, tmrate, locf, hstat.len); + _("%s (%s) - `%s' saved [%s]\n\n"), + tms, tmrate, locf, + number_to_static_string (hstat.len)); logprintf (LOG_NONVERBOSE, - "%s URL:%s [%ld] -> \"%s\" [%d]\n", - tms, u->url, hstat.len, locf, count); + "%s URL:%s [%s] -> \"%s\" [%d]\n", + tms, u->url, number_to_static_string (hstat.len), + locf, count); } ++opt.numurls; total_downloaded_bytes += hstat.len; @@ -1957,8 +2425,8 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); connection too soon */ { logprintf (LOG_VERBOSE, - _("%s (%s) - Connection closed at byte %ld. "), - tms, tmrate, hstat.len); + _("%s (%s) - Connection closed at byte %s. "), + tms, tmrate, number_to_static_string (hstat.len)); printwhat (count, opt.ntry); free_hstat (&hstat); continue; @@ -1966,11 +2434,16 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); else if (!opt.kill_longer) /* meaning we got more than expected */ { logprintf (LOG_VERBOSE, - _("%s (%s) - `%s' saved [%ld/%ld])\n\n"), - tms, tmrate, locf, hstat.len, hstat.contlen); + _("%s (%s) - `%s' saved [%s/%s])\n\n"), + tms, tmrate, locf, + number_to_static_string (hstat.len), + number_to_static_string (hstat.contlen)); logprintf (LOG_NONVERBOSE, - "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n", - tms, u->url, hstat.len, hstat.contlen, locf, count); + "%s URL:%s [%s/%s] -> \"%s\" [%d]\n", + tms, u->url, + number_to_static_string (hstat.len), + number_to_static_string (hstat.contlen), + locf, count); ++opt.numurls; total_downloaded_bytes += hstat.len; @@ -1987,8 +2460,10 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); else /* the same, but not accepted */ { logprintf (LOG_VERBOSE, - _("%s (%s) - Connection closed at byte %ld/%ld. "), - tms, tmrate, hstat.len, hstat.contlen); + _("%s (%s) - Connection closed at byte %s/%s. "), + tms, tmrate, + number_to_static_string (hstat.len), + number_to_static_string (hstat.contlen)); printwhat (count, opt.ntry); free_hstat (&hstat); continue; @@ -1999,8 +2474,9 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); if (hstat.contlen == -1) { logprintf (LOG_VERBOSE, - _("%s (%s) - Read error at byte %ld (%s)."), - tms, tmrate, hstat.len, strerror (errno)); + _("%s (%s) - Read error at byte %s (%s)."), + tms, tmrate, number_to_static_string (hstat.len), + strerror (errno)); printwhat (count, opt.ntry); free_hstat (&hstat); continue; @@ -2008,8 +2484,10 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); else /* hstat.res == -1 and contlen is given */ { logprintf (LOG_VERBOSE, - _("%s (%s) - Read error at byte %ld/%ld (%s). "), - tms, tmrate, hstat.len, hstat.contlen, + _("%s (%s) - Read error at byte %s/%s (%s). "), + tms, tmrate, + number_to_static_string (hstat.len), + number_to_static_string (hstat.contlen), strerror (errno)); printwhat (count, opt.ntry); free_hstat (&hstat); @@ -2017,7 +2495,6 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); } } /* not reached */ - break; } while (!opt.ntry || (count < opt.ntry)); return TRYLIMEXC; @@ -2185,74 +2662,37 @@ http_atotm (const char *time_string) return -1; } -/* Authorization support: We support two authorization schemes: +/* Authorization support: We support three authorization schemes: * `Basic' scheme, consisting of base64-ing USER:PASSWORD string; * `Digest' scheme, added by Junio Hamano , consisting of answering to the server's challenge with the proper - MD5 digests. */ - -/* How many bytes it will take to store LEN bytes in base64. */ -#define BASE64_LENGTH(len) (4 * (((len) + 2) / 3)) - -/* Encode the string S of length LENGTH to base64 format and place it - to STORE. STORE will be 0-terminated, and must point to a writable - buffer of at least 1+BASE64_LENGTH(length) bytes. */ -static void -base64_encode (const char *s, char *store, int length) -{ - /* Conversion table. */ - static char tbl[64] = { - 'A','B','C','D','E','F','G','H', - 'I','J','K','L','M','N','O','P', - 'Q','R','S','T','U','V','W','X', - 'Y','Z','a','b','c','d','e','f', - 'g','h','i','j','k','l','m','n', - 'o','p','q','r','s','t','u','v', - 'w','x','y','z','0','1','2','3', - '4','5','6','7','8','9','+','/' - }; - int i; - unsigned char *p = (unsigned char *)store; + MD5 digests. - /* Transform the 3x8 bits to 4x6 bits, as required by base64. */ - for (i = 0; i < length; i += 3) - { - *p++ = tbl[s[0] >> 2]; - *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)]; - *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)]; - *p++ = tbl[s[2] & 0x3f]; - s += 3; - } - /* Pad the result if necessary... */ - if (i == length + 1) - *(p - 1) = '='; - else if (i == length + 2) - *(p - 1) = *(p - 2) = '='; - /* ...and zero-terminate it. */ - *p = '\0'; -} + * `NTLM' ("NT Lan Manager") scheme, based on code written by Daniel + Stenberg for libcurl. Like digest, NTLM is based on a + challenge-response mechanism, but unlike digest, it is non-standard + (authenticates TCP connections rather than requests), undocumented + and Microsoft-specific. */ /* Create the authentication header contents for the `Basic' scheme. This is done by encoding the string `USER:PASS' in base64 and prepending `HEADER: Basic ' to it. */ + static char * -basic_authentication_encode (const char *user, const char *passwd, - const char *header) +basic_authentication_encode (const char *user, const char *passwd) { - char *t1, *t2, *res; + char *t1, *t2; int len1 = strlen (user) + 1 + strlen (passwd); - int len2 = BASE64_LENGTH (len1); t1 = (char *)alloca (len1 + 1); sprintf (t1, "%s:%s", user, passwd); - t2 = (char *)alloca (1 + len2); - base64_encode (t1, t2, len1); - res = (char *)xmalloc (len2 + 11 + strlen (header)); - sprintf (res, "%s: Basic %s\r\n", header, t2); - return res; + t2 = (char *)alloca (BASE64_LENGTH (len1) + 1); + base64_encode (t1, len1, t2); + + return concat_strings ("Basic ", t2, (char *) 0); } #define SKIP_WS(x) do { \ @@ -2260,7 +2700,7 @@ basic_authentication_encode (const char *user, const char *passwd, ++(x); \ } while (0) -#ifdef USE_DIGEST +#ifdef ENABLE_DIGEST /* Parse HTTP `WWW-Authenticate:' header. AU points to the beginning of a field in such a header. If the field is the one specified by ATTR_NAME ("realm", "opaque", and "nonce" are used by the current @@ -2270,9 +2710,8 @@ basic_authentication_encode (const char *user, const char *passwd, static int extract_header_attr (const char *au, const char *attr_name, char **ret) { - const char *cp, *ep; - - ep = cp = au; + const char *ep; + const char *cp = au; if (strncmp (cp, attr_name, strlen (attr_name)) == 0) { @@ -2434,7 +2873,7 @@ digest_authentication_encode (const char *au, const char *user, + 2 * MD5_HASHLEN /*strlen (response_digest)*/ + (opaque ? strlen (opaque) : 0) + 128); - sprintf (res, "Authorization: Digest \ + sprintf (res, "Digest \ username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"", user, realm, nonce, path, response_digest); if (opaque) @@ -2444,27 +2883,38 @@ username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"", strcat (p, opaque); strcat (p, "\""); } - strcat (res, "\r\n"); } return res; } -#endif /* USE_DIGEST */ +#endif /* ENABLE_DIGEST */ +/* Computing the size of a string literal must take into account that + value returned by sizeof includes the terminating \0. */ +#define STRSIZE(literal) (sizeof (literal) - 1) -#define BEGINS_WITH(line, string_constant) \ - (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \ - && (ISSPACE (line[sizeof (string_constant) - 1]) \ - || !line[sizeof (string_constant) - 1])) +/* Whether chars in [b, e) begin with the literal string provided as + first argument and are followed by whitespace or terminating \0. + The comparison is case-insensitive. */ +#define STARTS(literal, b, e) \ + ((e) - (b) >= STRSIZE (literal) \ + && 0 == strncasecmp (b, literal, STRSIZE (literal)) \ + && ((e) - (b) == STRSIZE (literal) \ + || ISSPACE (b[STRSIZE (literal)]))) static int -known_authentication_scheme_p (const char *au) +known_authentication_scheme_p (const char *hdrbeg, const char *hdrend) { - return BEGINS_WITH (au, "Basic") - || BEGINS_WITH (au, "Digest") - || BEGINS_WITH (au, "NTLM"); + return STARTS ("Basic", hdrbeg, hdrend) +#ifdef ENABLE_DIGEST + || STARTS ("Digest", hdrbeg, hdrend) +#endif +#ifdef ENABLE_NTLM + || STARTS ("NTLM", hdrbeg, hdrend) +#endif + ; } -#undef BEGINS_WITH +#undef STARTS /* Create the HTTP authorization request header. When the `WWW-Authenticate' response header is seen, according to the @@ -2474,22 +2924,47 @@ known_authentication_scheme_p (const char *au) static char * create_authorization_line (const char *au, const char *user, const char *passwd, const char *method, - const char *path) + const char *path, int *finished) { - char *wwwauth = NULL; - - if (!strncasecmp (au, "Basic", 5)) - wwwauth = basic_authentication_encode (user, passwd, "Authorization"); - if (!strncasecmp (au, "NTLM", 4)) - wwwauth = basic_authentication_encode (user, passwd, "Authorization"); -#ifdef USE_DIGEST - else if (!strncasecmp (au, "Digest", 6)) - wwwauth = digest_authentication_encode (au, user, passwd, method, path); -#endif /* USE_DIGEST */ - return wwwauth; + /* We are called only with known schemes, so we can dispatch on the + first letter. */ + switch (TOUPPER (*au)) + { + case 'B': /* Basic */ + *finished = 1; + return basic_authentication_encode (user, passwd); +#ifdef ENABLE_DIGEST + case 'D': /* Digest */ + *finished = 1; + return digest_authentication_encode (au, user, passwd, method, path); +#endif +#ifdef ENABLE_NTLM + case 'N': /* NTLM */ + if (!ntlm_input (&pconn.ntlm, au)) + { + *finished = 1; + return NULL; + } + return ntlm_output (&pconn.ntlm, user, passwd, finished); +#endif + default: + /* We shouldn't get here -- this function should be only called + with values approved by known_authentication_scheme_p. */ + abort (); + } } +void +save_cookies (void) +{ + if (wget_cookie_jar) + cookie_jar_save (wget_cookie_jar, opt.cookies_output); +} + void http_cleanup (void) { + xfree_null (pconn.host); + if (wget_cookie_jar) + cookie_jar_delete (wget_cookie_jar); }