X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fhttp.c;h=97a773a1e2d720abe45ac5041c3245e0477d49a7;hb=0967c21094580317353f0742c4836c5bbea34059;hp=6bc4f3be5be0a41994c7bbf57bf837b2ed8d9540;hpb=50d143f3fefbcb343d4d1968d4f9d0d59178ce3f;p=wget diff --git a/src/http.c b/src/http.c index 6bc4f3be..97a773a1 100644 --- a/src/http.c +++ b/src/http.c @@ -1,5 +1,5 @@ /* HTTP support. - Copyright (C) 2003 Free Software Foundation, Inc. + Copyright (C) 2005 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -64,10 +64,13 @@ extern int errno; #include "connect.h" #include "netrc.h" #ifdef HAVE_SSL -# include "gen_sslfunc.h" -#endif /* HAVE_SSL */ +# include "ssl.h" +#endif +#ifdef ENABLE_NTLM +# include "http-ntlm.h" +#endif #include "cookies.h" -#ifdef USE_DIGEST +#ifdef ENABLE_DIGEST # include "gen-md5.h" #endif #include "convert.h" @@ -84,7 +87,7 @@ extern int output_stream_regular; static int cookies_loaded_p; -struct cookie_jar *wget_cookie_jar; +static struct cookie_jar *wget_cookie_jar; #define TEXTHTML_S "text/html" #define TEXTXHTML_S "application/xhtml+xml" @@ -199,7 +202,7 @@ release_header (struct request_header *hdr) /* Set the request named NAME to VALUE. Specifically, this means that a "NAME: VALUE\r\n" header line will be used in the request. If a header with the same name previously existed in the request, its - value will be replaced by this one. + value will be replaced by this one. A NULL value means do nothing. RELEASE_POLICY determines whether NAME and VALUE should be released (freed) with request_free. Allowed values are: @@ -230,8 +233,16 @@ request_set_header (struct request *req, char *name, char *value, { struct request_header *hdr; int i; + if (!value) - return; + { + /* A NULL value is a no-op; if freeing the name is requested, + free it now to avoid leaks. */ + if (release_policy == rel_name || release_policy == rel_both) + xfree (name); + return; + } + for (i = 0; i < req->hcount; i++) { hdr = &req->headers[i]; @@ -248,11 +259,10 @@ request_set_header (struct request *req, char *name, char *value, /* Install new header. */ - if (req->hcount >= req->hcount) + if (req->hcount >= req->hcapacity) { req->hcapacity <<= 1; - req->headers = xrealloc (req->headers, - req->hcapacity * sizeof (struct request_header)); + req->headers = xrealloc (req->headers, req->hcapacity * sizeof (*hdr)); } hdr = &req->headers[req->hcount++]; hdr->name = name; @@ -279,6 +289,29 @@ request_set_user_header (struct request *req, const char *header) request_set_header (req, xstrdup (name), (char *) p, rel_name); } +/* Remove the header with specified name from REQ. Returns 1 if the + header was actually removed, 0 otherwise. */ + +static int +request_remove_header (struct request *req, char *name) +{ + int i; + for (i = 0; i < req->hcount; i++) + { + struct request_header *hdr = &req->headers[i]; + if (0 == strcasecmp (name, hdr->name)) + { + release_header (hdr); + /* Move the remaining headers by one. */ + if (i < req->hcount - 1) + memmove (hdr, hdr + 1, (req->hcount - i - 1) * sizeof (*hdr)); + --req->hcount; + return 1; + } + } + return 0; +} + #define APPEND(p, str) do { \ int A_len = strlen (str); \ memcpy (p, str, A_len); \ @@ -335,7 +368,7 @@ request_send (const struct request *req, int fd) /* Send the request to the server. */ - write_error = fd_write (fd, request_string, size - 1, -1); + write_error = fd_write (fd, request_string, size - 1, -1.0); if (write_error < 0) logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"), strerror (errno)); @@ -355,7 +388,7 @@ request_free (struct request *req) xfree (req); } -/* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly +/* Send the contents of FILE_NAME to SOCK. Make sure that exactly PROMISED_SIZE bytes are sent over the wire -- if the file is longer, read only that much; if the file is shorter, report an error. */ @@ -379,7 +412,7 @@ post_file (int sock, const char *file_name, wgint promised_size) if (length == 0) break; towrite = MIN (promised_size - written, length); - write_error = fd_write (sock, chunk, towrite, -1); + write_error = fd_write (sock, chunk, towrite, -1.0); if (write_error < 0) { fclose (fp); @@ -403,7 +436,7 @@ post_file (int sock, const char *file_name, wgint promised_size) } static const char * -head_terminator (const char *hunk, int oldlen, int peeklen) +response_head_terminator (const char *hunk, int oldlen, int peeklen) { const char *start, *end; @@ -432,6 +465,13 @@ head_terminator (const char *hunk, int oldlen, int peeklen) return NULL; } +/* The maximum size of a single HTTP response we care to read. This + is not meant to impose an arbitrary limit, but to protect the user + from Wget slurping up available memory upon encountering malicious + or buggy server output. Define it to 0 to remove the limit. */ + +#define HTTP_RESPONSE_MAX_SIZE 65536 + /* Read the HTTP request head from FD and return it. The error conditions are the same as with fd_read_hunk. @@ -441,9 +481,10 @@ head_terminator (const char *hunk, int oldlen, int peeklen) data can be treated as body. */ static char * -fd_read_http_head (int fd) +read_http_response_head (int fd) { - return fd_read_hunk (fd, head_terminator, 512); + return fd_read_hunk (fd, response_head_terminator, 512, + HTTP_RESPONSE_MAX_SIZE); } struct response { @@ -474,10 +515,10 @@ struct response { /* Create a new response object from the text of the HTTP response, available in HEAD. That text is automatically split into constituent header lines for fast retrieval using - response_header_*. */ + resp_header_*. */ static struct response * -response_new (const char *head) +resp_new (const char *head) { const char *hdr; int count, size; @@ -493,7 +534,7 @@ response_new (const char *head) return resp; } - /* Split HEAD into header lines, so that response_header_* functions + /* Split HEAD into header lines, so that resp_header_* functions don't need to do this over and over again. */ size = count = 0; @@ -519,32 +560,41 @@ response_new (const char *head) while (*hdr == ' ' || *hdr == '\t'); } DO_REALLOC (resp->headers, size, count + 1, const char *); - resp->headers[count++] = NULL; + resp->headers[count] = NULL; return resp; } -/* Locate the header named NAME in the request data. If found, set - *BEGPTR to its starting, and *ENDPTR to its ending position, and - return 1. Otherwise return 0. +/* Locate the header named NAME in the request data, starting with + position START. This allows the code to loop through the request + data, filtering for all requests of a given name. Returns the + found position, or -1 for failure. The code that uses this + function typically looks like this: + + for (pos = 0; (pos = resp_header_locate (...)) != -1; pos++) + ... do something with header ... - This function is used as a building block for response_header_copy - and response_header_strdup. */ + If you only care about one header, use resp_header_get instead of + this function. */ static int -response_header_bounds (const struct response *resp, const char *name, - const char **begptr, const char **endptr) +resp_header_locate (const struct response *resp, const char *name, int start, + const char **begptr, const char **endptr) { int i; const char **headers = resp->headers; int name_len; if (!headers || !headers[1]) - return 0; + return -1; name_len = strlen (name); + if (start > 0) + i = start; + else + i = 1; - for (i = 1; headers[i + 1]; i++) + for (; headers[i + 1]; i++) { const char *b = headers[i]; const char *e = headers[i + 1]; @@ -559,26 +609,41 @@ response_header_bounds (const struct response *resp, const char *name, --e; *begptr = b; *endptr = e; - return 1; + return i; } } - return 0; + return -1; +} + +/* Find and retrieve the header named NAME in the request data. If + found, set *BEGPTR to its starting, and *ENDPTR to its ending + position, and return 1. Otherwise return 0. + + This function is used as a building block for resp_header_copy + and resp_header_strdup. */ + +static int +resp_header_get (const struct response *resp, const char *name, + const char **begptr, const char **endptr) +{ + int pos = resp_header_locate (resp, name, 0, begptr, endptr); + return pos != -1; } /* Copy the response header named NAME to buffer BUF, no longer than BUFSIZE (BUFSIZE includes the terminating 0). If the header exists, 1 is returned, otherwise 0. If there should be no limit on - the size of the header, use response_header_strdup instead. + the size of the header, use resp_header_strdup instead. If BUFSIZE is 0, no data is copied, but the boolean indication of whether the header is present is still returned. */ static int -response_header_copy (const struct response *resp, const char *name, - char *buf, int bufsize) +resp_header_copy (const struct response *resp, const char *name, + char *buf, int bufsize) { const char *b, *e; - if (!response_header_bounds (resp, name, &b, &e)) + if (!resp_header_get (resp, name, &b, &e)) return 0; if (bufsize) { @@ -593,10 +658,10 @@ response_header_copy (const struct response *resp, const char *name, malloc. If such a header does not exist in RESP, return NULL. */ static char * -response_header_strdup (const struct response *resp, const char *name) +resp_header_strdup (const struct response *resp, const char *name) { const char *b, *e; - if (!response_header_bounds (resp, name, &b, &e)) + if (!resp_header_get (resp, name, &b, &e)) return NULL; return strdupdelim (b, e); } @@ -610,7 +675,7 @@ response_header_strdup (const struct response *resp, const char *name) returned in *MESSAGE. */ static int -response_status (const struct response *resp, char **message) +resp_status (const struct response *resp, char **message) { int status; const char *p, *end; @@ -670,28 +735,14 @@ response_status (const struct response *resp, char **message) /* Release the resources used by RESP. */ static void -response_free (struct response *resp) +resp_free (struct response *resp) { xfree_null (resp->headers); xfree (resp); } -/* Print [b, e) to the log, omitting the trailing CRLF. */ - -static void -print_server_response_1 (const char *prefix, const char *b, const char *e) -{ - char *ln; - if (b < e && e[-1] == '\n') - --e; - if (b < e && e[-1] == '\r') - --e; - BOUNDED_TO_ALLOCA (b, e, ln); - logprintf (LOG_VERBOSE, "%s%s\n", prefix, ln); -} - -/* Print the server response, line by line, omitting the trailing CR - characters, prefixed with PREFIX. */ +/* Print the server response, line by line, omitting the trailing CRLF + from individual header lines, and prefixed with PREFIX. */ static void print_server_response (const struct response *resp, const char *prefix) @@ -700,7 +751,18 @@ print_server_response (const struct response *resp, const char *prefix) if (!resp->headers) return; for (i = 0; resp->headers[i + 1]; i++) - print_server_response_1 (prefix, resp->headers[i], resp->headers[i + 1]); + { + const char *b = resp->headers[i]; + const char *e = resp->headers[i + 1]; + /* Skip CRLF */ + if (b < e && e[-1] == '\n') + --e; + if (b < e && e[-1] == '\r') + --e; + /* This is safe even on printfs with broken handling of "%.s" + because resp->headers ends with \0. */ + logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, e - b, b); + } } /* Parse the `Content-Range' header and extract the information it @@ -747,30 +809,54 @@ parse_content_range (const char *hdr, wgint *first_byte_ptr, } /* Read the body of the request, but don't store it anywhere and don't - display a progress gauge. This is useful for reading the error - responses whose bodies don't need to be displayed or logged, but - which need to be read anyway. */ + display a progress gauge. This is useful for reading the bodies of + administrative responses to which we will soon issue another + request. The response is not useful to the user, but reading it + allows us to continue using the same connection to the server. -static void + If reading fails, 0 is returned, non-zero otherwise. In debug + mode, the body is displayed for debugging purposes. */ + +static int skip_short_body (int fd, wgint contlen) { - /* Skipping the body doesn't make sense if the content length is - unknown because, in that case, persistent connections cannot be - used. (#### This is not the case with HTTP/1.1 where they can - still be used with the magic of the "chunked" transfer!) */ - if (contlen == -1) - return; - DEBUGP (("Skipping %s bytes of body data... ", number_to_static_string (contlen))); + enum { + SKIP_SIZE = 512, /* size of the download buffer */ + SKIP_THRESHOLD = 4096 /* the largest size we read */ + }; + char dlbuf[SKIP_SIZE + 1]; + dlbuf[SKIP_SIZE] = '\0'; /* so DEBUGP can safely print it */ + + /* We shouldn't get here with unknown contlen. (This will change + with HTTP/1.1, which supports "chunked" transfer.) */ + assert (contlen != -1); + + /* If the body is too large, it makes more sense to simply close the + connection than to try to read the body. */ + if (contlen > SKIP_THRESHOLD) + return 0; + + DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen))); while (contlen > 0) { - char dlbuf[512]; - int ret = fd_read (fd, dlbuf, MIN (contlen, sizeof (dlbuf)), -1); + int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1.0); if (ret <= 0) - return; + { + /* Don't normally report the error since this is an + optimization that should be invisible to the user. */ + DEBUGP (("] aborting (%s).\n", + ret < 0 ? strerror (errno) : "EOF received")); + return 0; + } contlen -= ret; + /* Safe even if %.*s bogusly expects terminating \0 because + we've zero-terminated dlbuf above. */ + DEBUGP (("%.*s", ret, dlbuf)); } - DEBUGP (("done.\n")); + + DEBUGP (("] done.\n")); + return 1; } /* Persistent connections. Currently, we cache the most recently used @@ -792,6 +878,17 @@ static struct { /* Whether a ssl handshake has occoured on this connection. */ int ssl; + + /* Whether the connection was authorized. This is only done by + NTLM, which authorizes *connections* rather than individual + requests. (That practice is peculiar for HTTP, but it is a + useful optimization.) */ + int authorized; + +#ifdef ENABLE_NTLM + /* NTLM data of the current connection. */ + struct ntlmdata ntlm; +#endif } pconn; /* Mark the persistent connection as invalid and free the resources it @@ -842,6 +939,7 @@ register_persistent (const char *host, int port, int fd, int ssl) pconn.host = xstrdup (host); pconn.port = port; pconn.ssl = ssl; + pconn.authorized = 0; DEBUGP (("Registered socket %d for persistent reuse.\n", fd)); } @@ -1004,9 +1102,9 @@ free_hstat (struct http_stat *hs) static char *create_authorization_line PARAMS ((const char *, const char *, const char *, const char *, - const char *)); + const char *, int *)); static char *basic_authentication_encode PARAMS ((const char *, const char *)); -static int known_authentication_scheme_p PARAMS ((const char *)); +static int known_authentication_scheme_p PARAMS ((const char *, const char *)); time_t http_atotm PARAMS ((const char *)); @@ -1015,6 +1113,14 @@ time_t http_atotm PARAMS ((const char *)); && (ISSPACE (line[sizeof (string_constant) - 1]) \ || !line[sizeof (string_constant) - 1])) +#define SET_USER_AGENT(req) \ + if (opt.useragent) \ + request_set_header (req, "User-Agent", opt.useragent, rel_none); \ + else \ + request_set_header (req, "User-Agent", \ + aprintf ("Wget/%s", version_string), rel_value); + + /* Retrieve a document through HTTP protocol. It recognizes status code, and correctly handles redirections. It closes the network socket. If it receives an error from the functions below it, it @@ -1042,8 +1148,12 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) int sock = -1; int flags; - /* Whether authorization has been already tried. */ - int auth_tried_already = 0; + /* Set to 1 when the authorization has failed permanently and should + not be tried again. */ + int auth_finished = 0; + + /* Whether NTLM authentication is used for this request. */ + int ntlm_seen = 0; /* Whether our connection to the remote host is through SSL. */ int using_ssl = 0; @@ -1057,8 +1167,15 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) is done. */ int keep_alive; - /* Whether keep-alive should be inhibited. */ - int inhibit_keep_alive = !opt.http_keep_alive || opt.ignore_length; + /* Whether keep-alive should be inhibited. + + RFC 2068 requests that 1.0 clients not send keep-alive requests + to proxies. This is because many 1.0 proxies do not interpret + the Connection header and transfer it to the remote server, + causing it to not close the connection and leave both the proxy + and the client hanging. */ + int inhibit_keep_alive = + !opt.http_keep_alive || opt.ignore_length || proxy != NULL; /* Headers sent when using POST. */ wgint post_data_size = 0; @@ -1070,29 +1187,12 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) { /* Initialize the SSL context. After this has once been done, it becomes a no-op. */ - switch (ssl_init ()) + if (!ssl_init ()) { - case SSLERRCTXCREATE: - /* this is fatal */ - logprintf (LOG_NOTQUIET, _("Failed to set up an SSL context\n")); - return SSLERRCTXCREATE; - case SSLERRCERTFILE: - /* try without certfile */ - logprintf (LOG_NOTQUIET, - _("Failed to load certificates from %s\n"), - opt.sslcertfile); + scheme_disable (SCHEME_HTTPS); logprintf (LOG_NOTQUIET, - _("Trying without the specified certificate\n")); - break; - case SSLERRCERTKEY: - logprintf (LOG_NOTQUIET, - _("Failed to get certificate key from %s\n"), - opt.sslcertkey); - logprintf (LOG_NOTQUIET, - _("Trying without the specified certificate\n")); - break; - default: - break; + _("Disabling SSL due to encountered errors.\n")); + return SSLINITFAILED; } } #endif /* HAVE_SSL */ @@ -1102,10 +1202,8 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) know the local filename so we can save to it. */ assert (*hs->local_file != NULL); - auth_tried_already = 0; - /* Initialize certain elements of struct http_stat. */ - hs->len = 0L; + hs->len = 0; hs->contlen = -1; hs->res = -1; hs->newloc = NULL; @@ -1138,19 +1236,15 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) aprintf ("bytes=%s-", number_to_static_string (hs->restval)), rel_value); - if (opt.useragent) - request_set_header (req, "User-Agent", opt.useragent, rel_none); - else - request_set_header (req, "User-Agent", - aprintf ("Wget/%s", version_string), rel_value); + SET_USER_AGENT (req); request_set_header (req, "Accept", "*/*", rel_none); /* Find the username and password for authentication. */ user = u->user; passwd = u->passwd; search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0); - user = user ? user : opt.http_user; - passwd = passwd ? passwd : opt.http_passwd; + user = user ? user : (opt.http_user ? opt.http_user : opt.user); + passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd); if (user && passwd) { @@ -1306,8 +1400,13 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) sock = pconn.socket; using_ssl = pconn.ssl; logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"), - pconn.host, pconn.port); + escnonprint (pconn.host), pconn.port); DEBUGP (("Reusing fd %d.\n", sock)); + if (pconn.authorized) + /* If the connection is already authorized, the "Basic" + authorization added by code above is unnecessary and + only hurts us. */ + request_remove_header (req, "Authorization"); } } @@ -1317,14 +1416,23 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) look up conn->host in some cases. If that lookup failed, we don't need to bother with connect_to_host. */ if (host_lookup_failed) - return HOSTERR; + { + request_free (req); + return HOSTERR; + } sock = connect_to_host (conn->host, conn->port); if (sock == E_HOST) - return HOSTERR; + { + request_free (req); + return HOSTERR; + } else if (sock < 0) - return (retryable_socket_connect_error (errno) - ? CONERROR : CONIMPOSSIBLE); + { + request_free (req); + return (retryable_socket_connect_error (errno) + ? CONERROR : CONIMPOSSIBLE); + } #ifdef HAVE_SSL if (proxy && u->scheme == SCHEME_HTTPS) @@ -1334,6 +1442,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) struct request *connreq = request_new (); request_set_method (connreq, "CONNECT", aprintf ("%s:%d", u->host, u->port)); + SET_USER_AGENT (connreq); if (proxyauth) { request_set_header (connreq, "Proxy-Authorization", @@ -1343,6 +1452,10 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) the regular request below. */ proxyauth = NULL; } + /* Examples in rfc2817 use the Host header in CONNECT + requests. I don't see how that gains anything, given + that the contents of Host would be exactly the same as + the contents of CONNECT. */ write_error = request_send (connreq, sock); request_free (connreq); @@ -1354,7 +1467,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) return WRITEFAILED; } - head = fd_read_http_head (sock); + head = read_http_response_head (sock); if (!head) { logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"), @@ -1370,18 +1483,19 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) } DEBUGP (("proxy responded with: [%s]\n", head)); - resp = response_new (head); - statcode = response_status (resp, &message); - response_free (resp); + resp = resp_new (head); + statcode = resp_status (resp, &message); + resp_free (resp); + xfree (head); if (statcode != 200) { failed_tunnel: logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"), - message ? message : "?"); + message ? escnonprint (message) : "?"); xfree_null (message); return CONSSLERR; } - xfree (message); + xfree_null (message); /* SOCK is now *really* connected to u->host, so update CONN to reflect this. That way register_persistent will @@ -1409,7 +1523,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) if (opt.post_data) { DEBUGP (("[POST data: %s]\n", opt.post_data)); - write_error = fd_write (sock, opt.post_data, post_data_size, -1); + write_error = fd_write (sock, opt.post_data, post_data_size, -1.0); } else if (opt.post_file_name && post_data_size != 0) write_error = post_file (sock, opt.post_file_name, post_data_size); @@ -1427,11 +1541,9 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) proxy ? "Proxy" : "HTTP"); contlen = -1; contrange = 0; - type = NULL; - statcode = -1; *dt &= ~RETROKF; - head = fd_read_http_head (sock); + head = read_http_response_head (sock); if (!head) { if (errno == 0) @@ -1452,13 +1564,14 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) } DEBUGP (("\n---response begin---\n%s---response end---\n", head)); - resp = response_new (head); + resp = resp_new (head); /* Check for status line. */ message = NULL; - statcode = response_status (resp, &message); + statcode = resp_status (resp, &message); if (!opt.server_response) - logprintf (LOG_VERBOSE, "%2d %s\n", statcode, message ? message : ""); + logprintf (LOG_VERBOSE, "%2d %s\n", statcode, + message ? escnonprint (message) : ""); else { logprintf (LOG_VERBOSE, "\n"); @@ -1466,7 +1579,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) } if (!opt.ignore_length - && response_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval))) + && resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval))) { wgint parsed; errno = 0; @@ -1485,10 +1598,9 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) /* Check for keep-alive related responses. */ if (!inhibit_keep_alive && contlen != -1) { - if (response_header_copy (resp, "Keep-Alive", NULL, 0)) + if (resp_header_copy (resp, "Keep-Alive", NULL, 0)) keep_alive = 1; - else if (response_header_copy (resp, "Connection", hdrval, - sizeof (hdrval))) + else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval))) { if (0 == strcasecmp (hdrval, "Keep-Alive")) keep_alive = 1; @@ -1502,9 +1614,15 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) if (statcode == HTTP_STATUS_UNAUTHORIZED) { /* Authorization is required. */ - skip_short_body (sock, contlen); - CLOSE_FINISH (sock); - if (auth_tried_already || !(user && passwd)) + if (keep_alive) + { + if (skip_short_body (sock, contlen)) + CLOSE_FINISH (sock); + else + CLOSE_INVALIDATE (sock); + } + pconn.authorized = 0; + if (auth_finished || !(user && passwd)) { /* If we have tried it already, then there is not point retrying it. */ @@ -1512,13 +1630,26 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) } else { - char *www_authenticate = response_header_strdup (resp, - "WWW-Authenticate"); - /* If the authentication scheme is unknown or if it's the - "Basic" authentication (which we try by default), there's - no sense in retrying. */ + /* IIS sometimes sends two instances of WWW-Authenticate + header, one with the keyword "negotiate", and other with + useful data. Loop over all occurrences of this header + and use the one we recognize. */ + int wapos; + const char *wabeg, *waend; + char *www_authenticate = NULL; + for (wapos = 0; + (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos, + &wabeg, &waend)) != -1; + ++wapos) + if (known_authentication_scheme_p (wabeg, waend)) + { + www_authenticate = strdupdelim (wabeg, waend); + break; + } + /* If the authentication header is missing or recognized, or + if the authentication scheme is "Basic" (which we send by + default), there's no sense in retrying. */ if (!www_authenticate - || !known_authentication_scheme_p (www_authenticate) || BEGINS_WITH (www_authenticate, "Basic")) { xfree_null (www_authenticate); @@ -1527,14 +1658,16 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) else { char *pth; - auth_tried_already = 1; pth = url_full_path (u); request_set_header (req, "Authorization", create_authorization_line (www_authenticate, user, passwd, request_method (req), - pth), + pth, + &auth_finished), rel_value); + if (BEGINS_WITH (www_authenticate, "NTLM")) + ntlm_seen = 1; xfree (pth); xfree (www_authenticate); goto retry_with_auth; @@ -1543,6 +1676,12 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) request_free (req); return AUTHFAILED; } + else /* statcode != HTTP_STATUS_UNAUTHORIZED */ + { + /* Kludge: if NTLM is used, mark the TCP connection as authorized. */ + if (ntlm_seen) + pconn.authorized = 1; + } request_free (req); hs->statcode = statcode; @@ -1552,8 +1691,9 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) hs->error = xstrdup (_("(no description)")); else hs->error = xstrdup (message); + xfree (message); - type = response_header_strdup (resp, "Content-Type"); + type = resp_header_strdup (resp, "Content-Type"); if (type) { char *tmp = strchr (type, ';'); @@ -1564,27 +1704,43 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) *tmp = '\0'; } } - hs->newloc = response_header_strdup (resp, "Location"); - hs->remote_time = response_header_strdup (resp, "Last-Modified"); + hs->newloc = resp_header_strdup (resp, "Location"); + hs->remote_time = resp_header_strdup (resp, "Last-Modified"); + + /* Handle (possibly multiple instances of) the Set-Cookie header. */ { - char *set_cookie = response_header_strdup (resp, "Set-Cookie"); - if (set_cookie) + char *pth = NULL; + int scpos; + const char *scbeg, *scend; + /* The jar should have been created by now. */ + assert (wget_cookie_jar != NULL); + for (scpos = 0; + (scpos = resp_header_locate (resp, "Set-Cookie", scpos, + &scbeg, &scend)) != -1; + ++scpos) { - /* The jar should have been created by now. */ - assert (wget_cookie_jar != NULL); - cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, u->path, + char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie); + if (pth == NULL) + { + /* u->path doesn't begin with /, which cookies.c expects. */ + pth = (char *) alloca (1 + strlen (u->path) + 1); + pth[0] = '/'; + strcpy (pth + 1, u->path); + } + cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, pth, set_cookie); - xfree (set_cookie); } } - if (response_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval))) + + if (resp_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval))) { wgint first_byte_pos, last_byte_pos, entity_length; if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos, &entity_length)) contrange = first_byte_pos; } - response_free (resp); + resp_free (resp); + xfree (head); /* 20x responses are counted among successful by default. */ if (H_20X (statcode)) @@ -1604,11 +1760,15 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) { logprintf (LOG_VERBOSE, _("Location: %s%s\n"), - hs->newloc ? hs->newloc : _("unspecified"), + hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"), hs->newloc ? _(" [following]") : ""); if (keep_alive) - skip_short_body (sock, contlen); - CLOSE_FINISH (sock); + { + if (skip_short_body (sock, contlen)) + CLOSE_FINISH (sock); + else + CLOSE_INVALIDATE (sock); + } xfree_null (type); return NEWLOCATION; } @@ -1683,15 +1843,26 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) logputs (LOG_VERBOSE, _("Length: ")); if (contlen != -1) { - logputs (LOG_VERBOSE, legible (contlen + contrange)); + logputs (LOG_VERBOSE, with_thousand_seps (contlen + contrange)); + if (contlen + contrange >= 1024) + logprintf (LOG_VERBOSE, " (%s)", + human_readable (contlen + contrange)); if (contrange) - logprintf (LOG_VERBOSE, _(" (%s to go)"), legible (contlen)); + { + if (contlen >= 1024) + logprintf (LOG_VERBOSE, _(", %s (%s) remaining"), + with_thousand_seps (contlen), + human_readable (contlen)); + else + logprintf (LOG_VERBOSE, _(", %s remaining"), + with_thousand_seps (contlen)); + } } else logputs (LOG_VERBOSE, opt.ignore_length ? _("ignored") : _("unspecified")); if (type) - logprintf (LOG_VERBOSE, " [%s]\n", type); + logprintf (LOG_VERBOSE, " [%s]\n", escnonprint (type)); else logputs (LOG_VERBOSE, "\n"); } @@ -1703,7 +1874,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) if (!(*dt & RETROKF) || (*dt & HEAD_ONLY)) { /* In case the caller cares to look... */ - hs->len = 0L; + hs->len = 0; hs->res = 0; xfree_null (type); /* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the @@ -1727,7 +1898,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) fp = fopen (*hs->local_file, "wb"); else { - fp = fopen_excl (*hs->local_file, 0); + fp = fopen_excl (*hs->local_file, 1); if (!fp && errno == EEXIST) { /* We cannot just invent a new name and use it (which is @@ -2042,14 +2213,12 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); locf = opt.output_document; } continue; - break; case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED: - case SSLERRCTXCREATE: case CONTNOTSUPPORTED: + case SSLINITFAILED: case CONTNOTSUPPORTED: /* Fatal errors just return from the function. */ free_hstat (&hstat); xfree_null (dummy); return err; - break; case FWRITEERR: case FOPENERR: /* Another fatal error. */ logputs (LOG_VERBOSE, "\n"); @@ -2058,7 +2227,6 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); free_hstat (&hstat); xfree_null (dummy); return err; - break; case CONSSLERR: /* Another fatal error. */ logputs (LOG_VERBOSE, "\n"); @@ -2066,7 +2234,6 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); free_hstat (&hstat); xfree_null (dummy); return err; - break; case NEWLOCATION: /* Return the new location to the caller. */ if (!hstat.newloc) @@ -2081,13 +2248,11 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); free_hstat (&hstat); xfree_null (dummy); return NEWLOCATION; - break; case RETRUNNEEDED: /* The file was already fully retrieved. */ free_hstat (&hstat); xfree_null (dummy); return RETROK; - break; case RETRFINISHED: /* Deal with you later. */ break; @@ -2105,7 +2270,7 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); xfree (hurl); } logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), - tms, hstat.statcode, hstat.error); + tms, hstat.statcode, escnonprint (hstat.error)); logputs (LOG_VERBOSE, "\n"); free_hstat (&hstat); xfree_null (dummy); @@ -2190,7 +2355,8 @@ The sizes do not match (local %s) -- retrieving.\n"), if (opt.spider) { - logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error); + logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, + escnonprint (hstat.error)); xfree_null (dummy); return RETROK; } @@ -2329,7 +2495,6 @@ The sizes do not match (local %s) -- retrieving.\n"), } } /* not reached */ - break; } while (!opt.ntry || (count < opt.ntry)); return TRYLIMEXC; @@ -2497,75 +2662,37 @@ http_atotm (const char *time_string) return -1; } -/* Authorization support: We support two authorization schemes: +/* Authorization support: We support three authorization schemes: * `Basic' scheme, consisting of base64-ing USER:PASSWORD string; * `Digest' scheme, added by Junio Hamano , consisting of answering to the server's challenge with the proper - MD5 digests. */ - -/* How many bytes it will take to store LEN bytes in base64. */ -#define BASE64_LENGTH(len) (4 * (((len) + 2) / 3)) + MD5 digests. -/* Encode the string S of length LENGTH to base64 format and place it - to STORE. STORE will be 0-terminated, and must point to a writable - buffer of at least 1+BASE64_LENGTH(length) bytes. */ -static void -base64_encode (const char *s, char *store, int length) -{ - /* Conversion table. */ - static char tbl[64] = { - 'A','B','C','D','E','F','G','H', - 'I','J','K','L','M','N','O','P', - 'Q','R','S','T','U','V','W','X', - 'Y','Z','a','b','c','d','e','f', - 'g','h','i','j','k','l','m','n', - 'o','p','q','r','s','t','u','v', - 'w','x','y','z','0','1','2','3', - '4','5','6','7','8','9','+','/' - }; - int i; - unsigned char *p = (unsigned char *)store; - - /* Transform the 3x8 bits to 4x6 bits, as required by base64. */ - for (i = 0; i < length; i += 3) - { - *p++ = tbl[s[0] >> 2]; - *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)]; - *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)]; - *p++ = tbl[s[2] & 0x3f]; - s += 3; - } - /* Pad the result if necessary... */ - if (i == length + 1) - *(p - 1) = '='; - else if (i == length + 2) - *(p - 1) = *(p - 2) = '='; - /* ...and zero-terminate it. */ - *p = '\0'; -} + * `NTLM' ("NT Lan Manager") scheme, based on code written by Daniel + Stenberg for libcurl. Like digest, NTLM is based on a + challenge-response mechanism, but unlike digest, it is non-standard + (authenticates TCP connections rather than requests), undocumented + and Microsoft-specific. */ /* Create the authentication header contents for the `Basic' scheme. This is done by encoding the string `USER:PASS' in base64 and prepending `HEADER: Basic ' to it. */ + static char * basic_authentication_encode (const char *user, const char *passwd) { - char *t1, *t2, *res; + char *t1, *t2; int len1 = strlen (user) + 1 + strlen (passwd); - int len2 = BASE64_LENGTH (len1); t1 = (char *)alloca (len1 + 1); sprintf (t1, "%s:%s", user, passwd); - t2 = (char *)alloca (len2 + 1); - base64_encode (t1, t2, len1); - - res = (char *)xmalloc (6 + len2 + 1); - sprintf (res, "Basic %s", t2); + t2 = (char *)alloca (BASE64_LENGTH (len1) + 1); + base64_encode (t1, len1, t2); - return res; + return concat_strings ("Basic ", t2, (char *) 0); } #define SKIP_WS(x) do { \ @@ -2573,7 +2700,7 @@ basic_authentication_encode (const char *user, const char *passwd) ++(x); \ } while (0) -#ifdef USE_DIGEST +#ifdef ENABLE_DIGEST /* Parse HTTP `WWW-Authenticate:' header. AU points to the beginning of a field in such a header. If the field is the one specified by ATTR_NAME ("realm", "opaque", and "nonce" are used by the current @@ -2583,9 +2710,8 @@ basic_authentication_encode (const char *user, const char *passwd) static int extract_header_attr (const char *au, const char *attr_name, char **ret) { - const char *cp, *ep; - - ep = cp = au; + const char *ep; + const char *cp = au; if (strncmp (cp, attr_name, strlen (attr_name)) == 0) { @@ -2760,23 +2886,35 @@ username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"", } return res; } -#endif /* USE_DIGEST */ +#endif /* ENABLE_DIGEST */ +/* Computing the size of a string literal must take into account that + value returned by sizeof includes the terminating \0. */ +#define STRSIZE(literal) (sizeof (literal) - 1) -#define BEGINS_WITH(line, string_constant) \ - (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \ - && (ISSPACE (line[sizeof (string_constant) - 1]) \ - || !line[sizeof (string_constant) - 1])) +/* Whether chars in [b, e) begin with the literal string provided as + first argument and are followed by whitespace or terminating \0. + The comparison is case-insensitive. */ +#define STARTS(literal, b, e) \ + ((e) - (b) >= STRSIZE (literal) \ + && 0 == strncasecmp (b, literal, STRSIZE (literal)) \ + && ((e) - (b) == STRSIZE (literal) \ + || ISSPACE (b[STRSIZE (literal)]))) static int -known_authentication_scheme_p (const char *au) +known_authentication_scheme_p (const char *hdrbeg, const char *hdrend) { - return BEGINS_WITH (au, "Basic") - || BEGINS_WITH (au, "Digest") - || BEGINS_WITH (au, "NTLM"); + return STARTS ("Basic", hdrbeg, hdrend) +#ifdef ENABLE_DIGEST + || STARTS ("Digest", hdrbeg, hdrend) +#endif +#ifdef ENABLE_NTLM + || STARTS ("NTLM", hdrbeg, hdrend) +#endif + ; } -#undef BEGINS_WITH +#undef STARTS /* Create the HTTP authorization request header. When the `WWW-Authenticate' response header is seen, according to the @@ -2786,18 +2924,47 @@ known_authentication_scheme_p (const char *au) static char * create_authorization_line (const char *au, const char *user, const char *passwd, const char *method, - const char *path) + const char *path, int *finished) { - if (0 == strncasecmp (au, "Basic", 5)) - return basic_authentication_encode (user, passwd); -#ifdef USE_DIGEST - if (0 == strncasecmp (au, "Digest", 6)) - return digest_authentication_encode (au, user, passwd, method, path); -#endif /* USE_DIGEST */ - return NULL; + /* We are called only with known schemes, so we can dispatch on the + first letter. */ + switch (TOUPPER (*au)) + { + case 'B': /* Basic */ + *finished = 1; + return basic_authentication_encode (user, passwd); +#ifdef ENABLE_DIGEST + case 'D': /* Digest */ + *finished = 1; + return digest_authentication_encode (au, user, passwd, method, path); +#endif +#ifdef ENABLE_NTLM + case 'N': /* NTLM */ + if (!ntlm_input (&pconn.ntlm, au)) + { + *finished = 1; + return NULL; + } + return ntlm_output (&pconn.ntlm, user, passwd, finished); +#endif + default: + /* We shouldn't get here -- this function should be only called + with values approved by known_authentication_scheme_p. */ + abort (); + } } +void +save_cookies (void) +{ + if (wget_cookie_jar) + cookie_jar_save (wget_cookie_jar, opt.cookies_output); +} + void http_cleanup (void) { + xfree_null (pconn.host); + if (wget_cookie_jar) + cookie_jar_delete (wget_cookie_jar); }