X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fhttp.c;h=97a773a1e2d720abe45ac5041c3245e0477d49a7;hb=0967c21094580317353f0742c4836c5bbea34059;hp=ea61911fc925032415161e353852ef11b5770834;hpb=976c54d0e6361637a363cf88dcfb856428eb88ea;p=wget diff --git a/src/http.c b/src/http.c index ea61911f..97a773a1 100644 --- a/src/http.c +++ b/src/http.c @@ -64,10 +64,13 @@ extern int errno; #include "connect.h" #include "netrc.h" #ifdef HAVE_SSL -# include "gen_sslfunc.h" -#endif /* HAVE_SSL */ +# include "ssl.h" +#endif +#ifdef ENABLE_NTLM +# include "http-ntlm.h" +#endif #include "cookies.h" -#ifdef USE_DIGEST +#ifdef ENABLE_DIGEST # include "gen-md5.h" #endif #include "convert.h" @@ -84,7 +87,7 @@ extern int output_stream_regular; static int cookies_loaded_p; -struct cookie_jar *wget_cookie_jar; +static struct cookie_jar *wget_cookie_jar; #define TEXTHTML_S "text/html" #define TEXTXHTML_S "application/xhtml+xml" @@ -199,7 +202,7 @@ release_header (struct request_header *hdr) /* Set the request named NAME to VALUE. Specifically, this means that a "NAME: VALUE\r\n" header line will be used in the request. If a header with the same name previously existed in the request, its - value will be replaced by this one. + value will be replaced by this one. A NULL value means do nothing. RELEASE_POLICY determines whether NAME and VALUE should be released (freed) with request_free. Allowed values are: @@ -230,8 +233,16 @@ request_set_header (struct request *req, char *name, char *value, { struct request_header *hdr; int i; + if (!value) - return; + { + /* A NULL value is a no-op; if freeing the name is requested, + free it now to avoid leaks. */ + if (release_policy == rel_name || release_policy == rel_both) + xfree (name); + return; + } + for (i = 0; i < req->hcount; i++) { hdr = &req->headers[i]; @@ -248,11 +259,10 @@ request_set_header (struct request *req, char *name, char *value, /* Install new header. */ - if (req->hcount >= req->hcount) + if (req->hcount >= req->hcapacity) { req->hcapacity <<= 1; - req->headers = xrealloc (req->headers, - req->hcapacity * sizeof (struct request_header)); + req->headers = xrealloc (req->headers, req->hcapacity * sizeof (*hdr)); } hdr = &req->headers[req->hcount++]; hdr->name = name; @@ -279,6 +289,29 @@ request_set_user_header (struct request *req, const char *header) request_set_header (req, xstrdup (name), (char *) p, rel_name); } +/* Remove the header with specified name from REQ. Returns 1 if the + header was actually removed, 0 otherwise. */ + +static int +request_remove_header (struct request *req, char *name) +{ + int i; + for (i = 0; i < req->hcount; i++) + { + struct request_header *hdr = &req->headers[i]; + if (0 == strcasecmp (name, hdr->name)) + { + release_header (hdr); + /* Move the remaining headers by one. */ + if (i < req->hcount - 1) + memmove (hdr, hdr + 1, (req->hcount - i - 1) * sizeof (*hdr)); + --req->hcount; + return 1; + } + } + return 0; +} + #define APPEND(p, str) do { \ int A_len = strlen (str); \ memcpy (p, str, A_len); \ @@ -335,7 +368,7 @@ request_send (const struct request *req, int fd) /* Send the request to the server. */ - write_error = fd_write (fd, request_string, size - 1, -1); + write_error = fd_write (fd, request_string, size - 1, -1.0); if (write_error < 0) logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"), strerror (errno)); @@ -355,7 +388,7 @@ request_free (struct request *req) xfree (req); } -/* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly +/* Send the contents of FILE_NAME to SOCK. Make sure that exactly PROMISED_SIZE bytes are sent over the wire -- if the file is longer, read only that much; if the file is shorter, report an error. */ @@ -379,7 +412,7 @@ post_file (int sock, const char *file_name, wgint promised_size) if (length == 0) break; towrite = MIN (promised_size - written, length); - write_error = fd_write (sock, chunk, towrite, -1); + write_error = fd_write (sock, chunk, towrite, -1.0); if (write_error < 0) { fclose (fp); @@ -432,6 +465,13 @@ response_head_terminator (const char *hunk, int oldlen, int peeklen) return NULL; } +/* The maximum size of a single HTTP response we care to read. This + is not meant to impose an arbitrary limit, but to protect the user + from Wget slurping up available memory upon encountering malicious + or buggy server output. Define it to 0 to remove the limit. */ + +#define HTTP_RESPONSE_MAX_SIZE 65536 + /* Read the HTTP request head from FD and return it. The error conditions are the same as with fd_read_hunk. @@ -443,7 +483,8 @@ response_head_terminator (const char *hunk, int oldlen, int peeklen) static char * read_http_response_head (int fd) { - return fd_read_hunk (fd, response_head_terminator, 512); + return fd_read_hunk (fd, response_head_terminator, 512, + HTTP_RESPONSE_MAX_SIZE); } struct response { @@ -700,22 +741,8 @@ resp_free (struct response *resp) xfree (resp); } -/* Print [b, e) to the log, omitting the trailing CRLF. */ - -static void -print_server_response_1 (const char *prefix, const char *b, const char *e) -{ - char *ln; - if (b < e && e[-1] == '\n') - --e; - if (b < e && e[-1] == '\r') - --e; - BOUNDED_TO_ALLOCA (b, e, ln); - logprintf (LOG_VERBOSE, "%s%s\n", prefix, escnonprint (ln)); -} - -/* Print the server response, line by line, omitting the trailing CR - characters, prefixed with PREFIX. */ +/* Print the server response, line by line, omitting the trailing CRLF + from individual header lines, and prefixed with PREFIX. */ static void print_server_response (const struct response *resp, const char *prefix) @@ -724,7 +751,18 @@ print_server_response (const struct response *resp, const char *prefix) if (!resp->headers) return; for (i = 0; resp->headers[i + 1]; i++) - print_server_response_1 (prefix, resp->headers[i], resp->headers[i + 1]); + { + const char *b = resp->headers[i]; + const char *e = resp->headers[i + 1]; + /* Skip CRLF */ + if (b < e && e[-1] == '\n') + --e; + if (b < e && e[-1] == '\r') + --e; + /* This is safe even on printfs with broken handling of "%.s" + because resp->headers ends with \0. */ + logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, e - b, b); + } } /* Parse the `Content-Range' header and extract the information it @@ -771,30 +809,54 @@ parse_content_range (const char *hdr, wgint *first_byte_ptr, } /* Read the body of the request, but don't store it anywhere and don't - display a progress gauge. This is useful for reading the error - responses whose bodies don't need to be displayed or logged, but - which need to be read anyway. */ + display a progress gauge. This is useful for reading the bodies of + administrative responses to which we will soon issue another + request. The response is not useful to the user, but reading it + allows us to continue using the same connection to the server. -static void + If reading fails, 0 is returned, non-zero otherwise. In debug + mode, the body is displayed for debugging purposes. */ + +static int skip_short_body (int fd, wgint contlen) { - /* Skipping the body doesn't make sense if the content length is - unknown because, in that case, persistent connections cannot be - used. (#### This is not the case with HTTP/1.1 where they can - still be used with the magic of the "chunked" transfer!) */ - if (contlen == -1) - return; - DEBUGP (("Skipping %s bytes of body data... ", number_to_static_string (contlen))); + enum { + SKIP_SIZE = 512, /* size of the download buffer */ + SKIP_THRESHOLD = 4096 /* the largest size we read */ + }; + char dlbuf[SKIP_SIZE + 1]; + dlbuf[SKIP_SIZE] = '\0'; /* so DEBUGP can safely print it */ + + /* We shouldn't get here with unknown contlen. (This will change + with HTTP/1.1, which supports "chunked" transfer.) */ + assert (contlen != -1); + + /* If the body is too large, it makes more sense to simply close the + connection than to try to read the body. */ + if (contlen > SKIP_THRESHOLD) + return 0; + + DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen))); while (contlen > 0) { - char dlbuf[512]; - int ret = fd_read (fd, dlbuf, MIN (contlen, sizeof (dlbuf)), -1); + int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1.0); if (ret <= 0) - return; + { + /* Don't normally report the error since this is an + optimization that should be invisible to the user. */ + DEBUGP (("] aborting (%s).\n", + ret < 0 ? strerror (errno) : "EOF received")); + return 0; + } contlen -= ret; + /* Safe even if %.*s bogusly expects terminating \0 because + we've zero-terminated dlbuf above. */ + DEBUGP (("%.*s", ret, dlbuf)); } - DEBUGP (("done.\n")); + + DEBUGP (("] done.\n")); + return 1; } /* Persistent connections. Currently, we cache the most recently used @@ -816,6 +878,17 @@ static struct { /* Whether a ssl handshake has occoured on this connection. */ int ssl; + + /* Whether the connection was authorized. This is only done by + NTLM, which authorizes *connections* rather than individual + requests. (That practice is peculiar for HTTP, but it is a + useful optimization.) */ + int authorized; + +#ifdef ENABLE_NTLM + /* NTLM data of the current connection. */ + struct ntlmdata ntlm; +#endif } pconn; /* Mark the persistent connection as invalid and free the resources it @@ -866,6 +939,7 @@ register_persistent (const char *host, int port, int fd, int ssl) pconn.host = xstrdup (host); pconn.port = port; pconn.ssl = ssl; + pconn.authorized = 0; DEBUGP (("Registered socket %d for persistent reuse.\n", fd)); } @@ -1028,9 +1102,9 @@ free_hstat (struct http_stat *hs) static char *create_authorization_line PARAMS ((const char *, const char *, const char *, const char *, - const char *)); + const char *, int *)); static char *basic_authentication_encode PARAMS ((const char *, const char *)); -static int known_authentication_scheme_p PARAMS ((const char *)); +static int known_authentication_scheme_p PARAMS ((const char *, const char *)); time_t http_atotm PARAMS ((const char *)); @@ -1039,6 +1113,14 @@ time_t http_atotm PARAMS ((const char *)); && (ISSPACE (line[sizeof (string_constant) - 1]) \ || !line[sizeof (string_constant) - 1])) +#define SET_USER_AGENT(req) \ + if (opt.useragent) \ + request_set_header (req, "User-Agent", opt.useragent, rel_none); \ + else \ + request_set_header (req, "User-Agent", \ + aprintf ("Wget/%s", version_string), rel_value); + + /* Retrieve a document through HTTP protocol. It recognizes status code, and correctly handles redirections. It closes the network socket. If it receives an error from the functions below it, it @@ -1066,8 +1148,12 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) int sock = -1; int flags; - /* Whether authorization has been already tried. */ - int auth_tried_already; + /* Set to 1 when the authorization has failed permanently and should + not be tried again. */ + int auth_finished = 0; + + /* Whether NTLM authentication is used for this request. */ + int ntlm_seen = 0; /* Whether our connection to the remote host is through SSL. */ int using_ssl = 0; @@ -1081,8 +1167,15 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) is done. */ int keep_alive; - /* Whether keep-alive should be inhibited. */ - int inhibit_keep_alive = !opt.http_keep_alive || opt.ignore_length; + /* Whether keep-alive should be inhibited. + + RFC 2068 requests that 1.0 clients not send keep-alive requests + to proxies. This is because many 1.0 proxies do not interpret + the Connection header and transfer it to the remote server, + causing it to not close the connection and leave both the proxy + and the client hanging. */ + int inhibit_keep_alive = + !opt.http_keep_alive || opt.ignore_length || proxy != NULL; /* Headers sent when using POST. */ wgint post_data_size = 0; @@ -1094,29 +1187,12 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) { /* Initialize the SSL context. After this has once been done, it becomes a no-op. */ - switch (ssl_init ()) + if (!ssl_init ()) { - case SSLERRCTXCREATE: - /* this is fatal */ - logprintf (LOG_NOTQUIET, _("Failed to set up an SSL context\n")); - return SSLERRCTXCREATE; - case SSLERRCERTFILE: - /* try without certfile */ - logprintf (LOG_NOTQUIET, - _("Failed to load certificates from %s\n"), - opt.sslcertfile); - logprintf (LOG_NOTQUIET, - _("Trying without the specified certificate\n")); - break; - case SSLERRCERTKEY: + scheme_disable (SCHEME_HTTPS); logprintf (LOG_NOTQUIET, - _("Failed to get certificate key from %s\n"), - opt.sslcertkey); - logprintf (LOG_NOTQUIET, - _("Trying without the specified certificate\n")); - break; - default: - break; + _("Disabling SSL due to encountered errors.\n")); + return SSLINITFAILED; } } #endif /* HAVE_SSL */ @@ -1126,10 +1202,8 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) know the local filename so we can save to it. */ assert (*hs->local_file != NULL); - auth_tried_already = 0; - /* Initialize certain elements of struct http_stat. */ - hs->len = 0L; + hs->len = 0; hs->contlen = -1; hs->res = -1; hs->newloc = NULL; @@ -1162,19 +1236,15 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) aprintf ("bytes=%s-", number_to_static_string (hs->restval)), rel_value); - if (opt.useragent) - request_set_header (req, "User-Agent", opt.useragent, rel_none); - else - request_set_header (req, "User-Agent", - aprintf ("Wget/%s", version_string), rel_value); + SET_USER_AGENT (req); request_set_header (req, "Accept", "*/*", rel_none); /* Find the username and password for authentication. */ user = u->user; passwd = u->passwd; search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0); - user = user ? user : opt.http_user; - passwd = passwd ? passwd : opt.http_passwd; + user = user ? user : (opt.http_user ? opt.http_user : opt.user); + passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd); if (user && passwd) { @@ -1332,6 +1402,11 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"), escnonprint (pconn.host), pconn.port); DEBUGP (("Reusing fd %d.\n", sock)); + if (pconn.authorized) + /* If the connection is already authorized, the "Basic" + authorization added by code above is unnecessary and + only hurts us. */ + request_remove_header (req, "Authorization"); } } @@ -1341,14 +1416,23 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) look up conn->host in some cases. If that lookup failed, we don't need to bother with connect_to_host. */ if (host_lookup_failed) - return HOSTERR; + { + request_free (req); + return HOSTERR; + } sock = connect_to_host (conn->host, conn->port); if (sock == E_HOST) - return HOSTERR; + { + request_free (req); + return HOSTERR; + } else if (sock < 0) - return (retryable_socket_connect_error (errno) - ? CONERROR : CONIMPOSSIBLE); + { + request_free (req); + return (retryable_socket_connect_error (errno) + ? CONERROR : CONIMPOSSIBLE); + } #ifdef HAVE_SSL if (proxy && u->scheme == SCHEME_HTTPS) @@ -1358,6 +1442,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) struct request *connreq = request_new (); request_set_method (connreq, "CONNECT", aprintf ("%s:%d", u->host, u->port)); + SET_USER_AGENT (connreq); if (proxyauth) { request_set_header (connreq, "Proxy-Authorization", @@ -1367,6 +1452,10 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) the regular request below. */ proxyauth = NULL; } + /* Examples in rfc2817 use the Host header in CONNECT + requests. I don't see how that gains anything, given + that the contents of Host would be exactly the same as + the contents of CONNECT. */ write_error = request_send (connreq, sock); request_free (connreq); @@ -1397,6 +1486,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) resp = resp_new (head); statcode = resp_status (resp, &message); resp_free (resp); + xfree (head); if (statcode != 200) { failed_tunnel: @@ -1433,7 +1523,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) if (opt.post_data) { DEBUGP (("[POST data: %s]\n", opt.post_data)); - write_error = fd_write (sock, opt.post_data, post_data_size, -1); + write_error = fd_write (sock, opt.post_data, post_data_size, -1.0); } else if (opt.post_file_name && post_data_size != 0) write_error = post_file (sock, opt.post_file_name, post_data_size); @@ -1524,9 +1614,15 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) if (statcode == HTTP_STATUS_UNAUTHORIZED) { /* Authorization is required. */ - skip_short_body (sock, contlen); - CLOSE_FINISH (sock); - if (auth_tried_already || !(user && passwd)) + if (keep_alive) + { + if (skip_short_body (sock, contlen)) + CLOSE_FINISH (sock); + else + CLOSE_INVALIDATE (sock); + } + pconn.authorized = 0; + if (auth_finished || !(user && passwd)) { /* If we have tried it already, then there is not point retrying it. */ @@ -1534,13 +1630,26 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) } else { - char *www_authenticate = resp_header_strdup (resp, - "WWW-Authenticate"); - /* If the authentication scheme is unknown or if it's the - "Basic" authentication (which we try by default), there's - no sense in retrying. */ + /* IIS sometimes sends two instances of WWW-Authenticate + header, one with the keyword "negotiate", and other with + useful data. Loop over all occurrences of this header + and use the one we recognize. */ + int wapos; + const char *wabeg, *waend; + char *www_authenticate = NULL; + for (wapos = 0; + (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos, + &wabeg, &waend)) != -1; + ++wapos) + if (known_authentication_scheme_p (wabeg, waend)) + { + www_authenticate = strdupdelim (wabeg, waend); + break; + } + /* If the authentication header is missing or recognized, or + if the authentication scheme is "Basic" (which we send by + default), there's no sense in retrying. */ if (!www_authenticate - || !known_authentication_scheme_p (www_authenticate) || BEGINS_WITH (www_authenticate, "Basic")) { xfree_null (www_authenticate); @@ -1549,14 +1658,16 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) else { char *pth; - auth_tried_already = 1; pth = url_full_path (u); request_set_header (req, "Authorization", create_authorization_line (www_authenticate, user, passwd, request_method (req), - pth), + pth, + &auth_finished), rel_value); + if (BEGINS_WITH (www_authenticate, "NTLM")) + ntlm_seen = 1; xfree (pth); xfree (www_authenticate); goto retry_with_auth; @@ -1565,6 +1676,12 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) request_free (req); return AUTHFAILED; } + else /* statcode != HTTP_STATUS_UNAUTHORIZED */ + { + /* Kludge: if NTLM is used, mark the TCP connection as authorized. */ + if (ntlm_seen) + pconn.authorized = 1; + } request_free (req); hs->statcode = statcode; @@ -1574,6 +1691,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) hs->error = xstrdup (_("(no description)")); else hs->error = xstrdup (message); + xfree (message); type = resp_header_strdup (resp, "Content-Type"); if (type) @@ -1591,6 +1709,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) /* Handle (possibly multiple instances of) the Set-Cookie header. */ { + char *pth = NULL; int scpos; const char *scbeg, *scend; /* The jar should have been created by now. */ @@ -1600,10 +1719,16 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) &scbeg, &scend)) != -1; ++scpos) { - char *set_cookie = strdupdelim (scbeg, scend); - cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, u->path, + char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie); + if (pth == NULL) + { + /* u->path doesn't begin with /, which cookies.c expects. */ + pth = (char *) alloca (1 + strlen (u->path) + 1); + pth[0] = '/'; + strcpy (pth + 1, u->path); + } + cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, pth, set_cookie); - xfree (set_cookie); } } @@ -1615,6 +1740,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) contrange = first_byte_pos; } resp_free (resp); + xfree (head); /* 20x responses are counted among successful by default. */ if (H_20X (statcode)) @@ -1637,8 +1763,12 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"), hs->newloc ? _(" [following]") : ""); if (keep_alive) - skip_short_body (sock, contlen); - CLOSE_FINISH (sock); + { + if (skip_short_body (sock, contlen)) + CLOSE_FINISH (sock); + else + CLOSE_INVALIDATE (sock); + } xfree_null (type); return NEWLOCATION; } @@ -1713,9 +1843,20 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) logputs (LOG_VERBOSE, _("Length: ")); if (contlen != -1) { - logputs (LOG_VERBOSE, legible (contlen + contrange)); + logputs (LOG_VERBOSE, with_thousand_seps (contlen + contrange)); + if (contlen + contrange >= 1024) + logprintf (LOG_VERBOSE, " (%s)", + human_readable (contlen + contrange)); if (contrange) - logprintf (LOG_VERBOSE, _(" (%s to go)"), legible (contlen)); + { + if (contlen >= 1024) + logprintf (LOG_VERBOSE, _(", %s (%s) remaining"), + with_thousand_seps (contlen), + human_readable (contlen)); + else + logprintf (LOG_VERBOSE, _(", %s remaining"), + with_thousand_seps (contlen)); + } } else logputs (LOG_VERBOSE, @@ -1733,7 +1874,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) if (!(*dt & RETROKF) || (*dt & HEAD_ONLY)) { /* In case the caller cares to look... */ - hs->len = 0L; + hs->len = 0; hs->res = 0; xfree_null (type); /* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the @@ -1757,7 +1898,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) fp = fopen (*hs->local_file, "wb"); else { - fp = fopen_excl (*hs->local_file, 0); + fp = fopen_excl (*hs->local_file, 1); if (!fp && errno == EEXIST) { /* We cannot just invent a new name and use it (which is @@ -2072,14 +2213,12 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); locf = opt.output_document; } continue; - break; case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED: - case SSLERRCTXCREATE: case CONTNOTSUPPORTED: + case SSLINITFAILED: case CONTNOTSUPPORTED: /* Fatal errors just return from the function. */ free_hstat (&hstat); xfree_null (dummy); return err; - break; case FWRITEERR: case FOPENERR: /* Another fatal error. */ logputs (LOG_VERBOSE, "\n"); @@ -2088,7 +2227,6 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); free_hstat (&hstat); xfree_null (dummy); return err; - break; case CONSSLERR: /* Another fatal error. */ logputs (LOG_VERBOSE, "\n"); @@ -2096,7 +2234,6 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); free_hstat (&hstat); xfree_null (dummy); return err; - break; case NEWLOCATION: /* Return the new location to the caller. */ if (!hstat.newloc) @@ -2111,13 +2248,11 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); free_hstat (&hstat); xfree_null (dummy); return NEWLOCATION; - break; case RETRUNNEEDED: /* The file was already fully retrieved. */ free_hstat (&hstat); xfree_null (dummy); return RETROK; - break; case RETRFINISHED: /* Deal with you later. */ break; @@ -2360,7 +2495,6 @@ The sizes do not match (local %s) -- retrieving.\n"), } } /* not reached */ - break; } while (!opt.ntry || (count < opt.ntry)); return TRYLIMEXC; @@ -2528,75 +2662,37 @@ http_atotm (const char *time_string) return -1; } -/* Authorization support: We support two authorization schemes: +/* Authorization support: We support three authorization schemes: * `Basic' scheme, consisting of base64-ing USER:PASSWORD string; * `Digest' scheme, added by Junio Hamano , consisting of answering to the server's challenge with the proper - MD5 digests. */ - -/* How many bytes it will take to store LEN bytes in base64. */ -#define BASE64_LENGTH(len) (4 * (((len) + 2) / 3)) + MD5 digests. -/* Encode the string S of length LENGTH to base64 format and place it - to STORE. STORE will be 0-terminated, and must point to a writable - buffer of at least 1+BASE64_LENGTH(length) bytes. */ -static void -base64_encode (const char *s, char *store, int length) -{ - /* Conversion table. */ - static char tbl[64] = { - 'A','B','C','D','E','F','G','H', - 'I','J','K','L','M','N','O','P', - 'Q','R','S','T','U','V','W','X', - 'Y','Z','a','b','c','d','e','f', - 'g','h','i','j','k','l','m','n', - 'o','p','q','r','s','t','u','v', - 'w','x','y','z','0','1','2','3', - '4','5','6','7','8','9','+','/' - }; - int i; - unsigned char *p = (unsigned char *)store; - - /* Transform the 3x8 bits to 4x6 bits, as required by base64. */ - for (i = 0; i < length; i += 3) - { - *p++ = tbl[s[0] >> 2]; - *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)]; - *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)]; - *p++ = tbl[s[2] & 0x3f]; - s += 3; - } - /* Pad the result if necessary... */ - if (i == length + 1) - *(p - 1) = '='; - else if (i == length + 2) - *(p - 1) = *(p - 2) = '='; - /* ...and zero-terminate it. */ - *p = '\0'; -} + * `NTLM' ("NT Lan Manager") scheme, based on code written by Daniel + Stenberg for libcurl. Like digest, NTLM is based on a + challenge-response mechanism, but unlike digest, it is non-standard + (authenticates TCP connections rather than requests), undocumented + and Microsoft-specific. */ /* Create the authentication header contents for the `Basic' scheme. This is done by encoding the string `USER:PASS' in base64 and prepending `HEADER: Basic ' to it. */ + static char * basic_authentication_encode (const char *user, const char *passwd) { - char *t1, *t2, *res; + char *t1, *t2; int len1 = strlen (user) + 1 + strlen (passwd); - int len2 = BASE64_LENGTH (len1); t1 = (char *)alloca (len1 + 1); sprintf (t1, "%s:%s", user, passwd); - t2 = (char *)alloca (len2 + 1); - base64_encode (t1, t2, len1); - - res = (char *)xmalloc (6 + len2 + 1); - sprintf (res, "Basic %s", t2); + t2 = (char *)alloca (BASE64_LENGTH (len1) + 1); + base64_encode (t1, len1, t2); - return res; + return concat_strings ("Basic ", t2, (char *) 0); } #define SKIP_WS(x) do { \ @@ -2604,7 +2700,7 @@ basic_authentication_encode (const char *user, const char *passwd) ++(x); \ } while (0) -#ifdef USE_DIGEST +#ifdef ENABLE_DIGEST /* Parse HTTP `WWW-Authenticate:' header. AU points to the beginning of a field in such a header. If the field is the one specified by ATTR_NAME ("realm", "opaque", and "nonce" are used by the current @@ -2790,23 +2886,35 @@ username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"", } return res; } -#endif /* USE_DIGEST */ +#endif /* ENABLE_DIGEST */ +/* Computing the size of a string literal must take into account that + value returned by sizeof includes the terminating \0. */ +#define STRSIZE(literal) (sizeof (literal) - 1) -#define BEGINS_WITH(line, string_constant) \ - (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \ - && (ISSPACE (line[sizeof (string_constant) - 1]) \ - || !line[sizeof (string_constant) - 1])) +/* Whether chars in [b, e) begin with the literal string provided as + first argument and are followed by whitespace or terminating \0. + The comparison is case-insensitive. */ +#define STARTS(literal, b, e) \ + ((e) - (b) >= STRSIZE (literal) \ + && 0 == strncasecmp (b, literal, STRSIZE (literal)) \ + && ((e) - (b) == STRSIZE (literal) \ + || ISSPACE (b[STRSIZE (literal)]))) static int -known_authentication_scheme_p (const char *au) +known_authentication_scheme_p (const char *hdrbeg, const char *hdrend) { - return BEGINS_WITH (au, "Basic") - || BEGINS_WITH (au, "Digest") - || BEGINS_WITH (au, "NTLM"); + return STARTS ("Basic", hdrbeg, hdrend) +#ifdef ENABLE_DIGEST + || STARTS ("Digest", hdrbeg, hdrend) +#endif +#ifdef ENABLE_NTLM + || STARTS ("NTLM", hdrbeg, hdrend) +#endif + ; } -#undef BEGINS_WITH +#undef STARTS /* Create the HTTP authorization request header. When the `WWW-Authenticate' response header is seen, according to the @@ -2816,18 +2924,47 @@ known_authentication_scheme_p (const char *au) static char * create_authorization_line (const char *au, const char *user, const char *passwd, const char *method, - const char *path) + const char *path, int *finished) { - if (0 == strncasecmp (au, "Basic", 5)) - return basic_authentication_encode (user, passwd); -#ifdef USE_DIGEST - if (0 == strncasecmp (au, "Digest", 6)) - return digest_authentication_encode (au, user, passwd, method, path); -#endif /* USE_DIGEST */ - return NULL; + /* We are called only with known schemes, so we can dispatch on the + first letter. */ + switch (TOUPPER (*au)) + { + case 'B': /* Basic */ + *finished = 1; + return basic_authentication_encode (user, passwd); +#ifdef ENABLE_DIGEST + case 'D': /* Digest */ + *finished = 1; + return digest_authentication_encode (au, user, passwd, method, path); +#endif +#ifdef ENABLE_NTLM + case 'N': /* NTLM */ + if (!ntlm_input (&pconn.ntlm, au)) + { + *finished = 1; + return NULL; + } + return ntlm_output (&pconn.ntlm, user, passwd, finished); +#endif + default: + /* We shouldn't get here -- this function should be only called + with values approved by known_authentication_scheme_p. */ + abort (); + } } +void +save_cookies (void) +{ + if (wget_cookie_jar) + cookie_jar_save (wget_cookie_jar, opt.cookies_output); +} + void http_cleanup (void) { + xfree_null (pconn.host); + if (wget_cookie_jar) + cookie_jar_delete (wget_cookie_jar); }