X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fhttp.c;h=97a773a1e2d720abe45ac5041c3245e0477d49a7;hb=0967c21094580317353f0742c4836c5bbea34059;hp=e3889bb017a45904a74618780f4d5cce0a5a1f96;hpb=a8155e7bccfef9df2344c7aa575773efcb155ba9;p=wget diff --git a/src/http.c b/src/http.c index e3889bb0..97a773a1 100644 --- a/src/http.c +++ b/src/http.c @@ -1,5 +1,5 @@ /* HTTP support. - Copyright (C) 2003 Free Software Foundation, Inc. + Copyright (C) 2005 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -64,10 +64,13 @@ extern int errno; #include "connect.h" #include "netrc.h" #ifdef HAVE_SSL -# include "gen_sslfunc.h" -#endif /* HAVE_SSL */ +# include "ssl.h" +#endif +#ifdef ENABLE_NTLM +# include "http-ntlm.h" +#endif #include "cookies.h" -#ifdef USE_DIGEST +#ifdef ENABLE_DIGEST # include "gen-md5.h" #endif #include "convert.h" @@ -84,7 +87,7 @@ extern int output_stream_regular; static int cookies_loaded_p; -struct cookie_jar *wget_cookie_jar; +static struct cookie_jar *wget_cookie_jar; #define TEXTHTML_S "text/html" #define TEXTXHTML_S "application/xhtml+xml" @@ -199,7 +202,7 @@ release_header (struct request_header *hdr) /* Set the request named NAME to VALUE. Specifically, this means that a "NAME: VALUE\r\n" header line will be used in the request. If a header with the same name previously existed in the request, its - value will be replaced by this one. + value will be replaced by this one. A NULL value means do nothing. RELEASE_POLICY determines whether NAME and VALUE should be released (freed) with request_free. Allowed values are: @@ -219,7 +222,8 @@ release_header (struct request_header *hdr) request_set_header (req, "Referer", opt.referer, rel_none); // Value freshly allocated, free it when done. - request_set_header (req, "Range", aprintf ("bytes=%ld-", hs->restval), + request_set_header (req, "Range", + aprintf ("bytes=%s-", number_to_static_string (hs->restval)), rel_value); */ @@ -229,8 +233,16 @@ request_set_header (struct request *req, char *name, char *value, { struct request_header *hdr; int i; + if (!value) - return; + { + /* A NULL value is a no-op; if freeing the name is requested, + free it now to avoid leaks. */ + if (release_policy == rel_name || release_policy == rel_both) + xfree (name); + return; + } + for (i = 0; i < req->hcount; i++) { hdr = &req->headers[i]; @@ -247,11 +259,10 @@ request_set_header (struct request *req, char *name, char *value, /* Install new header. */ - if (req->hcount >= req->hcount) + if (req->hcount >= req->hcapacity) { req->hcapacity <<= 1; - req->headers = xrealloc (req->headers, - req->hcapacity * sizeof (struct request_header)); + req->headers = xrealloc (req->headers, req->hcapacity * sizeof (*hdr)); } hdr = &req->headers[req->hcount++]; hdr->name = name; @@ -278,6 +289,29 @@ request_set_user_header (struct request *req, const char *header) request_set_header (req, xstrdup (name), (char *) p, rel_name); } +/* Remove the header with specified name from REQ. Returns 1 if the + header was actually removed, 0 otherwise. */ + +static int +request_remove_header (struct request *req, char *name) +{ + int i; + for (i = 0; i < req->hcount; i++) + { + struct request_header *hdr = &req->headers[i]; + if (0 == strcasecmp (name, hdr->name)) + { + release_header (hdr); + /* Move the remaining headers by one. */ + if (i < req->hcount - 1) + memmove (hdr, hdr + 1, (req->hcount - i - 1) * sizeof (*hdr)); + --req->hcount; + return 1; + } + } + return 0; +} + #define APPEND(p, str) do { \ int A_len = strlen (str); \ memcpy (p, str, A_len); \ @@ -334,7 +368,7 @@ request_send (const struct request *req, int fd) /* Send the request to the server. */ - write_error = fd_write (fd, request_string, size - 1, -1); + write_error = fd_write (fd, request_string, size - 1, -1.0); if (write_error < 0) logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"), strerror (errno)); @@ -354,15 +388,15 @@ request_free (struct request *req) xfree (req); } -/* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly +/* Send the contents of FILE_NAME to SOCK. Make sure that exactly PROMISED_SIZE bytes are sent over the wire -- if the file is longer, read only that much; if the file is shorter, report an error. */ static int -post_file (int sock, const char *file_name, long promised_size) +post_file (int sock, const char *file_name, wgint promised_size) { static char chunk[8192]; - long written = 0; + wgint written = 0; int write_error; FILE *fp; @@ -378,7 +412,7 @@ post_file (int sock, const char *file_name, long promised_size) if (length == 0) break; towrite = MIN (promised_size - written, length); - write_error = fd_write (sock, chunk, towrite, -1); + write_error = fd_write (sock, chunk, towrite, -1.0); if (write_error < 0) { fclose (fp); @@ -402,7 +436,7 @@ post_file (int sock, const char *file_name, long promised_size) } static const char * -head_terminator (const char *hunk, int oldlen, int peeklen) +response_head_terminator (const char *hunk, int oldlen, int peeklen) { const char *start, *end; @@ -431,6 +465,13 @@ head_terminator (const char *hunk, int oldlen, int peeklen) return NULL; } +/* The maximum size of a single HTTP response we care to read. This + is not meant to impose an arbitrary limit, but to protect the user + from Wget slurping up available memory upon encountering malicious + or buggy server output. Define it to 0 to remove the limit. */ + +#define HTTP_RESPONSE_MAX_SIZE 65536 + /* Read the HTTP request head from FD and return it. The error conditions are the same as with fd_read_hunk. @@ -440,9 +481,10 @@ head_terminator (const char *hunk, int oldlen, int peeklen) data can be treated as body. */ static char * -fd_read_http_head (int fd) +read_http_response_head (int fd) { - return fd_read_hunk (fd, head_terminator, 512); + return fd_read_hunk (fd, response_head_terminator, 512, + HTTP_RESPONSE_MAX_SIZE); } struct response { @@ -473,10 +515,10 @@ struct response { /* Create a new response object from the text of the HTTP response, available in HEAD. That text is automatically split into constituent header lines for fast retrieval using - response_header_*. */ + resp_header_*. */ static struct response * -response_new (const char *head) +resp_new (const char *head) { const char *hdr; int count, size; @@ -492,7 +534,7 @@ response_new (const char *head) return resp; } - /* Split HEAD into header lines, so that response_header_* functions + /* Split HEAD into header lines, so that resp_header_* functions don't need to do this over and over again. */ size = count = 0; @@ -518,32 +560,41 @@ response_new (const char *head) while (*hdr == ' ' || *hdr == '\t'); } DO_REALLOC (resp->headers, size, count + 1, const char *); - resp->headers[count++] = NULL; + resp->headers[count] = NULL; return resp; } -/* Locate the header named NAME in the request data. If found, set - *BEGPTR to its starting, and *ENDPTR to its ending position, and - return 1. Otherwise return 0. +/* Locate the header named NAME in the request data, starting with + position START. This allows the code to loop through the request + data, filtering for all requests of a given name. Returns the + found position, or -1 for failure. The code that uses this + function typically looks like this: + + for (pos = 0; (pos = resp_header_locate (...)) != -1; pos++) + ... do something with header ... - This function is used as a building block for response_header_copy - and response_header_strdup. */ + If you only care about one header, use resp_header_get instead of + this function. */ static int -response_header_bounds (const struct response *resp, const char *name, - const char **begptr, const char **endptr) +resp_header_locate (const struct response *resp, const char *name, int start, + const char **begptr, const char **endptr) { int i; const char **headers = resp->headers; int name_len; if (!headers || !headers[1]) - return 0; + return -1; name_len = strlen (name); + if (start > 0) + i = start; + else + i = 1; - for (i = 1; headers[i + 1]; i++) + for (; headers[i + 1]; i++) { const char *b = headers[i]; const char *e = headers[i + 1]; @@ -558,31 +609,46 @@ response_header_bounds (const struct response *resp, const char *name, --e; *begptr = b; *endptr = e; - return 1; + return i; } } - return 0; + return -1; +} + +/* Find and retrieve the header named NAME in the request data. If + found, set *BEGPTR to its starting, and *ENDPTR to its ending + position, and return 1. Otherwise return 0. + + This function is used as a building block for resp_header_copy + and resp_header_strdup. */ + +static int +resp_header_get (const struct response *resp, const char *name, + const char **begptr, const char **endptr) +{ + int pos = resp_header_locate (resp, name, 0, begptr, endptr); + return pos != -1; } /* Copy the response header named NAME to buffer BUF, no longer than BUFSIZE (BUFSIZE includes the terminating 0). If the header exists, 1 is returned, otherwise 0. If there should be no limit on - the size of the header, use response_header_strdup instead. + the size of the header, use resp_header_strdup instead. If BUFSIZE is 0, no data is copied, but the boolean indication of whether the header is present is still returned. */ static int -response_header_copy (const struct response *resp, const char *name, - char *buf, int bufsize) +resp_header_copy (const struct response *resp, const char *name, + char *buf, int bufsize) { const char *b, *e; - if (!response_header_bounds (resp, name, &b, &e)) + if (!resp_header_get (resp, name, &b, &e)) return 0; if (bufsize) { - int len = MIN (e - b, bufsize); - strncpy (buf, b, len); + int len = MIN (e - b, bufsize - 1); + memcpy (buf, b, len); buf[len] = '\0'; } return 1; @@ -592,10 +658,10 @@ response_header_copy (const struct response *resp, const char *name, malloc. If such a header does not exist in RESP, return NULL. */ static char * -response_header_strdup (const struct response *resp, const char *name) +resp_header_strdup (const struct response *resp, const char *name) { const char *b, *e; - if (!response_header_bounds (resp, name, &b, &e)) + if (!resp_header_get (resp, name, &b, &e)) return NULL; return strdupdelim (b, e); } @@ -609,7 +675,7 @@ response_header_strdup (const struct response *resp, const char *name) returned in *MESSAGE. */ static int -response_status (const struct response *resp, char **message) +resp_status (const struct response *resp, char **message) { int status; const char *p, *end; @@ -669,28 +735,14 @@ response_status (const struct response *resp, char **message) /* Release the resources used by RESP. */ static void -response_free (struct response *resp) +resp_free (struct response *resp) { xfree_null (resp->headers); xfree (resp); } -/* Print [b, e) to the log, omitting the trailing CRLF. */ - -static void -print_server_response_1 (const char *prefix, const char *b, const char *e) -{ - char *ln; - if (b < e && e[-1] == '\n') - --e; - if (b < e && e[-1] == '\r') - --e; - BOUNDED_TO_ALLOCA (b, e, ln); - logprintf (LOG_VERBOSE, "%s%s\n", prefix, ln); -} - -/* Print the server response, line by line, omitting the trailing CR - characters, prefixed with PREFIX. */ +/* Print the server response, line by line, omitting the trailing CRLF + from individual header lines, and prefixed with PREFIX. */ static void print_server_response (const struct response *resp, const char *prefix) @@ -699,16 +751,27 @@ print_server_response (const struct response *resp, const char *prefix) if (!resp->headers) return; for (i = 0; resp->headers[i + 1]; i++) - print_server_response_1 (prefix, resp->headers[i], resp->headers[i + 1]); + { + const char *b = resp->headers[i]; + const char *e = resp->headers[i + 1]; + /* Skip CRLF */ + if (b < e && e[-1] == '\n') + --e; + if (b < e && e[-1] == '\r') + --e; + /* This is safe even on printfs with broken handling of "%.s" + because resp->headers ends with \0. */ + logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, e - b, b); + } } /* Parse the `Content-Range' header and extract the information it contains. Returns 1 if successful, -1 otherwise. */ static int -parse_content_range (const char *hdr, long *first_byte_ptr, - long *last_byte_ptr, long *entity_length_ptr) +parse_content_range (const char *hdr, wgint *first_byte_ptr, + wgint *last_byte_ptr, wgint *entity_length_ptr) { - long num; + wgint num; /* Ancient versions of Netscape proxy server, presumably predating rfc2068, sent out `Content-Range' without the "bytes" @@ -746,30 +809,54 @@ parse_content_range (const char *hdr, long *first_byte_ptr, } /* Read the body of the request, but don't store it anywhere and don't - display a progress gauge. This is useful for reading the error - responses whose bodies don't need to be displayed or logged, but - which need to be read anyway. */ + display a progress gauge. This is useful for reading the bodies of + administrative responses to which we will soon issue another + request. The response is not useful to the user, but reading it + allows us to continue using the same connection to the server. -static void -skip_short_body (int fd, long contlen) + If reading fails, 0 is returned, non-zero otherwise. In debug + mode, the body is displayed for debugging purposes. */ + +static int +skip_short_body (int fd, wgint contlen) { - /* Skipping the body doesn't make sense if the content length is - unknown because, in that case, persistent connections cannot be - used. (#### This is not the case with HTTP/1.1 where they can - still be used with the magic of the "chunked" transfer!) */ - if (contlen == -1) - return; - DEBUGP (("Skipping %ld bytes of body data... ", contlen)); + enum { + SKIP_SIZE = 512, /* size of the download buffer */ + SKIP_THRESHOLD = 4096 /* the largest size we read */ + }; + char dlbuf[SKIP_SIZE + 1]; + dlbuf[SKIP_SIZE] = '\0'; /* so DEBUGP can safely print it */ + + /* We shouldn't get here with unknown contlen. (This will change + with HTTP/1.1, which supports "chunked" transfer.) */ + assert (contlen != -1); + + /* If the body is too large, it makes more sense to simply close the + connection than to try to read the body. */ + if (contlen > SKIP_THRESHOLD) + return 0; + + DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen))); while (contlen > 0) { - char dlbuf[512]; - int ret = fd_read (fd, dlbuf, MIN (contlen, sizeof (dlbuf)), -1); + int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1.0); if (ret <= 0) - return; + { + /* Don't normally report the error since this is an + optimization that should be invisible to the user. */ + DEBUGP (("] aborting (%s).\n", + ret < 0 ? strerror (errno) : "EOF received")); + return 0; + } contlen -= ret; + /* Safe even if %.*s bogusly expects terminating \0 because + we've zero-terminated dlbuf above. */ + DEBUGP (("%.*s", ret, dlbuf)); } - DEBUGP (("done.\n")); + + DEBUGP (("] done.\n")); + return 1; } /* Persistent connections. Currently, we cache the most recently used @@ -791,6 +878,17 @@ static struct { /* Whether a ssl handshake has occoured on this connection. */ int ssl; + + /* Whether the connection was authorized. This is only done by + NTLM, which authorizes *connections* rather than individual + requests. (That practice is peculiar for HTTP, but it is a + useful optimization.) */ + int authorized; + +#ifdef ENABLE_NTLM + /* NTLM data of the current connection. */ + struct ntlmdata ntlm; +#endif } pconn; /* Mark the persistent connection as invalid and free the resources it @@ -841,6 +939,7 @@ register_persistent (const char *host, int port, int fd, int ssl) pconn.host = xstrdup (host); pconn.port = port; pconn.ssl = ssl; + pconn.authorized = 0; DEBUGP (("Registered socket %d for persistent reuse.\n", fd)); } @@ -974,15 +1073,15 @@ persistent_available_p (const char *host, int port, int ssl, struct http_stat { - long len; /* received length */ - long contlen; /* expected length */ - long restval; /* the restart value */ + wgint len; /* received length */ + wgint contlen; /* expected length */ + wgint restval; /* the restart value */ int res; /* the result of last read */ char *newloc; /* new location (redirection) */ char *remote_time; /* remote time-stamp string */ char *error; /* textual HTTP error */ int statcode; /* status code */ - long rd_size; /* amount of data read from socket */ + wgint rd_size; /* amount of data read from socket */ double dltime; /* time it took to download the data */ const char *referer; /* value of the referer header. */ char **local_file; /* local file. */ @@ -1003,9 +1102,9 @@ free_hstat (struct http_stat *hs) static char *create_authorization_line PARAMS ((const char *, const char *, const char *, const char *, - const char *)); + const char *, int *)); static char *basic_authentication_encode PARAMS ((const char *, const char *)); -static int known_authentication_scheme_p PARAMS ((const char *)); +static int known_authentication_scheme_p PARAMS ((const char *, const char *)); time_t http_atotm PARAMS ((const char *)); @@ -1014,6 +1113,14 @@ time_t http_atotm PARAMS ((const char *)); && (ISSPACE (line[sizeof (string_constant) - 1]) \ || !line[sizeof (string_constant) - 1])) +#define SET_USER_AGENT(req) \ + if (opt.useragent) \ + request_set_header (req, "User-Agent", opt.useragent, rel_none); \ + else \ + request_set_header (req, "User-Agent", \ + aprintf ("Wget/%s", version_string), rel_value); + + /* Retrieve a document through HTTP protocol. It recognizes status code, and correctly handles redirections. It closes the network socket. If it receives an error from the functions below it, it @@ -1034,15 +1141,19 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) char *proxyauth; int statcode; int write_error; - long contlen, contrange; + wgint contlen, contrange; struct url *conn; FILE *fp; int sock = -1; int flags; - /* Whether authorization has been already tried. */ - int auth_tried_already = 0; + /* Set to 1 when the authorization has failed permanently and should + not be tried again. */ + int auth_finished = 0; + + /* Whether NTLM authentication is used for this request. */ + int ntlm_seen = 0; /* Whether our connection to the remote host is through SSL. */ int using_ssl = 0; @@ -1056,11 +1167,18 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) is done. */ int keep_alive; - /* Whether keep-alive should be inhibited. */ - int inhibit_keep_alive = !opt.http_keep_alive; + /* Whether keep-alive should be inhibited. + + RFC 2068 requests that 1.0 clients not send keep-alive requests + to proxies. This is because many 1.0 proxies do not interpret + the Connection header and transfer it to the remote server, + causing it to not close the connection and leave both the proxy + and the client hanging. */ + int inhibit_keep_alive = + !opt.http_keep_alive || opt.ignore_length || proxy != NULL; /* Headers sent when using POST. */ - long post_data_size = 0; + wgint post_data_size = 0; int host_lookup_failed = 0; @@ -1069,29 +1187,12 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) { /* Initialize the SSL context. After this has once been done, it becomes a no-op. */ - switch (ssl_init ()) + if (!ssl_init ()) { - case SSLERRCTXCREATE: - /* this is fatal */ - logprintf (LOG_NOTQUIET, _("Failed to set up an SSL context\n")); - return SSLERRCTXCREATE; - case SSLERRCERTFILE: - /* try without certfile */ - logprintf (LOG_NOTQUIET, - _("Failed to load certificates from %s\n"), - opt.sslcertfile); + scheme_disable (SCHEME_HTTPS); logprintf (LOG_NOTQUIET, - _("Trying without the specified certificate\n")); - break; - case SSLERRCERTKEY: - logprintf (LOG_NOTQUIET, - _("Failed to get certificate key from %s\n"), - opt.sslcertkey); - logprintf (LOG_NOTQUIET, - _("Trying without the specified certificate\n")); - break; - default: - break; + _("Disabling SSL due to encountered errors.\n")); + return SSLINITFAILED; } } #endif /* HAVE_SSL */ @@ -1101,10 +1202,8 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) know the local filename so we can save to it. */ assert (*hs->local_file != NULL); - auth_tried_already = 0; - /* Initialize certain elements of struct http_stat. */ - hs->len = 0L; + hs->len = 0; hs->contlen = -1; hs->res = -1; hs->newloc = NULL; @@ -1113,35 +1212,6 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) conn = u; - proxyauth = NULL; - if (proxy) - { - char *proxy_user, *proxy_passwd; - /* For normal username and password, URL components override - command-line/wgetrc parameters. With proxy - authentication, it's the reverse, because proxy URLs are - normally the "permanent" ones, so command-line args - should take precedence. */ - if (opt.proxy_user && opt.proxy_passwd) - { - proxy_user = opt.proxy_user; - proxy_passwd = opt.proxy_passwd; - } - else - { - proxy_user = proxy->user; - proxy_passwd = proxy->passwd; - } - /* #### This does not appear right. Can't the proxy request, - say, `Digest' authentication? */ - if (proxy_user && proxy_passwd) - proxyauth = basic_authentication_encode (proxy_user, proxy_passwd); - - /* If we're using a proxy, we will be connecting to the proxy - server. */ - conn = proxy; - } - /* Prepare the request to send. */ req = request_new (); @@ -1163,20 +1233,18 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) request_set_header (req, "Pragma", "no-cache", rel_none); if (hs->restval) request_set_header (req, "Range", - aprintf ("bytes=%ld-", hs->restval), rel_value); - if (opt.useragent) - request_set_header (req, "User-Agent", opt.useragent, rel_none); - else - request_set_header (req, "User-Agent", - aprintf ("Wget/%s", version_string), rel_value); + aprintf ("bytes=%s-", + number_to_static_string (hs->restval)), + rel_value); + SET_USER_AGENT (req); request_set_header (req, "Accept", "*/*", rel_none); /* Find the username and password for authentication. */ user = u->user; passwd = u->passwd; search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0); - user = user ? user : opt.http_user; - passwd = passwd ? passwd : opt.http_passwd; + user = user ? user : (opt.http_user ? opt.http_user : opt.user); + passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd); if (user && passwd) { @@ -1205,6 +1273,41 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) rel_value); } + proxyauth = NULL; + if (proxy) + { + char *proxy_user, *proxy_passwd; + /* For normal username and password, URL components override + command-line/wgetrc parameters. With proxy + authentication, it's the reverse, because proxy URLs are + normally the "permanent" ones, so command-line args + should take precedence. */ + if (opt.proxy_user && opt.proxy_passwd) + { + proxy_user = opt.proxy_user; + proxy_passwd = opt.proxy_passwd; + } + else + { + proxy_user = proxy->user; + proxy_passwd = proxy->passwd; + } + /* #### This does not appear right. Can't the proxy request, + say, `Digest' authentication? */ + if (proxy_user && proxy_passwd) + proxyauth = basic_authentication_encode (proxy_user, proxy_passwd); + + /* If we're using a proxy, we will be connecting to the proxy + server. */ + conn = proxy; + + /* Proxy authorization over SSL is handled below. */ +#ifdef HAVE_SSL + if (u->scheme != SCHEME_HTTPS) +#endif + request_set_header (req, "Proxy-Authorization", proxyauth, rel_value); + } + { /* Whether we need to print the host header with braces around host, e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the @@ -1253,7 +1356,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) } } request_set_header (req, "Content-Length", - aprintf ("Content-Length: %ld", post_data_size), + xstrdup (number_to_static_string (post_data_size)), rel_value); } @@ -1297,8 +1400,13 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) sock = pconn.socket; using_ssl = pconn.ssl; logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"), - pconn.host, pconn.port); + escnonprint (pconn.host), pconn.port); DEBUGP (("Reusing fd %d.\n", sock)); + if (pconn.authorized) + /* If the connection is already authorized, the "Basic" + authorization added by code above is unnecessary and + only hurts us. */ + request_remove_header (req, "Authorization"); } } @@ -1308,14 +1416,23 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) look up conn->host in some cases. If that lookup failed, we don't need to bother with connect_to_host. */ if (host_lookup_failed) - return HOSTERR; + { + request_free (req); + return HOSTERR; + } sock = connect_to_host (conn->host, conn->port); if (sock == E_HOST) - return HOSTERR; + { + request_free (req); + return HOSTERR; + } else if (sock < 0) - return (retryable_socket_connect_error (errno) - ? CONERROR : CONIMPOSSIBLE); + { + request_free (req); + return (retryable_socket_connect_error (errno) + ? CONERROR : CONIMPOSSIBLE); + } #ifdef HAVE_SSL if (proxy && u->scheme == SCHEME_HTTPS) @@ -1325,6 +1442,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) struct request *connreq = request_new (); request_set_method (connreq, "CONNECT", aprintf ("%s:%d", u->host, u->port)); + SET_USER_AGENT (connreq); if (proxyauth) { request_set_header (connreq, "Proxy-Authorization", @@ -1334,6 +1452,10 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) the regular request below. */ proxyauth = NULL; } + /* Examples in rfc2817 use the Host header in CONNECT + requests. I don't see how that gains anything, given + that the contents of Host would be exactly the same as + the contents of CONNECT. */ write_error = request_send (connreq, sock); request_free (connreq); @@ -1345,7 +1467,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) return WRITEFAILED; } - head = fd_read_http_head (sock); + head = read_http_response_head (sock); if (!head) { logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"), @@ -1361,18 +1483,19 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) } DEBUGP (("proxy responded with: [%s]\n", head)); - resp = response_new (head); - statcode = response_status (resp, &message); - response_free (resp); + resp = resp_new (head); + statcode = resp_status (resp, &message); + resp_free (resp); + xfree (head); if (statcode != 200) { failed_tunnel: logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"), - message ? message : "?"); + message ? escnonprint (message) : "?"); xfree_null (message); return CONSSLERR; } - xfree (message); + xfree_null (message); /* SOCK is now *really* connected to u->host, so update CONN to reflect this. That way register_persistent will @@ -1400,7 +1523,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) if (opt.post_data) { DEBUGP (("[POST data: %s]\n", opt.post_data)); - write_error = fd_write (sock, opt.post_data, post_data_size, -1); + write_error = fd_write (sock, opt.post_data, post_data_size, -1.0); } else if (opt.post_file_name && post_data_size != 0) write_error = post_file (sock, opt.post_file_name, post_data_size); @@ -1418,11 +1541,9 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) proxy ? "Proxy" : "HTTP"); contlen = -1; contrange = 0; - type = NULL; - statcode = -1; *dt &= ~RETROKF; - head = fd_read_http_head (sock); + head = read_http_response_head (sock); if (!head) { if (errno == 0) @@ -1443,29 +1564,43 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) } DEBUGP (("\n---response begin---\n%s---response end---\n", head)); - resp = response_new (head); + resp = resp_new (head); /* Check for status line. */ message = NULL; - statcode = response_status (resp, &message); + statcode = resp_status (resp, &message); if (!opt.server_response) - logprintf (LOG_VERBOSE, "%2d %s\n", statcode, message ? message : ""); + logprintf (LOG_VERBOSE, "%2d %s\n", statcode, + message ? escnonprint (message) : ""); else { logprintf (LOG_VERBOSE, "\n"); print_server_response (resp, " "); } - if (response_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval))) - contlen = strtol (hdrval, NULL, 10); + if (!opt.ignore_length + && resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval))) + { + wgint parsed; + errno = 0; + parsed = str_to_wgint (hdrval, NULL, 10); + if (parsed == WGINT_MAX && errno == ERANGE) + /* Out of range. + #### If Content-Length is out of range, it most likely + means that the file is larger than 2G and that we're + compiled without LFS. In that case we should probably + refuse to even attempt to download the file. */ + contlen = -1; + else + contlen = parsed; + } /* Check for keep-alive related responses. */ if (!inhibit_keep_alive && contlen != -1) { - if (response_header_copy (resp, "Keep-Alive", NULL, 0)) + if (resp_header_copy (resp, "Keep-Alive", NULL, 0)) keep_alive = 1; - else if (response_header_copy (resp, "Connection", hdrval, - sizeof (hdrval))) + else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval))) { if (0 == strcasecmp (hdrval, "Keep-Alive")) keep_alive = 1; @@ -1479,9 +1614,15 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) if (statcode == HTTP_STATUS_UNAUTHORIZED) { /* Authorization is required. */ - skip_short_body (sock, contlen); - CLOSE_FINISH (sock); - if (auth_tried_already || !(user && passwd)) + if (keep_alive) + { + if (skip_short_body (sock, contlen)) + CLOSE_FINISH (sock); + else + CLOSE_INVALIDATE (sock); + } + pconn.authorized = 0; + if (auth_finished || !(user && passwd)) { /* If we have tried it already, then there is not point retrying it. */ @@ -1489,13 +1630,26 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) } else { - char *www_authenticate = response_header_strdup (resp, - "WWW-Authenticate"); - /* If the authentication scheme is unknown or if it's the - "Basic" authentication (which we try by default), there's - no sense in retrying. */ + /* IIS sometimes sends two instances of WWW-Authenticate + header, one with the keyword "negotiate", and other with + useful data. Loop over all occurrences of this header + and use the one we recognize. */ + int wapos; + const char *wabeg, *waend; + char *www_authenticate = NULL; + for (wapos = 0; + (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos, + &wabeg, &waend)) != -1; + ++wapos) + if (known_authentication_scheme_p (wabeg, waend)) + { + www_authenticate = strdupdelim (wabeg, waend); + break; + } + /* If the authentication header is missing or recognized, or + if the authentication scheme is "Basic" (which we send by + default), there's no sense in retrying. */ if (!www_authenticate - || !known_authentication_scheme_p (www_authenticate) || BEGINS_WITH (www_authenticate, "Basic")) { xfree_null (www_authenticate); @@ -1504,14 +1658,16 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) else { char *pth; - auth_tried_already = 1; pth = url_full_path (u); request_set_header (req, "Authorization", create_authorization_line (www_authenticate, user, passwd, request_method (req), - pth), + pth, + &auth_finished), rel_value); + if (BEGINS_WITH (www_authenticate, "NTLM")) + ntlm_seen = 1; xfree (pth); xfree (www_authenticate); goto retry_with_auth; @@ -1520,6 +1676,12 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) request_free (req); return AUTHFAILED; } + else /* statcode != HTTP_STATUS_UNAUTHORIZED */ + { + /* Kludge: if NTLM is used, mark the TCP connection as authorized. */ + if (ntlm_seen) + pconn.authorized = 1; + } request_free (req); hs->statcode = statcode; @@ -1529,8 +1691,9 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) hs->error = xstrdup (_("(no description)")); else hs->error = xstrdup (message); + xfree (message); - type = response_header_strdup (resp, "Content-Type"); + type = resp_header_strdup (resp, "Content-Type"); if (type) { char *tmp = strchr (type, ';'); @@ -1541,27 +1704,43 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) *tmp = '\0'; } } - hs->newloc = response_header_strdup (resp, "Location"); - hs->remote_time = response_header_strdup (resp, "Last-Modified"); + hs->newloc = resp_header_strdup (resp, "Location"); + hs->remote_time = resp_header_strdup (resp, "Last-Modified"); + + /* Handle (possibly multiple instances of) the Set-Cookie header. */ { - char *set_cookie = response_header_strdup (resp, "Set-Cookie"); - if (set_cookie) + char *pth = NULL; + int scpos; + const char *scbeg, *scend; + /* The jar should have been created by now. */ + assert (wget_cookie_jar != NULL); + for (scpos = 0; + (scpos = resp_header_locate (resp, "Set-Cookie", scpos, + &scbeg, &scend)) != -1; + ++scpos) { - /* The jar should have been created by now. */ - assert (wget_cookie_jar != NULL); - cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, u->path, + char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie); + if (pth == NULL) + { + /* u->path doesn't begin with /, which cookies.c expects. */ + pth = (char *) alloca (1 + strlen (u->path) + 1); + pth[0] = '/'; + strcpy (pth + 1, u->path); + } + cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, pth, set_cookie); - xfree (set_cookie); } } - if (response_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval))) + + if (resp_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval))) { - long first_byte_pos, last_byte_pos, entity_length; + wgint first_byte_pos, last_byte_pos, entity_length; if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos, &entity_length)) contrange = first_byte_pos; } - response_free (resp); + resp_free (resp); + xfree (head); /* 20x responses are counted among successful by default. */ if (H_20X (statcode)) @@ -1581,11 +1760,15 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) { logprintf (LOG_VERBOSE, _("Location: %s%s\n"), - hs->newloc ? hs->newloc : _("unspecified"), + hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"), hs->newloc ? _(" [following]") : ""); if (keep_alive) - skip_short_body (sock, contlen); - CLOSE_FINISH (sock); + { + if (skip_short_body (sock, contlen)) + CLOSE_FINISH (sock); + else + CLOSE_INVALIDATE (sock); + } xfree_null (type); return NEWLOCATION; } @@ -1660,15 +1843,26 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) logputs (LOG_VERBOSE, _("Length: ")); if (contlen != -1) { - logputs (LOG_VERBOSE, legible (contlen + contrange)); + logputs (LOG_VERBOSE, with_thousand_seps (contlen + contrange)); + if (contlen + contrange >= 1024) + logprintf (LOG_VERBOSE, " (%s)", + human_readable (contlen + contrange)); if (contrange) - logprintf (LOG_VERBOSE, _(" (%s to go)"), legible (contlen)); + { + if (contlen >= 1024) + logprintf (LOG_VERBOSE, _(", %s (%s) remaining"), + with_thousand_seps (contlen), + human_readable (contlen)); + else + logprintf (LOG_VERBOSE, _(", %s remaining"), + with_thousand_seps (contlen)); + } } else logputs (LOG_VERBOSE, opt.ignore_length ? _("ignored") : _("unspecified")); if (type) - logprintf (LOG_VERBOSE, " [%s]\n", type); + logprintf (LOG_VERBOSE, " [%s]\n", escnonprint (type)); else logputs (LOG_VERBOSE, "\n"); } @@ -1680,7 +1874,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) if (!(*dt & RETROKF) || (*dt & HEAD_ONLY)) { /* In case the caller cares to look... */ - hs->len = 0L; + hs->len = 0; hs->res = 0; xfree_null (type); /* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the @@ -1697,7 +1891,27 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) mkalldirs (*hs->local_file); if (opt.backups) rotate_backups (*hs->local_file); - fp = fopen (*hs->local_file, hs->restval ? "ab" : "wb"); + if (hs->restval) + fp = fopen (*hs->local_file, "ab"); + else if (opt.noclobber || opt.always_rest || opt.timestamping || opt.dirstruct + || opt.output_document) + fp = fopen (*hs->local_file, "wb"); + else + { + fp = fopen_excl (*hs->local_file, 1); + if (!fp && errno == EEXIST) + { + /* We cannot just invent a new name and use it (which is + what functions like unique_create typically do) + because we told the user we'd use this name. + Instead, return and retry the download. */ + logprintf (LOG_NOTQUIET, + _("%s has sprung into existence.\n"), + *hs->local_file); + CLOSE_INVALIDATE (sock); + return FOPEN_EXCL_ERR; + } + } if (!fp) { logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno)); @@ -1718,6 +1932,8 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) if (keep_alive) flags |= rb_read_exactly; if (hs->restval > 0 && contrange == 0) + /* If the server ignored our range request, instruct fd_read_body + to skip the first RESTVAL bytes of body. */ flags |= rb_skip_startpos; hs->len = hs->restval; hs->rd_size = 0; @@ -1760,10 +1976,10 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, char *tms, *locf, *tmrate; uerr_t err; time_t tml = -1, tmr = -1; /* local and remote time-stamps */ - long local_size = 0; /* the size of the local file */ + wgint local_size = 0; /* the size of the local file */ size_t filename_len; struct http_stat hstat; /* HTTP status */ - struct stat st; + struct_stat st; char *dummy = NULL; /* This used to be done in main(), but it's a better idea to do it @@ -1793,7 +2009,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, /* Determine the local filename. */ if (local_file && *local_file) hstat.local_file = local_file; - else if (local_file) + else if (local_file && !opt.output_document) { *local_file = url_file_name (u); hstat.local_file = local_file; @@ -1802,6 +2018,9 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, { dummy = url_file_name (u); hstat.local_file = &dummy; + /* be honest about where we will save the file */ + if (local_file && opt.output_document) + *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document); } if (!opt.output_document) @@ -1853,7 +2072,7 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); point I profiled Wget, and found that a measurable and non-negligible amount of time was lost calling sprintf() in url.c. Replacing sprintf with inline calls to - strcpy() and long_to_string() made a difference. + strcpy() and number_to_string() made a difference. --hniksic */ memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len); memcpy (filename_plus_orig_suffix + filename_len, @@ -1903,14 +2122,14 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); if (opt.verbose) { char *hurl = url_string (u, 1); - char tmp[15]; + char tmp[256]; strcpy (tmp, " "); if (count > 1) sprintf (tmp, _("(try:%2d)"), count); logprintf (LOG_VERBOSE, "--%s-- %s\n %s => `%s'\n", tms, hurl, tmp, locf); #ifdef WINDOWS - ws_changetitle (hurl, 1); + ws_changetitle (hurl); #endif xfree (hurl); } @@ -1954,8 +2173,6 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); *hstat.local_file to tack on ".html". */ if (!opt.output_document) locf = *hstat.local_file; - else - locf = opt.output_document; /* Time? */ tms = time_str (NULL); @@ -1966,21 +2183,42 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); { case HERR: case HEOF: case CONSOCKERR: case CONCLOSED: case CONERROR: case READERR: case WRITEFAILED: - case RANGEERR: + case RANGEERR: case FOPEN_EXCL_ERR: /* Non-fatal errors continue executing the loop, which will bring them to "while" statement at the end, to judge whether the number of tries was exceeded. */ free_hstat (&hstat); printwhat (count, opt.ntry); + if (err == FOPEN_EXCL_ERR) + { + /* Re-determine the file name. */ + if (local_file && *local_file) + { + xfree (*local_file); + *local_file = url_file_name (u); + hstat.local_file = local_file; + } + else + { + xfree (dummy); + dummy = url_file_name (u); + hstat.local_file = &dummy; + } + /* be honest about where we will save the file */ + if (local_file && opt.output_document) + *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document); + if (!opt.output_document) + locf = *hstat.local_file; + else + locf = opt.output_document; + } continue; - break; case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED: - case SSLERRCTXCREATE: case CONTNOTSUPPORTED: + case SSLINITFAILED: case CONTNOTSUPPORTED: /* Fatal errors just return from the function. */ free_hstat (&hstat); xfree_null (dummy); return err; - break; case FWRITEERR: case FOPENERR: /* Another fatal error. */ logputs (LOG_VERBOSE, "\n"); @@ -1989,7 +2227,6 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); free_hstat (&hstat); xfree_null (dummy); return err; - break; case CONSSLERR: /* Another fatal error. */ logputs (LOG_VERBOSE, "\n"); @@ -1997,7 +2234,6 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); free_hstat (&hstat); xfree_null (dummy); return err; - break; case NEWLOCATION: /* Return the new location to the caller. */ if (!hstat.newloc) @@ -2012,13 +2248,11 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); free_hstat (&hstat); xfree_null (dummy); return NEWLOCATION; - break; case RETRUNNEEDED: /* The file was already fully retrieved. */ free_hstat (&hstat); xfree_null (dummy); return RETROK; - break; case RETRFINISHED: /* Deal with you later. */ break; @@ -2036,7 +2270,7 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); xfree (hurl); } logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), - tms, hstat.statcode, hstat.error); + tms, hstat.statcode, escnonprint (hstat.error)); logputs (LOG_VERBOSE, "\n"); free_hstat (&hstat); xfree_null (dummy); @@ -2088,7 +2322,8 @@ Server file no newer than local file `%s' -- not retrieving.\n\n"), } else if (tml >= tmr) logprintf (LOG_VERBOSE, _("\ -The sizes do not match (local %ld) -- retrieving.\n"), local_size); +The sizes do not match (local %s) -- retrieving.\n"), + number_to_static_string (local_size)); else logputs (LOG_VERBOSE, _("Remote file is newer, retrieving.\n")); @@ -2120,7 +2355,8 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); if (opt.spider) { - logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error); + logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, + escnonprint (hstat.error)); xfree_null (dummy); return RETROK; } @@ -2132,11 +2368,16 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); if (*dt & RETROKF) { logprintf (LOG_VERBOSE, - _("%s (%s) - `%s' saved [%ld/%ld]\n\n"), - tms, tmrate, locf, hstat.len, hstat.contlen); + _("%s (%s) - `%s' saved [%s/%s]\n\n"), + tms, tmrate, locf, + number_to_static_string (hstat.len), + number_to_static_string (hstat.contlen)); logprintf (LOG_NONVERBOSE, - "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n", - tms, u->url, hstat.len, hstat.contlen, locf, count); + "%s URL:%s [%s/%s] -> \"%s\" [%d]\n", + tms, u->url, + number_to_static_string (hstat.len), + number_to_static_string (hstat.contlen), + locf, count); } ++opt.numurls; total_downloaded_bytes += hstat.len; @@ -2159,11 +2400,13 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); if (*dt & RETROKF) { logprintf (LOG_VERBOSE, - _("%s (%s) - `%s' saved [%ld]\n\n"), - tms, tmrate, locf, hstat.len); + _("%s (%s) - `%s' saved [%s]\n\n"), + tms, tmrate, locf, + number_to_static_string (hstat.len)); logprintf (LOG_NONVERBOSE, - "%s URL:%s [%ld] -> \"%s\" [%d]\n", - tms, u->url, hstat.len, locf, count); + "%s URL:%s [%s] -> \"%s\" [%d]\n", + tms, u->url, number_to_static_string (hstat.len), + locf, count); } ++opt.numurls; total_downloaded_bytes += hstat.len; @@ -2182,8 +2425,8 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); connection too soon */ { logprintf (LOG_VERBOSE, - _("%s (%s) - Connection closed at byte %ld. "), - tms, tmrate, hstat.len); + _("%s (%s) - Connection closed at byte %s. "), + tms, tmrate, number_to_static_string (hstat.len)); printwhat (count, opt.ntry); free_hstat (&hstat); continue; @@ -2191,11 +2434,16 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); else if (!opt.kill_longer) /* meaning we got more than expected */ { logprintf (LOG_VERBOSE, - _("%s (%s) - `%s' saved [%ld/%ld])\n\n"), - tms, tmrate, locf, hstat.len, hstat.contlen); + _("%s (%s) - `%s' saved [%s/%s])\n\n"), + tms, tmrate, locf, + number_to_static_string (hstat.len), + number_to_static_string (hstat.contlen)); logprintf (LOG_NONVERBOSE, - "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n", - tms, u->url, hstat.len, hstat.contlen, locf, count); + "%s URL:%s [%s/%s] -> \"%s\" [%d]\n", + tms, u->url, + number_to_static_string (hstat.len), + number_to_static_string (hstat.contlen), + locf, count); ++opt.numurls; total_downloaded_bytes += hstat.len; @@ -2212,8 +2460,10 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); else /* the same, but not accepted */ { logprintf (LOG_VERBOSE, - _("%s (%s) - Connection closed at byte %ld/%ld. "), - tms, tmrate, hstat.len, hstat.contlen); + _("%s (%s) - Connection closed at byte %s/%s. "), + tms, tmrate, + number_to_static_string (hstat.len), + number_to_static_string (hstat.contlen)); printwhat (count, opt.ntry); free_hstat (&hstat); continue; @@ -2224,8 +2474,9 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); if (hstat.contlen == -1) { logprintf (LOG_VERBOSE, - _("%s (%s) - Read error at byte %ld (%s)."), - tms, tmrate, hstat.len, strerror (errno)); + _("%s (%s) - Read error at byte %s (%s)."), + tms, tmrate, number_to_static_string (hstat.len), + strerror (errno)); printwhat (count, opt.ntry); free_hstat (&hstat); continue; @@ -2233,8 +2484,10 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); else /* hstat.res == -1 and contlen is given */ { logprintf (LOG_VERBOSE, - _("%s (%s) - Read error at byte %ld/%ld (%s). "), - tms, tmrate, hstat.len, hstat.contlen, + _("%s (%s) - Read error at byte %s/%s (%s). "), + tms, tmrate, + number_to_static_string (hstat.len), + number_to_static_string (hstat.contlen), strerror (errno)); printwhat (count, opt.ntry); free_hstat (&hstat); @@ -2242,7 +2495,6 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size); } } /* not reached */ - break; } while (!opt.ntry || (count < opt.ntry)); return TRYLIMEXC; @@ -2410,75 +2662,37 @@ http_atotm (const char *time_string) return -1; } -/* Authorization support: We support two authorization schemes: +/* Authorization support: We support three authorization schemes: * `Basic' scheme, consisting of base64-ing USER:PASSWORD string; * `Digest' scheme, added by Junio Hamano , consisting of answering to the server's challenge with the proper - MD5 digests. */ - -/* How many bytes it will take to store LEN bytes in base64. */ -#define BASE64_LENGTH(len) (4 * (((len) + 2) / 3)) - -/* Encode the string S of length LENGTH to base64 format and place it - to STORE. STORE will be 0-terminated, and must point to a writable - buffer of at least 1+BASE64_LENGTH(length) bytes. */ -static void -base64_encode (const char *s, char *store, int length) -{ - /* Conversion table. */ - static char tbl[64] = { - 'A','B','C','D','E','F','G','H', - 'I','J','K','L','M','N','O','P', - 'Q','R','S','T','U','V','W','X', - 'Y','Z','a','b','c','d','e','f', - 'g','h','i','j','k','l','m','n', - 'o','p','q','r','s','t','u','v', - 'w','x','y','z','0','1','2','3', - '4','5','6','7','8','9','+','/' - }; - int i; - unsigned char *p = (unsigned char *)store; + MD5 digests. - /* Transform the 3x8 bits to 4x6 bits, as required by base64. */ - for (i = 0; i < length; i += 3) - { - *p++ = tbl[s[0] >> 2]; - *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)]; - *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)]; - *p++ = tbl[s[2] & 0x3f]; - s += 3; - } - /* Pad the result if necessary... */ - if (i == length + 1) - *(p - 1) = '='; - else if (i == length + 2) - *(p - 1) = *(p - 2) = '='; - /* ...and zero-terminate it. */ - *p = '\0'; -} + * `NTLM' ("NT Lan Manager") scheme, based on code written by Daniel + Stenberg for libcurl. Like digest, NTLM is based on a + challenge-response mechanism, but unlike digest, it is non-standard + (authenticates TCP connections rather than requests), undocumented + and Microsoft-specific. */ /* Create the authentication header contents for the `Basic' scheme. This is done by encoding the string `USER:PASS' in base64 and prepending `HEADER: Basic ' to it. */ + static char * basic_authentication_encode (const char *user, const char *passwd) { - char *t1, *t2, *res; + char *t1, *t2; int len1 = strlen (user) + 1 + strlen (passwd); - int len2 = BASE64_LENGTH (len1); t1 = (char *)alloca (len1 + 1); sprintf (t1, "%s:%s", user, passwd); - t2 = (char *)alloca (len2 + 1); - base64_encode (t1, t2, len1); + t2 = (char *)alloca (BASE64_LENGTH (len1) + 1); + base64_encode (t1, len1, t2); - res = (char *)xmalloc (6 + len2 + 1); - sprintf (res, "Basic %s", t2); - - return res; + return concat_strings ("Basic ", t2, (char *) 0); } #define SKIP_WS(x) do { \ @@ -2486,7 +2700,7 @@ basic_authentication_encode (const char *user, const char *passwd) ++(x); \ } while (0) -#ifdef USE_DIGEST +#ifdef ENABLE_DIGEST /* Parse HTTP `WWW-Authenticate:' header. AU points to the beginning of a field in such a header. If the field is the one specified by ATTR_NAME ("realm", "opaque", and "nonce" are used by the current @@ -2496,9 +2710,8 @@ basic_authentication_encode (const char *user, const char *passwd) static int extract_header_attr (const char *au, const char *attr_name, char **ret) { - const char *cp, *ep; - - ep = cp = au; + const char *ep; + const char *cp = au; if (strncmp (cp, attr_name, strlen (attr_name)) == 0) { @@ -2673,23 +2886,35 @@ username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"", } return res; } -#endif /* USE_DIGEST */ +#endif /* ENABLE_DIGEST */ +/* Computing the size of a string literal must take into account that + value returned by sizeof includes the terminating \0. */ +#define STRSIZE(literal) (sizeof (literal) - 1) -#define BEGINS_WITH(line, string_constant) \ - (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \ - && (ISSPACE (line[sizeof (string_constant) - 1]) \ - || !line[sizeof (string_constant) - 1])) +/* Whether chars in [b, e) begin with the literal string provided as + first argument and are followed by whitespace or terminating \0. + The comparison is case-insensitive. */ +#define STARTS(literal, b, e) \ + ((e) - (b) >= STRSIZE (literal) \ + && 0 == strncasecmp (b, literal, STRSIZE (literal)) \ + && ((e) - (b) == STRSIZE (literal) \ + || ISSPACE (b[STRSIZE (literal)]))) static int -known_authentication_scheme_p (const char *au) +known_authentication_scheme_p (const char *hdrbeg, const char *hdrend) { - return BEGINS_WITH (au, "Basic") - || BEGINS_WITH (au, "Digest") - || BEGINS_WITH (au, "NTLM"); + return STARTS ("Basic", hdrbeg, hdrend) +#ifdef ENABLE_DIGEST + || STARTS ("Digest", hdrbeg, hdrend) +#endif +#ifdef ENABLE_NTLM + || STARTS ("NTLM", hdrbeg, hdrend) +#endif + ; } -#undef BEGINS_WITH +#undef STARTS /* Create the HTTP authorization request header. When the `WWW-Authenticate' response header is seen, according to the @@ -2699,18 +2924,47 @@ known_authentication_scheme_p (const char *au) static char * create_authorization_line (const char *au, const char *user, const char *passwd, const char *method, - const char *path) + const char *path, int *finished) { - if (0 == strncasecmp (au, "Basic", 5)) - return basic_authentication_encode (user, passwd); -#ifdef USE_DIGEST - if (0 == strncasecmp (au, "Digest", 6)) - return digest_authentication_encode (au, user, passwd, method, path); -#endif /* USE_DIGEST */ - return NULL; + /* We are called only with known schemes, so we can dispatch on the + first letter. */ + switch (TOUPPER (*au)) + { + case 'B': /* Basic */ + *finished = 1; + return basic_authentication_encode (user, passwd); +#ifdef ENABLE_DIGEST + case 'D': /* Digest */ + *finished = 1; + return digest_authentication_encode (au, user, passwd, method, path); +#endif +#ifdef ENABLE_NTLM + case 'N': /* NTLM */ + if (!ntlm_input (&pconn.ntlm, au)) + { + *finished = 1; + return NULL; + } + return ntlm_output (&pconn.ntlm, user, passwd, finished); +#endif + default: + /* We shouldn't get here -- this function should be only called + with values approved by known_authentication_scheme_p. */ + abort (); + } } +void +save_cookies (void) +{ + if (wget_cookie_jar) + cookie_jar_save (wget_cookie_jar, opt.cookies_output); +} + void http_cleanup (void) { + xfree_null (pconn.host); + if (wget_cookie_jar) + cookie_jar_delete (wget_cookie_jar); }