#include "netrc.h"
#ifdef HAVE_SSL
# include "gen_sslfunc.h"
-#endif /* HAVE_SSL */
+#endif
+#ifdef ENABLE_NTLM
+# include "http-ntlm.h"
+#endif
#include "cookies.h"
-#ifdef USE_DIGEST
+#ifdef ENABLE_DIGEST
# include "gen-md5.h"
#endif
#include "convert.h"
\f
static int cookies_loaded_p;
-struct cookie_jar *wget_cookie_jar;
+static struct cookie_jar *wget_cookie_jar;
#define TEXTHTML_S "text/html"
#define TEXTXHTML_S "application/xhtml+xml"
struct request_header *hdr;
int i;
if (!value)
- return;
+ {
+ /* A NULL value is a no-op; if freeing the name is requested,
+ free it now to avoid leaks. */
+ if (release_policy == rel_name || release_policy == rel_both)
+ xfree (name);
+ return;
+ }
for (i = 0; i < req->hcount; i++)
{
hdr = &req->headers[i];
xfree (req);
}
-/* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly
+/* Send the contents of FILE_NAME to SOCK. Make sure that exactly
PROMISED_SIZE bytes are sent over the wire -- if the file is
longer, read only that much; if the file is shorter, report an error. */
return NULL;
}
+/* The maximum size of a single HTTP response we care to read. This
+ is not meant to impose an arbitrary limit, but to protect the user
+ from Wget slurping up available memory upon encountering malicious
+ or buggy server output. Define it to 0 to remove the limit. */
+
+#define HTTP_RESPONSE_MAX_SIZE 65536
+
/* Read the HTTP request head from FD and return it. The error
conditions are the same as with fd_read_hunk.
static char *
read_http_response_head (int fd)
{
- return fd_read_hunk (fd, response_head_terminator, 512);
+ return fd_read_hunk (fd, response_head_terminator, 512,
+ HTTP_RESPONSE_MAX_SIZE);
}
struct response {
xfree (resp);
}
-/* Print [b, e) to the log, omitting the trailing CRLF. */
-
-static void
-print_server_response_1 (const char *prefix, const char *b, const char *e)
-{
- char *ln;
- if (b < e && e[-1] == '\n')
- --e;
- if (b < e && e[-1] == '\r')
- --e;
- BOUNDED_TO_ALLOCA (b, e, ln);
- logprintf (LOG_VERBOSE, "%s%s\n", prefix, escnonprint (ln));
-}
-
-/* Print the server response, line by line, omitting the trailing CR
- characters, prefixed with PREFIX. */
+/* Print the server response, line by line, omitting the trailing CRLF
+ from individual header lines, and prefixed with PREFIX. */
static void
print_server_response (const struct response *resp, const char *prefix)
if (!resp->headers)
return;
for (i = 0; resp->headers[i + 1]; i++)
- print_server_response_1 (prefix, resp->headers[i], resp->headers[i + 1]);
+ {
+ const char *b = resp->headers[i];
+ const char *e = resp->headers[i + 1];
+ /* Skip CRLF */
+ if (b < e && e[-1] == '\n')
+ --e;
+ if (b < e && e[-1] == '\r')
+ --e;
+ /* This is safe even on printfs with broken handling of "%.<n>s"
+ because resp->headers ends with \0. */
+ logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, e - b, b);
+ }
}
/* Parse the `Content-Range' header and extract the information it
}
/* Read the body of the request, but don't store it anywhere and don't
- display a progress gauge. This is useful for reading the error
- responses whose bodies don't need to be displayed or logged, but
- which need to be read anyway. */
+ display a progress gauge. This is useful for reading the bodies of
+ administrative responses to which we will soon issue another
+ request. The response is not useful to the user, but reading it
+ allows us to continue using the same connection to the server.
-static void
+ If reading fails, 0 is returned, non-zero otherwise. In debug
+ mode, the body is displayed for debugging purposes. */
+
+static int
skip_short_body (int fd, wgint contlen)
{
- /* Skipping the body doesn't make sense if the content length is
- unknown because, in that case, persistent connections cannot be
- used. (#### This is not the case with HTTP/1.1 where they can
- still be used with the magic of the "chunked" transfer!) */
- if (contlen == -1)
- return;
- DEBUGP (("Skipping %s bytes of body data... ", number_to_static_string (contlen)));
+ enum {
+ SKIP_SIZE = 512, /* size of the download buffer */
+ SKIP_THRESHOLD = 4096 /* the largest size we read */
+ };
+ char dlbuf[SKIP_SIZE + 1];
+ dlbuf[SKIP_SIZE] = '\0'; /* so DEBUGP can safely print it */
+
+ /* We shouldn't get here with unknown contlen. (This will change
+ with HTTP/1.1, which supports "chunked" transfer.) */
+ assert (contlen != -1);
+
+ /* If the body is too large, it makes more sense to simply close the
+ connection than to try to read the body. */
+ if (contlen > SKIP_THRESHOLD)
+ return 0;
+
+ DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));
while (contlen > 0)
{
- char dlbuf[512];
- int ret = fd_read (fd, dlbuf, MIN (contlen, sizeof (dlbuf)), -1);
+ int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1);
if (ret <= 0)
- return;
+ {
+ /* Don't normally report the error since this is an
+ optimization that should be invisible to the user. */
+ DEBUGP (("] aborting (%s).\n",
+ ret < 0 ? strerror (errno) : "EOF received"));
+ return 0;
+ }
contlen -= ret;
+ /* Safe even if %.*s bogusly expects terminating \0 because
+ we've zero-terminated dlbuf above. */
+ DEBUGP (("%.*s", ret, dlbuf));
}
- DEBUGP (("done.\n"));
+
+ DEBUGP (("] done.\n"));
+ return 1;
}
\f
/* Persistent connections. Currently, we cache the most recently used
/* Whether a ssl handshake has occoured on this connection. */
int ssl;
+
+#ifdef ENABLE_NTLM
+ /* NTLM data of the current connection. */
+ struct ntlmdata ntlm;
+#endif
} pconn;
/* Mark the persistent connection as invalid and free the resources it
is done. */
int keep_alive;
- /* Whether keep-alive should be inhibited. */
- int inhibit_keep_alive = !opt.http_keep_alive || opt.ignore_length;
+ /* Whether keep-alive should be inhibited.
+
+ RFC 2068 requests that 1.0 clients not send keep-alive requests
+ to proxies. This is because many 1.0 proxies do not interpret
+ the Connection header and transfer it to the remote server,
+ causing it to not close the connection and leave both the proxy
+ and the client hanging. */
+ int inhibit_keep_alive =
+ !opt.http_keep_alive || opt.ignore_length || proxy != NULL;
/* Headers sent when using POST. */
wgint post_data_size = 0;
auth_tried_already = 0;
/* Initialize certain elements of struct http_stat. */
- hs->len = 0L;
+ hs->len = 0;
hs->contlen = -1;
hs->res = -1;
hs->newloc = NULL;
look up conn->host in some cases. If that lookup failed, we
don't need to bother with connect_to_host. */
if (host_lookup_failed)
- return HOSTERR;
+ {
+ request_free (req);
+ return HOSTERR;
+ }
sock = connect_to_host (conn->host, conn->port);
if (sock == E_HOST)
- return HOSTERR;
+ {
+ request_free (req);
+ return HOSTERR;
+ }
else if (sock < 0)
- return (retryable_socket_connect_error (errno)
- ? CONERROR : CONIMPOSSIBLE);
+ {
+ request_free (req);
+ return (retryable_socket_connect_error (errno)
+ ? CONERROR : CONIMPOSSIBLE);
+ }
#ifdef HAVE_SSL
if (proxy && u->scheme == SCHEME_HTTPS)
resp = resp_new (head);
statcode = resp_status (resp, &message);
resp_free (resp);
+ xfree (head);
if (statcode != 200)
{
failed_tunnel:
if (statcode == HTTP_STATUS_UNAUTHORIZED)
{
/* Authorization is required. */
- skip_short_body (sock, contlen);
- CLOSE_FINISH (sock);
+ if (skip_short_body (sock, contlen))
+ CLOSE_FINISH (sock);
+ else
+ CLOSE_INVALIDATE (sock);
if (auth_tried_already || !(user && passwd))
{
/* If we have tried it already, then there is not point
hs->error = xstrdup (_("(no description)"));
else
hs->error = xstrdup (message);
+ xfree (message);
type = resp_header_strdup (resp, "Content-Type");
if (type)
contrange = first_byte_pos;
}
resp_free (resp);
+ xfree (head);
/* 20x responses are counted among successful by default. */
if (H_20X (statcode))
hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"),
hs->newloc ? _(" [following]") : "");
if (keep_alive)
- skip_short_body (sock, contlen);
- CLOSE_FINISH (sock);
+ {
+ if (skip_short_body (sock, contlen))
+ CLOSE_FINISH (sock);
+ else
+ CLOSE_INVALIDATE (sock);
+ }
xfree_null (type);
return NEWLOCATION;
}
logputs (LOG_VERBOSE, _("Length: "));
if (contlen != -1)
{
- logputs (LOG_VERBOSE, legible (contlen + contrange));
+ logputs (LOG_VERBOSE, with_thousand_seps (contlen + contrange));
+ if (contlen + contrange >= 1024)
+ logprintf (LOG_VERBOSE, " (%s)",
+ human_readable (contlen + contrange));
if (contrange)
- logprintf (LOG_VERBOSE, _(" (%s to go)"), legible (contlen));
+ {
+ if (contlen >= 1024)
+ logprintf (LOG_VERBOSE, _(", %s (%s) remaining"),
+ with_thousand_seps (contlen),
+ human_readable (contlen));
+ else
+ logprintf (LOG_VERBOSE, _(", %s remaining"),
+ with_thousand_seps (contlen));
+ }
}
else
logputs (LOG_VERBOSE,
if (!(*dt & RETROKF) || (*dt & HEAD_ONLY))
{
/* In case the caller cares to look... */
- hs->len = 0L;
+ hs->len = 0;
hs->res = 0;
xfree_null (type);
/* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the
fp = fopen (*hs->local_file, "wb");
else
{
- fp = fopen_excl (*hs->local_file, 0);
+ fp = fopen_excl (*hs->local_file, 1);
if (!fp && errno == EEXIST)
{
/* We cannot just invent a new name and use it (which is
return -1;
}
\f
-/* Authorization support: We support two authorization schemes:
+/* Authorization support: We support three authorization schemes:
* `Basic' scheme, consisting of base64-ing USER:PASSWORD string;
* `Digest' scheme, added by Junio Hamano <junio@twinsun.com>,
consisting of answering to the server's challenge with the proper
- MD5 digests. */
-
-/* How many bytes it will take to store LEN bytes in base64. */
-#define BASE64_LENGTH(len) (4 * (((len) + 2) / 3))
-
-/* Encode the string S of length LENGTH to base64 format and place it
- to STORE. STORE will be 0-terminated, and must point to a writable
- buffer of at least 1+BASE64_LENGTH(length) bytes. */
-static void
-base64_encode (const char *s, char *store, int length)
-{
- /* Conversion table. */
- static char tbl[64] = {
- 'A','B','C','D','E','F','G','H',
- 'I','J','K','L','M','N','O','P',
- 'Q','R','S','T','U','V','W','X',
- 'Y','Z','a','b','c','d','e','f',
- 'g','h','i','j','k','l','m','n',
- 'o','p','q','r','s','t','u','v',
- 'w','x','y','z','0','1','2','3',
- '4','5','6','7','8','9','+','/'
- };
- int i;
- unsigned char *p = (unsigned char *)store;
+ MD5 digests.
- /* Transform the 3x8 bits to 4x6 bits, as required by base64. */
- for (i = 0; i < length; i += 3)
- {
- *p++ = tbl[s[0] >> 2];
- *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
- *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
- *p++ = tbl[s[2] & 0x3f];
- s += 3;
- }
- /* Pad the result if necessary... */
- if (i == length + 1)
- *(p - 1) = '=';
- else if (i == length + 2)
- *(p - 1) = *(p - 2) = '=';
- /* ...and zero-terminate it. */
- *p = '\0';
-}
+ * `NTLM' ("NT Lan Manager") scheme, based on code written by Daniel
+ Stenberg for libcurl. Like digest, NTLM is based on a
+ challenge-response mechanism, but unlike digest, it is non-standard
+ (authenticates TCP connections rather than requests), undocumented
+ and Microsoft-specific. */
/* Create the authentication header contents for the `Basic' scheme.
This is done by encoding the string `USER:PASS' in base64 and
prepending `HEADER: Basic ' to it. */
+
static char *
basic_authentication_encode (const char *user, const char *passwd)
{
char *t1, *t2;
int len1 = strlen (user) + 1 + strlen (passwd);
- int len2 = BASE64_LENGTH (len1);
t1 = (char *)alloca (len1 + 1);
sprintf (t1, "%s:%s", user, passwd);
- t2 = (char *)alloca (len2 + 1);
- base64_encode (t1, t2, len1);
+ t2 = (char *)alloca (BASE64_LENGTH (len1) + 1);
+ base64_encode (t1, len1, t2);
return concat_strings ("Basic ", t2, (char *) 0);
}
++(x); \
} while (0)
-#ifdef USE_DIGEST
+#ifdef ENABLE_DIGEST
/* Parse HTTP `WWW-Authenticate:' header. AU points to the beginning
of a field in such a header. If the field is the one specified by
ATTR_NAME ("realm", "opaque", and "nonce" are used by the current
}
return res;
}
-#endif /* USE_DIGEST */
+#endif /* ENABLE_DIGEST */
#define BEGINS_WITH(line, string_constant) \
known_authentication_scheme_p (const char *au)
{
return BEGINS_WITH (au, "Basic")
+#ifdef ENABLE_DIGEST
|| BEGINS_WITH (au, "Digest")
- || BEGINS_WITH (au, "NTLM");
+#endif
+#ifdef ENABLE_NTLM
+ || BEGINS_WITH (au, "NTLM")
+#endif
+ ;
}
#undef BEGINS_WITH
{
if (0 == strncasecmp (au, "Basic", 5))
return basic_authentication_encode (user, passwd);
-#ifdef USE_DIGEST
+#ifdef ENABLE_DIGEST
if (0 == strncasecmp (au, "Digest", 6))
return digest_authentication_encode (au, user, passwd, method, path);
-#endif /* USE_DIGEST */
+#endif
+#ifdef ENABLE_NTLM
+ if (0 == strncasecmp (au, "NTLM", 4))
+ {
+ int ok = ntlm_input (&pconn.ntlm, au);
+ if (!ok)
+ return NULL;
+ /* #### we shouldn't ignore the OK that ntlm_output returns. */
+ return ntlm_output (&pconn.ntlm, user, passwd, &ok);
+ }
+#endif
return NULL;
}
\f
+void
+save_cookies (void)
+{
+ if (wget_cookie_jar)
+ cookie_jar_save (wget_cookie_jar, opt.cookies_output);
+}
+
void
http_cleanup (void)
{
+ xfree_null (pconn.host);
+ if (wget_cookie_jar)
+ cookie_jar_delete (wget_cookie_jar);
}