/* HTTP support.
- Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+ 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
This file is part of GNU Wget.
# include "http-ntlm.h"
#endif
#include "cookies.h"
-#ifdef ENABLE_DIGEST
-# include "gen-md5.h"
-#endif
+#include "md5.h"
#include "convert.h"
#include "spider.h"
#define TEXTCSS_S "text/css"
/* Some status code validation macros: */
+#define H_10X(x) (((x) >= 100) && ((x) < 200))
#define H_20X(x) (((x) >= 200) && ((x) < 300))
#define H_PARTIAL(x) ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
#define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY \
APPEND (p, req->method); *p++ = ' ';
APPEND (p, req->arg); *p++ = ' ';
- memcpy (p, "HTTP/1.0\r\n", 10); p += 10;
+ memcpy (p, "HTTP/1.1\r\n", 10); p += 10;
for (i = 0; i < req->hcount; i++)
{
if (opt.auth_without_challenge)
{
- DEBUGP(("Auth-without-challenge set, sending Basic credentials.\n"));
+ DEBUGP (("Auth-without-challenge set, sending Basic credentials.\n"));
do_challenge = true;
}
else if (basic_authed_hosts
&& hash_table_contains(basic_authed_hosts, hostname))
{
- DEBUGP(("Found %s in basic_authed_hosts.\n", quote (hostname)));
+ DEBUGP (("Found %s in basic_authed_hosts.\n", quote (hostname)));
do_challenge = true;
}
else
{
- DEBUGP(("Host %s has not issued a general basic challenge.\n",
+ DEBUGP (("Host %s has not issued a general basic challenge.\n",
quote (hostname)));
}
if (do_challenge)
if (!hash_table_contains(basic_authed_hosts, hostname))
{
hash_table_put (basic_authed_hosts, xstrdup(hostname), NULL);
- DEBUGP(("Inserted %s into basic_authed_hosts\n", quote (hostname)));
+ DEBUGP (("Inserted %s into basic_authed_hosts\n", quote (hostname)));
}
}
mode, the body is displayed for debugging purposes. */
static bool
-skip_short_body (int fd, wgint contlen)
+skip_short_body (int fd, wgint contlen, bool chunked)
{
enum {
SKIP_SIZE = 512, /* size of the download buffer */
SKIP_THRESHOLD = 4096 /* the largest size we read */
};
+ wgint remaining_chunk_size = 0;
char dlbuf[SKIP_SIZE + 1];
dlbuf[SKIP_SIZE] = '\0'; /* so DEBUGP can safely print it */
- /* We shouldn't get here with unknown contlen. (This will change
- with HTTP/1.1, which supports "chunked" transfer.) */
- assert (contlen != -1);
+ assert (contlen != -1 || contlen);
/* If the body is too large, it makes more sense to simply close the
connection than to try to read the body. */
if (contlen > SKIP_THRESHOLD)
return false;
- DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));
-
- while (contlen > 0)
+ while (contlen > 0 || chunked)
{
- int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1);
+ int ret;
+ if (chunked)
+ {
+ if (remaining_chunk_size == 0)
+ {
+ char *line = fd_read_line (fd);
+ char *endl;
+ if (line == NULL)
+ {
+ ret = -1;
+ break;
+ }
+
+ remaining_chunk_size = strtol (line, &endl, 16);
+ if (remaining_chunk_size == 0)
+ {
+ ret = 0;
+ if (fd_read_line (fd) == NULL)
+ ret = -1;
+ break;
+ }
+ }
+
+ contlen = MIN (remaining_chunk_size, SKIP_SIZE);
+ }
+
+ DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));
+
+ ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1);
if (ret <= 0)
{
/* Don't normally report the error since this is an
return false;
}
contlen -= ret;
+
+ if (chunked)
+ {
+ remaining_chunk_size -= ret;
+ if (remaining_chunk_size == 0)
+ if (fd_read_line (fd) == NULL)
+ return false;
+ }
+
/* Safe even if %.*s bogusly expects terminating \0 because
we've zero-terminated dlbuf above. */
DEBUGP (("%.*s", ret, dlbuf));
return true;
}
+#define NOT_RFC2231 0
+#define RFC2231_NOENCODING 1
+#define RFC2231_ENCODING 2
+
+/* extract_param extracts the parameter name into NAME.
+ However, if the parameter name is in RFC2231 format then
+ this function adjusts NAME by stripping of the trailing
+ characters that are not part of the name but are present to
+ indicate the presence of encoding information in the value
+ or a fragment of a long parameter value
+*/
+static int
+modify_param_name(param_token *name)
+{
+ const char *delim1 = memchr (name->b, '*', name->e - name->b);
+ const char *delim2 = memrchr (name->b, '*', name->e - name->b);
+
+ int result;
+
+ if(delim1 == NULL)
+ {
+ result = NOT_RFC2231;
+ }
+ else if(delim1 == delim2)
+ {
+ if ((name->e - 1) == delim1)
+ {
+ result = RFC2231_ENCODING;
+ }
+ else
+ {
+ result = RFC2231_NOENCODING;
+ }
+ name->e = delim1;
+ }
+ else
+ {
+ name->e = delim1;
+ result = RFC2231_ENCODING;
+ }
+ return result;
+}
+
+/* extract_param extract the paramater value into VALUE.
+ Like modify_param_name this function modifies VALUE by
+ stripping off the encoding information from the actual value
+*/
+static void
+modify_param_value (param_token *value, int encoding_type )
+{
+ if (RFC2231_ENCODING == encoding_type)
+ {
+ const char *delim = memrchr (value->b, '\'', value->e - value->b);
+ if ( delim != NULL )
+ {
+ value->b = (delim+1);
+ }
+ }
+}
+
/* Extract a parameter from the string (typically an HTTP header) at
**SOURCE and advance SOURCE to the next parameter. Return false
when there are no more parameters to extract. The name of the
if (*p == separator) ++p;
}
*source = p;
+
+ int param_type = modify_param_name(name);
+ if (NOT_RFC2231 != param_type)
+ {
+ modify_param_value(value, param_type);
+ }
return true;
}
+#undef NOT_RFC2231
+#undef RFC2231_NOENCODING
+#undef RFC2231_ENCODING
+
+/* Appends the string represented by VALUE to FILENAME */
+
+static void
+append_value_to_filename (char **filename, param_token const * const value)
+{
+ int original_length = strlen(*filename);
+ int new_length = strlen(*filename) + (value->e - value->b);
+ *filename = xrealloc (*filename, new_length+1);
+ memcpy (*filename + original_length, value->b, (value->e - value->b));
+ (*filename)[new_length] = '\0';
+}
+
#undef MAX
#define MAX(p, q) ((p) > (q) ? (p) : (q))
The file name is stripped of directory components and must not be
empty. */
-
static bool
parse_content_disposition (const char *hdr, char **filename)
{
+ *filename = NULL;
param_token name, value;
while (extract_param (&hdr, &name, &value, ';'))
- if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename") && value.b != NULL)
- {
- /* Make the file name begin at the last slash or backslash. */
- const char *last_slash = memrchr (value.b, '/', value.e - value.b);
- const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
- if (last_slash && last_bs)
- value.b = 1 + MAX (last_slash, last_bs);
- else if (last_slash || last_bs)
- value.b = 1 + (last_slash ? last_slash : last_bs);
- if (value.b == value.e)
- continue;
- /* Start with the directory prefix, if specified. */
- if (opt.dir_prefix)
- {
- int prefix_length = strlen (opt.dir_prefix);
- bool add_slash = (opt.dir_prefix[prefix_length - 1] != '/');
- int total_length;
-
- if (add_slash)
- ++prefix_length;
- total_length = prefix_length + (value.e - value.b);
- *filename = xmalloc (total_length + 1);
- strcpy (*filename, opt.dir_prefix);
- if (add_slash)
- (*filename)[prefix_length - 1] = '/';
- memcpy (*filename + prefix_length, value.b, (value.e - value.b));
- (*filename)[total_length] = '\0';
- }
- else
- *filename = strdupdelim (value.b, value.e);
- return true;
- }
- return false;
+ {
+ int isFilename = BOUNDED_EQUAL_NO_CASE ( name.b, name.e, "filename" );
+ if ( isFilename && value.b != NULL)
+ {
+ /* Make the file name begin at the last slash or backslash. */
+ const char *last_slash = memrchr (value.b, '/', value.e - value.b);
+ const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
+ if (last_slash && last_bs)
+ value.b = 1 + MAX (last_slash, last_bs);
+ else if (last_slash || last_bs)
+ value.b = 1 + (last_slash ? last_slash : last_bs);
+ if (value.b == value.e)
+ continue;
+ /* Start with the directory prefix, if specified. */
+ if (opt.dir_prefix)
+ {
+ if (!(*filename))
+ {
+ int prefix_length = strlen (opt.dir_prefix);
+ bool add_slash = (opt.dir_prefix[prefix_length - 1] != '/');
+ int total_length;
+
+ if (add_slash)
+ ++prefix_length;
+ total_length = prefix_length + (value.e - value.b);
+ *filename = xmalloc (total_length + 1);
+ strcpy (*filename, opt.dir_prefix);
+ if (add_slash)
+ (*filename)[prefix_length - 1] = '/';
+ memcpy (*filename + prefix_length, value.b, (value.e - value.b));
+ (*filename)[total_length] = '\0';
+ }
+ else
+ {
+ append_value_to_filename (filename, &value);
+ }
+ }
+ else
+ {
+ if (*filename)
+ {
+ append_value_to_filename (filename, &value);
+ }
+ else
+ {
+ *filename = strdupdelim (value.b, value.e);
+ }
+ }
+ }
+ }
+ if (*filename)
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
}
+
\f
/* Persistent connections. Currently, we cache the most recently used
connection as persistent, provided that the HTTP server agrees to
is done. */
bool keep_alive;
+ /* Is the server using the chunked transfer encoding? */
+ bool chunked_transfer_encoding = false;
+
/* Whether keep-alive should be inhibited.
RFC 2068 requests that 1.0 clients not send keep-alive requests
if (!inhibit_keep_alive)
request_set_header (req, "Connection", "Keep-Alive", rel_none);
- if (opt.cookies)
- request_set_header (req, "Cookie",
- cookie_header (wget_cookie_jar,
- u->host, u->port, u->path,
-#ifdef HAVE_SSL
- u->scheme == SCHEME_HTTPS
-#else
- 0
-#endif
- ),
- rel_value);
-
if (opt.post_data || opt.post_file_name)
{
request_set_header (req, "Content-Type",
rel_value);
}
+ retry_with_auth:
+ /* We need to come back here when the initial attempt to retrieve
+ without authorization header fails. (Expected to happen at least
+ for the Digest authorization scheme.) */
+
+ if (opt.cookies)
+ request_set_header (req, "Cookie",
+ cookie_header (wget_cookie_jar,
+ u->host, u->port, u->path,
+#ifdef HAVE_SSL
+ u->scheme == SCHEME_HTTPS
+#else
+ 0
+#endif
+ ),
+ rel_value);
+
/* Add the user headers. */
if (opt.user_headers)
{
request_set_user_header (req, opt.user_headers[i]);
}
- retry_with_auth:
- /* We need to come back here when the initial attempt to retrieve
- without authorization header fails. (Expected to happen at least
- for the Digest authorization scheme.) */
-
proxyauth = NULL;
if (proxy)
{
request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
}
- keep_alive = false;
+ keep_alive = true;
/* Establish the connection. */
- if (!inhibit_keep_alive)
+ if (inhibit_keep_alive)
+ keep_alive = false;
+ else
{
/* Look for a persistent connection to target host, unless a
proxy is used. The exception is when SSL is in use, in which
resp = resp_new (head);
statcode = resp_status (resp, &message);
+ if (statcode < 0)
+ {
+ char *tms = datetime_str (time (NULL));
+ logprintf (LOG_VERBOSE, "%d\n", statcode);
+ logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), tms, statcode,
+ quotearg_style (escape_quoting_style,
+ _("Malformed status line")));
+ xfree (head);
+ return HERR;
+ }
hs->message = xstrdup (message);
resp_free (resp);
xfree (head);
contrange = 0;
*dt &= ~RETROKF;
+read_header:
head = read_http_response_head (sock);
if (!head)
{
/* Check for status line. */
message = NULL;
statcode = resp_status (resp, &message);
+ if (statcode < 0)
+ {
+ char *tms = datetime_str (time (NULL));
+ logprintf (LOG_VERBOSE, "%d\n", statcode);
+ logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), tms, statcode,
+ quotearg_style (escape_quoting_style,
+ _("Malformed status line")));
+ CLOSE_INVALIDATE (sock);
+ request_free (req);
+ return HERR;
+ }
+
+ if (H_10X (statcode))
+ {
+ DEBUGP (("Ignoring response\n"));
+ goto read_header;
+ }
+
hs->message = xstrdup (message);
if (!opt.server_response)
logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
/* Check for keep-alive related responses. */
if (!inhibit_keep_alive && contlen != -1)
{
- if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
- keep_alive = true;
- else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
+ if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
{
- if (0 == strcasecmp (hdrval, "Keep-Alive"))
- keep_alive = true;
+ if (0 == strcasecmp (hdrval, "Close"))
+ keep_alive = false;
}
}
+ resp_header_copy (resp, "Transfer-Encoding", hdrval, sizeof (hdrval));
+ if (0 == strcasecmp (hdrval, "chunked"))
+ chunked_transfer_encoding = true;
+
/* Handle (possibly multiple instances of) the Set-Cookie header. */
if (opt.cookies)
{
if (statcode == HTTP_STATUS_UNAUTHORIZED)
{
/* Authorization is required. */
- if (keep_alive && !head_only && skip_short_body (sock, contlen))
+ if (keep_alive && !head_only
+ && skip_short_body (sock, contlen, chunked_transfer_encoding))
CLOSE_FINISH (sock);
else
CLOSE_INVALIDATE (sock);
_("Location: %s%s\n"),
hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"),
hs->newloc ? _(" [following]") : "");
- if (keep_alive && !head_only && skip_short_body (sock, contlen))
+ if (keep_alive && !head_only
+ && skip_short_body (sock, contlen, chunked_transfer_encoding))
CLOSE_FINISH (sock);
else
CLOSE_INVALIDATE (sock);
If not, they can be worked around using
`--no-http-keep-alive'. */
CLOSE_FINISH (sock);
- else if (keep_alive && skip_short_body (sock, contlen))
+ else if (keep_alive
+ && skip_short_body (sock, contlen, chunked_transfer_encoding))
/* Successfully skipped the body; also keep using the socket. */
CLOSE_FINISH (sock);
else
/* If the server ignored our range request, instruct fd_read_body
to skip the first RESTVAL bytes of body. */
flags |= rb_skip_startpos;
+
+ if (chunked_transfer_encoding)
+ flags |= rb_chunked_transfer_encoding;
+
hs->len = hs->restval;
hs->rd_size = 0;
hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
here so that we don't go through the hoops if we're just using
FTP or whatever. */
if (opt.cookies)
- load_cookies();
+ load_cookies ();
/* Warn on (likely bogus) wildcard usage in HTTP. */
if (opt.ftp_glob && has_wildcards_p (u->path))
} /* send_head_first */
} /* !got_head */
- if ((tmr != (time_t) (-1))
+ if (opt.useservertimestamps
+ && (tmr != (time_t) (-1))
&& ((hstat.len == hstat.contlen) ||
((hstat.res == 0) && (hstat.contlen == -1))))
{
oldlocale = setlocale (LC_TIME, NULL);
if (oldlocale)
{
- size_t l = strlen (oldlocale);
+ size_t l = strlen (oldlocale) + 1;
if (l >= sizeof savedlocale)
savedlocale[0] = '\0';
else
{
int i;
- for (i = 0; i < MD5_HASHLEN; i++, hash++)
+ for (i = 0; i < MD5_DIGEST_SIZE; i++, hash++)
{
*buf++ = XNUM_TO_digit (*hash >> 4);
*buf++ = XNUM_TO_digit (*hash & 0xf);
/* Calculate the digest value. */
{
- ALLOCA_MD5_CONTEXT (ctx);
- unsigned char hash[MD5_HASHLEN];
- char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
- char response_digest[MD5_HASHLEN * 2 + 1];
+ struct md5_ctx ctx;
+ unsigned char hash[MD5_DIGEST_SIZE];
+ char a1buf[MD5_DIGEST_SIZE * 2 + 1], a2buf[MD5_DIGEST_SIZE * 2 + 1];
+ char response_digest[MD5_DIGEST_SIZE * 2 + 1];
/* A1BUF = H(user ":" realm ":" password) */
- gen_md5_init (ctx);
- gen_md5_update ((unsigned char *)user, strlen (user), ctx);
- gen_md5_update ((unsigned char *)":", 1, ctx);
- gen_md5_update ((unsigned char *)realm, strlen (realm), ctx);
- gen_md5_update ((unsigned char *)":", 1, ctx);
- gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx);
- gen_md5_finish (ctx, hash);
+ md5_init_ctx (&ctx);
+ md5_process_bytes ((unsigned char *)user, strlen (user), &ctx);
+ md5_process_bytes ((unsigned char *)":", 1, &ctx);
+ md5_process_bytes ((unsigned char *)realm, strlen (realm), &ctx);
+ md5_process_bytes ((unsigned char *)":", 1, &ctx);
+ md5_process_bytes ((unsigned char *)passwd, strlen (passwd), &ctx);
+ md5_finish_ctx (&ctx, hash);
dump_hash (a1buf, hash);
/* A2BUF = H(method ":" path) */
- gen_md5_init (ctx);
- gen_md5_update ((unsigned char *)method, strlen (method), ctx);
- gen_md5_update ((unsigned char *)":", 1, ctx);
- gen_md5_update ((unsigned char *)path, strlen (path), ctx);
- gen_md5_finish (ctx, hash);
+ md5_init_ctx (&ctx);
+ md5_process_bytes ((unsigned char *)method, strlen (method), &ctx);
+ md5_process_bytes ((unsigned char *)":", 1, &ctx);
+ md5_process_bytes ((unsigned char *)path, strlen (path), &ctx);
+ md5_finish_ctx (&ctx, hash);
dump_hash (a2buf, hash);
/* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
- gen_md5_init (ctx);
- gen_md5_update ((unsigned char *)a1buf, MD5_HASHLEN * 2, ctx);
- gen_md5_update ((unsigned char *)":", 1, ctx);
- gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx);
- gen_md5_update ((unsigned char *)":", 1, ctx);
- gen_md5_update ((unsigned char *)a2buf, MD5_HASHLEN * 2, ctx);
- gen_md5_finish (ctx, hash);
+ md5_init_ctx (&ctx);
+ md5_process_bytes ((unsigned char *)a1buf, MD5_DIGEST_SIZE * 2, &ctx);
+ md5_process_bytes ((unsigned char *)":", 1, &ctx);
+ md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx);
+ md5_process_bytes ((unsigned char *)":", 1, &ctx);
+ md5_process_bytes ((unsigned char *)a2buf, MD5_DIGEST_SIZE * 2, &ctx);
+ md5_finish_ctx (&ctx, hash);
dump_hash (response_digest, hash);
res = xmalloc (strlen (user)
+ strlen (realm)
+ strlen (nonce)
+ strlen (path)
- + 2 * MD5_HASHLEN /*strlen (response_digest)*/
+ + 2 * MD5_DIGEST_SIZE /*strlen (response_digest)*/
+ (opaque ? strlen (opaque) : 0)
+ 128);
sprintf (res, "Digest \
{ "attachment; filename=\"file.ext\"; dummy", "somedir", "somedir/file.ext", true },
{ "attachment", NULL, NULL, false },
{ "attachment", "somedir", NULL, false },
+ { "attachement; filename*=UTF-8'en-US'hello.txt", NULL, "hello.txt", true },
+ { "attachement; filename*0=\"hello\"; filename*1=\"world.txt\"", NULL, "helloworld.txt", true },
};
for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)