/* HTTP support.
- Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+ 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
This file is part of GNU Wget.
# include "http-ntlm.h"
#endif
#include "cookies.h"
-#ifdef ENABLE_DIGEST
-# include "gen-md5.h"
-#endif
+#include "md5.h"
#include "convert.h"
#include "spider.h"
#include "test.h"
#endif
+#ifdef __VMS
+# include "vms.h"
+#endif /* def __VMS */
+
extern char *version_string;
/* Forward decls. */
+struct http_stat;
static char *create_authorization_line (const char *, const char *,
const char *, const char *,
const char *, bool *);
static char *basic_authentication_encode (const char *, const char *);
static bool known_authentication_scheme_p (const char *, const char *);
+static void ensure_extension (struct http_stat *, const char *, int *);
static void load_cookies (void);
#ifndef MIN
#define TEXTHTML_S "text/html"
#define TEXTXHTML_S "application/xhtml+xml"
+#define TEXTCSS_S "text/css"
/* Some status code validation macros: */
+#define H_10X(x) (((x) >= 100) && ((x) < 200))
#define H_20X(x) (((x) >= 200) && ((x) < 300))
#define H_PARTIAL(x) ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
#define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY \
int hcount, hcapacity;
};
+extern int numurls;
+
/* Create a new, empty request. At least request_set_method must be
called before the request can be used. */
APPEND (p, req->method); *p++ = ' ';
APPEND (p, req->arg); *p++ = ' ';
- memcpy (p, "HTTP/1.0\r\n", 10); p += 10;
+ memcpy (p, "HTTP/1.1\r\n", 10); p += 10;
for (i = 0; i < req->hcount; i++)
{
if (opt.auth_without_challenge)
{
- DEBUGP(("Auth-without-challenge set, sending Basic credentials.\n"));
+ DEBUGP (("Auth-without-challenge set, sending Basic credentials.\n"));
do_challenge = true;
}
else if (basic_authed_hosts
&& hash_table_contains(basic_authed_hosts, hostname))
{
- DEBUGP(("Found %s in basic_authed_hosts.\n", quote (hostname)));
+ DEBUGP (("Found %s in basic_authed_hosts.\n", quote (hostname)));
do_challenge = true;
}
else
{
- DEBUGP(("Host %s has not issued a general basic challenge.\n",
+ DEBUGP (("Host %s has not issued a general basic challenge.\n",
quote (hostname)));
}
if (do_challenge)
if (!hash_table_contains(basic_authed_hosts, hostname))
{
hash_table_put (basic_authed_hosts, xstrdup(hostname), NULL);
- DEBUGP(("Inserted %s into basic_authed_hosts\n", quote (hostname)));
+ DEBUGP (("Inserted %s into basic_authed_hosts\n", quote (hostname)));
}
}
while (p < end && c_isdigit (*p))
++p;
if (p < end && *p == '.')
- ++p;
+ ++p;
while (p < end && c_isdigit (*p))
++p;
}
{
char *copy;
BOUNDED_TO_ALLOCA(b, e, copy);
- logprintf (LOG_ALWAYS, "%s%s\n", prefix,
+ logprintf (LOG_ALWAYS, "%s%s\n", prefix,
quotearg_style (escape_quoting_style, copy));
}
mode, the body is displayed for debugging purposes. */
static bool
-skip_short_body (int fd, wgint contlen)
+skip_short_body (int fd, wgint contlen, bool chunked)
{
enum {
SKIP_SIZE = 512, /* size of the download buffer */
SKIP_THRESHOLD = 4096 /* the largest size we read */
};
+ wgint remaining_chunk_size = 0;
char dlbuf[SKIP_SIZE + 1];
dlbuf[SKIP_SIZE] = '\0'; /* so DEBUGP can safely print it */
- /* We shouldn't get here with unknown contlen. (This will change
- with HTTP/1.1, which supports "chunked" transfer.) */
- assert (contlen != -1);
+ assert (contlen != -1 || contlen);
/* If the body is too large, it makes more sense to simply close the
connection than to try to read the body. */
if (contlen > SKIP_THRESHOLD)
return false;
- DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));
-
- while (contlen > 0)
+ while (contlen > 0 || chunked)
{
- int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1);
+ int ret;
+ if (chunked)
+ {
+ if (remaining_chunk_size == 0)
+ {
+ char *line = fd_read_line (fd);
+ char *endl;
+ if (line == NULL)
+ break;
+
+ remaining_chunk_size = strtol (line, &endl, 16);
+ if (remaining_chunk_size == 0)
+ {
+ fd_read_line (fd);
+ break;
+ }
+ }
+
+ contlen = MIN (remaining_chunk_size, SKIP_SIZE);
+ }
+
+ DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));
+
+ ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1);
if (ret <= 0)
{
/* Don't normally report the error since this is an
return false;
}
contlen -= ret;
+
+ if (chunked)
+ {
+ remaining_chunk_size -= ret;
+ if (remaining_chunk_size == 0)
+ if (fd_read_line (fd) == NULL)
+ return false;
+ }
+
/* Safe even if %.*s bogusly expects terminating \0 because
we've zero-terminated dlbuf above. */
DEBUGP (("%.*s", ret, dlbuf));
return true;
}
+#define NOT_RFC2231 0
+#define RFC2231_NOENCODING 1
+#define RFC2231_ENCODING 2
+
+/* extract_param extracts the parameter name into NAME.
+ However, if the parameter name is in RFC2231 format then
+ this function adjusts NAME by stripping of the trailing
+ characters that are not part of the name but are present to
+ indicate the presence of encoding information in the value
+ or a fragment of a long parameter value
+*/
+static int
+modify_param_name(param_token *name)
+{
+ const char *delim1 = memchr (name->b, '*', name->e - name->b);
+ const char *delim2 = memrchr (name->b, '*', name->e - name->b);
+
+ int result;
+
+ if(delim1 == NULL)
+ {
+ result = NOT_RFC2231;
+ }
+ else if(delim1 == delim2)
+ {
+ if ((name->e - 1) == delim1)
+ {
+ result = RFC2231_ENCODING;
+ }
+ else
+ {
+ result = RFC2231_NOENCODING;
+ }
+ name->e = delim1;
+ }
+ else
+ {
+ name->e = delim1;
+ result = RFC2231_ENCODING;
+ }
+ return result;
+}
+
+/* extract_param extract the paramater value into VALUE.
+ Like modify_param_name this function modifies VALUE by
+ stripping off the encoding information from the actual value
+*/
+static void
+modify_param_value (param_token *value, int encoding_type )
+{
+ if (RFC2231_ENCODING == encoding_type)
+ {
+ const char *delim = memrchr (value->b, '\'', value->e - value->b);
+ if ( delim != NULL )
+ {
+ value->b = (delim+1);
+ }
+ }
+}
+
/* Extract a parameter from the string (typically an HTTP header) at
**SOURCE and advance SOURCE to the next parameter. Return false
when there are no more parameters to extract. The name of the
if (*p == separator) ++p;
}
*source = p;
+
+ int param_type = modify_param_name(name);
+ if (NOT_RFC2231 != param_type)
+ {
+ modify_param_value(value, param_type);
+ }
return true;
}
+#undef NOT_RFC2231
+#undef RFC2231_NOENCODING
+#undef RFC2231_ENCODING
+
+/* Appends the string represented by VALUE to FILENAME */
+
+static void
+append_value_to_filename (char **filename, param_token const * const value)
+{
+ int original_length = strlen(*filename);
+ int new_length = strlen(*filename) + (value->e - value->b);
+ *filename = xrealloc (*filename, new_length+1);
+ memcpy (*filename + original_length, value->b, (value->e - value->b));
+ (*filename)[new_length] = '\0';
+}
+
#undef MAX
#define MAX(p, q) ((p) > (q) ? (p) : (q))
false.
The file name is stripped of directory components and must not be
- empty. */
+ empty.
+ Historically, this function returned filename prefixed with opt.dir_prefix,
+ now that logic is handled by the caller, new code should pay attention,
+ changed by crq, Sep 2010.
+
+*/
static bool
parse_content_disposition (const char *hdr, char **filename)
{
param_token name, value;
+ *filename = NULL;
while (extract_param (&hdr, &name, &value, ';'))
- if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename") && value.b != NULL)
- {
- /* Make the file name begin at the last slash or backslash. */
- const char *last_slash = memrchr (value.b, '/', value.e - value.b);
- const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
- if (last_slash && last_bs)
- value.b = 1 + MAX (last_slash, last_bs);
- else if (last_slash || last_bs)
- value.b = 1 + (last_slash ? last_slash : last_bs);
- if (value.b == value.e)
- continue;
- /* Start with the directory prefix, if specified. */
- if (opt.dir_prefix)
- {
- int prefix_length = strlen (opt.dir_prefix);
- bool add_slash = (opt.dir_prefix[prefix_length - 1] != '/');
- int total_length;
-
- if (add_slash)
- ++prefix_length;
- total_length = prefix_length + (value.e - value.b);
- *filename = xmalloc (total_length + 1);
- strcpy (*filename, opt.dir_prefix);
- if (add_slash)
- (*filename)[prefix_length - 1] = '/';
- memcpy (*filename + prefix_length, value.b, (value.e - value.b));
- (*filename)[total_length] = '\0';
- }
- else
- *filename = strdupdelim (value.b, value.e);
- return true;
- }
- return false;
+ {
+ int isFilename = BOUNDED_EQUAL_NO_CASE ( name.b, name.e, "filename" );
+ if ( isFilename && value.b != NULL)
+ {
+ /* Make the file name begin at the last slash or backslash. */
+ const char *last_slash = memrchr (value.b, '/', value.e - value.b);
+ const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
+ if (last_slash && last_bs)
+ value.b = 1 + MAX (last_slash, last_bs);
+ else if (last_slash || last_bs)
+ value.b = 1 + (last_slash ? last_slash : last_bs);
+ if (value.b == value.e)
+ continue;
+
+ if (*filename)
+ append_value_to_filename (filename, &value);
+ else
+ *filename = strdupdelim (value.b, value.e);
+ }
+ }
+
+ if (*filename)
+ return true;
+ else
+ return false;
}
+
\f
/* Persistent connections. Currently, we cache the most recently used
connection as persistent, provided that the HTTP server agrees to
char *remote_time; /* remote time-stamp string */
char *error; /* textual HTTP error */
int statcode; /* status code */
+ char *message; /* status message */
wgint rd_size; /* amount of data read from socket */
double dltime; /* time it took to download the data */
const char *referer; /* value of the referer header. */
existence after having begun to download
(needed in gethttp for when connection is
interrupted/restarted. */
- bool timestamp_checked; /* true if pre-download time-stamping checks
+ bool timestamp_checked; /* true if pre-download time-stamping checks
* have already been performed */
char *orig_file_name; /* name of file to compare for time-stamping
* (might be != local_file if -K is set) */
wgint orig_file_size; /* size of file to compare for time-stamping */
- time_t orig_file_tstamp; /* time-stamp of file to compare for
+ time_t orig_file_tstamp; /* time-stamp of file to compare for
* time-stamping */
};
xfree_null (hs->rderrmsg);
xfree_null (hs->local_file);
xfree_null (hs->orig_file_name);
+ xfree_null (hs->message);
/* Guard against being called twice. */
hs->newloc = NULL;
&& (c_isspace (line[sizeof (string_constant) - 1]) \
|| !line[sizeof (string_constant) - 1]))
+#ifdef __VMS
+#define SET_USER_AGENT(req) do { \
+ if (!opt.useragent) \
+ request_set_header (req, "User-Agent", \
+ aprintf ("Wget/%s (VMS %s %s)", \
+ version_string, vms_arch(), vms_vers()), \
+ rel_value); \
+ else if (*opt.useragent) \
+ request_set_header (req, "User-Agent", opt.useragent, rel_none); \
+} while (0)
+#else /* def __VMS */
#define SET_USER_AGENT(req) do { \
if (!opt.useragent) \
request_set_header (req, "User-Agent", \
- aprintf ("Wget/%s", version_string), rel_value); \
+ aprintf ("Wget/%s (%s)", \
+ version_string, OS_TYPE), \
+ rel_value); \
else if (*opt.useragent) \
request_set_header (req, "User-Agent", opt.useragent, rel_none); \
} while (0)
+#endif /* def __VMS [else] */
/* The flags that allow clobbering the file (opening with "wb").
Defined here to avoid repetition later. #### This will require
If PROXY is non-NULL, the connection will be made to the proxy
server, and u->url will be requested. */
static uerr_t
-gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
+gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
+ struct iri *iri, int count)
{
struct request *req;
is done. */
bool keep_alive;
- /* Whether keep-alive should be inhibited.
+ /* Is the server using the chunked transfer encoding? */
+ bool chunked_transfer_encoding = false;
- RFC 2068 requests that 1.0 clients not send keep-alive requests
- to proxies. This is because many 1.0 proxies do not interpret
- the Connection header and transfer it to the remote server,
- causing it to not close the connection and leave both the proxy
- and the client hanging. */
+ /* Whether keep-alive should be inhibited. */
bool inhibit_keep_alive =
- !opt.http_keep_alive || opt.ignore_length || proxy != NULL;
+ !opt.http_keep_alive || opt.ignore_length;
/* Headers sent when using POST. */
wgint post_data_size = 0;
hs->newloc = NULL;
hs->remote_time = NULL;
hs->error = NULL;
+ hs->message = NULL;
conn = u;
request_set_header (req, "Referer", (char *) hs->referer, rel_none);
if (*dt & SEND_NOCACHE)
request_set_header (req, "Pragma", "no-cache", rel_none);
- if (hs->restval)
+ if (hs->restval && !opt.timestamping)
request_set_header (req, "Range",
aprintf ("bytes=%s-",
number_to_static_string (hs->restval)),
user = user ? user : (opt.http_user ? opt.http_user : opt.user);
passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd);
- if (user && passwd
- && !u->user) /* We only do "site-wide" authentication with "global"
- user/password values; URL user/password info overrides. */
+ /* We only do "site-wide" authentication with "global" user/password
+ * values unless --auth-no-challange has been requested; URL user/password
+ * info overrides. */
+ if (user && passwd && (!u->user || opt.auth_without_challenge))
{
/* If this is a host for which we've already received a Basic
* challenge, we'll go ahead and send Basic authentication creds. */
rel_value);
}
- if (!inhibit_keep_alive)
- request_set_header (req, "Connection", "Keep-Alive", rel_none);
-
- if (opt.cookies)
- request_set_header (req, "Cookie",
- cookie_header (wget_cookie_jar,
- u->host, u->port, u->path,
-#ifdef HAVE_SSL
- u->scheme == SCHEME_HTTPS
-#else
- 0
-#endif
- ),
- rel_value);
+ if (inhibit_keep_alive)
+ request_set_header (req, "Connection", "Close", rel_none);
+ else
+ {
+ if (proxy == NULL)
+ request_set_header (req, "Connection", "Keep-Alive", rel_none);
+ else
+ {
+ request_set_header (req, "Connection", "Close", rel_none);
+ request_set_header (req, "Proxy-Connection", "Keep-Alive", rel_none);
+ }
+ }
if (opt.post_data || opt.post_file_name)
{
rel_value);
}
+ retry_with_auth:
+ /* We need to come back here when the initial attempt to retrieve
+ without authorization header fails. (Expected to happen at least
+ for the Digest authorization scheme.) */
+
+ if (opt.cookies)
+ request_set_header (req, "Cookie",
+ cookie_header (wget_cookie_jar,
+ u->host, u->port, u->path,
+#ifdef HAVE_SSL
+ u->scheme == SCHEME_HTTPS
+#else
+ 0
+#endif
+ ),
+ rel_value);
+
/* Add the user headers. */
if (opt.user_headers)
{
request_set_user_header (req, opt.user_headers[i]);
}
- retry_with_auth:
- /* We need to come back here when the initial attempt to retrieve
- without authorization header fails. (Expected to happen at least
- for the Digest authorization scheme.) */
-
proxyauth = NULL;
if (proxy)
{
request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
}
- keep_alive = false;
+ keep_alive = true;
/* Establish the connection. */
- if (!inhibit_keep_alive)
+ if (inhibit_keep_alive)
+ keep_alive = false;
+ else
{
/* Look for a persistent connection to target host, unless a
proxy is used. The exception is when SSL is in use, in which
sock = pconn.socket;
using_ssl = pconn.ssl;
logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
- quotearg_style (escape_quoting_style, pconn.host),
+ quotearg_style (escape_quoting_style, pconn.host),
pconn.port);
DEBUGP (("Reusing fd %d.\n", sock));
if (pconn.authorized)
resp = resp_new (head);
statcode = resp_status (resp, &message);
+ if (statcode < 0)
+ {
+ char *tms = datetime_str (time (NULL));
+ logprintf (LOG_VERBOSE, "%d\n", statcode);
+ logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), tms, statcode,
+ quotearg_style (escape_quoting_style,
+ _("Malformed status line")));
+ xfree (head);
+ return HERR;
+ }
+ hs->message = xstrdup (message);
resp_free (resp);
xfree (head);
if (statcode != 200)
if (conn->scheme == SCHEME_HTTPS)
{
- if (!ssl_connect (sock) || !ssl_check_certificate (sock, u->host))
+ if (!ssl_connect_wget (sock))
{
fd_close (sock);
return CONSSLERR;
}
+ else if (!ssl_check_certificate (sock, u->host))
+ {
+ fd_close (sock);
+ return VERIFCERTERR;
+ }
using_ssl = true;
}
#endif /* HAVE_SSL */
contrange = 0;
*dt &= ~RETROKF;
+read_header:
head = read_http_response_head (sock);
if (!head)
{
/* Check for status line. */
message = NULL;
statcode = resp_status (resp, &message);
- if (!opt.server_response)
- logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
- message ? quotearg_style (escape_quoting_style, message) : "");
- else
+ if (statcode < 0)
{
- logprintf (LOG_VERBOSE, "\n");
- print_server_response (resp, " ");
+ char *tms = datetime_str (time (NULL));
+ logprintf (LOG_VERBOSE, "%d\n", statcode);
+ logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), tms, statcode,
+ quotearg_style (escape_quoting_style,
+ _("Malformed status line")));
+ CLOSE_INVALIDATE (sock);
+ request_free (req);
+ return HERR;
}
- /* Determine the local filename if needed. Notice that if -O is used
- * hstat.local_file is set by http_loop to the argument of -O. */
- if (!hs->local_file)
- {
- /* Honor Content-Disposition whether possible. */
- if (!opt.content_disposition
- || !resp_header_copy (resp, "Content-Disposition",
- hdrval, sizeof (hdrval))
- || !parse_content_disposition (hdrval, &hs->local_file))
- {
- /* The Content-Disposition header is missing or broken.
- * Choose unique file name according to given URL. */
- hs->local_file = url_file_name (u);
- }
- }
-
- /* TODO: perform this check only once. */
- if (!hs->existence_checked && file_exists_p (hs->local_file))
+ if (H_10X (statcode))
{
- if (opt.noclobber && !opt.output_document)
- {
- /* If opt.noclobber is turned on and file already exists, do not
- retrieve the file. But if the output_document was given, then this
- test was already done and the file didn't exist. Hence the !opt.output_document */
- logprintf (LOG_VERBOSE, _("\
-File %s already there; not retrieving.\n\n"), quote (hs->local_file));
- /* If the file is there, we suppose it's retrieved OK. */
- *dt |= RETROKF;
-
- /* #### Bogusness alert. */
- /* If its suffix is "html" or "htm" or similar, assume text/html. */
- if (has_html_suffix_p (hs->local_file))
- *dt |= TEXTHTML;
-
- return RETRUNNEEDED;
- }
- else if (!ALLOW_CLOBBER)
- {
- char *unique = unique_name (hs->local_file, true);
- if (unique != hs->local_file)
- xfree (hs->local_file);
- hs->local_file = unique;
- }
+ DEBUGP (("Ignoring response\n"));
+ goto read_header;
}
- hs->existence_checked = true;
- /* Support timestamping */
- /* TODO: move this code out of gethttp. */
- if (opt.timestamping && !hs->timestamp_checked)
+ hs->message = xstrdup (message);
+ if (!opt.server_response)
+ logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
+ message ? quotearg_style (escape_quoting_style, message) : "");
+ else
{
- size_t filename_len = strlen (hs->local_file);
- char *filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
- bool local_dot_orig_file_exists = false;
- char *local_filename = NULL;
- struct_stat st;
-
- if (opt.backup_converted)
- /* If -K is specified, we'll act on the assumption that it was specified
- last time these files were downloaded as well, and instead of just
- comparing local file X against server file X, we'll compare local
- file X.orig (if extant, else X) against server file X. If -K
- _wasn't_ specified last time, or the server contains files called
- *.orig, -N will be back to not operating correctly with -k. */
- {
- /* Would a single s[n]printf() call be faster? --dan
-
- Definitely not. sprintf() is horribly slow. It's a
- different question whether the difference between the two
- affects a program. Usually I'd say "no", but at one
- point I profiled Wget, and found that a measurable and
- non-negligible amount of time was lost calling sprintf()
- in url.c. Replacing sprintf with inline calls to
- strcpy() and number_to_string() made a difference.
- --hniksic */
- memcpy (filename_plus_orig_suffix, hs->local_file, filename_len);
- memcpy (filename_plus_orig_suffix + filename_len,
- ".orig", sizeof (".orig"));
-
- /* Try to stat() the .orig file. */
- if (stat (filename_plus_orig_suffix, &st) == 0)
- {
- local_dot_orig_file_exists = true;
- local_filename = filename_plus_orig_suffix;
- }
- }
-
- if (!local_dot_orig_file_exists)
- /* Couldn't stat() <file>.orig, so try to stat() <file>. */
- if (stat (hs->local_file, &st) == 0)
- local_filename = hs->local_file;
-
- if (local_filename != NULL)
- /* There was a local file, so we'll check later to see if the version
- the server has is the same version we already have, allowing us to
- skip a download. */
- {
- hs->orig_file_name = xstrdup (local_filename);
- hs->orig_file_size = st.st_size;
- hs->orig_file_tstamp = st.st_mtime;
-#ifdef WINDOWS
- /* Modification time granularity is 2 seconds for Windows, so
- increase local time by 1 second for later comparison. */
- ++hs->orig_file_tstamp;
-#endif
- }
+ logprintf (LOG_VERBOSE, "\n");
+ print_server_response (resp, " ");
}
if (!opt.ignore_length
/* Check for keep-alive related responses. */
if (!inhibit_keep_alive && contlen != -1)
{
- if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
- keep_alive = true;
- else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
+ if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
{
- if (0 == strcasecmp (hdrval, "Keep-Alive"))
- keep_alive = true;
+ if (0 == strcasecmp (hdrval, "Close"))
+ keep_alive = false;
}
}
+
+ resp_header_copy (resp, "Transfer-Encoding", hdrval, sizeof (hdrval));
+ if (0 == strcasecmp (hdrval, "chunked"))
+ chunked_transfer_encoding = true;
+
+ /* Handle (possibly multiple instances of) the Set-Cookie header. */
+ if (opt.cookies)
+ {
+ int scpos;
+ const char *scbeg, *scend;
+ /* The jar should have been created by now. */
+ assert (wget_cookie_jar != NULL);
+ for (scpos = 0;
+ (scpos = resp_header_locate (resp, "Set-Cookie", scpos,
+ &scbeg, &scend)) != -1;
+ ++scpos)
+ {
+ char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie);
+ cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port,
+ u->path, set_cookie);
+ }
+ }
+
if (keep_alive)
/* The server has promised that it will not close the connection
when we're done. This means that we can register it. */
if (statcode == HTTP_STATUS_UNAUTHORIZED)
{
/* Authorization is required. */
- if (keep_alive && !head_only && skip_short_body (sock, contlen))
+ if (keep_alive && !head_only
+ && skip_short_body (sock, contlen, chunked_transfer_encoding))
CLOSE_FINISH (sock);
else
CLOSE_INVALIDATE (sock);
register_basic_auth_host (u->host);
}
xfree (pth);
+ xfree_null (message);
+ resp_free (resp);
+ xfree (head);
goto retry_with_auth;
}
else
}
logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
request_free (req);
+ xfree_null (message);
+ resp_free (resp);
+ xfree (head);
return AUTHFAILED;
}
else /* statcode != HTTP_STATUS_UNAUTHORIZED */
if (ntlm_seen)
pconn.authorized = true;
}
+
+ /* Determine the local filename if needed. Notice that if -O is used
+ * hstat.local_file is set by http_loop to the argument of -O. */
+ if (!hs->local_file)
+ {
+ char *local_file = NULL;
+
+ /* Honor Content-Disposition whether possible. */
+ if (!opt.content_disposition
+ || !resp_header_copy (resp, "Content-Disposition",
+ hdrval, sizeof (hdrval))
+ || !parse_content_disposition (hdrval, &local_file))
+ {
+ /* The Content-Disposition header is missing or broken.
+ * Choose unique file name according to given URL. */
+ hs->local_file = url_file_name (u, NULL);
+ }
+ else
+ {
+ DEBUGP (("Parsed filename from Content-Disposition: %s\n",
+ local_file));
+ hs->local_file = url_file_name (u, local_file);
+ }
+ }
+
+ /* TODO: perform this check only once. */
+ if (!hs->existence_checked && file_exists_p (hs->local_file))
+ {
+ if (opt.noclobber && !opt.output_document)
+ {
+ /* If opt.noclobber is turned on and file already exists, do not
+ retrieve the file. But if the output_document was given, then this
+ test was already done and the file didn't exist. Hence the !opt.output_document */
+ logprintf (LOG_VERBOSE, _("\
+File %s already there; not retrieving.\n\n"), quote (hs->local_file));
+ /* If the file is there, we suppose it's retrieved OK. */
+ *dt |= RETROKF;
+
+ /* #### Bogusness alert. */
+ /* If its suffix is "html" or "htm" or similar, assume text/html. */
+ if (has_html_suffix_p (hs->local_file))
+ *dt |= TEXTHTML;
+
+ xfree (head);
+ xfree_null (message);
+ return RETRUNNEEDED;
+ }
+ else if (!ALLOW_CLOBBER)
+ {
+ char *unique = unique_name (hs->local_file, true);
+ if (unique != hs->local_file)
+ xfree (hs->local_file);
+ hs->local_file = unique;
+ }
+ }
+ hs->existence_checked = true;
+
+ /* Support timestamping */
+ /* TODO: move this code out of gethttp. */
+ if (opt.timestamping && !hs->timestamp_checked)
+ {
+ size_t filename_len = strlen (hs->local_file);
+ char *filename_plus_orig_suffix = alloca (filename_len + sizeof (ORIG_SFX));
+ bool local_dot_orig_file_exists = false;
+ char *local_filename = NULL;
+ struct_stat st;
+
+ if (opt.backup_converted)
+ /* If -K is specified, we'll act on the assumption that it was specified
+ last time these files were downloaded as well, and instead of just
+ comparing local file X against server file X, we'll compare local
+ file X.orig (if extant, else X) against server file X. If -K
+ _wasn't_ specified last time, or the server contains files called
+ *.orig, -N will be back to not operating correctly with -k. */
+ {
+ /* Would a single s[n]printf() call be faster? --dan
+
+ Definitely not. sprintf() is horribly slow. It's a
+ different question whether the difference between the two
+ affects a program. Usually I'd say "no", but at one
+ point I profiled Wget, and found that a measurable and
+ non-negligible amount of time was lost calling sprintf()
+ in url.c. Replacing sprintf with inline calls to
+ strcpy() and number_to_string() made a difference.
+ --hniksic */
+ memcpy (filename_plus_orig_suffix, hs->local_file, filename_len);
+ memcpy (filename_plus_orig_suffix + filename_len,
+ ORIG_SFX, sizeof (ORIG_SFX));
+
+ /* Try to stat() the .orig file. */
+ if (stat (filename_plus_orig_suffix, &st) == 0)
+ {
+ local_dot_orig_file_exists = true;
+ local_filename = filename_plus_orig_suffix;
+ }
+ }
+
+ if (!local_dot_orig_file_exists)
+ /* Couldn't stat() <file>.orig, so try to stat() <file>. */
+ if (stat (hs->local_file, &st) == 0)
+ local_filename = hs->local_file;
+
+ if (local_filename != NULL)
+ /* There was a local file, so we'll check later to see if the version
+ the server has is the same version we already have, allowing us to
+ skip a download. */
+ {
+ hs->orig_file_name = xstrdup (local_filename);
+ hs->orig_file_size = st.st_size;
+ hs->orig_file_tstamp = st.st_mtime;
+#ifdef WINDOWS
+ /* Modification time granularity is 2 seconds for Windows, so
+ increase local time by 1 second for later comparison. */
+ ++hs->orig_file_tstamp;
+#endif
+ }
+ }
+
request_free (req);
hs->statcode = statcode;
char *tmp = strchr (type, ';');
if (tmp)
{
+ /* sXXXav: only needed if IRI support is enabled */
+ char *tmp2 = tmp + 1;
+
while (tmp > type && c_isspace (tmp[-1]))
--tmp;
*tmp = '\0';
+
+ /* Try to get remote encoding if needed */
+ if (opt.enable_iri && !opt.encoding_remote)
+ {
+ tmp = parse_charset (tmp2);
+ if (tmp)
+ set_content_encoding (iri, tmp);
+ }
}
}
hs->newloc = resp_header_strdup (resp, "Location");
hs->remote_time = resp_header_strdup (resp, "Last-Modified");
- /* Handle (possibly multiple instances of) the Set-Cookie header. */
- if (opt.cookies)
- {
- int scpos;
- const char *scbeg, *scend;
- /* The jar should have been created by now. */
- assert (wget_cookie_jar != NULL);
- for (scpos = 0;
- (scpos = resp_header_locate (resp, "Set-Cookie", scpos,
- &scbeg, &scend)) != -1;
- ++scpos)
- {
- char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie);
- cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port,
- u->path, set_cookie);
- }
- }
-
if (resp_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval)))
{
wgint first_byte_pos, last_byte_pos, entity_length;
_("Location: %s%s\n"),
hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"),
hs->newloc ? _(" [following]") : "");
- if (keep_alive && !head_only && skip_short_body (sock, contlen))
+ if (keep_alive && !head_only
+ && skip_short_body (sock, contlen, chunked_transfer_encoding))
CLOSE_FINISH (sock);
else
CLOSE_INVALIDATE (sock);
xfree_null (type);
+ xfree (head);
return NEWLOCATION;
}
}
content-type. */
if (!type ||
0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
- 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
+ 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
*dt |= TEXTHTML;
else
*dt &= ~TEXTHTML;
- if (opt.html_extension && (*dt & TEXTHTML))
- /* -E / --html-extension / html_extension = on was specified, and this is a
- text/html file. If some case-insensitive variation on ".htm[l]" isn't
- already the file's suffix, tack on ".html". */
- {
- char *last_period_in_local_filename = strrchr (hs->local_file, '.');
+ if (type &&
+ 0 == strncasecmp (type, TEXTCSS_S, strlen (TEXTCSS_S)))
+ *dt |= TEXTCSS;
+ else
+ *dt &= ~TEXTCSS;
- if (last_period_in_local_filename == NULL
- || !(0 == strcasecmp (last_period_in_local_filename, ".htm")
- || 0 == strcasecmp (last_period_in_local_filename, ".html")))
+ if (opt.adjust_extension)
+ {
+ if (*dt & TEXTHTML)
+ /* -E / --adjust-extension / adjust_extension = on was specified,
+ and this is a text/html file. If some case-insensitive
+ variation on ".htm[l]" isn't already the file's suffix,
+ tack on ".html". */
{
- int local_filename_len = strlen (hs->local_file);
- /* Resize the local file, allowing for ".html" preceded by
- optional ".NUMBER". */
- hs->local_file = xrealloc (hs->local_file,
- local_filename_len + 24 + sizeof (".html"));
- strcpy(hs->local_file + local_filename_len, ".html");
- /* If clobbering is not allowed and the file, as named,
- exists, tack on ".NUMBER.html" instead. */
- if (!ALLOW_CLOBBER && file_exists_p (hs->local_file))
- {
- int ext_num = 1;
- do
- sprintf (hs->local_file + local_filename_len,
- ".%d.html", ext_num++);
- while (file_exists_p (hs->local_file));
- }
- *dt |= ADDED_HTML_EXTENSION;
+ ensure_extension (hs, ".html", dt);
+ }
+ else if (*dt & TEXTCSS)
+ {
+ ensure_extension (hs, ".css", dt);
}
}
- if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE)
+ if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE
+ || (!opt.timestamping && hs->restval > 0 && statcode == HTTP_STATUS_OK
+ && contrange == 0 && contlen >= 0 && hs->restval >= contlen))
{
/* If `-c' is in use and the file has been fully downloaded (or
the remote file has shrunk), Wget effectively requests bytes
- after the end of file and the server response with 416. */
+ after the end of file and the server response with 416
+ (or 200 with a <= Content-Length. */
logputs (LOG_VERBOSE, _("\
\n The file is already fully retrieved; nothing to do.\n\n"));
/* In case the caller inspects. */
xfree_null (type);
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
might be more bytes in the body. */
+ xfree (head);
return RETRUNNEEDED;
}
if ((contrange != 0 && contrange != hs->restval)
Bail out. */
xfree_null (type);
CLOSE_INVALIDATE (sock);
+ xfree (head);
return RANGEERR;
}
if (contlen == -1)
If not, they can be worked around using
`--no-http-keep-alive'. */
CLOSE_FINISH (sock);
- else if (keep_alive && skip_short_body (sock, contlen))
+ else if (keep_alive
+ && skip_short_body (sock, contlen, chunked_transfer_encoding))
/* Successfully skipped the body; also keep using the socket. */
CLOSE_FINISH (sock);
else
CLOSE_INVALIDATE (sock);
+ xfree (head);
return RETRFINISHED;
}
+/* 2005-06-17 SMS.
+ For VMS, define common fopen() optional arguments.
+*/
+#ifdef __VMS
+# define FOPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
+# define FOPEN_BIN_FLAG 3
+#else /* def __VMS */
+# define FOPEN_BIN_FLAG true
+#endif /* def __VMS [else] */
+
/* Open the local file. */
if (!output_stream)
{
if (opt.backups)
rotate_backups (hs->local_file);
if (hs->restval)
- fp = fopen (hs->local_file, "ab");
- else if (ALLOW_CLOBBER)
- fp = fopen (hs->local_file, "wb");
+ {
+#ifdef __VMS
+ int open_id;
+
+ open_id = 21;
+ fp = fopen (hs->local_file, "ab", FOPEN_OPT_ARGS);
+#else /* def __VMS */
+ fp = fopen (hs->local_file, "ab");
+#endif /* def __VMS [else] */
+ }
+ else if (ALLOW_CLOBBER || count > 0)
+ {
+ if (opt.unlink && file_exists_p (hs->local_file))
+ {
+ int res = unlink (hs->local_file);
+ if (res < 0)
+ {
+ logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file,
+ strerror (errno));
+ CLOSE_INVALIDATE (sock);
+ xfree (head);
+ return UNLINKERR;
+ }
+ }
+
+#ifdef __VMS
+ int open_id;
+
+ open_id = 22;
+ fp = fopen (hs->local_file, "wb", FOPEN_OPT_ARGS);
+#else /* def __VMS */
+ fp = fopen (hs->local_file, "wb");
+#endif /* def __VMS [else] */
+ }
else
{
- fp = fopen_excl (hs->local_file, true);
+ fp = fopen_excl (hs->local_file, FOPEN_BIN_FLAG);
if (!fp && errno == EEXIST)
{
/* We cannot just invent a new name and use it (which is
_("%s has sprung into existence.\n"),
hs->local_file);
CLOSE_INVALIDATE (sock);
+ xfree (head);
return FOPEN_EXCL_ERR;
}
}
{
logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file, strerror (errno));
CLOSE_INVALIDATE (sock);
+ xfree (head);
return FOPENERR;
}
}
/* Print fetch message, if opt.verbose. */
if (opt.verbose)
{
- logprintf (LOG_NOTQUIET, _("Saving to: %s\n"),
+ logprintf (LOG_NOTQUIET, _("Saving to: %s\n"),
HYPHENP (hs->local_file) ? quote ("STDOUT") : quote (hs->local_file));
}
-
+
/* This confuses the timestamping code that checks for file size.
#### The timestamping code should be smarter about file size. */
if (opt.save_headers && hs->restval == 0)
/* If the server ignored our range request, instruct fd_read_body
to skip the first RESTVAL bytes of body. */
flags |= rb_skip_startpos;
+
+ if (chunked_transfer_encoding)
+ flags |= rb_chunked_transfer_encoding;
+
hs->len = hs->restval;
hs->rd_size = 0;
hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
/* The genuine HTTP loop! This is the part where the retrieval is
retried, and retried, and retried, and... */
uerr_t
-http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
- int *dt, struct url *proxy)
+http_loop (struct url *u, struct url *original_url, char **newloc,
+ char **local_file, const char *referer, int *dt, struct url *proxy,
+ struct iri *iri)
{
int count;
bool got_head = false; /* used for time-stamping and filename detection */
uerr_t err, ret = TRYLIMEXC;
time_t tmr = -1; /* remote time-stamp */
struct http_stat hstat; /* HTTP status */
- struct_stat st;
+ struct_stat st;
bool send_head_first = true;
+ char *file_name;
+ bool force_full_retrieve = false;
/* Assert that no value for *LOCAL_FILE was passed. */
assert (local_file == NULL || *local_file == NULL);
-
+
/* Set LOCAL_FILE parameter. */
if (local_file && opt.output_document)
*local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
-
+
/* Reset NEWLOC parameter. */
*newloc = NULL;
here so that we don't go through the hoops if we're just using
FTP or whatever. */
if (opt.cookies)
- load_cookies();
+ load_cookies ();
/* Warn on (likely bogus) wildcard usage in HTTP. */
if (opt.ftp_glob && has_wildcards_p (u->path))
}
else if (!opt.content_disposition)
{
- hstat.local_file = url_file_name (u);
+ hstat.local_file =
+ url_file_name (opt.trustservernames ? u : original_url, NULL);
got_name = true;
}
retrieve the file. But if the output_document was given, then this
test was already done and the file didn't exist. Hence the !opt.output_document */
logprintf (LOG_VERBOSE, _("\
-File %s already there; not retrieving.\n\n"),
+File %s already there; not retrieving.\n\n"),
quote (hstat.local_file));
/* If the file is there, we suppose it's retrieved OK. */
*dt |= RETROKF;
/* Reset the counter. */
count = 0;
-
+
/* Reset the document type. */
*dt = 0;
-
- /* Skip preliminary HEAD request if we're not in spider mode AND
- * if -O was given or HTTP Content-Disposition support is disabled. */
- if (!opt.spider
- && (got_name || !opt.content_disposition))
+
+ /* Skip preliminary HEAD request if we're not in spider mode. */
+ if (!opt.spider)
send_head_first = false;
- /* Send preliminary HEAD request if -N is given and we have an existing
+ /* Send preliminary HEAD request if -N is given and we have an existing
* destination file. */
- if (opt.timestamping
- && !opt.content_disposition
- && file_exists_p (url_file_name (u)))
+ file_name = url_file_name (opt.trustservernames ? u : original_url, NULL);
+ if (opt.timestamping && (file_exists_p (file_name)
+ || opt.content_disposition))
send_head_first = true;
-
+ xfree (file_name);
+
/* THE loop */
do
{
/* Increment the pass counter. */
++count;
sleep_between_retrievals (count);
-
+
/* Get the current time string. */
tms = datetime_str (time (NULL));
-
+
if (opt.spider && !got_head)
logprintf (LOG_VERBOSE, _("\
Spider mode enabled. Check if remote file exists.\n"));
if (opt.verbose)
{
char *hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
-
- if (count > 1)
+
+ if (count > 1)
{
char tmp[256];
sprintf (tmp, _("(try:%2d)"), count);
logprintf (LOG_NOTQUIET, "--%s-- %s %s\n",
tms, tmp, hurl);
}
- else
+ else
{
logprintf (LOG_NOTQUIET, "--%s-- %s\n",
tms, hurl);
}
-
+
#ifdef WINDOWS
ws_changetitle (hurl);
#endif
/* Default document type is empty. However, if spider mode is
on or time-stamping is employed, HEAD_ONLY commands is
encoded within *dt. */
- if (send_head_first && !got_head)
+ if (send_head_first && !got_head)
*dt |= HEAD_ONLY;
else
*dt &= ~HEAD_ONLY;
/* Decide whether or not to restart. */
- if (opt.always_rest
+ if (force_full_retrieve)
+ hstat.restval = hstat.len;
+ else if (opt.always_rest
&& got_name
&& stat (hstat.local_file, &st) == 0
&& S_ISREG (st.st_mode))
*dt &= ~SEND_NOCACHE;
/* Try fetching the document, or at least its head. */
- err = gethttp (u, &hstat, dt, proxy);
+ err = gethttp (u, &hstat, dt, proxy, iri, count);
/* Time? */
tms = datetime_str (time (NULL));
-
+
/* Get the new location (with or without the redirection). */
if (hstat.newloc)
*newloc = xstrdup (hstat.newloc);
logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET, _("Cannot write to %s (%s).\n"),
quote (hstat.local_file), strerror (errno));
- case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
- case SSLINITFAILED: case CONTNOTSUPPORTED:
+ case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
+ case SSLINITFAILED: case CONTNOTSUPPORTED: case VERIFCERTERR:
/* Fatal errors just return from the function. */
ret = err;
goto exit;
logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
ret = err;
goto exit;
+ case UNLINKERR:
+ /* Another fatal error. */
+ logputs (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET, _("Cannot unlink %s (%s).\n"),
+ quote (hstat.local_file), strerror (errno));
+ ret = err;
+ goto exit;
case NEWLOCATION:
/* Return the new location to the caller. */
if (!*newloc)
hstat.statcode);
ret = WRONGCODE;
}
- else
+ else
{
ret = NEWLOCATION;
}
/* All possibilities should have been exhausted. */
abort ();
}
-
+
if (!(*dt & RETROKF))
{
char *hurl = NULL;
continue;
}
/* Maybe we should always keep track of broken links, not just in
- * spider mode. */
- else if (opt.spider)
+ * spider mode.
+ * Don't log error if it was UTF-8 encoded because we will try
+ * once unencoded. */
+ else if (opt.spider && !iri->utf8_encode)
{
/* #### Again: ugly ugly ugly! */
- if (!hurl)
+ if (!hurl)
hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
nonexisting_url (hurl);
logprintf (LOG_NOTQUIET, _("\
else
{
logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
- tms, hstat.statcode,
+ tms, hstat.statcode,
quotearg_style (escape_quoting_style, hstat.error));
}
logputs (LOG_VERBOSE, "\n");
if (*dt & HEAD_ONLY)
time_came_from_head = true;
}
-
+
if (send_head_first)
{
/* The time-stamping section. */
we're supposed to
download already exists. */
{
- if (hstat.remote_time &&
+ if (hstat.remote_time &&
tmr != (time_t) (-1))
{
/* Now time-stamping can be used validly.
download procedure is resumed. */
if (hstat.orig_file_tstamp >= tmr)
{
- if (hstat.contlen == -1
+ if (hstat.contlen == -1
|| hstat.orig_file_size == hstat.contlen)
{
logprintf (LOG_VERBOSE, _("\
}
}
else
- logputs (LOG_VERBOSE,
- _("Remote file is newer, retrieving.\n"));
+ {
+ force_full_retrieve = true;
+ logputs (LOG_VERBOSE,
+ _("Remote file is newer, retrieving.\n"));
+ }
logputs (LOG_VERBOSE, "\n");
}
}
-
+
/* free_hstat (&hstat); */
hstat.timestamp_checked = true;
}
-
+
if (opt.spider)
{
+ bool finished = true;
if (opt.recursive)
{
if (*dt & TEXTHTML)
{
logputs (LOG_VERBOSE, _("\
Remote file exists and could contain links to other resources -- retrieving.\n\n"));
+ finished = false;
}
- else
+ else
{
logprintf (LOG_VERBOSE, _("\
Remote file exists but does not contain any link -- not retrieving.\n\n"));
ret = RETROK; /* RETRUNNEEDED is not for caller. */
- goto exit;
}
}
else
Remote file exists and could contain further links,\n\
but recursion is disabled -- not retrieving.\n\n"));
}
- else
+ else
{
logprintf (LOG_VERBOSE, _("\
Remote file exists.\n\n"));
}
ret = RETROK; /* RETRUNNEEDED is not for caller. */
+ }
+
+ if (finished)
+ {
+ logprintf (LOG_NONVERBOSE,
+ _("%s URL: %s %2d %s\n"),
+ tms, u->url, hstat.statcode,
+ hstat.message ? quotearg_style (escape_quoting_style, hstat.message) : "");
goto exit;
}
}
continue;
} /* send_head_first */
} /* !got_head */
-
- if ((tmr != (time_t) (-1))
+
+ if (opt.useservertimestamps
+ && (tmr != (time_t) (-1))
&& ((hstat.len == hstat.contlen) ||
((hstat.res == 0) && (hstat.contlen == -1))))
{
- /* #### This code repeats in http.c and ftp.c. Move it to a
- function! */
const char *fl = NULL;
- if (opt.output_document)
- {
- if (output_stream_regular)
- fl = opt.output_document;
- }
- else
- fl = hstat.local_file;
+ set_local_file (&fl, hstat.local_file);
if (fl)
{
time_t newtmr = -1;
&& hstat.remote_time && hstat.remote_time[0])
{
newtmr = http_atotm (hstat.remote_time);
- if (newtmr != -1)
+ if (newtmr != (time_t)-1)
tmr = newtmr;
}
touch (fl, tmr);
{
if (*dt & RETROKF)
{
+ bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document));
+
logprintf (LOG_VERBOSE,
- _("%s (%s) - %s saved [%s/%s]\n\n"),
- tms, tmrate, quote (hstat.local_file),
+ write_to_stdout
+ ? _("%s (%s) - written to stdout %s[%s/%s]\n\n")
+ : _("%s (%s) - %s saved [%s/%s]\n\n"),
+ tms, tmrate,
+ write_to_stdout ? "" : quote (hstat.local_file),
number_to_static_string (hstat.len),
number_to_static_string (hstat.contlen));
logprintf (LOG_NONVERBOSE,
number_to_static_string (hstat.contlen),
hstat.local_file, count);
}
- ++opt.numurls;
- total_downloaded_bytes += hstat.len;
+ ++numurls;
+ total_downloaded_bytes += hstat.rd_size;
/* Remember that we downloaded the file for later ".orig" code. */
if (*dt & ADDED_HTML_EXTENSION)
- downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
+ downloaded_file (FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
else
- downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file);
+ downloaded_file (FILE_DOWNLOADED_NORMALLY, hstat.local_file);
ret = RETROK;
goto exit;
else if (hstat.res == 0) /* No read error */
{
if (hstat.contlen == -1) /* We don't know how much we were supposed
- to get, so assume we succeeded. */
+ to get, so assume we succeeded. */
{
if (*dt & RETROKF)
{
+ bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document));
+
logprintf (LOG_VERBOSE,
- _("%s (%s) - %s saved [%s]\n\n"),
- tms, tmrate, quote (hstat.local_file),
+ write_to_stdout
+ ? _("%s (%s) - written to stdout %s[%s]\n\n")
+ : _("%s (%s) - %s saved [%s]\n\n"),
+ tms, tmrate,
+ write_to_stdout ? "" : quote (hstat.local_file),
number_to_static_string (hstat.len));
logprintf (LOG_NONVERBOSE,
"%s URL:%s [%s] -> \"%s\" [%d]\n",
tms, u->url, number_to_static_string (hstat.len),
hstat.local_file, count);
}
- ++opt.numurls;
- total_downloaded_bytes += hstat.len;
+ ++numurls;
+ total_downloaded_bytes += hstat.rd_size;
/* Remember that we downloaded the file for later ".orig" code. */
if (*dt & ADDED_HTML_EXTENSION)
- downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
+ downloaded_file (FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
else
- downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file);
-
+ downloaded_file (FILE_DOWNLOADED_NORMALLY, hstat.local_file);
+
ret = RETROK;
goto exit;
}
printwhat (count, opt.ntry);
continue;
}
- else
+ else if (hstat.len != hstat.restval)
/* Getting here would mean reading more data than
requested with content-length, which we never do. */
abort ();
+ else
+ {
+ /* Getting here probably means that the content-length was
+ * _less_ than the original, local size. We should probably
+ * truncate or re-read, or something. FIXME */
+ ret = RETROK;
+ goto exit;
+ }
}
else /* from now on hstat.res can only be -1 */
{
while (!opt.ntry || (count < opt.ntry));
exit:
- if (ret == RETROK)
+ if (ret == RETROK && local_file)
*local_file = xstrdup (hstat.local_file);
free_hstat (&hstat);
-
+
return ret;
}
\f
Netscape cookie specification.) */
};
const char *oldlocale;
+ char savedlocale[256];
size_t i;
time_t ret = (time_t) -1;
non-English locales, which we work around by temporarily setting
locale to C before invoking strptime. */
oldlocale = setlocale (LC_TIME, NULL);
+ if (oldlocale)
+ {
+ size_t l = strlen (oldlocale) + 1;
+ if (l >= sizeof savedlocale)
+ savedlocale[0] = '\0';
+ else
+ memcpy (savedlocale, oldlocale, l);
+ }
+ else savedlocale[0] = '\0';
+
setlocale (LC_TIME, "C");
for (i = 0; i < countof (time_formats); i++)
}
/* Restore the previous locale. */
- setlocale (LC_TIME, oldlocale);
+ if (savedlocale[0])
+ setlocale (LC_TIME, savedlocale);
return ret;
}
{
int i;
- for (i = 0; i < MD5_HASHLEN; i++, hash++)
+ for (i = 0; i < MD5_DIGEST_SIZE; i++, hash++)
{
*buf++ = XNUM_TO_digit (*hash >> 4);
*buf++ = XNUM_TO_digit (*hash & 0xf);
/* Calculate the digest value. */
{
- ALLOCA_MD5_CONTEXT (ctx);
- unsigned char hash[MD5_HASHLEN];
- char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
- char response_digest[MD5_HASHLEN * 2 + 1];
+ struct md5_ctx ctx;
+ unsigned char hash[MD5_DIGEST_SIZE];
+ char a1buf[MD5_DIGEST_SIZE * 2 + 1], a2buf[MD5_DIGEST_SIZE * 2 + 1];
+ char response_digest[MD5_DIGEST_SIZE * 2 + 1];
/* A1BUF = H(user ":" realm ":" password) */
- gen_md5_init (ctx);
- gen_md5_update ((unsigned char *)user, strlen (user), ctx);
- gen_md5_update ((unsigned char *)":", 1, ctx);
- gen_md5_update ((unsigned char *)realm, strlen (realm), ctx);
- gen_md5_update ((unsigned char *)":", 1, ctx);
- gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx);
- gen_md5_finish (ctx, hash);
+ md5_init_ctx (&ctx);
+ md5_process_bytes ((unsigned char *)user, strlen (user), &ctx);
+ md5_process_bytes ((unsigned char *)":", 1, &ctx);
+ md5_process_bytes ((unsigned char *)realm, strlen (realm), &ctx);
+ md5_process_bytes ((unsigned char *)":", 1, &ctx);
+ md5_process_bytes ((unsigned char *)passwd, strlen (passwd), &ctx);
+ md5_finish_ctx (&ctx, hash);
dump_hash (a1buf, hash);
/* A2BUF = H(method ":" path) */
- gen_md5_init (ctx);
- gen_md5_update ((unsigned char *)method, strlen (method), ctx);
- gen_md5_update ((unsigned char *)":", 1, ctx);
- gen_md5_update ((unsigned char *)path, strlen (path), ctx);
- gen_md5_finish (ctx, hash);
+ md5_init_ctx (&ctx);
+ md5_process_bytes ((unsigned char *)method, strlen (method), &ctx);
+ md5_process_bytes ((unsigned char *)":", 1, &ctx);
+ md5_process_bytes ((unsigned char *)path, strlen (path), &ctx);
+ md5_finish_ctx (&ctx, hash);
dump_hash (a2buf, hash);
/* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
- gen_md5_init (ctx);
- gen_md5_update ((unsigned char *)a1buf, MD5_HASHLEN * 2, ctx);
- gen_md5_update ((unsigned char *)":", 1, ctx);
- gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx);
- gen_md5_update ((unsigned char *)":", 1, ctx);
- gen_md5_update ((unsigned char *)a2buf, MD5_HASHLEN * 2, ctx);
- gen_md5_finish (ctx, hash);
+ md5_init_ctx (&ctx);
+ md5_process_bytes ((unsigned char *)a1buf, MD5_DIGEST_SIZE * 2, &ctx);
+ md5_process_bytes ((unsigned char *)":", 1, &ctx);
+ md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx);
+ md5_process_bytes ((unsigned char *)":", 1, &ctx);
+ md5_process_bytes ((unsigned char *)a2buf, MD5_DIGEST_SIZE * 2, &ctx);
+ md5_finish_ctx (&ctx, hash);
dump_hash (response_digest, hash);
res = xmalloc (strlen (user)
+ strlen (realm)
+ strlen (nonce)
+ strlen (path)
- + 2 * MD5_HASHLEN /*strlen (response_digest)*/
+ + 2 * MD5_DIGEST_SIZE /*strlen (response_digest)*/
+ (opaque ? strlen (opaque) : 0)
+ 128);
sprintf (res, "Digest \
cookie_jar_delete (wget_cookie_jar);
}
+void
+ensure_extension (struct http_stat *hs, const char *ext, int *dt)
+{
+ char *last_period_in_local_filename = strrchr (hs->local_file, '.');
+ char shortext[8];
+ int len = strlen (ext);
+ if (len == 5)
+ {
+ strncpy (shortext, ext, len - 1);
+ shortext[len - 2] = '\0';
+ }
+
+ if (last_period_in_local_filename == NULL
+ || !(0 == strcasecmp (last_period_in_local_filename, shortext)
+ || 0 == strcasecmp (last_period_in_local_filename, ext)))
+ {
+ int local_filename_len = strlen (hs->local_file);
+ /* Resize the local file, allowing for ".html" preceded by
+ optional ".NUMBER". */
+ hs->local_file = xrealloc (hs->local_file,
+ local_filename_len + 24 + len);
+ strcpy (hs->local_file + local_filename_len, ext);
+ /* If clobbering is not allowed and the file, as named,
+ exists, tack on ".NUMBER.html" instead. */
+ if (!ALLOW_CLOBBER && file_exists_p (hs->local_file))
+ {
+ int ext_num = 1;
+ do
+ sprintf (hs->local_file + local_filename_len,
+ ".%d%s", ext_num++, ext);
+ while (file_exists_p (hs->local_file));
+ }
+ *dt |= ADDED_HTML_EXTENSION;
+ }
+}
+
#ifdef TESTING
{
int i;
struct {
- char *hdrval;
- char *opt_dir_prefix;
+ char *hdrval;
char *filename;
bool result;
} test_array[] = {
- { "filename=\"file.ext\"", NULL, "file.ext", true },
- { "filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
- { "attachment; filename=\"file.ext\"", NULL, "file.ext", true },
- { "attachment; filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
- { "attachment; filename=\"file.ext\"; dummy", NULL, "file.ext", true },
- { "attachment; filename=\"file.ext\"; dummy", "somedir", "somedir/file.ext", true },
- { "attachment", NULL, NULL, false },
- { "attachment", "somedir", NULL, false },
+ { "filename=\"file.ext\"", "file.ext", true },
+ { "attachment; filename=\"file.ext\"", "file.ext", true },
+ { "attachment; filename=\"file.ext\"; dummy", "file.ext", true },
+ { "attachment", NULL, false },
+ { "attachement; filename*=UTF-8'en-US'hello.txt", "hello.txt", true },
+ { "attachement; filename*0=\"hello\"; filename*1=\"world.txt\"", "helloworld.txt", true },
};
-
- for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
+
+ for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
{
char *filename;
bool res;
- opt.dir_prefix = test_array[i].opt_dir_prefix;
res = parse_content_disposition (test_array[i].hdrval, &filename);
- mu_assert ("test_parse_content_disposition: wrong result",
+ mu_assert ("test_parse_content_disposition: wrong result",
res == test_array[i].result
- && (res == false
+ && (res == false
|| 0 == strcmp (test_array[i].filename, filename)));
}