X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fhttp.c;h=c3adbf40755150d2ab158f674941302b8dd521cb;hb=766df9d4e9392045a4e5c730ed81e599b509557a;hp=a882c2d1acd7a8dd1630e8a721c00a486efe66a9;hpb=823228830e57766ebabe529b75765816cb2507dc;p=wget
diff --git a/src/http.c b/src/http.c
index a882c2d1..c3adbf40 100644
--- a/src/http.c
+++ b/src/http.c
@@ -1,11 +1,12 @@
/* HTTP support.
- Copyright (C) 1996-2005 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+ 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
+the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
@@ -14,20 +15,20 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with Wget; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+along with Wget. If not, see .
-In addition, as a special exception, the Free Software Foundation
-gives permission to link the code of its release of Wget with the
-OpenSSL project's "OpenSSL" library (or with modified versions of it
-that use the same license as the "OpenSSL" library), and distribute
-the linked executables. You must obey the GNU General Public License
-in all respects for all of the code used other than "OpenSSL". If you
-modify this file, you may extend this exception to your version of the
-file, but you are not obligated to do so. If you do not wish to do
-so, delete this exception statement from your version. */
+Additional permission under GNU GPL version 3 section 7
-#include
+If you modify this program, or any covered work, by linking or
+combining it with the OpenSSL project's OpenSSL library (or a
+modified version of that library), containing parts covered by the
+terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
+grants you additional permission to convey the resulting work.
+Corresponding Source for a non-source form of such a combination
+shall include the source code for the parts of OpenSSL used as well
+as that of the covered work. */
+
+#include "wget.h"
#include
#include
@@ -40,7 +41,7 @@ so, delete this exception statement from your version. */
#include
#include
-#include "wget.h"
+#include "hash.h"
#include "http.h"
#include "utils.h"
#include "url.h"
@@ -59,9 +60,24 @@ so, delete this exception statement from your version. */
# include "gen-md5.h"
#endif
#include "convert.h"
+#include "spider.h"
+
+#ifdef TESTING
+#include "test.h"
+#endif
extern char *version_string;
+/* Forward decls. */
+struct http_stat;
+static char *create_authorization_line (const char *, const char *,
+ const char *, const char *,
+ const char *, bool *);
+static char *basic_authentication_encode (const char *, const char *);
+static bool known_authentication_scheme_p (const char *, const char *);
+static void ensure_extension (struct http_stat *, const char *, int *);
+static void load_cookies (void);
+
#ifndef MIN
# define MIN(x, y) ((x) > (y) ? (y) : (x))
#endif
@@ -72,6 +88,7 @@ static struct cookie_jar *wget_cookie_jar;
#define TEXTHTML_S "text/html"
#define TEXTXHTML_S "application/xhtml+xml"
+#define TEXTCSS_S "text/css"
/* Some status code validation macros: */
#define H_20X(x) (((x) >= 200) && ((x) < 300))
@@ -125,6 +142,8 @@ struct request {
int hcount, hcapacity;
};
+extern int numurls;
+
/* Create a new, empty request. At least request_set_method must be
called before the request can be used. */
@@ -265,7 +284,7 @@ request_set_user_header (struct request *req, const char *header)
return;
BOUNDED_TO_ALLOCA (header, p, name);
++p;
- while (ISSPACE (*p))
+ while (c_isspace (*p))
++p;
request_set_header (req, xstrdup (name), (char *) p, rel_name);
}
@@ -369,6 +388,58 @@ request_free (struct request *req)
xfree (req);
}
+static struct hash_table *basic_authed_hosts;
+
+/* Find out if this host has issued a Basic challenge yet; if so, give
+ * it the username, password. A temporary measure until we can get
+ * proper authentication in place. */
+
+static bool
+maybe_send_basic_creds (const char *hostname, const char *user,
+ const char *passwd, struct request *req)
+{
+ bool do_challenge = false;
+
+ if (opt.auth_without_challenge)
+ {
+ DEBUGP(("Auth-without-challenge set, sending Basic credentials.\n"));
+ do_challenge = true;
+ }
+ else if (basic_authed_hosts
+ && hash_table_contains(basic_authed_hosts, hostname))
+ {
+ DEBUGP(("Found %s in basic_authed_hosts.\n", quote (hostname)));
+ do_challenge = true;
+ }
+ else
+ {
+ DEBUGP(("Host %s has not issued a general basic challenge.\n",
+ quote (hostname)));
+ }
+ if (do_challenge)
+ {
+ request_set_header (req, "Authorization",
+ basic_authentication_encode (user, passwd),
+ rel_value);
+ }
+ return do_challenge;
+}
+
+static void
+register_basic_auth_host (const char *hostname)
+{
+ if (!basic_authed_hosts)
+ {
+ basic_authed_hosts = make_nocase_string_hash_table (1);
+ }
+ if (!hash_table_contains(basic_authed_hosts, hostname))
+ {
+ hash_table_put (basic_authed_hosts, xstrdup(hostname), NULL);
+ DEBUGP(("Inserted %s into basic_authed_hosts\n", quote (hostname)));
+ }
+}
+
+
/* Send the contents of FILE_NAME to SOCK. Make sure that exactly
PROMISED_SIZE bytes are sent over the wire -- if the file is
longer, read only that much; if the file is shorter, report an error. */
@@ -595,9 +666,9 @@ resp_header_locate (const struct response *resp, const char *name, int start,
&& 0 == strncasecmp (b, name, name_len))
{
b += name_len + 1;
- while (b < e && ISSPACE (*b))
+ while (b < e && c_isspace (*b))
++b;
- while (b < e && ISSPACE (e[-1]))
+ while (b < e && c_isspace (e[-1]))
--e;
*begptr = b;
*endptr = e;
@@ -696,17 +767,17 @@ resp_status (const struct response *resp, char **message)
if (p < end && *p == '/')
{
++p;
- while (p < end && ISDIGIT (*p))
+ while (p < end && c_isdigit (*p))
++p;
if (p < end && *p == '.')
++p;
- while (p < end && ISDIGIT (*p))
+ while (p < end && c_isdigit (*p))
++p;
}
- while (p < end && ISSPACE (*p))
+ while (p < end && c_isspace (*p))
++p;
- if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2]))
+ if (end - p < 3 || !c_isdigit (p[0]) || !c_isdigit (p[1]) || !c_isdigit (p[2]))
return -1;
status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0');
@@ -714,9 +785,9 @@ resp_status (const struct response *resp, char **message)
if (message)
{
- while (p < end && ISSPACE (*p))
+ while (p < end && c_isspace (*p))
++p;
- while (p < end && ISSPACE (end[-1]))
+ while (p < end && c_isspace (end[-1]))
--end;
*message = strdupdelim (p, end);
}
@@ -733,6 +804,21 @@ resp_free (struct response *resp)
xfree (resp);
}
+/* Print a single line of response, the characters [b, e). We tried
+ getting away with
+ logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, (int) (e - b), b);
+ but that failed to escape the non-printable characters and, in fact,
+ caused crashes in UTF-8 locales. */
+
+static void
+print_response_line(const char *prefix, const char *b, const char *e)
+{
+ char *copy;
+ BOUNDED_TO_ALLOCA(b, e, copy);
+ logprintf (LOG_ALWAYS, "%s%s\n", prefix,
+ quotearg_style (escape_quoting_style, copy));
+}
+
/* Print the server response, line by line, omitting the trailing CRLF
from individual header lines, and prefixed with PREFIX. */
@@ -751,9 +837,7 @@ print_server_response (const struct response *resp, const char *prefix)
--e;
if (b < e && e[-1] == '\r')
--e;
- /* This is safe even on printfs with broken handling of "%.s"
- because resp->headers ends with \0. */
- logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, e - b, b);
+ print_response_line(prefix, b, e);
}
}
@@ -775,27 +859,30 @@ parse_content_range (const char *hdr, wgint *first_byte_ptr,
HTTP spec. */
if (*hdr == ':')
++hdr;
- while (ISSPACE (*hdr))
+ while (c_isspace (*hdr))
++hdr;
if (!*hdr)
return false;
}
- if (!ISDIGIT (*hdr))
+ if (!c_isdigit (*hdr))
return false;
- for (num = 0; ISDIGIT (*hdr); hdr++)
+ for (num = 0; c_isdigit (*hdr); hdr++)
num = 10 * num + (*hdr - '0');
- if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
+ if (*hdr != '-' || !c_isdigit (*(hdr + 1)))
return false;
*first_byte_ptr = num;
++hdr;
- for (num = 0; ISDIGIT (*hdr); hdr++)
+ for (num = 0; c_isdigit (*hdr); hdr++)
num = 10 * num + (*hdr - '0');
- if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
+ if (*hdr != '/' || !c_isdigit (*(hdr + 1)))
return false;
*last_byte_ptr = num;
++hdr;
- for (num = 0; ISDIGIT (*hdr); hdr++)
- num = 10 * num + (*hdr - '0');
+ if (*hdr == '*')
+ num = -1;
+ else
+ for (num = 0; c_isdigit (*hdr); hdr++)
+ num = 10 * num + (*hdr - '0');
*entity_length_ptr = num;
return true;
}
@@ -850,6 +937,140 @@ skip_short_body (int fd, wgint contlen)
DEBUGP (("] done.\n"));
return true;
}
+
+/* Extract a parameter from the string (typically an HTTP header) at
+ **SOURCE and advance SOURCE to the next parameter. Return false
+ when there are no more parameters to extract. The name of the
+ parameter is returned in NAME, and the value in VALUE. If the
+ parameter has no value, the token's value is zeroed out.
+
+ For example, if *SOURCE points to the string "attachment;
+ filename=\"foo bar\"", the first call to this function will return
+ the token named "attachment" and no value, and the second call will
+ return the token named "filename" and value "foo bar". The third
+ call will return false, indicating no more valid tokens. */
+
+bool
+extract_param (const char **source, param_token *name, param_token *value,
+ char separator)
+{
+ const char *p = *source;
+
+ while (c_isspace (*p)) ++p;
+ if (!*p)
+ {
+ *source = p;
+ return false; /* no error; nothing more to extract */
+ }
+
+ /* Extract name. */
+ name->b = p;
+ while (*p && !c_isspace (*p) && *p != '=' && *p != separator) ++p;
+ name->e = p;
+ if (name->b == name->e)
+ return false; /* empty name: error */
+ while (c_isspace (*p)) ++p;
+ if (*p == separator || !*p) /* no value */
+ {
+ xzero (*value);
+ if (*p == separator) ++p;
+ *source = p;
+ return true;
+ }
+ if (*p != '=')
+ return false; /* error */
+
+ /* *p is '=', extract value */
+ ++p;
+ while (c_isspace (*p)) ++p;
+ if (*p == '"') /* quoted */
+ {
+ value->b = ++p;
+ while (*p && *p != '"') ++p;
+ if (!*p)
+ return false;
+ value->e = p++;
+ /* Currently at closing quote; find the end of param. */
+ while (c_isspace (*p)) ++p;
+ while (*p && *p != separator) ++p;
+ if (*p == separator)
+ ++p;
+ else if (*p)
+ /* garbage after closed quote, e.g. foo="bar"baz */
+ return false;
+ }
+ else /* unquoted */
+ {
+ value->b = p;
+ while (*p && *p != separator) ++p;
+ value->e = p;
+ while (value->e != value->b && c_isspace (value->e[-1]))
+ --value->e;
+ if (*p == separator) ++p;
+ }
+ *source = p;
+ return true;
+}
+
+#undef MAX
+#define MAX(p, q) ((p) > (q) ? (p) : (q))
+
+/* Parse the contents of the `Content-Disposition' header, extracting
+ the information useful to Wget. Content-Disposition is a header
+ borrowed from MIME; when used in HTTP, it typically serves for
+ specifying the desired file name of the resource. For example:
+
+ Content-Disposition: attachment; filename="flora.jpg"
+
+ Wget will skip the tokens it doesn't care about, such as
+ "attachment" in the previous example; it will also skip other
+ unrecognized params. If the header is syntactically correct and
+ contains a file name, a copy of the file name is stored in
+ *filename and true is returned. Otherwise, the function returns
+ false.
+
+ The file name is stripped of directory components and must not be
+ empty. */
+
+static bool
+parse_content_disposition (const char *hdr, char **filename)
+{
+ param_token name, value;
+ while (extract_param (&hdr, &name, &value, ';'))
+ if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename") && value.b != NULL)
+ {
+ /* Make the file name begin at the last slash or backslash. */
+ const char *last_slash = memrchr (value.b, '/', value.e - value.b);
+ const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
+ if (last_slash && last_bs)
+ value.b = 1 + MAX (last_slash, last_bs);
+ else if (last_slash || last_bs)
+ value.b = 1 + (last_slash ? last_slash : last_bs);
+ if (value.b == value.e)
+ continue;
+ /* Start with the directory prefix, if specified. */
+ if (opt.dir_prefix)
+ {
+ int prefix_length = strlen (opt.dir_prefix);
+ bool add_slash = (opt.dir_prefix[prefix_length - 1] != '/');
+ int total_length;
+
+ if (add_slash)
+ ++prefix_length;
+ total_length = prefix_length + (value.e - value.b);
+ *filename = xmalloc (total_length + 1);
+ strcpy (*filename, opt.dir_prefix);
+ if (add_slash)
+ (*filename)[prefix_length - 1] = '/';
+ memcpy (*filename + prefix_length, value.b, (value.e - value.b));
+ (*filename)[total_length] = '\0';
+ }
+ else
+ *filename = strdupdelim (value.b, value.e);
+ return true;
+ }
+ return false;
+}
/* Persistent connections. Currently, we cache the most recently used
connection as persistent, provided that the HTTP server agrees to
@@ -1080,10 +1301,15 @@ struct http_stat
char *remote_time; /* remote time-stamp string */
char *error; /* textual HTTP error */
int statcode; /* status code */
+ char *message; /* status message */
wgint rd_size; /* amount of data read from socket */
double dltime; /* time it took to download the data */
const char *referer; /* value of the referer header. */
char *local_file; /* local file name. */
+ bool existence_checked; /* true if we already checked for a file's
+ existence after having begun to download
+ (needed in gethttp for when connection is
+ interrupted/restarted. */
bool timestamp_checked; /* true if pre-download time-stamping checks
* have already been performed */
char *orig_file_name; /* name of file to compare for time-stamping
@@ -1102,6 +1328,7 @@ free_hstat (struct http_stat *hs)
xfree_null (hs->rderrmsg);
xfree_null (hs->local_file);
xfree_null (hs->orig_file_name);
+ xfree_null (hs->message);
/* Guard against being called twice. */
hs->newloc = NULL;
@@ -1109,16 +1336,9 @@ free_hstat (struct http_stat *hs)
hs->error = NULL;
}
-static char *create_authorization_line (const char *, const char *,
- const char *, const char *,
- const char *, bool *);
-static char *basic_authentication_encode (const char *, const char *);
-static bool known_authentication_scheme_p (const char *, const char *);
-static void load_cookies (void);
-
#define BEGINS_WITH(line, string_constant) \
(!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
- && (ISSPACE (line[sizeof (string_constant) - 1]) \
+ && (c_isspace (line[sizeof (string_constant) - 1]) \
|| !line[sizeof (string_constant) - 1]))
#define SET_USER_AGENT(req) do { \
@@ -1162,10 +1382,15 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
int sock = -1;
int flags;
- /* Set to 1 when the authorization has failed permanently and should
+ /* Set to 1 when the authorization has already been sent and should
not be tried again. */
bool auth_finished = false;
+ /* Set to 1 when just globally-set Basic authorization has been sent;
+ * should prevent further Basic negotiations, but not other
+ * mechanisms. */
+ bool basic_auth_finished = false;
+
/* Whether NTLM authentication is used for this request. */
bool ntlm_seen = false;
@@ -1200,8 +1425,6 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
bool host_lookup_failed = false;
- DEBUGP(("in gethttp 1\n"));
-
#ifdef HAVE_SSL
if (u->scheme == SCHEME_HTTPS)
{
@@ -1217,9 +1440,6 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
}
#endif /* HAVE_SSL */
- DEBUGP(("in gethttp 2\n"));
- DEBUGP(("in gethttp 3\n"));
-
/* Initialize certain elements of struct http_stat. */
hs->len = 0;
hs->contlen = -1;
@@ -1228,6 +1448,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
hs->newloc = NULL;
hs->remote_time = NULL;
hs->error = NULL;
+ hs->message = NULL;
conn = u;
@@ -1276,66 +1497,14 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
user = user ? user : (opt.http_user ? opt.http_user : opt.user);
passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd);
- if (user && passwd)
+ /* We only do "site-wide" authentication with "global" user/password
+ * values unless --auth-no-challange has been requested; URL user/password
+ * info overrides. */
+ if (user && passwd && (!u->user || opt.auth_without_challenge))
{
- /* We have the username and the password, but haven't tried
- any authorization yet. Let's see if the "Basic" method
- works. If not, we'll come back here and construct a
- proper authorization method with the right challenges.
-
- If we didn't employ this kind of logic, every URL that
- requires authorization would have to be processed twice,
- which is very suboptimal and generates a bunch of false
- "unauthorized" errors in the server log.
-
- #### But this logic also has a serious problem when used
- with stronger authentications: we *first* transmit the
- username and the password in clear text, and *then* attempt a
- stronger authentication scheme. That cannot be right! We
- are only fortunate that almost everyone still uses the
- `Basic' scheme anyway.
-
- There should be an option to prevent this from happening, for
- those who use strong authentication schemes and value their
- passwords. */
- request_set_header (req, "Authorization",
- basic_authentication_encode (user, passwd),
- rel_value);
- }
-
- proxyauth = NULL;
- if (proxy)
- {
- char *proxy_user, *proxy_passwd;
- /* For normal username and password, URL components override
- command-line/wgetrc parameters. With proxy
- authentication, it's the reverse, because proxy URLs are
- normally the "permanent" ones, so command-line args
- should take precedence. */
- if (opt.proxy_user && opt.proxy_passwd)
- {
- proxy_user = opt.proxy_user;
- proxy_passwd = opt.proxy_passwd;
- }
- else
- {
- proxy_user = proxy->user;
- proxy_passwd = proxy->passwd;
- }
- /* #### This does not appear right. Can't the proxy request,
- say, `Digest' authentication? */
- if (proxy_user && proxy_passwd)
- proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
-
- /* If we're using a proxy, we will be connecting to the proxy
- server. */
- conn = proxy;
-
- /* Proxy authorization over SSL is handled below. */
-#ifdef HAVE_SSL
- if (u->scheme != SCHEME_HTTPS)
-#endif
- request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
+ /* If this is a host for which we've already received a Basic
+ * challenge, we'll go ahead and send Basic authentication creds. */
+ basic_auth_finished = maybe_send_basic_creds(u->host, user, passwd, req);
}
/* Generate the Host header, HOST:PORT. Take into account that:
@@ -1385,8 +1554,8 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
post_data_size = file_size (opt.post_file_name);
if (post_data_size == -1)
{
- logprintf (LOG_NOTQUIET, _("POST data file `%s' missing: %s\n"),
- opt.post_file_name, strerror (errno));
+ logprintf (LOG_NOTQUIET, _("POST data file %s missing: %s\n"),
+ quote (opt.post_file_name), strerror (errno));
post_data_size = 0;
}
}
@@ -1408,6 +1577,41 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
without authorization header fails. (Expected to happen at least
for the Digest authorization scheme.) */
+ proxyauth = NULL;
+ if (proxy)
+ {
+ char *proxy_user, *proxy_passwd;
+ /* For normal username and password, URL components override
+ command-line/wgetrc parameters. With proxy
+ authentication, it's the reverse, because proxy URLs are
+ normally the "permanent" ones, so command-line args
+ should take precedence. */
+ if (opt.proxy_user && opt.proxy_passwd)
+ {
+ proxy_user = opt.proxy_user;
+ proxy_passwd = opt.proxy_passwd;
+ }
+ else
+ {
+ proxy_user = proxy->user;
+ proxy_passwd = proxy->passwd;
+ }
+ /* #### This does not appear right. Can't the proxy request,
+ say, `Digest' authentication? */
+ if (proxy_user && proxy_passwd)
+ proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
+
+ /* If we're using a proxy, we will be connecting to the proxy
+ server. */
+ conn = proxy;
+
+ /* Proxy authorization over SSL is handled below. */
+#ifdef HAVE_SSL
+ if (u->scheme != SCHEME_HTTPS)
+#endif
+ request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
+ }
+
keep_alive = false;
/* Establish the connection. */
@@ -1435,7 +1639,8 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
sock = pconn.socket;
using_ssl = pconn.ssl;
logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
- escnonprint (pconn.host), pconn.port);
+ quotearg_style (escape_quoting_style, pconn.host),
+ pconn.port);
DEBUGP (("Reusing fd %d.\n", sock));
if (pconn.authorized)
/* If the connection is already authorized, the "Basic"
@@ -1443,19 +1648,18 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
only hurts us. */
request_remove_header (req, "Authorization");
}
- }
-
- if (sock < 0)
- {
- /* In its current implementation, persistent_available_p will
- look up conn->host in some cases. If that lookup failed, we
- don't need to bother with connect_to_host. */
- if (host_lookup_failed)
+ else if (host_lookup_failed)
{
request_free (req);
+ logprintf(LOG_NOTQUIET,
+ _("%s: unable to resolve host address %s\n"),
+ exec_name, quote (relevant->host));
return HOSTERR;
}
+ }
+ if (sock < 0)
+ {
sock = connect_to_host (conn->host, conn->port);
if (sock == E_HOST)
{
@@ -1518,13 +1722,14 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
resp = resp_new (head);
statcode = resp_status (resp, &message);
+ hs->message = xstrdup (message);
resp_free (resp);
xfree (head);
if (statcode != 200)
{
failed_tunnel:
logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
- message ? escnonprint (message) : "?");
+ message ? quotearg_style (escape_quoting_style, message) : "?");
xfree_null (message);
return CONSSLERR;
}
@@ -1600,52 +1805,150 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
/* Check for status line. */
message = NULL;
statcode = resp_status (resp, &message);
+ hs->message = xstrdup (message);
if (!opt.server_response)
logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
- message ? escnonprint (message) : "");
+ message ? quotearg_style (escape_quoting_style, message) : "");
else
{
logprintf (LOG_VERBOSE, "\n");
print_server_response (resp, " ");
}
- DEBUGP(("in gethttp 4\n"));
-
- /* Determine the local filename if needed. Notice that if -O is used
- * hstat.local_file is set by http_loop to the argument of -O. */
- if (!hs->local_file)
+ /* Check for keep-alive related responses. */
+ if (!inhibit_keep_alive && contlen != -1)
{
- if (resp_header_copy (resp, "Content-Disposition", hdrval, sizeof (hdrval)))
- /* Honor Content-Disposition. */
+ if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
+ keep_alive = true;
+ else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
{
- hs->local_file = xstrdup (hdrval);
+ if (0 == strcasecmp (hdrval, "Keep-Alive"))
+ keep_alive = true;
}
+ }
+
+ if (keep_alive)
+ /* The server has promised that it will not close the connection
+ when we're done. This means that we can register it. */
+ register_persistent (conn->host, conn->port, sock, using_ssl);
+
+ if (statcode == HTTP_STATUS_UNAUTHORIZED)
+ {
+ /* Authorization is required. */
+ if (keep_alive && !head_only && skip_short_body (sock, contlen))
+ CLOSE_FINISH (sock);
else
- /* Choose filename according to URL name. */
+ CLOSE_INVALIDATE (sock);
+ pconn.authorized = false;
+ if (!auth_finished && (user && passwd))
{
+ /* IIS sends multiple copies of WWW-Authenticate, one with
+ the value "negotiate", and other(s) with data. Loop over
+ all the occurrences and pick the one we recognize. */
+ int wapos;
+ const char *wabeg, *waend;
+ char *www_authenticate = NULL;
+ for (wapos = 0;
+ (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos,
+ &wabeg, &waend)) != -1;
+ ++wapos)
+ if (known_authentication_scheme_p (wabeg, waend))
+ {
+ BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate);
+ break;
+ }
+
+ if (!www_authenticate)
+ {
+ /* If the authentication header is missing or
+ unrecognized, there's no sense in retrying. */
+ logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
+ }
+ else if (!basic_auth_finished
+ || !BEGINS_WITH (www_authenticate, "Basic"))
+ {
+ char *pth;
+ pth = url_full_path (u);
+ request_set_header (req, "Authorization",
+ create_authorization_line (www_authenticate,
+ user, passwd,
+ request_method (req),
+ pth,
+ &auth_finished),
+ rel_value);
+ if (BEGINS_WITH (www_authenticate, "NTLM"))
+ ntlm_seen = true;
+ else if (!u->user && BEGINS_WITH (www_authenticate, "Basic"))
+ {
+ /* Need to register this host as using basic auth,
+ * so we automatically send creds next time. */
+ register_basic_auth_host (u->host);
+ }
+ xfree (pth);
+ goto retry_with_auth;
+ }
+ else
+ {
+ /* We already did Basic auth, and it failed. Gotta
+ * give up. */
+ }
+ }
+ logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
+ request_free (req);
+ return AUTHFAILED;
+ }
+ else /* statcode != HTTP_STATUS_UNAUTHORIZED */
+ {
+ /* Kludge: if NTLM is used, mark the TCP connection as authorized. */
+ if (ntlm_seen)
+ pconn.authorized = true;
+ }
+
+ /* Determine the local filename if needed. Notice that if -O is used
+ * hstat.local_file is set by http_loop to the argument of -O. */
+ if (!hs->local_file)
+ {
+ /* Honor Content-Disposition whether possible. */
+ if (!opt.content_disposition
+ || !resp_header_copy (resp, "Content-Disposition",
+ hdrval, sizeof (hdrval))
+ || !parse_content_disposition (hdrval, &hs->local_file))
+ {
+ /* The Content-Disposition header is missing or broken.
+ * Choose unique file name according to given URL. */
hs->local_file = url_file_name (u);
}
}
- DEBUGP(("in gethttp 5\n"));
-
/* TODO: perform this check only once. */
- if (opt.noclobber && file_exists_p (hs->local_file))
+ if (!hs->existence_checked && file_exists_p (hs->local_file))
{
- /* If opt.noclobber is turned on and file already exists, do not
- retrieve the file */
- logprintf (LOG_VERBOSE, _("\
-File `%s' already there; not retrieving.\n\n"), hs->local_file);
- /* If the file is there, we suppose it's retrieved OK. */
- *dt |= RETROKF;
-
- /* #### Bogusness alert. */
- /* If its suffix is "html" or "htm" or similar, assume text/html. */
- if (has_html_suffix_p (hs->local_file))
- *dt |= TEXTHTML;
-
- return RETROK;
+ if (opt.noclobber && !opt.output_document)
+ {
+ /* If opt.noclobber is turned on and file already exists, do not
+ retrieve the file. But if the output_document was given, then this
+ test was already done and the file didn't exist. Hence the !opt.output_document */
+ logprintf (LOG_VERBOSE, _("\
+File %s already there; not retrieving.\n\n"), quote (hs->local_file));
+ /* If the file is there, we suppose it's retrieved OK. */
+ *dt |= RETROKF;
+
+ /* #### Bogusness alert. */
+ /* If its suffix is "html" or "htm" or similar, assume text/html. */
+ if (has_html_suffix_p (hs->local_file))
+ *dt |= TEXTHTML;
+
+ return RETRUNNEEDED;
+ }
+ else if (!ALLOW_CLOBBER)
+ {
+ char *unique = unique_name (hs->local_file, true);
+ if (unique != hs->local_file)
+ xfree (hs->local_file);
+ hs->local_file = unique;
+ }
}
+ hs->existence_checked = true;
/* Support timestamping */
/* TODO: move this code out of gethttp. */
@@ -1682,7 +1985,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
/* Try to stat() the .orig file. */
if (stat (filename_plus_orig_suffix, &st) == 0)
{
- local_dot_orig_file_exists = 1;
+ local_dot_orig_file_exists = true;
local_filename = filename_plus_orig_suffix;
}
}
@@ -1715,95 +2018,24 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
errno = 0;
parsed = str_to_wgint (hdrval, NULL, 10);
if (parsed == WGINT_MAX && errno == ERANGE)
- /* Out of range.
- #### If Content-Length is out of range, it most likely
- means that the file is larger than 2G and that we're
- compiled without LFS. In that case we should probably
- refuse to even attempt to download the file. */
- contlen = -1;
- else
- contlen = parsed;
- }
-
- /* Check for keep-alive related responses. */
- if (!inhibit_keep_alive && contlen != -1)
- {
- if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
- keep_alive = true;
- else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
{
- if (0 == strcasecmp (hdrval, "Keep-Alive"))
- keep_alive = true;
+ /* Out of range.
+ #### If Content-Length is out of range, it most likely
+ means that the file is larger than 2G and that we're
+ compiled without LFS. In that case we should probably
+ refuse to even attempt to download the file. */
+ contlen = -1;
}
- }
- if (keep_alive)
- /* The server has promised that it will not close the connection
- when we're done. This means that we can register it. */
- register_persistent (conn->host, conn->port, sock, using_ssl);
-
- if (statcode == HTTP_STATUS_UNAUTHORIZED)
- {
- /* Authorization is required. */
- if (keep_alive && !head_only && skip_short_body (sock, contlen))
- CLOSE_FINISH (sock);
- else
- CLOSE_INVALIDATE (sock);
- pconn.authorized = false;
- if (!auth_finished && (user && passwd))
+ else if (parsed < 0)
{
- /* IIS sends multiple copies of WWW-Authenticate, one with
- the value "negotiate", and other(s) with data. Loop over
- all the occurrences and pick the one we recognize. */
- int wapos;
- const char *wabeg, *waend;
- char *www_authenticate = NULL;
- for (wapos = 0;
- (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos,
- &wabeg, &waend)) != -1;
- ++wapos)
- if (known_authentication_scheme_p (wabeg, waend))
- {
- BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate);
- break;
- }
-
- if (!www_authenticate)
- /* If the authentication header is missing or
- unrecognized, there's no sense in retrying. */
- logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
- else if (BEGINS_WITH (www_authenticate, "Basic"))
- /* If the authentication scheme is "Basic", which we send
- by default, there's no sense in retrying either. (This
- should be changed when we stop sending "Basic" data by
- default.) */
- ;
- else
- {
- char *pth;
- pth = url_full_path (u);
- request_set_header (req, "Authorization",
- create_authorization_line (www_authenticate,
- user, passwd,
- request_method (req),
- pth,
- &auth_finished),
- rel_value);
- if (BEGINS_WITH (www_authenticate, "NTLM"))
- ntlm_seen = true;
- xfree (pth);
- goto retry_with_auth;
- }
+ /* Negative Content-Length; nonsensical, so we can't
+ assume any information about the content to receive. */
+ contlen = -1;
}
- logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
- request_free (req);
- return AUTHFAILED;
- }
- else /* statcode != HTTP_STATUS_UNAUTHORIZED */
- {
- /* Kludge: if NTLM is used, mark the TCP connection as authorized. */
- if (ntlm_seen)
- pconn.authorized = true;
+ else
+ contlen = parsed;
}
+
request_free (req);
hs->statcode = statcode;
@@ -1821,7 +2053,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
char *tmp = strchr (type, ';');
if (tmp)
{
- while (tmp > type && ISSPACE (tmp[-1]))
+ while (tmp > type && c_isspace (tmp[-1]))
--tmp;
*tmp = '\0';
}
@@ -1852,7 +2084,10 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
wgint first_byte_pos, last_byte_pos, entity_length;
if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos,
&entity_length))
- contrange = first_byte_pos;
+ {
+ contrange = first_byte_pos;
+ contlen = last_byte_pos - first_byte_pos + 1;
+ }
}
resp_free (resp);
@@ -1890,47 +2125,42 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
content-type. */
if (!type ||
0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
- 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
+ 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
*dt |= TEXTHTML;
else
*dt &= ~TEXTHTML;
- if (opt.html_extension && (*dt & TEXTHTML))
- /* -E / --html-extension / html_extension = on was specified, and this is a
- text/html file. If some case-insensitive variation on ".htm[l]" isn't
- already the file's suffix, tack on ".html". */
- {
- char *last_period_in_local_filename = strrchr (hs->local_file, '.');
+ if (type &&
+ 0 == strncasecmp (type, TEXTCSS_S, strlen (TEXTCSS_S)))
+ *dt |= TEXTCSS;
+ else
+ *dt &= ~TEXTCSS;
- if (last_period_in_local_filename == NULL
- || !(0 == strcasecmp (last_period_in_local_filename, ".htm")
- || 0 == strcasecmp (last_period_in_local_filename, ".html")))
+ if (opt.html_extension)
+ {
+ if (*dt & TEXTHTML)
+ /* -E / --html-extension / html_extension = on was specified,
+ and this is a text/html file. If some case-insensitive
+ variation on ".htm[l]" isn't already the file's suffix,
+ tack on ".html". */
{
- int local_filename_len = strlen (hs->local_file);
- /* Resize the local file, allowing for ".html" preceded by
- optional ".NUMBER". */
- hs->local_file = xrealloc (hs->local_file,
- local_filename_len + 24 + sizeof (".html"));
- strcpy(hs->local_file + local_filename_len, ".html");
- /* If clobbering is not allowed and the file, as named,
- exists, tack on ".NUMBER.html" instead. */
- if (!ALLOW_CLOBBER)
- {
- int ext_num = 1;
- do
- sprintf (hs->local_file + local_filename_len,
- ".%d.html", ext_num++);
- while (file_exists_p (hs->local_file));
- }
- *dt |= ADDED_HTML_EXTENSION;
+ ensure_extension (hs, ".html", dt);
+ }
+ else if (*dt & TEXTCSS)
+ {
+ ensure_extension (hs, ".css", dt);
}
}
- if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE)
+ if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE
+ || (hs->restval > 0 && statcode == HTTP_STATUS_OK
+ && contrange == 0 && hs->restval >= contlen)
+ )
{
/* If `-c' is in use and the file has been fully downloaded (or
the remote file has shrunk), Wget effectively requests bytes
- after the end of file and the server response with 416. */
+ after the end of file and the server response with 416
+ (or 200 with a <= Content-Length. */
logputs (LOG_VERBOSE, _("\
\n The file is already fully retrieved; nothing to do.\n\n"));
/* In case the caller inspects. */
@@ -1952,7 +2182,10 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
CLOSE_INVALIDATE (sock);
return RANGEERR;
}
- hs->contlen = contlen + contrange;
+ if (contlen == -1)
+ hs->contlen = -1;
+ else
+ hs->contlen = contlen + contrange;
if (opt.verbose)
{
@@ -1983,7 +2216,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
logputs (LOG_VERBOSE,
opt.ignore_length ? _("ignored") : _("unspecified"));
if (type)
- logprintf (LOG_VERBOSE, " [%s]\n", escnonprint (type));
+ logprintf (LOG_VERBOSE, " [%s]\n", quotearg_style (escape_quoting_style, type));
else
logputs (LOG_VERBOSE, "\n");
}
@@ -2049,6 +2282,13 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
else
fp = output_stream;
+ /* Print fetch message, if opt.verbose. */
+ if (opt.verbose)
+ {
+ logprintf (LOG_NOTQUIET, _("Saving to: %s\n"),
+ HYPHENP (hs->local_file) ? quote ("STDOUT") : quote (hs->local_file));
+ }
+
/* This confuses the timestamping code that checks for file size.
#### The timestamping code should be smarter about file size. */
if (opt.save_headers && hs->restval == 0)
@@ -2097,16 +2337,16 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
int *dt, struct url *proxy)
{
int count;
- bool got_head = false; /* used for time-stamping */
+ bool got_head = false; /* used for time-stamping and filename detection */
+ bool time_came_from_head = false;
+ bool got_name = false;
char *tms;
const char *tmrate;
- uerr_t err;
+ uerr_t err, ret = TRYLIMEXC;
time_t tmr = -1; /* remote time-stamp */
- wgint local_size = 0; /* the size of the local file */
struct http_stat hstat; /* HTTP status */
- struct_stat st;
-
- DEBUGP(("in http_loop\n"));
+ struct_stat st;
+ bool send_head_first = true;
/* Assert that no value for *LOCAL_FILE was passed. */
assert (local_file == NULL || *local_file == NULL);
@@ -2133,7 +2373,37 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
hstat.referer = referer;
if (opt.output_document)
- hstat.local_file = xstrdup (opt.output_document);
+ {
+ hstat.local_file = xstrdup (opt.output_document);
+ got_name = true;
+ }
+ else if (!opt.content_disposition)
+ {
+ hstat.local_file = url_file_name (u);
+ got_name = true;
+ }
+
+ /* TODO: Ick! This code is now in both gethttp and http_loop, and is
+ * screaming for some refactoring. */
+ if (got_name && file_exists_p (hstat.local_file) && opt.noclobber && !opt.output_document)
+ {
+ /* If opt.noclobber is turned on and file already exists, do not
+ retrieve the file. But if the output_document was given, then this
+ test was already done and the file didn't exist. Hence the !opt.output_document */
+ logprintf (LOG_VERBOSE, _("\
+File %s already there; not retrieving.\n\n"),
+ quote (hstat.local_file));
+ /* If the file is there, we suppose it's retrieved OK. */
+ *dt |= RETROKF;
+
+ /* #### Bogusness alert. */
+ /* If its suffix is "html" or "htm" or similar, assume text/html. */
+ if (has_html_suffix_p (hstat.local_file))
+ *dt |= TEXTHTML;
+
+ ret = RETROK;
+ goto exit;
+ }
/* Reset the counter. */
count = 0;
@@ -2141,44 +2411,49 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
/* Reset the document type. */
*dt = 0;
+ /* Skip preliminary HEAD request if we're not in spider mode AND
+ * if -O was given or HTTP Content-Disposition support is disabled. */
+ if (!opt.spider
+ && (got_name || !opt.content_disposition))
+ send_head_first = false;
+
+ /* Send preliminary HEAD request if -N is given and we have an existing
+ * destination file. */
+ if (opt.timestamping
+ && !opt.content_disposition
+ && file_exists_p (url_file_name (u)))
+ send_head_first = true;
+
/* THE loop */
do
{
- DEBUGP(("in http_loop LOOP\n"));
-
/* Increment the pass counter. */
++count;
sleep_between_retrievals (count);
/* Get the current time string. */
- tms = time_str (NULL);
+ tms = datetime_str (time (NULL));
+ if (opt.spider && !got_head)
+ logprintf (LOG_VERBOSE, _("\
+Spider mode enabled. Check if remote file exists.\n"));
+
/* Print fetch message, if opt.verbose. */
if (opt.verbose)
{
- char *hurl = url_string (u, true);
- logprintf (LOG_VERBOSE, "--%s-- %s\n",
- tms, hurl);
+ char *hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
if (count > 1)
{
char tmp[256];
sprintf (tmp, _("(try:%2d)"), count);
- logprintf (LOG_VERBOSE, " %s", tmp);
+ logprintf (LOG_NOTQUIET, "--%s-- %s %s\n",
+ tms, tmp, hurl);
}
else
{
- logprintf (LOG_VERBOSE, " ");
- }
-
- if (hstat.local_file)
- {
- logprintf (LOG_VERBOSE, " => `%s'\n",
- HYPHENP (hstat.local_file) ? "STDOUT" : hstat.local_file);
- }
- else
- {
- logprintf (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET, "--%s-- %s\n",
+ tms, hurl);
}
#ifdef WINDOWS
@@ -2190,13 +2465,14 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
/* Default document type is empty. However, if spider mode is
on or time-stamping is employed, HEAD_ONLY commands is
encoded within *dt. */
- if (opt.spider || (opt.timestamping && !got_head))
+ if (send_head_first && !got_head)
*dt |= HEAD_ONLY;
else
*dt &= ~HEAD_ONLY;
/* Decide whether or not to restart. */
if (opt.always_rest
+ && got_name
&& stat (hstat.local_file, &st) == 0
&& S_ISREG (st.st_mode))
/* When -c is used, continue from on-disk size. (Can't use
@@ -2216,8 +2492,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
we require a fresh get.
b) caching is explicitly inhibited. */
if ((proxy && count > 1) /* a */
- || !opt.allow_cache /* b */
- )
+ || !opt.allow_cache) /* b */
*dt |= SEND_NOCACHE;
else
*dt &= ~SEND_NOCACHE;
@@ -2226,12 +2501,12 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
err = gethttp (u, &hstat, dt, proxy);
/* Time? */
- tms = time_str (NULL);
+ tms = datetime_str (time (NULL));
/* Get the new location (with or without the redirection). */
if (hstat.newloc)
*newloc = xstrdup (hstat.newloc);
-
+
switch (err)
{
case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
@@ -2240,26 +2515,23 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
/* Non-fatal errors continue executing the loop, which will
bring them to "while" statement at the end, to judge
whether the number of tries was exceeded. */
- /* free_hstat (&hstat); */
printwhat (count, opt.ntry);
continue;
- case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
- case SSLINITFAILED: case CONTNOTSUPPORTED:
- /* Fatal errors just return from the function. */
- free_hstat (&hstat);
- return err;
case FWRITEERR: case FOPENERR:
/* Another fatal error. */
logputs (LOG_VERBOSE, "\n");
- logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
- hstat.local_file, strerror (errno));
- free_hstat (&hstat);
- return err;
+ logprintf (LOG_NOTQUIET, _("Cannot write to %s (%s).\n"),
+ quote (hstat.local_file), strerror (errno));
+ case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
+ case SSLINITFAILED: case CONTNOTSUPPORTED:
+ /* Fatal errors just return from the function. */
+ ret = err;
+ goto exit;
case CONSSLERR:
/* Another fatal error. */
logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
- free_hstat (&hstat);
- return err;
+ ret = err;
+ goto exit;
case NEWLOCATION:
/* Return the new location to the caller. */
if (!*newloc)
@@ -2267,15 +2539,17 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
logprintf (LOG_NOTQUIET,
_("ERROR: Redirection (%d) without location.\n"),
hstat.statcode);
- free_hstat (&hstat);
- return WRONGCODE;
+ ret = WRONGCODE;
}
- free_hstat (&hstat);
- return NEWLOCATION;
+ else
+ {
+ ret = NEWLOCATION;
+ }
+ goto exit;
case RETRUNNEEDED:
/* The file was already fully retrieved. */
- free_hstat (&hstat);
- return RETROK;
+ ret = RETROK;
+ goto exit;
case RETRFINISHED:
/* Deal with you later. */
break;
@@ -2286,23 +2560,49 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
if (!(*dt & RETROKF))
{
+ char *hurl = NULL;
if (!opt.verbose)
{
/* #### Ugly ugly ugly! */
- char *hurl = url_string (u, true);
+ hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
- xfree (hurl);
}
- logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
- tms, hstat.statcode, escnonprint (hstat.error));
+
+ /* Fall back to GET if HEAD fails with a 500 or 501 error code. */
+ if (*dt & HEAD_ONLY
+ && (hstat.statcode == 500 || hstat.statcode == 501))
+ {
+ got_head = true;
+ continue;
+ }
+ /* Maybe we should always keep track of broken links, not just in
+ * spider mode. */
+ else if (opt.spider)
+ {
+ /* #### Again: ugly ugly ugly! */
+ if (!hurl)
+ hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
+ nonexisting_url (hurl);
+ logprintf (LOG_NOTQUIET, _("\
+Remote file does not exist -- broken link!!!\n"));
+ }
+ else
+ {
+ logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
+ tms, hstat.statcode,
+ quotearg_style (escape_quoting_style, hstat.error));
+ }
logputs (LOG_VERBOSE, "\n");
- free_hstat (&hstat);
- return WRONGCODE;
+ ret = WRONGCODE;
+ xfree_null (hurl);
+ goto exit;
}
/* Did we get the time-stamp? */
if (!got_head)
{
+ got_head = true; /* no more time-stamping */
+
if (opt.timestamping && !hstat.remote_time)
{
logputs (LOG_NOTQUIET, _("\
@@ -2315,77 +2615,132 @@ Last-modified header missing -- time-stamps turned off.\n"));
if (tmr == (time_t) (-1))
logputs (LOG_VERBOSE, _("\
Last-modified header invalid -- time-stamp ignored.\n"));
+ if (*dt & HEAD_ONLY)
+ time_came_from_head = true;
}
- }
-
- /* The time-stamping section. */
- if (opt.timestamping && !got_head)
- {
- got_head = true; /* no more time-stamping */
- *dt &= ~HEAD_ONLY;
- count = 0; /* the retrieve count for HEAD is reset */
-
- if (hstat.remote_time && tmr != (time_t) (-1))
+
+ if (send_head_first)
{
- /* Now time-stamping can be used validly. Time-stamping
- means that if the sizes of the local and remote file
- match, and local file is newer than the remote file,
- it will not be retrieved. Otherwise, the normal
- download procedure is resumed. */
- if (hstat.orig_file_tstamp >= tmr)
+ /* The time-stamping section. */
+ if (opt.timestamping)
+ {
+ if (hstat.orig_file_name) /* Perform the following
+ checks only if the file
+ we're supposed to
+ download already exists. */
+ {
+ if (hstat.remote_time &&
+ tmr != (time_t) (-1))
+ {
+ /* Now time-stamping can be used validly.
+ Time-stamping means that if the sizes of
+ the local and remote file match, and local
+ file is newer than the remote file, it will
+ not be retrieved. Otherwise, the normal
+ download procedure is resumed. */
+ if (hstat.orig_file_tstamp >= tmr)
+ {
+ if (hstat.contlen == -1
+ || hstat.orig_file_size == hstat.contlen)
+ {
+ logprintf (LOG_VERBOSE, _("\
+Server file no newer than local file %s -- not retrieving.\n\n"),
+ quote (hstat.orig_file_name));
+ ret = RETROK;
+ goto exit;
+ }
+ else
+ {
+ logprintf (LOG_VERBOSE, _("\
+The sizes do not match (local %s) -- retrieving.\n"),
+ number_to_static_string (hstat.orig_file_size));
+ }
+ }
+ else
+ logputs (LOG_VERBOSE,
+ _("Remote file is newer, retrieving.\n"));
+
+ logputs (LOG_VERBOSE, "\n");
+ }
+ }
+
+ /* free_hstat (&hstat); */
+ hstat.timestamp_checked = true;
+ }
+
+ if (opt.spider)
{
- if (hstat.contlen == -1 || hstat.orig_file_size == hstat.contlen)
+ bool finished = true;
+ if (opt.recursive)
{
- logprintf (LOG_VERBOSE, _("\
-Server file no newer than local file `%s' -- not retrieving.\n\n"),
- hstat.orig_file_name);
- free_hstat (&hstat);
- return RETROK;
+ if (*dt & TEXTHTML)
+ {
+ logputs (LOG_VERBOSE, _("\
+Remote file exists and could contain links to other resources -- retrieving.\n\n"));
+ finished = false;
+ }
+ else
+ {
+ logprintf (LOG_VERBOSE, _("\
+Remote file exists but does not contain any link -- not retrieving.\n\n"));
+ ret = RETROK; /* RETRUNNEEDED is not for caller. */
+ }
}
else
{
- logprintf (LOG_VERBOSE, _("\
-The sizes do not match (local %s) -- retrieving.\n"),
- number_to_static_string (local_size));
+ if (*dt & TEXTHTML)
+ {
+ logprintf (LOG_VERBOSE, _("\
+Remote file exists and could contain further links,\n\
+but recursion is disabled -- not retrieving.\n\n"));
+ }
+ else
+ {
+ logprintf (LOG_VERBOSE, _("\
+Remote file exists.\n\n"));
+ }
+ ret = RETROK; /* RETRUNNEEDED is not for caller. */
+ }
+
+ if (finished)
+ {
+ logprintf (LOG_NONVERBOSE,
+ _("%s URL:%s %2d %s\n"),
+ tms, u->url, hstat.statcode,
+ hstat.message ? quotearg_style (escape_quoting_style, hstat.message) : "");
+ goto exit;
}
}
- else
- logputs (LOG_VERBOSE,
- _("Remote file is newer, retrieving.\n"));
- }
+
+ got_name = true;
+ *dt &= ~HEAD_ONLY;
+ count = 0; /* the retrieve count for HEAD is reset */
+ continue;
+ } /* send_head_first */
+ } /* !got_head */
- /* free_hstat (&hstat); */
- hstat.timestamp_checked = true;
- continue;
- }
-
if ((tmr != (time_t) (-1))
- && !opt.spider
&& ((hstat.len == hstat.contlen) ||
((hstat.res == 0) && (hstat.contlen == -1))))
{
- /* #### This code repeats in http.c and ftp.c. Move it to a
- function! */
const char *fl = NULL;
- if (opt.output_document)
+ set_local_file (&fl, hstat.local_file);
+ if (fl)
{
- if (output_stream_regular)
- fl = opt.output_document;
+ time_t newtmr = -1;
+ /* Reparse time header, in case it's changed. */
+ if (time_came_from_head
+ && hstat.remote_time && hstat.remote_time[0])
+ {
+ newtmr = http_atotm (hstat.remote_time);
+ if (newtmr != -1)
+ tmr = newtmr;
+ }
+ touch (fl, tmr);
}
- else
- fl = hstat.local_file;
- if (fl)
- touch (fl, tmr);
}
/* End of time-stamping section. */
- if (opt.spider)
- {
- logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode,
- escnonprint (hstat.error));
- return RETROK;
- }
-
tmrate = retr_rate (hstat.rd_size, hstat.dltime);
total_download_time += hstat.dltime;
@@ -2393,9 +2748,14 @@ The sizes do not match (local %s) -- retrieving.\n"),
{
if (*dt & RETROKF)
{
+ bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document));
+
logprintf (LOG_VERBOSE,
- _("%s (%s) - `%s' saved [%s/%s]\n\n"),
- tms, tmrate, hstat.local_file,
+ write_to_stdout
+ ? _("%s (%s) - written to stdout %s[%s/%s]\n\n")
+ : _("%s (%s) - %s saved [%s/%s]\n\n"),
+ tms, tmrate,
+ write_to_stdout ? "" : quote (hstat.local_file),
number_to_static_string (hstat.len),
number_to_static_string (hstat.contlen));
logprintf (LOG_NONVERBOSE,
@@ -2405,7 +2765,7 @@ The sizes do not match (local %s) -- retrieving.\n"),
number_to_static_string (hstat.contlen),
hstat.local_file, count);
}
- ++opt.numurls;
+ ++numurls;
total_downloaded_bytes += hstat.len;
/* Remember that we downloaded the file for later ".orig" code. */
@@ -2414,8 +2774,8 @@ The sizes do not match (local %s) -- retrieving.\n"),
else
downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file);
- free_hstat (&hstat);
- return RETROK;
+ ret = RETROK;
+ goto exit;
}
else if (hstat.res == 0) /* No read error */
{
@@ -2424,16 +2784,21 @@ The sizes do not match (local %s) -- retrieving.\n"),
{
if (*dt & RETROKF)
{
+ bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document));
+
logprintf (LOG_VERBOSE,
- _("%s (%s) - `%s' saved [%s]\n\n"),
- tms, tmrate, hstat.local_file,
+ write_to_stdout
+ ? _("%s (%s) - written to stdout %s[%s]\n\n")
+ : _("%s (%s) - %s saved [%s]\n\n"),
+ tms, tmrate,
+ write_to_stdout ? "" : quote (hstat.local_file),
number_to_static_string (hstat.len));
logprintf (LOG_NONVERBOSE,
"%s URL:%s [%s] -> \"%s\" [%d]\n",
tms, u->url, number_to_static_string (hstat.len),
hstat.local_file, count);
}
- ++opt.numurls;
+ ++numurls;
total_downloaded_bytes += hstat.len;
/* Remember that we downloaded the file for later ".orig" code. */
@@ -2442,8 +2807,8 @@ The sizes do not match (local %s) -- retrieving.\n"),
else
downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file);
- free_hstat (&hstat);
- return RETROK;
+ ret = RETROK;
+ goto exit;
}
else if (hstat.len < hstat.contlen) /* meaning we lost the
connection too soon */
@@ -2452,13 +2817,20 @@ The sizes do not match (local %s) -- retrieving.\n"),
_("%s (%s) - Connection closed at byte %s. "),
tms, tmrate, number_to_static_string (hstat.len));
printwhat (count, opt.ntry);
- /* free_hstat (&hstat); */
continue;
}
- else
+ else if (hstat.len != hstat.restval)
/* Getting here would mean reading more data than
requested with content-length, which we never do. */
abort ();
+ else
+ {
+ /* Getting here probably means that the content-length was
+ * _less_ than the original, local size. We should probably
+ * truncate or re-read, or something. FIXME */
+ ret = RETROK;
+ goto exit;
+ }
}
else /* from now on hstat.res can only be -1 */
{
@@ -2469,7 +2841,6 @@ The sizes do not match (local %s) -- retrieving.\n"),
tms, tmrate, number_to_static_string (hstat.len),
hstat.rderrmsg);
printwhat (count, opt.ntry);
- /* free_hstat (&hstat); */
continue;
}
else /* hstat.res == -1 and contlen is given */
@@ -2481,15 +2852,19 @@ The sizes do not match (local %s) -- retrieving.\n"),
number_to_static_string (hstat.contlen),
hstat.rderrmsg);
printwhat (count, opt.ntry);
- /* free_hstat (&hstat); */
continue;
}
}
/* not reached */
}
while (!opt.ntry || (count < opt.ntry));
+
+exit:
+ if (ret == RETROK)
+ *local_file = xstrdup (hstat.local_file);
+ free_hstat (&hstat);
- return TRYLIMEXC;
+ return ret;
}
/* Check whether the result of strptime() indicates success.
@@ -2505,11 +2880,11 @@ check_end (const char *p)
{
if (!p)
return false;
- while (ISSPACE (*p))
+ while (c_isspace (*p))
++p;
if (!*p
|| (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
- || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1])))
+ || ((p[0] == '+' || p[0] == '-') && c_isdigit (p[1])))
return true;
else
return false;
@@ -2560,7 +2935,7 @@ http_atotm (const char *time_string)
Netscape cookie specification.) */
};
const char *oldlocale;
- int i;
+ size_t i;
time_t ret = (time_t) -1;
/* Solaris strptime fails to recognize English month names in
@@ -2625,50 +3000,11 @@ basic_authentication_encode (const char *user, const char *passwd)
}
#define SKIP_WS(x) do { \
- while (ISSPACE (*(x))) \
+ while (c_isspace (*(x))) \
++(x); \
} while (0)
#ifdef ENABLE_DIGEST
-/* Parse HTTP `WWW-Authenticate:' header. AU points to the beginning
- of a field in such a header. If the field is the one specified by
- ATTR_NAME ("realm", "opaque", and "nonce" are used by the current
- digest authorization code), extract its value in the (char*)
- variable pointed by RET. Returns negative on a malformed header,
- or number of bytes that have been parsed by this call. */
-static int
-extract_header_attr (const char *au, const char *attr_name, char **ret)
-{
- const char *ep;
- const char *cp = au;
-
- if (strncmp (cp, attr_name, strlen (attr_name)) == 0)
- {
- cp += strlen (attr_name);
- if (!*cp)
- return -1;
- SKIP_WS (cp);
- if (*cp != '=')
- return -1;
- if (!*++cp)
- return -1;
- SKIP_WS (cp);
- if (*cp != '\"')
- return -1;
- if (!*++cp)
- return -1;
- for (ep = cp; *ep && *ep != '\"'; ep++)
- ;
- if (!*ep)
- return -1;
- xfree_null (*ret);
- *ret = strdupdelim (cp, ep);
- return ep - au + 1;
- }
- else
- return 0;
-}
-
/* Dump the hexadecimal representation of HASH to BUF. HASH should be
an array of 16 bytes containing the hash keys, and BUF should be a
buffer of 33 writable characters (32 for hex digits plus one for
@@ -2703,53 +3039,23 @@ digest_authentication_encode (const char *au, const char *user,
{ "nonce", &nonce }
};
char *res;
+ param_token name, value;
realm = opaque = nonce = NULL;
au += 6; /* skip over `Digest' */
- while (*au)
+ while (extract_param (&au, &name, &value, ','))
{
- int i;
-
- SKIP_WS (au);
+ size_t i;
+ size_t namelen = name.e - name.b;
for (i = 0; i < countof (options); i++)
- {
- int skip = extract_header_attr (au, options[i].name,
- options[i].variable);
- if (skip < 0)
- {
- xfree_null (realm);
- xfree_null (opaque);
- xfree_null (nonce);
- return NULL;
- }
- else if (skip)
- {
- au += skip;
- break;
- }
- }
- if (i == countof (options))
- {
- while (*au && *au != '=')
- au++;
- if (*au && *++au)
- {
- SKIP_WS (au);
- if (*au == '\"')
- {
- au++;
- while (*au && *au != '\"')
- au++;
- if (*au)
- au++;
- }
- }
- }
- while (*au && *au != ',')
- au++;
- if (*au)
- au++;
+ if (namelen == strlen (options[i].name)
+ && 0 == strncmp (name.b, options[i].name,
+ namelen))
+ {
+ *options[i].variable = strdupdelim (value.b, value.e);
+ break;
+ }
}
if (!realm || !nonce || !user || !passwd || !path || !method)
{
@@ -2825,10 +3131,11 @@ username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
first argument and are followed by whitespace or terminating \0.
The comparison is case-insensitive. */
#define STARTS(literal, b, e) \
- ((e) - (b) >= STRSIZE (literal) \
+ ((e > b) \
+ && ((size_t) ((e) - (b))) >= STRSIZE (literal) \
&& 0 == strncasecmp (b, literal, STRSIZE (literal)) \
- && ((e) - (b) == STRSIZE (literal) \
- || ISSPACE (b[STRSIZE (literal)])))
+ && ((size_t) ((e) - (b)) == STRSIZE (literal) \
+ || c_isspace (b[STRSIZE (literal)])))
static bool
known_authentication_scheme_p (const char *hdrbeg, const char *hdrend)
@@ -2857,7 +3164,7 @@ create_authorization_line (const char *au, const char *user,
{
/* We are called only with known schemes, so we can dispatch on the
first letter. */
- switch (TOUPPER (*au))
+ switch (c_toupper (*au))
{
case 'B': /* Basic */
*finished = true;
@@ -2910,7 +3217,85 @@ http_cleanup (void)
cookie_jar_delete (wget_cookie_jar);
}
+void
+ensure_extension (struct http_stat *hs, const char *ext, int *dt)
+{
+ char *last_period_in_local_filename = strrchr (hs->local_file, '.');
+ char shortext[8];
+ int len = strlen (ext);
+ if (len == 5)
+ {
+ strncpy (shortext, ext, len - 1);
+ shortext[len - 2] = '\0';
+ }
+
+ if (last_period_in_local_filename == NULL
+ || !(0 == strcasecmp (last_period_in_local_filename, shortext)
+ || 0 == strcasecmp (last_period_in_local_filename, ext)))
+ {
+ int local_filename_len = strlen (hs->local_file);
+ /* Resize the local file, allowing for ".html" preceded by
+ optional ".NUMBER". */
+ hs->local_file = xrealloc (hs->local_file,
+ local_filename_len + 24 + len);
+ strcpy (hs->local_file + local_filename_len, ext);
+ /* If clobbering is not allowed and the file, as named,
+ exists, tack on ".NUMBER.html" instead. */
+ if (!ALLOW_CLOBBER && file_exists_p (hs->local_file))
+ {
+ int ext_num = 1;
+ do
+ sprintf (hs->local_file + local_filename_len,
+ ".%d%s", ext_num++, ext);
+ while (file_exists_p (hs->local_file));
+ }
+ *dt |= ADDED_HTML_EXTENSION;
+ }
+}
+
+
+#ifdef TESTING
+
+const char *
+test_parse_content_disposition()
+{
+ int i;
+ struct {
+ char *hdrval;
+ char *opt_dir_prefix;
+ char *filename;
+ bool result;
+ } test_array[] = {
+ { "filename=\"file.ext\"", NULL, "file.ext", true },
+ { "filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
+ { "attachment; filename=\"file.ext\"", NULL, "file.ext", true },
+ { "attachment; filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
+ { "attachment; filename=\"file.ext\"; dummy", NULL, "file.ext", true },
+ { "attachment; filename=\"file.ext\"; dummy", "somedir", "somedir/file.ext", true },
+ { "attachment", NULL, NULL, false },
+ { "attachment", "somedir", NULL, false },
+ };
+
+ for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
+ {
+ char *filename;
+ bool res;
+
+ opt.dir_prefix = test_array[i].opt_dir_prefix;
+ res = parse_content_disposition (test_array[i].hdrval, &filename);
+
+ mu_assert ("test_parse_content_disposition: wrong result",
+ res == test_array[i].result
+ && (res == false
+ || 0 == strcmp (test_array[i].filename, filename)));
+ }
+
+ return NULL;
+}
+
+#endif /* TESTING */
+
/*
- * vim: et ts=2 sw=2
+ * vim: et sts=2 sw=2 cino+={s
*/