X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fhttp.c;h=113e096a6705f7036fd41c2cc78ea57485f683c7;hb=6db8909f87012ee8f65e496846962d98cd8bafea;hp=e6f48aa2b2b8c1832d902461ae3069f567d0dd66;hpb=da99855784988c6bf125799f47c57d888bbc25f1;p=wget
diff --git a/src/http.c b/src/http.c
index e6f48aa2..113e096a 100644
--- a/src/http.c
+++ b/src/http.c
@@ -1,6 +1,6 @@
/* HTTP support.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+ 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
This file is part of GNU Wget.
@@ -17,17 +17,18 @@ GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget. If not, see .
-In addition, as a special exception, the Free Software Foundation
-gives permission to link the code of its release of Wget with the
-OpenSSL project's "OpenSSL" library (or with modified versions of it
-that use the same license as the "OpenSSL" library), and distribute
-the linked executables. You must obey the GNU General Public License
-in all respects for all of the code used other than "OpenSSL". If you
-modify this file, you may extend this exception to your version of the
-file, but you are not obligated to do so. If you do not wish to do
-so, delete this exception statement from your version. */
+Additional permission under GNU GPL version 3 section 7
-#include
+If you modify this program, or any covered work, by linking or
+combining it with the OpenSSL project's OpenSSL library (or a
+modified version of that library), containing parts covered by the
+terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
+grants you additional permission to convey the resulting work.
+Corresponding Source for a non-source form of such a combination
+shall include the source code for the parts of OpenSSL used as well
+as that of the covered work. */
+
+#include "wget.h"
#include
#include
@@ -40,7 +41,6 @@ so, delete this exception statement from your version. */
#include
#include
-#include "wget.h"
#include "hash.h"
#include "http.h"
#include "utils.h"
@@ -389,27 +389,35 @@ static struct hash_table *basic_authed_hosts;
* it the username, password. A temporary measure until we can get
* proper authentication in place. */
-static int
+static bool
maybe_send_basic_creds (const char *hostname, const char *user,
const char *passwd, struct request *req)
{
- int did_challenge = 0;
+ bool do_challenge = false;
- if (basic_authed_hosts
+ if (opt.auth_without_challenge)
+ {
+ DEBUGP(("Auth-without-challenge set, sending Basic credentials.\n"));
+ do_challenge = true;
+ }
+ else if (basic_authed_hosts
&& hash_table_contains(basic_authed_hosts, hostname))
{
- DEBUGP(("Found `%s' in basic_authed_hosts.\n", hostname));
- request_set_header (req, "Authorization",
- basic_authentication_encode (user, passwd),
- rel_value);
- did_challenge = 1;
+ DEBUGP(("Found %s in basic_authed_hosts.\n", quote (hostname)));
+ do_challenge = true;
}
else
{
- DEBUGP(("Host `%s' has not issued a general basic challenge.\n",
- hostname));
+ DEBUGP(("Host %s has not issued a general basic challenge.\n",
+ quote (hostname)));
+ }
+ if (do_challenge)
+ {
+ request_set_header (req, "Authorization",
+ basic_authentication_encode (user, passwd),
+ rel_value);
}
- return did_challenge;
+ return do_challenge;
}
static void
@@ -422,7 +430,7 @@ register_basic_auth_host (const char *hostname)
if (!hash_table_contains(basic_authed_hosts, hostname))
{
hash_table_put (basic_authed_hosts, xstrdup(hostname), NULL);
- DEBUGP(("Inserted `%s' into basic_authed_hosts\n", hostname));
+ DEBUGP(("Inserted %s into basic_authed_hosts\n", quote (hostname)));
}
}
@@ -802,7 +810,8 @@ print_response_line(const char *prefix, const char *b, const char *e)
{
char *copy;
BOUNDED_TO_ALLOCA(b, e, copy);
- logprintf (LOG_VERBOSE, "%s%s\n", prefix, escnonprint(copy));
+ logprintf (LOG_ALWAYS, "%s%s\n", prefix,
+ quotearg_style (escape_quoting_style, copy));
}
/* Print the server response, line by line, omitting the trailing CRLF
@@ -864,8 +873,11 @@ parse_content_range (const char *hdr, wgint *first_byte_ptr,
return false;
*last_byte_ptr = num;
++hdr;
- for (num = 0; c_isdigit (*hdr); hdr++)
- num = 10 * num + (*hdr - '0');
+ if (*hdr == '*')
+ num = -1;
+ else
+ for (num = 0; c_isdigit (*hdr); hdr++)
+ num = 10 * num + (*hdr - '0');
*entity_length_ptr = num;
return true;
}
@@ -1284,10 +1296,15 @@ struct http_stat
char *remote_time; /* remote time-stamp string */
char *error; /* textual HTTP error */
int statcode; /* status code */
+ char *message; /* status message */
wgint rd_size; /* amount of data read from socket */
double dltime; /* time it took to download the data */
const char *referer; /* value of the referer header. */
char *local_file; /* local file name. */
+ bool existence_checked; /* true if we already checked for a file's
+ existence after having begun to download
+ (needed in gethttp for when connection is
+ interrupted/restarted. */
bool timestamp_checked; /* true if pre-download time-stamping checks
* have already been performed */
char *orig_file_name; /* name of file to compare for time-stamping
@@ -1306,6 +1323,7 @@ free_hstat (struct http_stat *hs)
xfree_null (hs->rderrmsg);
xfree_null (hs->local_file);
xfree_null (hs->orig_file_name);
+ xfree_null (hs->message);
/* Guard against being called twice. */
hs->newloc = NULL;
@@ -1425,6 +1443,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
hs->newloc = NULL;
hs->remote_time = NULL;
hs->error = NULL;
+ hs->message = NULL;
conn = u;
@@ -1482,41 +1501,6 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
basic_auth_finished = maybe_send_basic_creds(u->host, user, passwd, req);
}
- proxyauth = NULL;
- if (proxy)
- {
- char *proxy_user, *proxy_passwd;
- /* For normal username and password, URL components override
- command-line/wgetrc parameters. With proxy
- authentication, it's the reverse, because proxy URLs are
- normally the "permanent" ones, so command-line args
- should take precedence. */
- if (opt.proxy_user && opt.proxy_passwd)
- {
- proxy_user = opt.proxy_user;
- proxy_passwd = opt.proxy_passwd;
- }
- else
- {
- proxy_user = proxy->user;
- proxy_passwd = proxy->passwd;
- }
- /* #### This does not appear right. Can't the proxy request,
- say, `Digest' authentication? */
- if (proxy_user && proxy_passwd)
- proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
-
- /* If we're using a proxy, we will be connecting to the proxy
- server. */
- conn = proxy;
-
- /* Proxy authorization over SSL is handled below. */
-#ifdef HAVE_SSL
- if (u->scheme != SCHEME_HTTPS)
-#endif
- request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
- }
-
/* Generate the Host header, HOST:PORT. Take into account that:
- Broken server-side software often doesn't recognize the PORT
@@ -1564,8 +1548,8 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
post_data_size = file_size (opt.post_file_name);
if (post_data_size == -1)
{
- logprintf (LOG_NOTQUIET, _("POST data file `%s' missing: %s\n"),
- opt.post_file_name, strerror (errno));
+ logprintf (LOG_NOTQUIET, _("POST data file %s missing: %s\n"),
+ quote (opt.post_file_name), strerror (errno));
post_data_size = 0;
}
}
@@ -1587,6 +1571,41 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
without authorization header fails. (Expected to happen at least
for the Digest authorization scheme.) */
+ proxyauth = NULL;
+ if (proxy)
+ {
+ char *proxy_user, *proxy_passwd;
+ /* For normal username and password, URL components override
+ command-line/wgetrc parameters. With proxy
+ authentication, it's the reverse, because proxy URLs are
+ normally the "permanent" ones, so command-line args
+ should take precedence. */
+ if (opt.proxy_user && opt.proxy_passwd)
+ {
+ proxy_user = opt.proxy_user;
+ proxy_passwd = opt.proxy_passwd;
+ }
+ else
+ {
+ proxy_user = proxy->user;
+ proxy_passwd = proxy->passwd;
+ }
+ /* #### This does not appear right. Can't the proxy request,
+ say, `Digest' authentication? */
+ if (proxy_user && proxy_passwd)
+ proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
+
+ /* If we're using a proxy, we will be connecting to the proxy
+ server. */
+ conn = proxy;
+
+ /* Proxy authorization over SSL is handled below. */
+#ifdef HAVE_SSL
+ if (u->scheme != SCHEME_HTTPS)
+#endif
+ request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
+ }
+
keep_alive = false;
/* Establish the connection. */
@@ -1614,7 +1633,8 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
sock = pconn.socket;
using_ssl = pconn.ssl;
logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
- escnonprint (pconn.host), pconn.port);
+ quotearg_style (escape_quoting_style, pconn.host),
+ pconn.port);
DEBUGP (("Reusing fd %d.\n", sock));
if (pconn.authorized)
/* If the connection is already authorized, the "Basic"
@@ -1626,8 +1646,8 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
{
request_free (req);
logprintf(LOG_NOTQUIET,
- _("%s: unable to resolve host address `%s'\n"),
- exec_name, relevant->host);
+ _("%s: unable to resolve host address %s\n"),
+ exec_name, quote (relevant->host));
return HOSTERR;
}
}
@@ -1696,13 +1716,14 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
resp = resp_new (head);
statcode = resp_status (resp, &message);
+ hs->message = xstrdup (message);
resp_free (resp);
xfree (head);
if (statcode != 200)
{
failed_tunnel:
logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
- message ? escnonprint (message) : "?");
+ message ? quotearg_style (escape_quoting_style, message) : "?");
xfree_null (message);
return CONSSLERR;
}
@@ -1778,9 +1799,10 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
/* Check for status line. */
message = NULL;
statcode = resp_status (resp, &message);
+ hs->message = xstrdup (message);
if (!opt.server_response)
logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
- message ? escnonprint (message) : "");
+ message ? quotearg_style (escape_quoting_style, message) : "");
else
{
logprintf (LOG_VERBOSE, "\n");
@@ -1804,14 +1826,15 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
}
/* TODO: perform this check only once. */
- if (file_exists_p (hs->local_file))
+ if (!hs->existence_checked && file_exists_p (hs->local_file))
{
- if (opt.noclobber)
+ if (opt.noclobber && !opt.output_document)
{
/* If opt.noclobber is turned on and file already exists, do not
- retrieve the file */
+ retrieve the file. But if the output_document was given, then this
+ test was already done and the file didn't exist. Hence the !opt.output_document */
logprintf (LOG_VERBOSE, _("\
-File `%s' already there; not retrieving.\n\n"), hs->local_file);
+File %s already there; not retrieving.\n\n"), quote (hs->local_file));
/* If the file is there, we suppose it's retrieved OK. */
*dt |= RETROKF;
@@ -1830,6 +1853,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
hs->local_file = unique;
}
}
+ hs->existence_checked = true;
/* Support timestamping */
/* TODO: move this code out of gethttp. */
@@ -2052,7 +2076,10 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
wgint first_byte_pos, last_byte_pos, entity_length;
if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos,
&entity_length))
- contrange = first_byte_pos;
+ {
+ contrange = first_byte_pos;
+ contlen = last_byte_pos - first_byte_pos + 1;
+ }
}
resp_free (resp);
@@ -2152,7 +2179,10 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
CLOSE_INVALIDATE (sock);
return RANGEERR;
}
- hs->contlen = contlen + contrange;
+ if (contlen == -1)
+ hs->contlen = -1;
+ else
+ hs->contlen = contlen + contrange;
if (opt.verbose)
{
@@ -2183,7 +2213,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
logputs (LOG_VERBOSE,
opt.ignore_length ? _("ignored") : _("unspecified"));
if (type)
- logprintf (LOG_VERBOSE, " [%s]\n", escnonprint (type));
+ logprintf (LOG_VERBOSE, " [%s]\n", quotearg_style (escape_quoting_style, type));
else
logputs (LOG_VERBOSE, "\n");
}
@@ -2252,8 +2282,8 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
/* Print fetch message, if opt.verbose. */
if (opt.verbose)
{
- logprintf (LOG_NOTQUIET, _("Saving to: `%s'\n"),
- HYPHENP (hs->local_file) ? "STDOUT" : hs->local_file);
+ logprintf (LOG_NOTQUIET, _("Saving to: %s\n"),
+ HYPHENP (hs->local_file) ? quote ("STDOUT") : quote (hs->local_file));
}
/* This confuses the timestamping code that checks for file size.
@@ -2350,6 +2380,28 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
got_name = true;
}
+ /* TODO: Ick! This code is now in both gethttp and http_loop, and is
+ * screaming for some refactoring. */
+ if (got_name && file_exists_p (hstat.local_file) && opt.noclobber && !opt.output_document)
+ {
+ /* If opt.noclobber is turned on and file already exists, do not
+ retrieve the file. But if the output_document was given, then this
+ test was already done and the file didn't exist. Hence the !opt.output_document */
+ logprintf (LOG_VERBOSE, _("\
+File %s already there; not retrieving.\n\n"),
+ quote (hstat.local_file));
+ /* If the file is there, we suppose it's retrieved OK. */
+ *dt |= RETROKF;
+
+ /* #### Bogusness alert. */
+ /* If its suffix is "html" or "htm" or similar, assume text/html. */
+ if (has_html_suffix_p (hstat.local_file))
+ *dt |= TEXTHTML;
+
+ ret = RETROK;
+ goto exit;
+ }
+
/* Reset the counter. */
count = 0;
@@ -2465,8 +2517,8 @@ Spider mode enabled. Check if remote file exists.\n"));
case FWRITEERR: case FOPENERR:
/* Another fatal error. */
logputs (LOG_VERBOSE, "\n");
- logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
- hstat.local_file, strerror (errno));
+ logprintf (LOG_NOTQUIET, _("Cannot write to %s (%s).\n"),
+ quote (hstat.local_file), strerror (errno));
case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
case SSLINITFAILED: case CONTNOTSUPPORTED:
/* Fatal errors just return from the function. */
@@ -2534,7 +2586,8 @@ Remote file does not exist -- broken link!!!\n"));
else
{
logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
- tms, hstat.statcode, escnonprint (hstat.error));
+ tms, hstat.statcode,
+ quotearg_style (escape_quoting_style, hstat.error));
}
logputs (LOG_VERBOSE, "\n");
ret = WRONGCODE;
@@ -2588,8 +2641,8 @@ Last-modified header invalid -- time-stamp ignored.\n"));
|| hstat.orig_file_size == hstat.contlen)
{
logprintf (LOG_VERBOSE, _("\
-Server file no newer than local file `%s' -- not retrieving.\n\n"),
- hstat.orig_file_name);
+Server file no newer than local file %s -- not retrieving.\n\n"),
+ quote (hstat.orig_file_name));
ret = RETROK;
goto exit;
}
@@ -2614,26 +2667,44 @@ The sizes do not match (local %s) -- retrieving.\n"),
if (opt.spider)
{
+ bool finished = true;
if (opt.recursive)
{
if (*dt & TEXTHTML)
{
logputs (LOG_VERBOSE, _("\
Remote file exists and could contain links to other resources -- retrieving.\n\n"));
+ finished = false;
}
else
{
logprintf (LOG_VERBOSE, _("\
Remote file exists but does not contain any link -- not retrieving.\n\n"));
ret = RETROK; /* RETRUNNEEDED is not for caller. */
- goto exit;
}
}
else
{
- logprintf (LOG_VERBOSE, _("\
-Remote file exists but recursion is disabled -- not retrieving.\n\n"));
+ if (*dt & TEXTHTML)
+ {
+ logprintf (LOG_VERBOSE, _("\
+Remote file exists and could contain further links,\n\
+but recursion is disabled -- not retrieving.\n\n"));
+ }
+ else
+ {
+ logprintf (LOG_VERBOSE, _("\
+Remote file exists.\n\n"));
+ }
ret = RETROK; /* RETRUNNEEDED is not for caller. */
+ }
+
+ if (finished)
+ {
+ logprintf (LOG_NONVERBOSE,
+ _("%s URL:%s %2d %s\n"),
+ tms, u->url, hstat.statcode,
+ hstat.message ? quotearg_style (escape_quoting_style, hstat.message) : "");
goto exit;
}
}
@@ -2683,8 +2754,8 @@ Remote file exists but recursion is disabled -- not retrieving.\n\n"));
if (*dt & RETROKF)
{
logprintf (LOG_VERBOSE,
- _("%s (%s) - `%s' saved [%s/%s]\n\n"),
- tms, tmrate, hstat.local_file,
+ _("%s (%s) - %s saved [%s/%s]\n\n"),
+ tms, tmrate, quote (hstat.local_file),
number_to_static_string (hstat.len),
number_to_static_string (hstat.contlen));
logprintf (LOG_NONVERBOSE,
@@ -2714,8 +2785,8 @@ Remote file exists but recursion is disabled -- not retrieving.\n\n"));
if (*dt & RETROKF)
{
logprintf (LOG_VERBOSE,
- _("%s (%s) - `%s' saved [%s]\n\n"),
- tms, tmrate, hstat.local_file,
+ _("%s (%s) - %s saved [%s]\n\n"),
+ tms, tmrate, quote (hstat.local_file),
number_to_static_string (hstat.len));
logprintf (LOG_NONVERBOSE,
"%s URL:%s [%s] -> \"%s\" [%d]\n",
@@ -2743,10 +2814,18 @@ Remote file exists but recursion is disabled -- not retrieving.\n\n"));
printwhat (count, opt.ntry);
continue;
}
- else
+ else if (hstat.len != hstat.restval)
/* Getting here would mean reading more data than
requested with content-length, which we never do. */
abort ();
+ else
+ {
+ /* Getting here probably means that the content-length was
+ * _less_ than the original, local size. We should probably
+ * truncate or re-read, or something. FIXME */
+ ret = RETROK;
+ goto exit;
+ }
}
else /* from now on hstat.res can only be -1 */
{
@@ -2851,7 +2930,7 @@ http_atotm (const char *time_string)
Netscape cookie specification.) */
};
const char *oldlocale;
- int i;
+ size_t i;
time_t ret = (time_t) -1;
/* Solaris strptime fails to recognize English month names in
@@ -2962,10 +3041,12 @@ digest_authentication_encode (const char *au, const char *user,
au += 6; /* skip over `Digest' */
while (extract_param (&au, &name, &value, ','))
{
- int i;
+ size_t i;
+ size_t namelen = name.e - name.b;
for (i = 0; i < countof (options); i++)
- if (name.e - name.b == strlen (options[i].name)
- && 0 == strncmp (name.b, options[i].name, name.e - name.b))
+ if (namelen == strlen (options[i].name)
+ && 0 == strncmp (name.b, options[i].name,
+ namelen))
{
*options[i].variable = strdupdelim (value.b, value.e);
break;
@@ -3045,9 +3126,10 @@ username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
first argument and are followed by whitespace or terminating \0.
The comparison is case-insensitive. */
#define STARTS(literal, b, e) \
- ((e) - (b) >= STRSIZE (literal) \
+ ((e > b) \
+ && ((size_t) ((e) - (b))) >= STRSIZE (literal) \
&& 0 == strncasecmp (b, literal, STRSIZE (literal)) \
- && ((e) - (b) == STRSIZE (literal) \
+ && ((size_t) ((e) - (b)) == STRSIZE (literal) \
|| c_isspace (b[STRSIZE (literal)])))
static bool