X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fhttp.c;h=11af939a170fc20d073795988b13ef717d0a1b5b;hp=0ff820961716c90a319a45b91444dcc2fb5f0b86;hb=1b28d66fcb583791fb1f92199a29e1063cdd6ed8;hpb=dfeb089f3c3c8f895258058bfcf49ac9b0dee23f
diff --git a/src/http.c b/src/http.c
index 0ff82096..11af939a 100644
--- a/src/http.c
+++ b/src/http.c
@@ -1,11 +1,12 @@
/* HTTP support.
- Copyright (C) 1996-2006 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+ 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
+the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
@@ -14,8 +15,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with Wget; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+along with Wget. If not, see .
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
@@ -41,6 +41,7 @@ so, delete this exception statement from your version. */
#include
#include "wget.h"
+#include "hash.h"
#include "http.h"
#include "utils.h"
#include "url.h"
@@ -67,6 +68,14 @@ so, delete this exception statement from your version. */
extern char *version_string;
+/* Forward decls. */
+static char *create_authorization_line (const char *, const char *,
+ const char *, const char *,
+ const char *, bool *);
+static char *basic_authentication_encode (const char *, const char *);
+static bool known_authentication_scheme_p (const char *, const char *);
+static void load_cookies (void);
+
#ifndef MIN
# define MIN(x, y) ((x) > (y) ? (y) : (x))
#endif
@@ -374,6 +383,50 @@ request_free (struct request *req)
xfree (req);
}
+static struct hash_table *basic_authed_hosts;
+
+/* Find out if this host has issued a Basic challenge yet; if so, give
+ * it the username, password. A temporary measure until we can get
+ * proper authentication in place. */
+
+static int
+maybe_send_basic_creds (const char *hostname, const char *user,
+ const char *passwd, struct request *req)
+{
+ int did_challenge = 0;
+
+ if (basic_authed_hosts
+ && hash_table_contains(basic_authed_hosts, hostname))
+ {
+ DEBUGP(("Found `%s' in basic_authed_hosts.\n", hostname));
+ request_set_header (req, "Authorization",
+ basic_authentication_encode (user, passwd),
+ rel_value);
+ did_challenge = 1;
+ }
+ else
+ {
+ DEBUGP(("Host `%s' has not issued a general basic challenge.\n",
+ hostname));
+ }
+ return did_challenge;
+}
+
+static void
+register_basic_auth_host (const char *hostname)
+{
+ if (!basic_authed_hosts)
+ {
+ basic_authed_hosts = make_nocase_string_hash_table (1);
+ }
+ if (!hash_table_contains(basic_authed_hosts, hostname))
+ {
+ hash_table_put (basic_authed_hosts, xstrdup(hostname), NULL);
+ DEBUGP(("Inserted `%s' into basic_authed_hosts\n", hostname));
+ }
+}
+
+
/* Send the contents of FILE_NAME to SOCK. Make sure that exactly
PROMISED_SIZE bytes are sent over the wire -- if the file is
longer, read only that much; if the file is shorter, report an error. */
@@ -1260,13 +1313,6 @@ free_hstat (struct http_stat *hs)
hs->error = NULL;
}
-static char *create_authorization_line (const char *, const char *,
- const char *, const char *,
- const char *, bool *);
-static char *basic_authentication_encode (const char *, const char *);
-static bool known_authentication_scheme_p (const char *, const char *);
-static void load_cookies (void);
-
#define BEGINS_WITH(line, string_constant) \
(!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
&& (ISSPACE (line[sizeof (string_constant) - 1]) \
@@ -1313,10 +1359,15 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
int sock = -1;
int flags;
- /* Set to 1 when the authorization has failed permanently and should
+ /* Set to 1 when the authorization has already been sent and should
not be tried again. */
bool auth_finished = false;
+ /* Set to 1 when just globally-set Basic authorization has been sent;
+ * should prevent further Basic negotiations, but not other
+ * mechanisms. */
+ bool basic_auth_finished = false;
+
/* Whether NTLM authentication is used for this request. */
bool ntlm_seen = false;
@@ -1422,31 +1473,13 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
user = user ? user : (opt.http_user ? opt.http_user : opt.user);
passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd);
- if (user && passwd)
+ if (user && passwd
+ && !u->user) /* We only do "site-wide" authentication with "global"
+ user/password values; URL user/password info overrides. */
{
- /* We have the username and the password, but haven't tried
- any authorization yet. Let's see if the "Basic" method
- works. If not, we'll come back here and construct a
- proper authorization method with the right challenges.
-
- If we didn't employ this kind of logic, every URL that
- requires authorization would have to be processed twice,
- which is very suboptimal and generates a bunch of false
- "unauthorized" errors in the server log.
-
- #### But this logic also has a serious problem when used
- with stronger authentications: we *first* transmit the
- username and the password in clear text, and *then* attempt a
- stronger authentication scheme. That cannot be right! We
- are only fortunate that almost everyone still uses the
- `Basic' scheme anyway.
-
- There should be an option to prevent this from happening, for
- those who use strong authentication schemes and value their
- passwords. */
- request_set_header (req, "Authorization",
- basic_authentication_encode (user, passwd),
- rel_value);
+ /* If this is a host for which we've already received a Basic
+ * challenge, we'll go ahead and send Basic authentication creds. */
+ basic_auth_finished = maybe_send_basic_creds(u->host, user, passwd, req);
}
proxyauth = NULL;
@@ -1589,19 +1622,18 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
only hurts us. */
request_remove_header (req, "Authorization");
}
- }
-
- if (sock < 0)
- {
- /* In its current implementation, persistent_available_p will
- look up conn->host in some cases. If that lookup failed, we
- don't need to bother with connect_to_host. */
- if (host_lookup_failed)
+ else if (host_lookup_failed)
{
request_free (req);
+ logprintf(LOG_NOTQUIET,
+ _("%s: unable to resolve host address `%s'\n"),
+ exec_name, relevant->host);
return HOSTERR;
}
+ }
+ if (sock < 0)
+ {
sock = connect_to_host (conn->host, conn->port);
if (sock == E_HOST)
{
@@ -1788,7 +1820,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
if (has_html_suffix_p (hs->local_file))
*dt |= TEXTHTML;
- return RETROK;
+ return RETRUNNEEDED;
}
else if (!ALLOW_CLOBBER)
{
@@ -1867,12 +1899,20 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
errno = 0;
parsed = str_to_wgint (hdrval, NULL, 10);
if (parsed == WGINT_MAX && errno == ERANGE)
- /* Out of range.
- #### If Content-Length is out of range, it most likely
- means that the file is larger than 2G and that we're
- compiled without LFS. In that case we should probably
- refuse to even attempt to download the file. */
- contlen = -1;
+ {
+ /* Out of range.
+ #### If Content-Length is out of range, it most likely
+ means that the file is larger than 2G and that we're
+ compiled without LFS. In that case we should probably
+ refuse to even attempt to download the file. */
+ contlen = -1;
+ }
+ else if (parsed < 0)
+ {
+ /* Negative Content-Length; nonsensical, so we can't
+ assume any information about the content to receive. */
+ contlen = -1;
+ }
else
contlen = parsed;
}
@@ -1920,16 +1960,13 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
}
if (!www_authenticate)
- /* If the authentication header is missing or
- unrecognized, there's no sense in retrying. */
- logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
- else if (BEGINS_WITH (www_authenticate, "Basic"))
- /* If the authentication scheme is "Basic", which we send
- by default, there's no sense in retrying either. (This
- should be changed when we stop sending "Basic" data by
- default.) */
- ;
- else
+ {
+ /* If the authentication header is missing or
+ unrecognized, there's no sense in retrying. */
+ logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
+ }
+ else if (!basic_auth_finished
+ || !BEGINS_WITH (www_authenticate, "Basic"))
{
char *pth;
pth = url_full_path (u);
@@ -1942,9 +1979,20 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
rel_value);
if (BEGINS_WITH (www_authenticate, "NTLM"))
ntlm_seen = true;
+ else if (!u->user && BEGINS_WITH (www_authenticate, "Basic"))
+ {
+ /* Need to register this host as using basic auth,
+ * so we automatically send creds next time. */
+ register_basic_auth_host (u->host);
+ }
xfree (pth);
goto retry_with_auth;
}
+ else
+ {
+ /* We already did Basic auth, and it failed. Gotta
+ * give up. */
+ }
}
logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
request_free (req);
@@ -2257,14 +2305,15 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
{
int count;
bool got_head = false; /* used for time-stamping and filename detection */
+ bool time_came_from_head = false;
bool got_name = false;
char *tms;
const char *tmrate;
uerr_t err, ret = TRYLIMEXC;
time_t tmr = -1; /* remote time-stamp */
- wgint local_size = 0; /* the size of the local file */
struct http_stat hstat; /* HTTP status */
struct_stat st;
+ bool send_head_first = true;
/* Assert that no value for *LOCAL_FILE was passed. */
assert (local_file == NULL || *local_file == NULL);
@@ -2302,6 +2351,19 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
/* Reset the document type. */
*dt = 0;
+ /* Skip preliminary HEAD request if we're not in spider mode AND
+ * if -O was given or HTTP Content-Disposition support is disabled. */
+ if (!opt.spider
+ && (got_name || !opt.content_disposition))
+ send_head_first = false;
+
+ /* Send preliminary HEAD request if -N is given and we have an existing
+ * destination file. */
+ if (opt.timestamping
+ && !opt.content_disposition
+ && file_exists_p (url_file_name (u)))
+ send_head_first = true;
+
/* THE loop */
do
{
@@ -2310,7 +2372,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
sleep_between_retrievals (count);
/* Get the current time string. */
- tms = time_str (time (NULL));
+ tms = datetime_str (time (NULL));
if (opt.spider && !got_head)
logprintf (LOG_VERBOSE, _("\
@@ -2319,7 +2381,7 @@ Spider mode enabled. Check if remote file exists.\n"));
/* Print fetch message, if opt.verbose. */
if (opt.verbose)
{
- char *hurl = url_string (u, true);
+ char *hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
if (count > 1)
{
@@ -2343,7 +2405,7 @@ Spider mode enabled. Check if remote file exists.\n"));
/* Default document type is empty. However, if spider mode is
on or time-stamping is employed, HEAD_ONLY commands is
encoded within *dt. */
- if (((opt.spider || opt.timestamping) && !got_head) || !got_name)
+ if (send_head_first && !got_head)
*dt |= HEAD_ONLY;
else
*dt &= ~HEAD_ONLY;
@@ -2379,12 +2441,12 @@ Spider mode enabled. Check if remote file exists.\n"));
err = gethttp (u, &hstat, dt, proxy);
/* Time? */
- tms = time_str (time (NULL));
+ tms = datetime_str (time (NULL));
/* Get the new location (with or without the redirection). */
if (hstat.newloc)
*newloc = xstrdup (hstat.newloc);
-
+
switch (err)
{
case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
@@ -2435,23 +2497,31 @@ Spider mode enabled. Check if remote file exists.\n"));
/* All possibilities should have been exhausted. */
abort ();
}
-
+
if (!(*dt & RETROKF))
{
char *hurl = NULL;
if (!opt.verbose)
{
/* #### Ugly ugly ugly! */
- hurl = url_string (u, true);
+ hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
}
+
+ /* Fall back to GET if HEAD fails with a 500 or 501 error code. */
+ if (*dt & HEAD_ONLY
+ && (hstat.statcode == 500 || hstat.statcode == 501))
+ {
+ got_head = true;
+ continue;
+ }
/* Maybe we should always keep track of broken links, not just in
* spider mode. */
- if (opt.spider)
+ else if (opt.spider)
{
/* #### Again: ugly ugly ugly! */
if (!hurl)
- hurl = url_string (u, true);
+ hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
nonexisting_url (hurl);
logprintf (LOG_NOTQUIET, _("\
Remote file does not exist -- broken link!!!\n"));
@@ -2468,7 +2538,7 @@ Remote file does not exist -- broken link!!!\n"));
}
/* Did we get the time-stamp? */
- if (!got_head)
+ if (send_head_first && !got_head)
{
bool restart_loop = false;
@@ -2484,6 +2554,8 @@ Last-modified header missing -- time-stamps turned off.\n"));
if (tmr == (time_t) (-1))
logputs (LOG_VERBOSE, _("\
Last-modified header invalid -- time-stamp ignored.\n"));
+ if (*dt & HEAD_ONLY)
+ time_came_from_head = true;
}
/* The time-stamping section. */
@@ -2516,7 +2588,7 @@ Server file no newer than local file `%s' -- not retrieving.\n\n"),
{
logprintf (LOG_VERBOSE, _("\
The sizes do not match (local %s) -- retrieving.\n"),
- number_to_static_string (local_size));
+ number_to_static_string (hstat.orig_file_size));
}
}
else
@@ -2532,12 +2604,6 @@ The sizes do not match (local %s) -- retrieving.\n"),
restart_loop = true;
}
- if (opt.always_rest)
- {
- got_name = true;
- restart_loop = true;
- }
-
if (opt.spider)
{
if (opt.recursive)
@@ -2552,7 +2618,7 @@ Remote file exists and could contain links to other resources -- retrieving.\n\n
{
logprintf (LOG_VERBOSE, _("\
Remote file exists but does not contain any link -- not retrieving.\n\n"));
- ret = RETRUNNEEDED;
+ ret = RETROK; /* RETRUNNEEDED is not for caller. */
goto exit;
}
}
@@ -2560,11 +2626,17 @@ Remote file exists but does not contain any link -- not retrieving.\n\n"));
{
logprintf (LOG_VERBOSE, _("\
Remote file exists but recursion is disabled -- not retrieving.\n\n"));
- ret = RETRUNNEEDED;
+ ret = RETROK; /* RETRUNNEEDED is not for caller. */
goto exit;
}
}
+ if (send_head_first)
+ {
+ got_name = true;
+ restart_loop = true;
+ }
+
got_head = true; /* no more time-stamping */
*dt &= ~HEAD_ONLY;
count = 0; /* the retrieve count for HEAD is reset */
@@ -2588,7 +2660,18 @@ Remote file exists but recursion is disabled -- not retrieving.\n\n"));
else
fl = hstat.local_file;
if (fl)
- touch (fl, tmr);
+ {
+ time_t newtmr = -1;
+ /* Reparse time header, in case it's changed. */
+ if (time_came_from_head
+ && hstat.remote_time && hstat.remote_time[0])
+ {
+ newtmr = http_atotm (hstat.remote_time);
+ if (newtmr != -1)
+ tmr = newtmr;
+ }
+ touch (fl, tmr);
+ }
}
/* End of time-stamping section. */
@@ -3090,6 +3173,6 @@ test_parse_content_disposition()
#endif /* TESTING */
/*
- * vim: et ts=2 sw=2
+ * vim: et sts=2 sw=2 cino+={s
*/