X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fhttp.c;h=02645724b136bc0f5554b9e98489e38aacfbdc68;hp=af024bb92a51d83429b4e5a73c1e388a08a376d9;hb=c1b7382ec4c25c23c81a0e0964d94fff72c6a633;hpb=e4600575bb4e2d2a6b9e2c543d0920b969d98e55
diff --git a/src/http.c b/src/http.c
index af024bb9..02645724 100644
--- a/src/http.c
+++ b/src/http.c
@@ -1,5 +1,6 @@
/* HTTP support.
- Copyright (C) 1996-2006 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+ 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
This file is part of GNU Wget.
@@ -16,17 +17,18 @@ GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget. If not, see .
-In addition, as a special exception, the Free Software Foundation
-gives permission to link the code of its release of Wget with the
-OpenSSL project's "OpenSSL" library (or with modified versions of it
-that use the same license as the "OpenSSL" library), and distribute
-the linked executables. You must obey the GNU General Public License
-in all respects for all of the code used other than "OpenSSL". If you
-modify this file, you may extend this exception to your version of the
-file, but you are not obligated to do so. If you do not wish to do
-so, delete this exception statement from your version. */
+Additional permission under GNU GPL version 3 section 7
-#include
+If you modify this program, or any covered work, by linking or
+combining it with the OpenSSL project's OpenSSL library (or a
+modified version of that library), containing parts covered by the
+terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
+grants you additional permission to convey the resulting work.
+Corresponding Source for a non-source form of such a combination
+shall include the source code for the parts of OpenSSL used as well
+as that of the covered work. */
+
+#include "wget.h"
#include
#include
@@ -39,7 +41,6 @@ so, delete this exception statement from your version. */
#include
#include
-#include "wget.h"
#include "hash.h"
#include "http.h"
#include "utils.h"
@@ -278,7 +279,7 @@ request_set_user_header (struct request *req, const char *header)
return;
BOUNDED_TO_ALLOCA (header, p, name);
++p;
- while (ISSPACE (*p))
+ while (c_isspace (*p))
++p;
request_set_header (req, xstrdup (name), (char *) p, rel_name);
}
@@ -652,9 +653,9 @@ resp_header_locate (const struct response *resp, const char *name, int start,
&& 0 == strncasecmp (b, name, name_len))
{
b += name_len + 1;
- while (b < e && ISSPACE (*b))
+ while (b < e && c_isspace (*b))
++b;
- while (b < e && ISSPACE (e[-1]))
+ while (b < e && c_isspace (e[-1]))
--e;
*begptr = b;
*endptr = e;
@@ -753,17 +754,17 @@ resp_status (const struct response *resp, char **message)
if (p < end && *p == '/')
{
++p;
- while (p < end && ISDIGIT (*p))
+ while (p < end && c_isdigit (*p))
++p;
if (p < end && *p == '.')
++p;
- while (p < end && ISDIGIT (*p))
+ while (p < end && c_isdigit (*p))
++p;
}
- while (p < end && ISSPACE (*p))
+ while (p < end && c_isspace (*p))
++p;
- if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2]))
+ if (end - p < 3 || !c_isdigit (p[0]) || !c_isdigit (p[1]) || !c_isdigit (p[2]))
return -1;
status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0');
@@ -771,9 +772,9 @@ resp_status (const struct response *resp, char **message)
if (message)
{
- while (p < end && ISSPACE (*p))
+ while (p < end && c_isspace (*p))
++p;
- while (p < end && ISSPACE (end[-1]))
+ while (p < end && c_isspace (end[-1]))
--end;
*message = strdupdelim (p, end);
}
@@ -844,27 +845,30 @@ parse_content_range (const char *hdr, wgint *first_byte_ptr,
HTTP spec. */
if (*hdr == ':')
++hdr;
- while (ISSPACE (*hdr))
+ while (c_isspace (*hdr))
++hdr;
if (!*hdr)
return false;
}
- if (!ISDIGIT (*hdr))
+ if (!c_isdigit (*hdr))
return false;
- for (num = 0; ISDIGIT (*hdr); hdr++)
+ for (num = 0; c_isdigit (*hdr); hdr++)
num = 10 * num + (*hdr - '0');
- if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
+ if (*hdr != '-' || !c_isdigit (*(hdr + 1)))
return false;
*first_byte_ptr = num;
++hdr;
- for (num = 0; ISDIGIT (*hdr); hdr++)
+ for (num = 0; c_isdigit (*hdr); hdr++)
num = 10 * num + (*hdr - '0');
- if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
+ if (*hdr != '/' || !c_isdigit (*(hdr + 1)))
return false;
*last_byte_ptr = num;
++hdr;
- for (num = 0; ISDIGIT (*hdr); hdr++)
- num = 10 * num + (*hdr - '0');
+ if (*hdr == '*')
+ num = -1;
+ else
+ for (num = 0; c_isdigit (*hdr); hdr++)
+ num = 10 * num + (*hdr - '0');
*entity_length_ptr = num;
return true;
}
@@ -938,7 +942,7 @@ extract_param (const char **source, param_token *name, param_token *value,
{
const char *p = *source;
- while (ISSPACE (*p)) ++p;
+ while (c_isspace (*p)) ++p;
if (!*p)
{
*source = p;
@@ -947,11 +951,11 @@ extract_param (const char **source, param_token *name, param_token *value,
/* Extract name. */
name->b = p;
- while (*p && !ISSPACE (*p) && *p != '=' && *p != separator) ++p;
+ while (*p && !c_isspace (*p) && *p != '=' && *p != separator) ++p;
name->e = p;
if (name->b == name->e)
return false; /* empty name: error */
- while (ISSPACE (*p)) ++p;
+ while (c_isspace (*p)) ++p;
if (*p == separator || !*p) /* no value */
{
xzero (*value);
@@ -964,7 +968,7 @@ extract_param (const char **source, param_token *name, param_token *value,
/* *p is '=', extract value */
++p;
- while (ISSPACE (*p)) ++p;
+ while (c_isspace (*p)) ++p;
if (*p == '"') /* quoted */
{
value->b = ++p;
@@ -973,7 +977,7 @@ extract_param (const char **source, param_token *name, param_token *value,
return false;
value->e = p++;
/* Currently at closing quote; find the end of param. */
- while (ISSPACE (*p)) ++p;
+ while (c_isspace (*p)) ++p;
while (*p && *p != separator) ++p;
if (*p == separator)
++p;
@@ -986,7 +990,7 @@ extract_param (const char **source, param_token *name, param_token *value,
value->b = p;
while (*p && *p != separator) ++p;
value->e = p;
- while (value->e != value->b && ISSPACE (value->e[-1]))
+ while (value->e != value->b && c_isspace (value->e[-1]))
--value->e;
if (*p == separator) ++p;
}
@@ -1314,7 +1318,7 @@ free_hstat (struct http_stat *hs)
#define BEGINS_WITH(line, string_constant) \
(!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
- && (ISSPACE (line[sizeof (string_constant) - 1]) \
+ && (c_isspace (line[sizeof (string_constant) - 1]) \
|| !line[sizeof (string_constant) - 1]))
#define SET_USER_AGENT(req) do { \
@@ -1621,19 +1625,18 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
only hurts us. */
request_remove_header (req, "Authorization");
}
- }
-
- if (sock < 0)
- {
- /* In its current implementation, persistent_available_p will
- look up conn->host in some cases. If that lookup failed, we
- don't need to bother with connect_to_host. */
- if (host_lookup_failed)
+ else if (host_lookup_failed)
{
request_free (req);
+ logprintf(LOG_NOTQUIET,
+ _("%s: unable to resolve host address `%s'\n"),
+ exec_name, relevant->host);
return HOSTERR;
}
+ }
+ if (sock < 0)
+ {
sock = connect_to_host (conn->host, conn->port);
if (sock == E_HOST)
{
@@ -1820,7 +1823,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
if (has_html_suffix_p (hs->local_file))
*dt |= TEXTHTML;
- return RETROK;
+ return RETRUNNEEDED;
}
else if (!ALLOW_CLOBBER)
{
@@ -1899,12 +1902,20 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
errno = 0;
parsed = str_to_wgint (hdrval, NULL, 10);
if (parsed == WGINT_MAX && errno == ERANGE)
- /* Out of range.
- #### If Content-Length is out of range, it most likely
- means that the file is larger than 2G and that we're
- compiled without LFS. In that case we should probably
- refuse to even attempt to download the file. */
- contlen = -1;
+ {
+ /* Out of range.
+ #### If Content-Length is out of range, it most likely
+ means that the file is larger than 2G and that we're
+ compiled without LFS. In that case we should probably
+ refuse to even attempt to download the file. */
+ contlen = -1;
+ }
+ else if (parsed < 0)
+ {
+ /* Negative Content-Length; nonsensical, so we can't
+ assume any information about the content to receive. */
+ contlen = -1;
+ }
else
contlen = parsed;
}
@@ -2013,7 +2024,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
char *tmp = strchr (type, ';');
if (tmp)
{
- while (tmp > type && ISSPACE (tmp[-1]))
+ while (tmp > type && c_isspace (tmp[-1]))
--tmp;
*tmp = '\0';
}
@@ -2044,7 +2055,10 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
wgint first_byte_pos, last_byte_pos, entity_length;
if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos,
&entity_length))
- contrange = first_byte_pos;
+ {
+ contrange = first_byte_pos;
+ contlen = last_byte_pos - first_byte_pos + 1;
+ }
}
resp_free (resp);
@@ -2144,7 +2158,10 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
CLOSE_INVALIDATE (sock);
return RANGEERR;
}
- hs->contlen = contlen + contrange;
+ if (contlen == -1)
+ hs->contlen = -1;
+ else
+ hs->contlen = contlen + contrange;
if (opt.verbose)
{
@@ -2297,14 +2314,15 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
{
int count;
bool got_head = false; /* used for time-stamping and filename detection */
+ bool time_came_from_head = false;
bool got_name = false;
char *tms;
const char *tmrate;
uerr_t err, ret = TRYLIMEXC;
time_t tmr = -1; /* remote time-stamp */
- wgint local_size = 0; /* the size of the local file */
struct http_stat hstat; /* HTTP status */
struct_stat st;
+ bool send_head_first = true;
/* Assert that no value for *LOCAL_FILE was passed. */
assert (local_file == NULL || *local_file == NULL);
@@ -2335,6 +2353,31 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
hstat.local_file = xstrdup (opt.output_document);
got_name = true;
}
+ else if (!opt.content_disposition)
+ {
+ hstat.local_file = url_file_name (u);
+ got_name = true;
+ }
+
+ /* TODO: Ick! This code is now in both gethttp and http_loop, and is
+ * screaming for some refactoring. */
+ if (got_name && file_exists_p (hstat.local_file) && opt.noclobber)
+ {
+ /* If opt.noclobber is turned on and file already exists, do not
+ retrieve the file */
+ logprintf (LOG_VERBOSE, _("\
+File `%s' already there; not retrieving.\n\n"),
+ hstat.local_file);
+ /* If the file is there, we suppose it's retrieved OK. */
+ *dt |= RETROKF;
+
+ /* #### Bogusness alert. */
+ /* If its suffix is "html" or "htm" or similar, assume text/html. */
+ if (has_html_suffix_p (hstat.local_file))
+ *dt |= TEXTHTML;
+
+ return RETRUNNEEDED;
+ }
/* Reset the counter. */
count = 0;
@@ -2342,6 +2385,19 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
/* Reset the document type. */
*dt = 0;
+ /* Skip preliminary HEAD request if we're not in spider mode AND
+ * if -O was given or HTTP Content-Disposition support is disabled. */
+ if (!opt.spider
+ && (got_name || !opt.content_disposition))
+ send_head_first = false;
+
+ /* Send preliminary HEAD request if -N is given and we have an existing
+ * destination file. */
+ if (opt.timestamping
+ && !opt.content_disposition
+ && file_exists_p (url_file_name (u)))
+ send_head_first = true;
+
/* THE loop */
do
{
@@ -2350,7 +2406,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
sleep_between_retrievals (count);
/* Get the current time string. */
- tms = time_str (time (NULL));
+ tms = datetime_str (time (NULL));
if (opt.spider && !got_head)
logprintf (LOG_VERBOSE, _("\
@@ -2359,7 +2415,7 @@ Spider mode enabled. Check if remote file exists.\n"));
/* Print fetch message, if opt.verbose. */
if (opt.verbose)
{
- char *hurl = url_string (u, true);
+ char *hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
if (count > 1)
{
@@ -2383,8 +2439,7 @@ Spider mode enabled. Check if remote file exists.\n"));
/* Default document type is empty. However, if spider mode is
on or time-stamping is employed, HEAD_ONLY commands is
encoded within *dt. */
- if (((opt.spider || opt.timestamping) && !got_head)
- || (opt.always_rest && !got_name))
+ if (send_head_first && !got_head)
*dt |= HEAD_ONLY;
else
*dt &= ~HEAD_ONLY;
@@ -2420,12 +2475,12 @@ Spider mode enabled. Check if remote file exists.\n"));
err = gethttp (u, &hstat, dt, proxy);
/* Time? */
- tms = time_str (time (NULL));
+ tms = datetime_str (time (NULL));
/* Get the new location (with or without the redirection). */
if (hstat.newloc)
*newloc = xstrdup (hstat.newloc);
-
+
switch (err)
{
case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
@@ -2476,23 +2531,31 @@ Spider mode enabled. Check if remote file exists.\n"));
/* All possibilities should have been exhausted. */
abort ();
}
-
+
if (!(*dt & RETROKF))
{
char *hurl = NULL;
if (!opt.verbose)
{
/* #### Ugly ugly ugly! */
- hurl = url_string (u, true);
+ hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
}
+
+ /* Fall back to GET if HEAD fails with a 500 or 501 error code. */
+ if (*dt & HEAD_ONLY
+ && (hstat.statcode == 500 || hstat.statcode == 501))
+ {
+ got_head = true;
+ continue;
+ }
/* Maybe we should always keep track of broken links, not just in
* spider mode. */
- if (opt.spider)
+ else if (opt.spider)
{
/* #### Again: ugly ugly ugly! */
if (!hurl)
- hurl = url_string (u, true);
+ hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
nonexisting_url (hurl);
logprintf (LOG_NOTQUIET, _("\
Remote file does not exist -- broken link!!!\n"));
@@ -2511,7 +2574,7 @@ Remote file does not exist -- broken link!!!\n"));
/* Did we get the time-stamp? */
if (!got_head)
{
- bool restart_loop = false;
+ got_head = true; /* no more time-stamping */
if (opt.timestamping && !hstat.remote_time)
{
@@ -2525,94 +2588,91 @@ Last-modified header missing -- time-stamps turned off.\n"));
if (tmr == (time_t) (-1))
logputs (LOG_VERBOSE, _("\
Last-modified header invalid -- time-stamp ignored.\n"));
+ if (*dt & HEAD_ONLY)
+ time_came_from_head = true;
}
- /* The time-stamping section. */
- if (opt.timestamping)
+ if (send_head_first)
{
- if (hstat.orig_file_name) /* Perform the following checks only
- if the file we're supposed to
- download already exists. */
+ /* The time-stamping section. */
+ if (opt.timestamping)
{
- if (hstat.remote_time &&
- tmr != (time_t) (-1))
+ if (hstat.orig_file_name) /* Perform the following
+ checks only if the file
+ we're supposed to
+ download already exists. */
{
- /* Now time-stamping can be used validly. Time-stamping
- means that if the sizes of the local and remote file
- match, and local file is newer than the remote file,
- it will not be retrieved. Otherwise, the normal
- download procedure is resumed. */
- if (hstat.orig_file_tstamp >= tmr)
+ if (hstat.remote_time &&
+ tmr != (time_t) (-1))
{
- if (hstat.contlen == -1
- || hstat.orig_file_size == hstat.contlen)
+ /* Now time-stamping can be used validly.
+ Time-stamping means that if the sizes of
+ the local and remote file match, and local
+ file is newer than the remote file, it will
+ not be retrieved. Otherwise, the normal
+ download procedure is resumed. */
+ if (hstat.orig_file_tstamp >= tmr)
{
- logprintf (LOG_VERBOSE, _("\
+ if (hstat.contlen == -1
+ || hstat.orig_file_size == hstat.contlen)
+ {
+ logprintf (LOG_VERBOSE, _("\
Server file no newer than local file `%s' -- not retrieving.\n\n"),
- hstat.orig_file_name);
- ret = RETROK;
- goto exit;
- }
- else
- {
- logprintf (LOG_VERBOSE, _("\
+ hstat.orig_file_name);
+ ret = RETROK;
+ goto exit;
+ }
+ else
+ {
+ logprintf (LOG_VERBOSE, _("\
The sizes do not match (local %s) -- retrieving.\n"),
- number_to_static_string (local_size));
+ number_to_static_string (hstat.orig_file_size));
+ }
}
- }
- else
- logputs (LOG_VERBOSE,
- _("Remote file is newer, retrieving.\n"));
+ else
+ logputs (LOG_VERBOSE,
+ _("Remote file is newer, retrieving.\n"));
- logputs (LOG_VERBOSE, "\n");
+ logputs (LOG_VERBOSE, "\n");
+ }
}
+
+ /* free_hstat (&hstat); */
+ hstat.timestamp_checked = true;
}
- /* free_hstat (&hstat); */
- hstat.timestamp_checked = true;
- restart_loop = true;
- }
-
- if (opt.always_rest)
- {
- got_name = true;
- restart_loop = true;
- }
-
- if (opt.spider)
- {
- if (opt.recursive)
+ if (opt.spider)
{
- if (*dt & TEXTHTML)
+ if (opt.recursive)
{
- logputs (LOG_VERBOSE, _("\
+ if (*dt & TEXTHTML)
+ {
+ logputs (LOG_VERBOSE, _("\
Remote file exists and could contain links to other resources -- retrieving.\n\n"));
- restart_loop = true;
+ }
+ else
+ {
+ logprintf (LOG_VERBOSE, _("\
+Remote file exists but does not contain any link -- not retrieving.\n\n"));
+ ret = RETROK; /* RETRUNNEEDED is not for caller. */
+ goto exit;
+ }
}
- else
+ else
{
logprintf (LOG_VERBOSE, _("\
-Remote file exists but does not contain any link -- not retrieving.\n\n"));
- ret = RETRUNNEEDED;
+Remote file exists but recursion is disabled -- not retrieving.\n\n"));
+ ret = RETROK; /* RETRUNNEEDED is not for caller. */
goto exit;
}
}
- else
- {
- logprintf (LOG_VERBOSE, _("\
-Remote file exists but recursion is disabled -- not retrieving.\n\n"));
- ret = RETRUNNEEDED;
- goto exit;
- }
- }
- got_head = true; /* no more time-stamping */
- *dt &= ~HEAD_ONLY;
- count = 0; /* the retrieve count for HEAD is reset */
-
- if (restart_loop)
- continue;
- }
+ got_name = true;
+ *dt &= ~HEAD_ONLY;
+ count = 0; /* the retrieve count for HEAD is reset */
+ continue;
+ } /* send_head_first */
+ } /* !got_head */
if ((tmr != (time_t) (-1))
&& ((hstat.len == hstat.contlen) ||
@@ -2629,7 +2689,18 @@ Remote file exists but recursion is disabled -- not retrieving.\n\n"));
else
fl = hstat.local_file;
if (fl)
- touch (fl, tmr);
+ {
+ time_t newtmr = -1;
+ /* Reparse time header, in case it's changed. */
+ if (time_came_from_head
+ && hstat.remote_time && hstat.remote_time[0])
+ {
+ newtmr = http_atotm (hstat.remote_time);
+ if (newtmr != -1)
+ tmr = newtmr;
+ }
+ touch (fl, tmr);
+ }
}
/* End of time-stamping section. */
@@ -2754,11 +2825,11 @@ check_end (const char *p)
{
if (!p)
return false;
- while (ISSPACE (*p))
+ while (c_isspace (*p))
++p;
if (!*p
|| (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
- || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1])))
+ || ((p[0] == '+' || p[0] == '-') && c_isdigit (p[1])))
return true;
else
return false;
@@ -2874,7 +2945,7 @@ basic_authentication_encode (const char *user, const char *passwd)
}
#define SKIP_WS(x) do { \
- while (ISSPACE (*(x))) \
+ while (c_isspace (*(x))) \
++(x); \
} while (0)
@@ -3006,7 +3077,7 @@ username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
((e) - (b) >= STRSIZE (literal) \
&& 0 == strncasecmp (b, literal, STRSIZE (literal)) \
&& ((e) - (b) == STRSIZE (literal) \
- || ISSPACE (b[STRSIZE (literal)])))
+ || c_isspace (b[STRSIZE (literal)])))
static bool
known_authentication_scheme_p (const char *hdrbeg, const char *hdrend)
@@ -3035,7 +3106,7 @@ create_authorization_line (const char *au, const char *user,
{
/* We are called only with known schemes, so we can dispatch on the
first letter. */
- switch (TOUPPER (*au))
+ switch (c_toupper (*au))
{
case 'B': /* Basic */
*finished = true;