/* HTTP support.
- Copyright (C) 1996-2006 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+ 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
This file is part of GNU Wget.
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
-#include <config.h>
+#include "wget.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <locale.h>
-#include "wget.h"
#include "hash.h"
#include "http.h"
#include "utils.h"
return;
BOUNDED_TO_ALLOCA (header, p, name);
++p;
- while (ISSPACE (*p))
+ while (c_isspace (*p))
++p;
request_set_header (req, xstrdup (name), (char *) p, rel_name);
}
&& 0 == strncasecmp (b, name, name_len))
{
b += name_len + 1;
- while (b < e && ISSPACE (*b))
+ while (b < e && c_isspace (*b))
++b;
- while (b < e && ISSPACE (e[-1]))
+ while (b < e && c_isspace (e[-1]))
--e;
*begptr = b;
*endptr = e;
if (p < end && *p == '/')
{
++p;
- while (p < end && ISDIGIT (*p))
+ while (p < end && c_isdigit (*p))
++p;
if (p < end && *p == '.')
++p;
- while (p < end && ISDIGIT (*p))
+ while (p < end && c_isdigit (*p))
++p;
}
- while (p < end && ISSPACE (*p))
+ while (p < end && c_isspace (*p))
++p;
- if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2]))
+ if (end - p < 3 || !c_isdigit (p[0]) || !c_isdigit (p[1]) || !c_isdigit (p[2]))
return -1;
status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0');
if (message)
{
- while (p < end && ISSPACE (*p))
+ while (p < end && c_isspace (*p))
++p;
- while (p < end && ISSPACE (end[-1]))
+ while (p < end && c_isspace (end[-1]))
--end;
*message = strdupdelim (p, end);
}
HTTP spec. */
if (*hdr == ':')
++hdr;
- while (ISSPACE (*hdr))
+ while (c_isspace (*hdr))
++hdr;
if (!*hdr)
return false;
}
- if (!ISDIGIT (*hdr))
+ if (!c_isdigit (*hdr))
return false;
- for (num = 0; ISDIGIT (*hdr); hdr++)
+ for (num = 0; c_isdigit (*hdr); hdr++)
num = 10 * num + (*hdr - '0');
- if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
+ if (*hdr != '-' || !c_isdigit (*(hdr + 1)))
return false;
*first_byte_ptr = num;
++hdr;
- for (num = 0; ISDIGIT (*hdr); hdr++)
+ for (num = 0; c_isdigit (*hdr); hdr++)
num = 10 * num + (*hdr - '0');
- if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
+ if (*hdr != '/' || !c_isdigit (*(hdr + 1)))
return false;
*last_byte_ptr = num;
++hdr;
- for (num = 0; ISDIGIT (*hdr); hdr++)
+ for (num = 0; c_isdigit (*hdr); hdr++)
num = 10 * num + (*hdr - '0');
*entity_length_ptr = num;
return true;
{
const char *p = *source;
- while (ISSPACE (*p)) ++p;
+ while (c_isspace (*p)) ++p;
if (!*p)
{
*source = p;
/* Extract name. */
name->b = p;
- while (*p && !ISSPACE (*p) && *p != '=' && *p != separator) ++p;
+ while (*p && !c_isspace (*p) && *p != '=' && *p != separator) ++p;
name->e = p;
if (name->b == name->e)
return false; /* empty name: error */
- while (ISSPACE (*p)) ++p;
+ while (c_isspace (*p)) ++p;
if (*p == separator || !*p) /* no value */
{
xzero (*value);
/* *p is '=', extract value */
++p;
- while (ISSPACE (*p)) ++p;
+ while (c_isspace (*p)) ++p;
if (*p == '"') /* quoted */
{
value->b = ++p;
return false;
value->e = p++;
/* Currently at closing quote; find the end of param. */
- while (ISSPACE (*p)) ++p;
+ while (c_isspace (*p)) ++p;
while (*p && *p != separator) ++p;
if (*p == separator)
++p;
value->b = p;
while (*p && *p != separator) ++p;
value->e = p;
- while (value->e != value->b && ISSPACE (value->e[-1]))
+ while (value->e != value->b && c_isspace (value->e[-1]))
--value->e;
if (*p == separator) ++p;
}
#define BEGINS_WITH(line, string_constant) \
(!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
- && (ISSPACE (line[sizeof (string_constant) - 1]) \
+ && (c_isspace (line[sizeof (string_constant) - 1]) \
|| !line[sizeof (string_constant) - 1]))
#define SET_USER_AGENT(req) do { \
only hurts us. */
request_remove_header (req, "Authorization");
}
- }
-
- if (sock < 0)
- {
- /* In its current implementation, persistent_available_p will
- look up conn->host in some cases. If that lookup failed, we
- don't need to bother with connect_to_host. */
- if (host_lookup_failed)
+ else if (host_lookup_failed)
{
request_free (req);
+ logprintf(LOG_NOTQUIET,
+ _("%s: unable to resolve host address `%s'\n"),
+ exec_name, relevant->host);
return HOSTERR;
}
+ }
+ if (sock < 0)
+ {
sock = connect_to_host (conn->host, conn->port);
if (sock == E_HOST)
{
if (has_html_suffix_p (hs->local_file))
*dt |= TEXTHTML;
- return RETROK;
+ return RETRUNNEEDED;
}
else if (!ALLOW_CLOBBER)
{
errno = 0;
parsed = str_to_wgint (hdrval, NULL, 10);
if (parsed == WGINT_MAX && errno == ERANGE)
- /* Out of range.
- #### If Content-Length is out of range, it most likely
- means that the file is larger than 2G and that we're
- compiled without LFS. In that case we should probably
- refuse to even attempt to download the file. */
- contlen = -1;
+ {
+ /* Out of range.
+ #### If Content-Length is out of range, it most likely
+ means that the file is larger than 2G and that we're
+ compiled without LFS. In that case we should probably
+ refuse to even attempt to download the file. */
+ contlen = -1;
+ }
+ else if (parsed < 0)
+ {
+ /* Negative Content-Length; nonsensical, so we can't
+ assume any information about the content to receive. */
+ contlen = -1;
+ }
else
contlen = parsed;
}
char *tmp = strchr (type, ';');
if (tmp)
{
- while (tmp > type && ISSPACE (tmp[-1]))
+ while (tmp > type && c_isspace (tmp[-1]))
--tmp;
*tmp = '\0';
}
{
int count;
bool got_head = false; /* used for time-stamping and filename detection */
+ bool time_came_from_head = false;
bool got_name = false;
char *tms;
const char *tmrate;
uerr_t err, ret = TRYLIMEXC;
time_t tmr = -1; /* remote time-stamp */
- wgint local_size = 0; /* the size of the local file */
struct http_stat hstat; /* HTTP status */
struct_stat st;
+ bool send_head_first = true;
/* Assert that no value for *LOCAL_FILE was passed. */
assert (local_file == NULL || *local_file == NULL);
hstat.local_file = xstrdup (opt.output_document);
got_name = true;
}
+ else if (!opt.content_disposition)
+ {
+ hstat.local_file = url_file_name (u);
+ got_name = true;
+ }
/* Reset the counter. */
count = 0;
/* Reset the document type. */
*dt = 0;
+ /* Skip preliminary HEAD request if we're not in spider mode AND
+ * if -O was given or HTTP Content-Disposition support is disabled. */
+ if (!opt.spider
+ && (got_name || !opt.content_disposition))
+ send_head_first = false;
+
+ /* Send preliminary HEAD request if -N is given and we have an existing
+ * destination file. */
+ if (opt.timestamping
+ && !opt.content_disposition
+ && file_exists_p (url_file_name (u)))
+ send_head_first = true;
+
/* THE loop */
do
{
sleep_between_retrievals (count);
/* Get the current time string. */
- tms = time_str (time (NULL));
+ tms = datetime_str (time (NULL));
if (opt.spider && !got_head)
logprintf (LOG_VERBOSE, _("\
/* Print fetch message, if opt.verbose. */
if (opt.verbose)
{
- char *hurl = url_string (u, true);
+ char *hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
if (count > 1)
{
/* Default document type is empty. However, if spider mode is
on or time-stamping is employed, HEAD_ONLY commands is
encoded within *dt. */
- if (((opt.spider || opt.timestamping) && !got_head)
- || (opt.always_rest && !got_name))
+ if (send_head_first && !got_head)
*dt |= HEAD_ONLY;
else
*dt &= ~HEAD_ONLY;
err = gethttp (u, &hstat, dt, proxy);
/* Time? */
- tms = time_str (time (NULL));
+ tms = datetime_str (time (NULL));
/* Get the new location (with or without the redirection). */
if (hstat.newloc)
*newloc = xstrdup (hstat.newloc);
-
+
switch (err)
{
case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
/* All possibilities should have been exhausted. */
abort ();
}
-
+
if (!(*dt & RETROKF))
{
char *hurl = NULL;
if (!opt.verbose)
{
/* #### Ugly ugly ugly! */
- hurl = url_string (u, true);
+ hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
}
+
+ /* Fall back to GET if HEAD fails with a 500 or 501 error code. */
+ if (*dt & HEAD_ONLY
+ && (hstat.statcode == 500 || hstat.statcode == 501))
+ {
+ got_head = true;
+ continue;
+ }
/* Maybe we should always keep track of broken links, not just in
* spider mode. */
- if (opt.spider)
+ else if (opt.spider)
{
/* #### Again: ugly ugly ugly! */
if (!hurl)
- hurl = url_string (u, true);
+ hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
nonexisting_url (hurl);
logprintf (LOG_NOTQUIET, _("\
Remote file does not exist -- broken link!!!\n"));
/* Did we get the time-stamp? */
if (!got_head)
{
- bool restart_loop = false;
+ got_head = true; /* no more time-stamping */
if (opt.timestamping && !hstat.remote_time)
{
if (tmr == (time_t) (-1))
logputs (LOG_VERBOSE, _("\
Last-modified header invalid -- time-stamp ignored.\n"));
+ if (*dt & HEAD_ONLY)
+ time_came_from_head = true;
}
- /* The time-stamping section. */
- if (opt.timestamping)
+ if (send_head_first)
{
- if (hstat.orig_file_name) /* Perform the following checks only
- if the file we're supposed to
- download already exists. */
+ /* The time-stamping section. */
+ if (opt.timestamping)
{
- if (hstat.remote_time &&
- tmr != (time_t) (-1))
+ if (hstat.orig_file_name) /* Perform the following
+ checks only if the file
+ we're supposed to
+ download already exists. */
{
- /* Now time-stamping can be used validly. Time-stamping
- means that if the sizes of the local and remote file
- match, and local file is newer than the remote file,
- it will not be retrieved. Otherwise, the normal
- download procedure is resumed. */
- if (hstat.orig_file_tstamp >= tmr)
+ if (hstat.remote_time &&
+ tmr != (time_t) (-1))
{
- if (hstat.contlen == -1
- || hstat.orig_file_size == hstat.contlen)
+ /* Now time-stamping can be used validly.
+ Time-stamping means that if the sizes of
+ the local and remote file match, and local
+ file is newer than the remote file, it will
+ not be retrieved. Otherwise, the normal
+ download procedure is resumed. */
+ if (hstat.orig_file_tstamp >= tmr)
{
- logprintf (LOG_VERBOSE, _("\
+ if (hstat.contlen == -1
+ || hstat.orig_file_size == hstat.contlen)
+ {
+ logprintf (LOG_VERBOSE, _("\
Server file no newer than local file `%s' -- not retrieving.\n\n"),
- hstat.orig_file_name);
- ret = RETROK;
- goto exit;
- }
- else
- {
- logprintf (LOG_VERBOSE, _("\
+ hstat.orig_file_name);
+ ret = RETROK;
+ goto exit;
+ }
+ else
+ {
+ logprintf (LOG_VERBOSE, _("\
The sizes do not match (local %s) -- retrieving.\n"),
- number_to_static_string (local_size));
+ number_to_static_string (hstat.orig_file_size));
+ }
}
- }
- else
- logputs (LOG_VERBOSE,
- _("Remote file is newer, retrieving.\n"));
+ else
+ logputs (LOG_VERBOSE,
+ _("Remote file is newer, retrieving.\n"));
- logputs (LOG_VERBOSE, "\n");
+ logputs (LOG_VERBOSE, "\n");
+ }
}
+
+ /* free_hstat (&hstat); */
+ hstat.timestamp_checked = true;
}
- /* free_hstat (&hstat); */
- hstat.timestamp_checked = true;
- restart_loop = true;
- }
-
- if (opt.always_rest)
- {
- got_name = true;
- restart_loop = true;
- }
-
- if (opt.spider)
- {
- if (opt.recursive)
+ if (opt.spider)
{
- if (*dt & TEXTHTML)
+ if (opt.recursive)
{
- logputs (LOG_VERBOSE, _("\
+ if (*dt & TEXTHTML)
+ {
+ logputs (LOG_VERBOSE, _("\
Remote file exists and could contain links to other resources -- retrieving.\n\n"));
- restart_loop = true;
+ }
+ else
+ {
+ logprintf (LOG_VERBOSE, _("\
+Remote file exists but does not contain any link -- not retrieving.\n\n"));
+ ret = RETROK; /* RETRUNNEEDED is not for caller. */
+ goto exit;
+ }
}
- else
+ else
{
logprintf (LOG_VERBOSE, _("\
-Remote file exists but does not contain any link -- not retrieving.\n\n"));
- ret = RETRUNNEEDED;
+Remote file exists but recursion is disabled -- not retrieving.\n\n"));
+ ret = RETROK; /* RETRUNNEEDED is not for caller. */
goto exit;
}
}
- else
- {
- logprintf (LOG_VERBOSE, _("\
-Remote file exists but recursion is disabled -- not retrieving.\n\n"));
- ret = RETRUNNEEDED;
- goto exit;
- }
- }
-
- got_head = true; /* no more time-stamping */
- *dt &= ~HEAD_ONLY;
- count = 0; /* the retrieve count for HEAD is reset */
- if (restart_loop)
- continue;
- }
+ got_name = true;
+ *dt &= ~HEAD_ONLY;
+ count = 0; /* the retrieve count for HEAD is reset */
+ continue;
+ } /* send_head_first */
+ } /* !got_head */
if ((tmr != (time_t) (-1))
&& ((hstat.len == hstat.contlen) ||
else
fl = hstat.local_file;
if (fl)
- touch (fl, tmr);
+ {
+ time_t newtmr = -1;
+ /* Reparse time header, in case it's changed. */
+ if (time_came_from_head
+ && hstat.remote_time && hstat.remote_time[0])
+ {
+ newtmr = http_atotm (hstat.remote_time);
+ if (newtmr != -1)
+ tmr = newtmr;
+ }
+ touch (fl, tmr);
+ }
}
/* End of time-stamping section. */
{
if (!p)
return false;
- while (ISSPACE (*p))
+ while (c_isspace (*p))
++p;
if (!*p
|| (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
- || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1])))
+ || ((p[0] == '+' || p[0] == '-') && c_isdigit (p[1])))
return true;
else
return false;
}
#define SKIP_WS(x) do { \
- while (ISSPACE (*(x))) \
+ while (c_isspace (*(x))) \
++(x); \
} while (0)
((e) - (b) >= STRSIZE (literal) \
&& 0 == strncasecmp (b, literal, STRSIZE (literal)) \
&& ((e) - (b) == STRSIZE (literal) \
- || ISSPACE (b[STRSIZE (literal)])))
+ || c_isspace (b[STRSIZE (literal)])))
static bool
known_authentication_scheme_p (const char *hdrbeg, const char *hdrend)
{
/* We are called only with known schemes, so we can dispatch on the
first letter. */
- switch (TOUPPER (*au))
+ switch (c_toupper (*au))
{
case 'B': /* Basic */
*finished = true;