X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fhttp.c;h=faeb0e7fd345cbe94826445e27f9fe18bff819f4;hb=4d7c5e087b2bc82c9f503dff003916d1047903ce;hp=70f6aa00e7373e43679503b82fa273c7a86fb7d1;hpb=a46aa44f5793b769c7048fb68a6951c8e67daf6e;p=wget
diff --git a/src/http.c b/src/http.c
index 70f6aa00..faeb0e7f 100644
--- a/src/http.c
+++ b/src/http.c
@@ -1,11 +1,11 @@
/* HTTP support.
- Copyright (C) 1996-2005 Free Software Foundation, Inc.
+ Copyright (C) 1996-2006 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
+the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
@@ -14,8 +14,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with Wget; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+along with Wget. If not, see .
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
@@ -59,6 +58,7 @@ so, delete this exception statement from your version. */
# include "gen-md5.h"
#endif
#include "convert.h"
+#include "spider.h"
#ifdef TESTING
#include "test.h"
@@ -737,6 +737,20 @@ resp_free (struct response *resp)
xfree (resp);
}
+/* Print a single line of response, the characters [b, e). We tried
+ getting away with
+ logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, (int) (e - b), b);
+ but that failed to escape the non-printable characters and, in fact,
+ caused crashes in UTF-8 locales. */
+
+static void
+print_response_line(const char *prefix, const char *b, const char *e)
+{
+ char *copy;
+ BOUNDED_TO_ALLOCA(b, e, copy);
+ logprintf (LOG_VERBOSE, "%s%s\n", prefix, escnonprint(copy));
+}
+
/* Print the server response, line by line, omitting the trailing CRLF
from individual header lines, and prefixed with PREFIX. */
@@ -755,9 +769,7 @@ print_server_response (const struct response *resp, const char *prefix)
--e;
if (b < e && e[-1] == '\r')
--e;
- /* This is safe even on printfs with broken handling of "%.s"
- because resp->headers ends with \0. */
- logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, e - b, b);
+ print_response_line(prefix, b, e);
}
}
@@ -869,7 +881,7 @@ skip_short_body (int fd, wgint contlen)
bool
extract_param (const char **source, param_token *name, param_token *value,
- char separator)
+ char separator)
{
const char *p = *source;
@@ -877,7 +889,7 @@ extract_param (const char **source, param_token *name, param_token *value,
if (!*p)
{
*source = p;
- return false; /* no error; nothing more to extract */
+ return false; /* no error; nothing more to extract */
}
/* Extract name. */
@@ -885,9 +897,9 @@ extract_param (const char **source, param_token *name, param_token *value,
while (*p && !ISSPACE (*p) && *p != '=' && *p != separator) ++p;
name->e = p;
if (name->b == name->e)
- return false; /* empty name: error */
+ return false; /* empty name: error */
while (ISSPACE (*p)) ++p;
- if (*p == separator || !*p) /* no value */
+ if (*p == separator || !*p) /* no value */
{
xzero (*value);
if (*p == separator) ++p;
@@ -895,12 +907,12 @@ extract_param (const char **source, param_token *name, param_token *value,
return true;
}
if (*p != '=')
- return false; /* error */
+ return false; /* error */
/* *p is '=', extract value */
++p;
while (ISSPACE (*p)) ++p;
- if (*p == '"') /* quoted */
+ if (*p == '"') /* quoted */
{
value->b = ++p;
while (*p && *p != '"') ++p;
@@ -911,12 +923,12 @@ extract_param (const char **source, param_token *name, param_token *value,
while (ISSPACE (*p)) ++p;
while (*p && *p != separator) ++p;
if (*p == separator)
- ++p;
+ ++p;
else if (*p)
- /* garbage after closed quote, e.g. foo="bar"baz */
- return false;
+ /* garbage after closed quote, e.g. foo="bar"baz */
+ return false;
}
- else /* unquoted */
+ else /* unquoted */
{
value->b = p;
while (*p && *p != separator) ++p;
@@ -956,16 +968,34 @@ parse_content_disposition (const char *hdr, char **filename)
while (extract_param (&hdr, &name, &value, ';'))
if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename") && value.b != NULL)
{
- /* Make the file name begin at the last slash or backslash. */
+ /* Make the file name begin at the last slash or backslash. */
const char *last_slash = memrchr (value.b, '/', value.e - value.b);
const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
if (last_slash && last_bs)
value.b = 1 + MAX (last_slash, last_bs);
else if (last_slash || last_bs)
value.b = 1 + (last_slash ? last_slash : last_bs);
- if (value.b == value.e)
- continue;
- *filename = strdupdelim (value.b, value.e);
+ if (value.b == value.e)
+ continue;
+ /* Start with the directory prefix, if specified. */
+ if (opt.dir_prefix)
+ {
+ int prefix_length = strlen (opt.dir_prefix);
+ bool add_slash = (opt.dir_prefix[prefix_length - 1] != '/');
+ int total_length;
+
+ if (add_slash)
+ ++prefix_length;
+ total_length = prefix_length + (value.e - value.b);
+ *filename = xmalloc (total_length + 1);
+ strcpy (*filename, opt.dir_prefix);
+ if (add_slash)
+ (*filename)[prefix_length - 1] = '/';
+ memcpy (*filename + prefix_length, value.b, (value.e - value.b));
+ (*filename)[total_length] = '\0';
+ }
+ else
+ *filename = strdupdelim (value.b, value.e);
return true;
}
return false;
@@ -1726,33 +1756,46 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
/* Determine the local filename if needed. Notice that if -O is used
* hstat.local_file is set by http_loop to the argument of -O. */
- if (!hs->local_file)
+ if (!hs->local_file)
{
/* Honor Content-Disposition whether possible. */
- if (!resp_header_copy (resp, "Content-Disposition", hdrval, sizeof (hdrval))
+ if (!opt.content_disposition
+ || !resp_header_copy (resp, "Content-Disposition",
+ hdrval, sizeof (hdrval))
|| !parse_content_disposition (hdrval, &hs->local_file))
{
- /* Choose filename according to URL name. */
+ /* The Content-Disposition header is missing or broken.
+ * Choose unique file name according to given URL. */
hs->local_file = url_file_name (u);
}
}
/* TODO: perform this check only once. */
- if (opt.noclobber && file_exists_p (hs->local_file))
+ if (file_exists_p (hs->local_file))
{
- /* If opt.noclobber is turned on and file already exists, do not
- retrieve the file */
- logprintf (LOG_VERBOSE, _("\
+ if (opt.noclobber)
+ {
+ /* If opt.noclobber is turned on and file already exists, do not
+ retrieve the file */
+ logprintf (LOG_VERBOSE, _("\
File `%s' already there; not retrieving.\n\n"), hs->local_file);
- /* If the file is there, we suppose it's retrieved OK. */
- *dt |= RETROKF;
+ /* If the file is there, we suppose it's retrieved OK. */
+ *dt |= RETROKF;
- /* #### Bogusness alert. */
- /* If its suffix is "html" or "htm" or similar, assume text/html. */
- if (has_html_suffix_p (hs->local_file))
- *dt |= TEXTHTML;
+ /* #### Bogusness alert. */
+ /* If its suffix is "html" or "htm" or similar, assume text/html. */
+ if (has_html_suffix_p (hs->local_file))
+ *dt |= TEXTHTML;
- return RETROK;
+ return RETROK;
+ }
+ else if (!ALLOW_CLOBBER)
+ {
+ char *unique = unique_name (hs->local_file, true);
+ if (unique != hs->local_file)
+ xfree (hs->local_file);
+ hs->local_file = unique;
+ }
}
/* Support timestamping */
@@ -1790,7 +1833,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
/* Try to stat() the .orig file. */
if (stat (filename_plus_orig_suffix, &st) == 0)
{
- local_dot_orig_file_exists = 1;
+ local_dot_orig_file_exists = true;
local_filename = filename_plus_orig_suffix;
}
}
@@ -1998,7 +2041,7 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
content-type. */
if (!type ||
0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
- 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
+ 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
*dt |= TEXTHTML;
else
*dt &= ~TEXTHTML;
@@ -2121,13 +2164,6 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
return RETRFINISHED;
}
- /* Print fetch message, if opt.verbose. */
- if (opt.verbose)
- {
- logprintf (LOG_NOTQUIET, _("Saving to: `%s'\n"),
- HYPHENP (hs->local_file) ? "STDOUT" : hs->local_file);
- }
-
/* Open the local file. */
if (!output_stream)
{
@@ -2164,6 +2200,13 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
else
fp = output_stream;
+ /* Print fetch message, if opt.verbose. */
+ if (opt.verbose)
+ {
+ logprintf (LOG_NOTQUIET, _("Saving to: `%s'\n"),
+ HYPHENP (hs->local_file) ? "STDOUT" : hs->local_file);
+ }
+
/* This confuses the timestamping code that checks for file size.
#### The timestamping code should be smarter about file size. */
if (opt.save_headers && hs->restval == 0)
@@ -2212,7 +2255,8 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
int *dt, struct url *proxy)
{
int count;
- bool got_head = false; /* used for time-stamping */
+ bool got_head = false; /* used for time-stamping and filename detection */
+ bool got_name = false;
char *tms;
const char *tmrate;
uerr_t err, ret = TRYLIMEXC;
@@ -2246,7 +2290,10 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
hstat.referer = referer;
if (opt.output_document)
- hstat.local_file = xstrdup (opt.output_document);
+ {
+ hstat.local_file = xstrdup (opt.output_document);
+ got_name = true;
+ }
/* Reset the counter. */
count = 0;
@@ -2262,8 +2309,12 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
sleep_between_retrievals (count);
/* Get the current time string. */
- tms = time_str (NULL);
+ tms = time_str (time (NULL));
+ if (opt.spider && !got_head)
+ logprintf (LOG_VERBOSE, _("\
+Spider mode enabled. Check if remote file exists.\n"));
+
/* Print fetch message, if opt.verbose. */
if (opt.verbose)
{
@@ -2291,13 +2342,14 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
/* Default document type is empty. However, if spider mode is
on or time-stamping is employed, HEAD_ONLY commands is
encoded within *dt. */
- if (opt.spider || (opt.timestamping && !got_head))
+ if (((opt.spider || opt.timestamping) && !got_head) || !got_name)
*dt |= HEAD_ONLY;
else
*dt &= ~HEAD_ONLY;
/* Decide whether or not to restart. */
if (opt.always_rest
+ && got_name
&& stat (hstat.local_file, &st) == 0
&& S_ISREG (st.st_mode))
/* When -c is used, continue from on-disk size. (Can't use
@@ -2326,7 +2378,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
err = gethttp (u, &hstat, dt, proxy);
/* Time? */
- tms = time_str (NULL);
+ tms = time_str (time (NULL));
/* Get the new location (with or without the redirection). */
if (hstat.newloc)
@@ -2382,26 +2434,43 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
/* All possibilities should have been exhausted. */
abort ();
}
-
+
if (!(*dt & RETROKF))
{
+ char *hurl = NULL;
if (!opt.verbose)
{
/* #### Ugly ugly ugly! */
- char *hurl = url_string (u, true);
+ hurl = url_string (u, true);
logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
- xfree (hurl);
}
- logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
- tms, hstat.statcode, escnonprint (hstat.error));
+ /* Maybe we should always keep track of broken links, not just in
+ * spider mode. */
+ if (opt.spider)
+ {
+ /* #### Again: ugly ugly ugly! */
+ if (!hurl)
+ hurl = url_string (u, true);
+ nonexisting_url (hurl);
+ logprintf (LOG_NOTQUIET, _("\
+Remote file does not exist -- broken link!!!\n"));
+ }
+ else
+ {
+ logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
+ tms, hstat.statcode, escnonprint (hstat.error));
+ }
logputs (LOG_VERBOSE, "\n");
ret = WRONGCODE;
+ xfree_null (hurl);
goto exit;
}
/* Did we get the time-stamp? */
if (!got_head)
{
+ bool restart_loop = false;
+
if (opt.timestamping && !hstat.remote_time)
{
logputs (LOG_NOTQUIET, _("\
@@ -2415,53 +2484,95 @@ Last-modified header missing -- time-stamps turned off.\n"));
logputs (LOG_VERBOSE, _("\
Last-modified header invalid -- time-stamp ignored.\n"));
}
- }
-
- /* The time-stamping section. */
- if (opt.timestamping && !got_head)
- {
- got_head = true; /* no more time-stamping */
- *dt &= ~HEAD_ONLY;
- count = 0; /* the retrieve count for HEAD is reset */
-
- if (hstat.remote_time && tmr != (time_t) (-1))
+
+ /* The time-stamping section. */
+ if (opt.timestamping)
{
- /* Now time-stamping can be used validly. Time-stamping
- means that if the sizes of the local and remote file
- match, and local file is newer than the remote file,
- it will not be retrieved. Otherwise, the normal
- download procedure is resumed. */
- if (hstat.orig_file_tstamp >= tmr)
+ if (hstat.orig_file_name) /* Perform the following checks only
+ if the file we're supposed to
+ download already exists. */
{
- if (hstat.contlen == -1 || hstat.orig_file_size == hstat.contlen)
+ if (hstat.remote_time &&
+ tmr != (time_t) (-1))
{
- logprintf (LOG_VERBOSE, _("\
+ /* Now time-stamping can be used validly. Time-stamping
+ means that if the sizes of the local and remote file
+ match, and local file is newer than the remote file,
+ it will not be retrieved. Otherwise, the normal
+ download procedure is resumed. */
+ if (hstat.orig_file_tstamp >= tmr)
+ {
+ if (hstat.contlen == -1
+ || hstat.orig_file_size == hstat.contlen)
+ {
+ logprintf (LOG_VERBOSE, _("\
Server file no newer than local file `%s' -- not retrieving.\n\n"),
- hstat.orig_file_name);
- ret = RETROK;
- goto exit;
+ hstat.orig_file_name);
+ ret = RETROK;
+ goto exit;
+ }
+ else
+ {
+ logprintf (LOG_VERBOSE, _("\
+The sizes do not match (local %s) -- retrieving.\n"),
+ number_to_static_string (local_size));
+ }
+ }
+ else
+ logputs (LOG_VERBOSE,
+ _("Remote file is newer, retrieving.\n"));
+
+ logputs (LOG_VERBOSE, "\n");
}
- else
+ }
+
+ /* free_hstat (&hstat); */
+ hstat.timestamp_checked = true;
+ restart_loop = true;
+ }
+
+ if (opt.always_rest)
+ {
+ got_name = true;
+ restart_loop = true;
+ }
+
+ if (opt.spider)
+ {
+ if (opt.recursive)
+ {
+ if (*dt & TEXTHTML)
+ {
+ logputs (LOG_VERBOSE, _("\
+Remote file exists and could contain links to other resources -- retrieving.\n\n"));
+ restart_loop = true;
+ }
+ else
{
logprintf (LOG_VERBOSE, _("\
-The sizes do not match (local %s) -- retrieving.\n"),
- number_to_static_string (local_size));
+Remote file exists but does not contain any link -- not retrieving.\n\n"));
+ ret = RETRUNNEEDED;
+ goto exit;
}
}
else
- logputs (LOG_VERBOSE,
- _("Remote file is newer, retrieving.\n"));
-
- logputs (LOG_VERBOSE, "\n");
+ {
+ logprintf (LOG_VERBOSE, _("\
+Remote file exists but recursion is disabled -- not retrieving.\n\n"));
+ ret = RETRUNNEEDED;
+ goto exit;
+ }
}
-
- /* free_hstat (&hstat); */
- hstat.timestamp_checked = true;
- continue;
+
+ got_head = true; /* no more time-stamping */
+ *dt &= ~HEAD_ONLY;
+ count = 0; /* the retrieve count for HEAD is reset */
+
+ if (restart_loop)
+ continue;
}
-
+
if ((tmr != (time_t) (-1))
- && !opt.spider
&& ((hstat.len == hstat.contlen) ||
((hstat.res == 0) && (hstat.contlen == -1))))
{
@@ -2480,14 +2591,6 @@ The sizes do not match (local %s) -- retrieving.\n"),
}
/* End of time-stamping section. */
- if (opt.spider)
- {
- logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode,
- escnonprint (hstat.error));
- ret = RETROK;
- goto exit;
- }
-
tmrate = retr_rate (hstat.rd_size, hstat.dltime);
total_download_time += hstat.dltime;
@@ -2777,12 +2880,12 @@ digest_authentication_encode (const char *au, const char *user,
{
int i;
for (i = 0; i < countof (options); i++)
- if (name.e - name.b == strlen (options[i].name)
- && 0 == strncmp (name.b, options[i].name, name.e - name.b))
- {
- *options[i].variable = strdupdelim (value.b, value.e);
- break;
- }
+ if (name.e - name.b == strlen (options[i].name)
+ && 0 == strncmp (name.b, options[i].name, name.e - name.b))
+ {
+ *options[i].variable = strdupdelim (value.b, value.e);
+ break;
+ }
}
if (!realm || !nonce || !user || !passwd || !path || !method)
{
@@ -2952,19 +3055,27 @@ test_parse_content_disposition()
int i;
struct {
char *hdrval;
+ char *opt_dir_prefix;
char *filename;
bool result;
} test_array[] = {
- { "filename=\"file.ext\"", "file.ext", true },
- { "attachment; filename=\"file.ext\"", "file.ext", true },
- { "attachment; filename=\"file.ext\"; dummy", "file.ext", true },
- { "attachment", NULL, false },
+ { "filename=\"file.ext\"", NULL, "file.ext", true },
+ { "filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
+ { "attachment; filename=\"file.ext\"", NULL, "file.ext", true },
+ { "attachment; filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
+ { "attachment; filename=\"file.ext\"; dummy", NULL, "file.ext", true },
+ { "attachment; filename=\"file.ext\"; dummy", "somedir", "somedir/file.ext", true },
+ { "attachment", NULL, NULL, false },
+ { "attachment", "somedir", NULL, false },
};
for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
{
char *filename;
- bool res = parse_content_disposition (test_array[i].hdrval, &filename);
+ bool res;
+
+ opt.dir_prefix = test_array[i].opt_dir_prefix;
+ res = parse_content_disposition (test_array[i].hdrval, &filename);
mu_assert ("test_parse_content_disposition: wrong result",
res == test_array[i].result