# include "gen-md5.h"
#endif
#include "convert.h"
+#include "spider.h"
#ifdef TESTING
#include "test.h"
bool
extract_param (const char **source, param_token *name, param_token *value,
- char separator)
+ char separator)
{
const char *p = *source;
if (!*p)
{
*source = p;
- return false; /* no error; nothing more to extract */
+ return false; /* no error; nothing more to extract */
}
/* Extract name. */
while (*p && !ISSPACE (*p) && *p != '=' && *p != separator) ++p;
name->e = p;
if (name->b == name->e)
- return false; /* empty name: error */
+ return false; /* empty name: error */
while (ISSPACE (*p)) ++p;
- if (*p == separator || !*p) /* no value */
+ if (*p == separator || !*p) /* no value */
{
xzero (*value);
if (*p == separator) ++p;
return true;
}
if (*p != '=')
- return false; /* error */
+ return false; /* error */
/* *p is '=', extract value */
++p;
while (ISSPACE (*p)) ++p;
- if (*p == '"') /* quoted */
+ if (*p == '"') /* quoted */
{
value->b = ++p;
while (*p && *p != '"') ++p;
while (ISSPACE (*p)) ++p;
while (*p && *p != separator) ++p;
if (*p == separator)
- ++p;
+ ++p;
else if (*p)
- /* garbage after closed quote, e.g. foo="bar"baz */
- return false;
+ /* garbage after closed quote, e.g. foo="bar"baz */
+ return false;
}
- else /* unquoted */
+ else /* unquoted */
{
value->b = p;
while (*p && *p != separator) ++p;
while (extract_param (&hdr, &name, &value, ';'))
if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename") && value.b != NULL)
{
- /* Make the file name begin at the last slash or backslash. */
+ /* Make the file name begin at the last slash or backslash. */
const char *last_slash = memrchr (value.b, '/', value.e - value.b);
const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
if (last_slash && last_bs)
value.b = 1 + MAX (last_slash, last_bs);
else if (last_slash || last_bs)
value.b = 1 + (last_slash ? last_slash : last_bs);
- if (value.b == value.e)
- continue;
- *filename = strdupdelim (value.b, value.e);
+ if (value.b == value.e)
+ continue;
+ /* Start with the directory prefix, if specified. */
+ if (opt.dir_prefix)
+ {
+ int prefix_length = strlen (opt.dir_prefix);
+ bool add_slash = (opt.dir_prefix[prefix_length - 1] != '/');
+ int total_length;
+
+ if (add_slash)
+ ++prefix_length;
+ total_length = prefix_length + (value.e - value.b);
+ *filename = xmalloc (total_length + 1);
+ strcpy (*filename, opt.dir_prefix);
+ if (add_slash)
+ (*filename)[prefix_length - 1] = '/';
+ memcpy (*filename + prefix_length, value.b, (value.e - value.b));
+ (*filename)[total_length] = '\0';
+ }
+ else
+ *filename = strdupdelim (value.b, value.e);
return true;
}
return false;
/* Get the current time string. */
tms = time_str (time (NULL));
+ if (opt.spider && !got_head)
+ logprintf (LOG_VERBOSE, _("\
+Spider mode enabled. Check if remote file exists.\n"));
+
/* Print fetch message, if opt.verbose. */
if (opt.verbose)
{
/* Default document type is empty. However, if spider mode is
on or time-stamping is employed, HEAD_ONLY commands is
encoded within *dt. */
- if ((opt.spider && !opt.recursive)
- || (opt.timestamping && !got_head)
+ if (((opt.spider || opt.timestamping) && !got_head)
|| (opt.always_rest && !got_name))
*dt |= HEAD_ONLY;
else
hurl = url_string (u, true);
logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
}
- if (opt.spider && opt.recursive)
+ /* Maybe we should always keep track of broken links, not just in
+ * spider mode. */
+ if (opt.spider)
+ {
+ /* #### Again: ugly ugly ugly! */
+ if (!hurl)
+ hurl = url_string (u, true);
+ nonexisting_url (hurl);
+ logprintf (LOG_NOTQUIET, _("\
+Remote file does not exist -- broken link!!!\n"));
+ }
+ else
{
- if (!hurl) hurl = url_string (u, true);
- nonexisting_url (hurl, referer);
+ logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
+ tms, hstat.statcode, escnonprint (hstat.error));
}
- logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
- tms, hstat.statcode, escnonprint (hstat.error));
logputs (LOG_VERBOSE, "\n");
ret = WRONGCODE;
xfree_null (hurl);
/* The time-stamping section. */
if (opt.timestamping)
{
- if (hstat.orig_file_name) /* Perform this check only if the file we're
- supposed to download already exists. */
+ if (hstat.orig_file_name) /* Perform the following checks only
+ if the file we're supposed to
+ download already exists. */
{
- if (hstat.remote_time && tmr != (time_t) (-1))
+ if (hstat.remote_time &&
+ tmr != (time_t) (-1))
{
/* Now time-stamping can be used validly. Time-stamping
means that if the sizes of the local and remote file
download procedure is resumed. */
if (hstat.orig_file_tstamp >= tmr)
{
- if (hstat.contlen == -1 || hstat.orig_file_size == hstat.contlen)
+ if (hstat.contlen == -1
+ || hstat.orig_file_size == hstat.contlen)
{
logprintf (LOG_VERBOSE, _("\
Server file no newer than local file `%s' -- not retrieving.\n\n"),
got_name = true;
restart_loop = true;
}
+
+ if (opt.spider)
+ {
+ if (opt.recursive)
+ {
+ if (*dt & TEXTHTML)
+ {
+ logputs (LOG_VERBOSE, _("\
+Remote file exists and could contain links to other resources -- retrieving.\n\n"));
+ restart_loop = true;
+ }
+ else
+ {
+ logprintf (LOG_VERBOSE, _("\
+Remote file exists but does not contain any link -- not retrieving.\n\n"));
+ ret = RETRUNNEEDED;
+ goto exit;
+ }
+ }
+ else
+ {
+ logprintf (LOG_VERBOSE, _("\
+Remote file exists but recursion is disabled -- not retrieving.\n\n"));
+ ret = RETRUNNEEDED;
+ goto exit;
+ }
+ }
got_head = true; /* no more time-stamping */
*dt &= ~HEAD_ONLY;
}
if ((tmr != (time_t) (-1))
- && (!opt.spider || opt.recursive)
&& ((hstat.len == hstat.contlen) ||
((hstat.res == 0) && (hstat.contlen == -1))))
{
}
/* End of time-stamping section. */
- if (opt.spider && !opt.recursive)
- {
- logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode,
- escnonprint (hstat.error));
- ret = RETROK;
- goto exit;
- }
-
tmrate = retr_rate (hstat.rd_size, hstat.dltime);
total_download_time += hstat.dltime;
{
int i;
for (i = 0; i < countof (options); i++)
- if (name.e - name.b == strlen (options[i].name)
- && 0 == strncmp (name.b, options[i].name, name.e - name.b))
- {
- *options[i].variable = strdupdelim (value.b, value.e);
- break;
- }
+ if (name.e - name.b == strlen (options[i].name)
+ && 0 == strncmp (name.b, options[i].name, name.e - name.b))
+ {
+ *options[i].variable = strdupdelim (value.b, value.e);
+ break;
+ }
}
if (!realm || !nonce || !user || !passwd || !path || !method)
{
int i;
struct {
char *hdrval;
+ char *opt_dir_prefix;
char *filename;
bool result;
} test_array[] = {
- { "filename=\"file.ext\"", "file.ext", true },
- { "attachment; filename=\"file.ext\"", "file.ext", true },
- { "attachment; filename=\"file.ext\"; dummy", "file.ext", true },
- { "attachment", NULL, false },
+ { "filename=\"file.ext\"", NULL, "file.ext", true },
+ { "filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
+ { "attachment; filename=\"file.ext\"", NULL, "file.ext", true },
+ { "attachment; filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
+ { "attachment; filename=\"file.ext\"; dummy", NULL, "file.ext", true },
+ { "attachment; filename=\"file.ext\"; dummy", "somedir", "somedir/file.ext", true },
+ { "attachment", NULL, NULL, false },
+ { "attachment", "somedir", NULL, false },
};
for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
{
char *filename;
- bool res = parse_content_disposition (test_array[i].hdrval, &filename);
+ bool res;
+
+ opt.dir_prefix = test_array[i].opt_dir_prefix;
+ res = parse_content_disposition (test_array[i].hdrval, &filename);
mu_assert ("test_parse_content_disposition: wrong result",
res == test_array[i].result