# include "gen-md5.h"
#endif
#include "convert.h"
+#include "spider.h"
#ifdef TESTING
#include "test.h"
#define TEXTHTML_S "text/html"
#define TEXTXHTML_S "application/xhtml+xml"
+#define TEXTCSS_S "text/css"
/* Some status code validation macros: */
#define H_20X(x) (((x) >= 200) && ((x) < 300))
const char *, bool *);
static char *basic_authentication_encode (const char *, const char *);
static bool known_authentication_scheme_p (const char *, const char *);
+static void ensure_extension (struct http_stat *, const char *, int *);
static void load_cookies (void);
#define BEGINS_WITH(line, string_constant) \
}
}
- DEBUGP (("hs->local_file is: %s %s\n", hs->local_file,
- file_exists_p (hs->local_file) ? "(existing)" : "(not existing)"));
-
/* TODO: perform this check only once. */
if (file_exists_p (hs->local_file))
{
return RETROK;
}
- else
+ else if (!ALLOW_CLOBBER)
{
char *unique = unique_name (hs->local_file, true);
if (unique != hs->local_file)
/* Try to stat() the .orig file. */
if (stat (filename_plus_orig_suffix, &st) == 0)
{
- local_dot_orig_file_exists = 1;
+ local_dot_orig_file_exists = true;
local_filename = filename_plus_orig_suffix;
}
}
else
*dt &= ~TEXTHTML;
- DEBUGP (("TEXTHTML is %s.\n", *dt | TEXTHTML ? "on": "off"));
+ if (type &&
+ 0 == strncasecmp (type, TEXTCSS_S, strlen (TEXTCSS_S)))
+ *dt |= TEXTCSS;
+ else
+ *dt &= ~TEXTCSS;
- if (opt.html_extension && (*dt & TEXTHTML))
- /* -E / --html-extension / html_extension = on was specified, and this is a
- text/html file. If some case-insensitive variation on ".htm[l]" isn't
- already the file's suffix, tack on ".html". */
+ if (opt.html_extension)
{
- char *last_period_in_local_filename = strrchr (hs->local_file, '.');
-
- if (last_period_in_local_filename == NULL
- || !(0 == strcasecmp (last_period_in_local_filename, ".htm")
- || 0 == strcasecmp (last_period_in_local_filename, ".html")))
+ if (*dt & TEXTHTML)
+ /* -E / --html-extension / html_extension = on was specified,
+ and this is a text/html file. If some case-insensitive
+ variation on ".htm[l]" isn't already the file's suffix,
+ tack on ".html". */
{
- int local_filename_len = strlen (hs->local_file);
- /* Resize the local file, allowing for ".html" preceded by
- optional ".NUMBER". */
- hs->local_file = xrealloc (hs->local_file,
- local_filename_len + 24 + sizeof (".html"));
- strcpy(hs->local_file + local_filename_len, ".html");
- /* If clobbering is not allowed and the file, as named,
- exists, tack on ".NUMBER.html" instead. */
- if (!ALLOW_CLOBBER && file_exists_p (hs->local_file))
- {
- int ext_num = 1;
- do
- sprintf (hs->local_file + local_filename_len,
- ".%d.html", ext_num++);
- while (file_exists_p (hs->local_file));
- }
- *dt |= ADDED_HTML_EXTENSION;
+ ensure_extension (hs, ".html", dt);
+ }
+ else if (*dt & TEXTCSS)
+ {
+ ensure_extension (hs, ".css", dt);
}
}
int *dt, struct url *proxy)
{
int count;
- bool got_head = false; /* used for time-stamping */
+ bool got_head = false; /* used for time-stamping and filename detection */
+ bool got_name = false;
char *tms;
const char *tmrate;
uerr_t err, ret = TRYLIMEXC;
hstat.referer = referer;
if (opt.output_document)
- hstat.local_file = xstrdup (opt.output_document);
+ {
+ hstat.local_file = xstrdup (opt.output_document);
+ got_name = true;
+ }
/* Reset the counter. */
count = 0;
/* Get the current time string. */
tms = time_str (time (NULL));
+ if (opt.spider && !got_head)
+ logprintf (LOG_VERBOSE, _("\
+Spider mode enabled. Check if remote file exists.\n"));
+
/* Print fetch message, if opt.verbose. */
if (opt.verbose)
{
/* Default document type is empty. However, if spider mode is
on or time-stamping is employed, HEAD_ONLY commands is
encoded within *dt. */
- if ((opt.spider && !opt.recursive) || (opt.timestamping && !got_head))
+ if (((opt.spider || opt.timestamping) && !got_head)
+ || (opt.always_rest && !got_name))
*dt |= HEAD_ONLY;
else
*dt &= ~HEAD_ONLY;
/* Decide whether or not to restart. */
if (opt.always_rest
+ && got_name
&& stat (hstat.local_file, &st) == 0
&& S_ISREG (st.st_mode))
/* When -c is used, continue from on-disk size. (Can't use
hurl = url_string (u, true);
logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
}
- if (opt.spider && opt.recursive)
+ /* Maybe we should always keep track of broken links, not just in
+ * spider mode. */
+ if (opt.spider)
+ {
+ /* #### Again: ugly ugly ugly! */
+ if (!hurl)
+ hurl = url_string (u, true);
+ nonexisting_url (hurl);
+ logprintf (LOG_NOTQUIET, _("\
+Remote file does not exist -- broken link!!!\n"));
+ }
+ else
{
- if (!hurl) hurl = url_string (u, true);
- nonexisting_url (hurl, referer);
+ logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
+ tms, hstat.statcode, escnonprint (hstat.error));
}
- logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
- tms, hstat.statcode, escnonprint (hstat.error));
logputs (LOG_VERBOSE, "\n");
ret = WRONGCODE;
xfree_null (hurl);
/* Did we get the time-stamp? */
if (!got_head)
{
+ bool restart_loop = false;
+
if (opt.timestamping && !hstat.remote_time)
{
logputs (LOG_NOTQUIET, _("\
logputs (LOG_VERBOSE, _("\
Last-modified header invalid -- time-stamp ignored.\n"));
}
- }
-
- /* The time-stamping section. */
- if (opt.timestamping && !got_head)
- {
- got_head = true; /* no more time-stamping */
- *dt &= ~HEAD_ONLY;
- count = 0; /* the retrieve count for HEAD is reset */
-
- if (hstat.remote_time && tmr != (time_t) (-1))
+
+ /* The time-stamping section. */
+ if (opt.timestamping)
{
- /* Now time-stamping can be used validly. Time-stamping
- means that if the sizes of the local and remote file
- match, and local file is newer than the remote file,
- it will not be retrieved. Otherwise, the normal
- download procedure is resumed. */
- if (hstat.orig_file_tstamp >= tmr)
+ if (hstat.orig_file_name) /* Perform the following checks only
+ if the file we're supposed to
+ download already exists. */
{
- if (hstat.contlen == -1 || hstat.orig_file_size == hstat.contlen)
+ if (hstat.remote_time &&
+ tmr != (time_t) (-1))
{
- logprintf (LOG_VERBOSE, _("\
+ /* Now time-stamping can be used validly. Time-stamping
+ means that if the sizes of the local and remote file
+ match, and local file is newer than the remote file,
+ it will not be retrieved. Otherwise, the normal
+ download procedure is resumed. */
+ if (hstat.orig_file_tstamp >= tmr)
+ {
+ if (hstat.contlen == -1
+ || hstat.orig_file_size == hstat.contlen)
+ {
+ logprintf (LOG_VERBOSE, _("\
Server file no newer than local file `%s' -- not retrieving.\n\n"),
- hstat.orig_file_name);
- ret = RETROK;
- goto exit;
+ hstat.orig_file_name);
+ ret = RETROK;
+ goto exit;
+ }
+ else
+ {
+ logprintf (LOG_VERBOSE, _("\
+The sizes do not match (local %s) -- retrieving.\n"),
+ number_to_static_string (local_size));
+ }
+ }
+ else
+ logputs (LOG_VERBOSE,
+ _("Remote file is newer, retrieving.\n"));
+
+ logputs (LOG_VERBOSE, "\n");
}
- else
+ }
+
+ /* free_hstat (&hstat); */
+ hstat.timestamp_checked = true;
+ restart_loop = true;
+ }
+
+ if (opt.always_rest)
+ {
+ got_name = true;
+ restart_loop = true;
+ }
+
+ if (opt.spider)
+ {
+ if (opt.recursive)
+ {
+ if (*dt & TEXTHTML)
+ {
+ logputs (LOG_VERBOSE, _("\
+Remote file exists and could contain links to other resources -- retrieving.\n\n"));
+ restart_loop = true;
+ }
+ else
{
logprintf (LOG_VERBOSE, _("\
-The sizes do not match (local %s) -- retrieving.\n"),
- number_to_static_string (local_size));
+Remote file exists but does not contain any link -- not retrieving.\n\n"));
+ ret = RETRUNNEEDED;
+ goto exit;
}
}
else
- logputs (LOG_VERBOSE,
- _("Remote file is newer, retrieving.\n"));
-
- logputs (LOG_VERBOSE, "\n");
+ {
+ logprintf (LOG_VERBOSE, _("\
+Remote file exists but recursion is disabled -- not retrieving.\n\n"));
+ ret = RETRUNNEEDED;
+ goto exit;
+ }
}
-
- /* free_hstat (&hstat); */
- hstat.timestamp_checked = true;
- continue;
+
+ got_head = true; /* no more time-stamping */
+ *dt &= ~HEAD_ONLY;
+ count = 0; /* the retrieve count for HEAD is reset */
+
+ if (restart_loop)
+ continue;
}
-
+
if ((tmr != (time_t) (-1))
- && (!opt.spider || opt.recursive)
&& ((hstat.len == hstat.contlen) ||
((hstat.res == 0) && (hstat.contlen == -1))))
{
}
/* End of time-stamping section. */
- if (opt.spider && !opt.recursive)
- {
- logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode,
- escnonprint (hstat.error));
- ret = RETROK;
- goto exit;
- }
-
tmrate = retr_rate (hstat.rd_size, hstat.dltime);
total_download_time += hstat.dltime;
cookie_jar_delete (wget_cookie_jar);
}
+void
+ensure_extension (struct http_stat *hs, const char *ext, int *dt)
+{
+ char *last_period_in_local_filename = strrchr (hs->local_file, '.');
+ char shortext[8];
+ int len = strlen (ext);
+ if (len == 5)
+ {
+ strncpy (shortext, ext, len - 1);
+ shortext[len - 2] = '\0';
+ }
+
+ if (last_period_in_local_filename == NULL
+ || !(0 == strcasecmp (last_period_in_local_filename, shortext)
+ || 0 == strcasecmp (last_period_in_local_filename, ext)))
+ {
+ int local_filename_len = strlen (hs->local_file);
+ /* Resize the local file, allowing for ".html" preceded by
+ optional ".NUMBER". */
+ hs->local_file = xrealloc (hs->local_file,
+ local_filename_len + 24 + len);
+ strcpy (hs->local_file + local_filename_len, ext);
+ /* If clobbering is not allowed and the file, as named,
+ exists, tack on ".NUMBER.html" instead. */
+ if (!ALLOW_CLOBBER && file_exists_p (hs->local_file))
+ {
+ int ext_num = 1;
+ do
+ sprintf (hs->local_file + local_filename_len,
+ ".%d%s", ext_num++, ext);
+ while (file_exists_p (hs->local_file));
+ }
+ *dt |= ADDED_HTML_EXTENSION;
+ }
+}
+
#ifdef TESTING