/* Handling of recursive HTTP retrieving.
- Copyright (C) 1996-2005 Free Software Foundation, Inc.
+ Copyright (C) 1996-2006 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
+the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with Wget; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+along with Wget. If not, see <http://www.gnu.org/licenses/>.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
#include "hash.h"
#include "res.h"
#include "convert.h"
+#include "spider.h"
\f
/* Functions for maintaining the URL queue. */
{
int dt = 0;
char *redirected = NULL;
- bool oldrec = opt.recursive;
- opt.recursive = false;
- status = retrieve_url (url, &file, &redirected, referer, &dt);
- opt.recursive = oldrec;
+ status = retrieve_url (url, &file, &redirected, referer, &dt, false);
if (html_allowed && file && status == RETROK
&& (dt & RETROKF) && (dt & TEXTHTML))
}
}
+ if (opt.spider)
+ {
+ visited_url (url, referer);
+ }
+
if (descend
&& depth >= opt.reclevel && opt.reclevel != INFINITE_RECURSION)
{
}
}
- if (opt.delete_after || (file && !acceptable (file)))
+ if (file
+ && (opt.delete_after
+ || opt.spider /* opt.recursive is implicitely true */
+ || !acceptable (file)))
{
/* Either --delete-after was specified, or we loaded this
- otherwise rejected (e.g. by -R) HTML file just so we
- could harvest its hyperlinks -- in either case, delete
- the local file. */
+ (otherwise unneeded because of --spider or rejected by -R)
+ HTML file just to harvest its hyperlinks -- in either case,
+ delete the local file. */
DEBUGP (("Removing file due to %s in recursive_retrieve():\n",
opt.delete_after ? "--delete-after" :
- "recursive rejection criteria"));
+ (opt.spider ? "--spider" :
+ "recursive rejection criteria")));
logprintf (LOG_VERBOSE,
- (opt.delete_after
+ (opt.delete_after || opt.spider
? _("Removing %s.\n")
: _("Removing %s since it should be rejected.\n")),
file);
if (unlink (file))
logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
+ logputs (LOG_VERBOSE, "\n");
register_delete_file (file);
}
if (string_set_contains (blacklist, url))
{
+ if (opt.spider)
+ {
+ char *referrer = url_string (parent, true);
+ DEBUGP (("download_child_p: parent->url is: `%s'\n", parent->url));
+ visited_url (url, referrer);
+ xfree (referrer);
+ }
DEBUGP (("Already on the black list.\n"));
goto out;
}
&& u->port == start_url_parsed->port
&& !(opt.page_requisites && upos->link_inline_p))
{
- if (!frontcmp (start_url_parsed->dir, u->dir))
+ if (!subdir_p (start_url_parsed->dir, u->dir))
{
DEBUGP (("Going to \"%s\" would escape \"%s\" with no_parent on.\n",
u->dir, start_url_parsed->dir));