X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fspider.c;h=ae2f392c2252abf0fefe3e9f7b7134240b7b5d8d;hp=184ac35b856399eeb913cb3f986cb6c655852d23;hb=38a7829dcb4eb5dba28dbf0f05c6a80fea9217f8;hpb=e9a98d3639db13258f02e2ad2e8bc110da5b356a diff --git a/src/spider.c b/src/spider.c index 184ac35b..ae2f392c 100644 --- a/src/spider.c +++ b/src/spider.c @@ -1,5 +1,6 @@ /* Keep track of visited URLs in spider mode. - Copyright (C) 2006 Free Software Foundation, Inc. + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Free Software + Foundation, Inc. This file is part of GNU Wget. @@ -16,23 +17,23 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Wget. If not, see . -In addition, as a special exception, the Free Software Foundation -gives permission to link the code of its release of Wget with the -OpenSSL project's "OpenSSL" library (or with modified versions of it -that use the same license as the "OpenSSL" library), and distribute -the linked executables. You must obey the GNU General Public License -in all respects for all of the code used other than "OpenSSL". If you -modify this file, you may extend this exception to your version of the -file, but you are not obligated to do so. If you do not wish to do -so, delete this exception statement from your version. */ +Additional permission under GNU GPL version 3 section 7 -#include +If you modify this program, or any covered work, by linking or +combining it with the OpenSSL project's OpenSSL library (or a +modified version of that library), containing parts covered by the +terms of the OpenSSL or SSLeay licenses, the Free Software Foundation +grants you additional permission to convey the resulting work. +Corresponding Source for a non-source form of such a combination +shall include the source code for the parts of OpenSSL used as well +as that of the covered work. */ + +#include "wget.h" #include #include #include -#include "wget.h" #include "spider.h" #include "url.h" #include "utils.h" @@ -40,7 +41,6 @@ so, delete this exception statement from your version. */ #include "res.h" -static struct hash_table *visited_urls_hash; static struct hash_table *nonexisting_urls_set; /* Cleanup the data structures associated with this file. */ @@ -48,72 +48,10 @@ static struct hash_table *nonexisting_urls_set; void spider_cleanup (void) { - if (visited_urls_hash) - { - free_keys_and_values (visited_urls_hash); - hash_table_destroy (visited_urls_hash); - visited_urls_hash = NULL; - } if (nonexisting_urls_set) string_set_free (nonexisting_urls_set); } -/* Remembers visited files. */ - -struct url_list -{ - char *url; - struct url_list *next; -}; - -static bool -in_url_list_p (const struct url_list *list, const char *url) -{ - const struct url_list *ptr; - - for (ptr = list; ptr; ptr = ptr->next) - { - /* str[case]cmp is inadequate for URL comparison */ - if (are_urls_equal (url, ptr->url)) - return true; - } - - return false; -} - -void -visited_url (const char *url, const char *referrer) -{ - struct url_list *list; - - /* Ignore robots.txt URLs */ - if (is_robots_txt_url (url)) - return; - - if (!visited_urls_hash) - visited_urls_hash = make_string_hash_table (0); - - list = hash_table_get (visited_urls_hash, url); - if (!list) - { - list = (struct url_list *) xnew0 (struct url_list); - list->url = referrer ? xstrdup (referrer) : NULL; - hash_table_put (visited_urls_hash, xstrdup (url), list); - } - else if (referrer && !in_url_list_p (list, referrer)) - { - /* Append referrer at the end of the list */ - struct url_list *newnode; - - while (list->next) - list = list->next; - - newnode = (struct url_list *) xnew0 (struct url_list); - newnode->url = xstrdup (referrer); - list->next = newnode; - } -} - /* Remembers broken links. */ void nonexisting_url (const char *url) @@ -131,39 +69,27 @@ print_broken_links (void) { hash_table_iterator iter; int num_elems; - - if (!nonexisting_urls_set) + + if (!nonexisting_urls_set) { logprintf (LOG_NOTQUIET, _("Found no broken links.\n\n")); return; } - + num_elems = hash_table_count (nonexisting_urls_set); assert (num_elems > 0); - - if (num_elems > 1) - { - logprintf (LOG_NOTQUIET, _("Found %d broken links.\n\n"), - num_elems); - } - else - { - logprintf (LOG_NOTQUIET, _("Found 1 broken link.\n\n")); - } - + + logprintf (LOG_NOTQUIET, ngettext("Found %d broken link.\n\n", + "Found %d broken links.\n\n", num_elems), + num_elems); + for (hash_table_iterate (nonexisting_urls_set, &iter); hash_table_iter_next (&iter); ) { - struct url_list *list; + /* Struct url_list *list; */ const char *url = (const char *) iter.key; - - logprintf (LOG_NOTQUIET, _("%s referred by:\n"), url); - - for (list = (struct url_list *) hash_table_get (visited_urls_hash, url); - list; list = list->next) - { - logprintf (LOG_NOTQUIET, _(" %s\n"), list->url); - } + + logprintf (LOG_NOTQUIET, _("%s\n"), url); } logputs (LOG_NOTQUIET, "\n"); }