1 /* Keep track of visited URLs in spider mode.
2 Copyright (C) 2006 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software Foundation, Inc.,
18 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
44 static struct hash_table *visited_urls_hash;
45 static struct hash_table *nonexisting_urls_set;
47 /* Cleanup the data structures associated with this file. */
52 if (visited_urls_hash)
54 free_keys_and_values (visited_urls_hash);
55 hash_table_destroy (visited_urls_hash);
56 visited_urls_hash = NULL;
58 if (nonexisting_urls_set)
59 string_set_free (nonexisting_urls_set);
62 /* Remembers visited files. */
67 struct url_list *next;
71 in_url_list_p (const struct url_list *list, const char *url, bool verbose)
73 const struct url_list *ptr;
75 for (ptr = list; ptr; ptr = ptr->next)
77 /* str[case]cmp is inadequate for URL comparison */
78 if (are_urls_equal (url, ptr->url))
86 visited_url (const char *url, const char *referrer)
88 struct url_list *list;
90 /* Ignore robots.txt URLs */
91 if (is_robots_txt_url (url))
94 if (!visited_urls_hash)
95 visited_urls_hash = make_string_hash_table (0);
97 list = hash_table_get (visited_urls_hash, url);
100 list = (struct url_list *) xnew0 (struct url_list);
101 list->url = referrer ? xstrdup (referrer) : NULL;
102 hash_table_put (visited_urls_hash, xstrdup (url), list);
104 else if (referrer && !in_url_list_p (list, referrer, false))
106 /* Append referrer at the end of the list */
107 struct url_list *newnode;
112 newnode = (struct url_list *) xnew0 (struct url_list);
113 newnode->url = xstrdup (referrer);
114 list->next = newnode;
118 /* Remembers broken links. */
120 nonexisting_url (const char *url)
122 /* Ignore robots.txt URLs */
123 if (is_robots_txt_url (url))
125 if (!nonexisting_urls_set)
126 nonexisting_urls_set = make_string_hash_table (0);
127 string_set_add (nonexisting_urls_set, url);
131 print_broken_links (void)
133 hash_table_iterator iter;
136 if (!nonexisting_urls_set)
138 logprintf (LOG_NOTQUIET, _("Found no broken links.\n\n"));
142 num_elems = hash_table_count (nonexisting_urls_set);
143 assert (num_elems > 0);
147 logprintf (LOG_NOTQUIET, _("Found %d broken links.\n\n"),
152 logprintf (LOG_NOTQUIET, _("Found 1 broken link.\n\n"));
155 for (hash_table_iterate (nonexisting_urls_set, &iter);
156 hash_table_iter_next (&iter); )
158 struct url_list *list;
159 const char *url = (const char *) iter.key;
161 logprintf (LOG_NOTQUIET, _("%s referred by:\n"), url);
163 for (list = (struct url_list *) hash_table_get (visited_urls_hash, url);
164 list; list = list->next)
166 logprintf (LOG_NOTQUIET, _(" %s\n"), list->url);
169 logputs (LOG_NOTQUIET, "\n");