1 /* Keep track of visited URLs in spider mode.
2 Copyright (C) 2006 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget. If not, see <http://www.gnu.org/licenses/>.
19 In addition, as a special exception, the Free Software Foundation
20 gives permission to link the code of its release of Wget with the
21 OpenSSL project's "OpenSSL" library (or with modified versions of it
22 that use the same license as the "OpenSSL" library), and distribute
23 the linked executables. You must obey the GNU General Public License
24 in all respects for all of the code used other than "OpenSSL". If you
25 modify this file, you may extend this exception to your version of the
26 file, but you are not obligated to do so. If you do not wish to do
27 so, delete this exception statement from your version. */
43 static struct hash_table *visited_urls_hash;
44 static struct hash_table *nonexisting_urls_set;
46 /* Cleanup the data structures associated with this file. */
51 if (visited_urls_hash)
53 free_keys_and_values (visited_urls_hash);
54 hash_table_destroy (visited_urls_hash);
55 visited_urls_hash = NULL;
57 if (nonexisting_urls_set)
58 string_set_free (nonexisting_urls_set);
61 /* Remembers visited files. */
66 struct url_list *next;
70 in_url_list_p (const struct url_list *list, const char *url)
72 const struct url_list *ptr;
74 for (ptr = list; ptr; ptr = ptr->next)
76 /* str[case]cmp is inadequate for URL comparison */
77 if (ptr->url != NULL && are_urls_equal (url, ptr->url))
85 visited_url (const char *url, const char *referrer)
87 struct url_list *list;
89 /* Ignore robots.txt URLs */
90 if (is_robots_txt_url (url))
93 if (!visited_urls_hash)
94 visited_urls_hash = make_string_hash_table (0);
96 list = hash_table_get (visited_urls_hash, url);
99 list = (struct url_list *) xnew0 (struct url_list);
100 list->url = referrer ? xstrdup (referrer) : NULL;
101 hash_table_put (visited_urls_hash, xstrdup (url), list);
103 else if (referrer && !in_url_list_p (list, referrer))
105 /* Append referrer at the end of the list */
106 struct url_list *newnode;
111 newnode = (struct url_list *) xnew0 (struct url_list);
112 newnode->url = xstrdup (referrer);
113 list->next = newnode;
117 /* Remembers broken links. */
119 nonexisting_url (const char *url)
121 /* Ignore robots.txt URLs */
122 if (is_robots_txt_url (url))
124 if (!nonexisting_urls_set)
125 nonexisting_urls_set = make_string_hash_table (0);
126 string_set_add (nonexisting_urls_set, url);
130 print_broken_links (void)
132 hash_table_iterator iter;
135 if (!nonexisting_urls_set)
137 logprintf (LOG_NOTQUIET, _("Found no broken links.\n\n"));
141 num_elems = hash_table_count (nonexisting_urls_set);
142 assert (num_elems > 0);
144 logprintf (LOG_NOTQUIET, ngettext("Found %d broken link.\n\n",
145 "Found %d broken links.\n\n", num_elems),
148 for (hash_table_iterate (nonexisting_urls_set, &iter);
149 hash_table_iter_next (&iter); )
151 struct url_list *list;
152 const char *url = (const char *) iter.key;
154 logprintf (LOG_NOTQUIET, _("%s referred by:\n"), url);
156 for (list = (struct url_list *) hash_table_get (visited_urls_hash, url);
157 list; list = list->next)
159 logprintf (LOG_NOTQUIET, _(" %s\n"), list->url);
162 logputs (LOG_NOTQUIET, "\n");