/* Conversion of links to local files.
- Copyright (C) 2005 Free Software Foundation, Inc.
+ Copyright (C) 2003-2005 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with Wget; if not, write to the Free Software
-Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+along with Wget; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
#include <stdio.h>
#include <stdlib.h>
-#ifdef HAVE_STRING_H
-# include <string.h>
-#else
-# include <strings.h>
-#endif /* HAVE_STRING_H */
+#include <string.h>
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif /* HAVE_UNISTD_H */
#include <errno.h>
#include <assert.h>
-#include <sys/types.h>
#include "wget.h"
#include "convert.h"
conversion after Wget is done. */
struct hash_table *downloaded_html_set;
-static void convert_links PARAMS ((const char *, struct urlpos *));
+static struct hash_table *nonexisting_urls_hash;
+
+static void convert_links (const char *, struct urlpos *);
/* This function is called when the retrieval is done to convert the
links that have been downloaded. It has to be called at the end of
free_urlpos (urls);
}
- secs = ptimer_measure (timer) / 1000;
+ secs = ptimer_measure (timer);
ptimer_destroy (timer);
- logprintf (LOG_VERBOSE, _("Converted %d files in %.*f seconds.\n"),
- file_count, secs < 10 ? 3 : 1, secs);
+ logprintf (LOG_VERBOSE, _("Converted %d files in %s seconds.\n"),
+ file_count, print_decimal (secs));
}
-static void write_backup_file PARAMS ((const char *, downloaded_file_t));
-static const char *replace_attr PARAMS ((const char *, int, FILE *,
- const char *));
-static const char *replace_attr_refresh_hack PARAMS ((const char *, int, FILE *,
- const char *, int));
-static char *local_quote_string PARAMS ((const char *));
-static char *construct_relative PARAMS ((const char *, const char *));
+static void write_backup_file (const char *, downloaded_file_t);
+static const char *replace_attr (const char *, int, FILE *, const char *);
+static const char *replace_attr_refresh_hack (const char *, int, FILE *,
+ const char *, int);
+static char *local_quote_string (const char *);
+static char *construct_relative (const char *, const char *);
/* Change the links in one HTML file. LINKS is a list of links in the
document, along with their positions and the desired direction of
}
/* Construct LINK as explained above. */
- link = (char *)xmalloc (3 * basedirs + strlen (linkfile) + 1);
+ link = xmalloc (3 * basedirs + strlen (linkfile) + 1);
for (i = 0; i < basedirs; i++)
memcpy (link + 3 * i, "../", 3);
strcpy (link + 3 * i, linkfile);
}
}
-static int find_fragment PARAMS ((const char *, int, const char **,
- const char **));
+static bool find_fragment (const char *, int, const char **, const char **);
/* Replace an attribute's original text with NEW_TEXT. */
static const char *
replace_attr (const char *p, int size, FILE *fp, const char *new_text)
{
- int quote_flag = 0;
+ bool quote_flag = false;
char quote_char = '\"'; /* use "..." for quoting, unless the
original value is quoted, in which
case reuse its quoting char. */
if (*p == '\"' || *p == '\'')
{
quote_char = *p;
- quote_flag = 1;
+ quote_flag = true;
++p;
size -= 2; /* disregard opening and closing quote */
}
/* Find the first occurrence of '#' in [BEG, BEG+SIZE) that is not
preceded by '&'. If the character is not found, return zero. If
- the character is found, return 1 and set BP and EP to point to the
- beginning and end of the region.
+ the character is found, return true and set BP and EP to point to
+ the beginning and end of the region.
This is used for finding the fragment indentifiers in URLs. */
-static int
+static bool
find_fragment (const char *beg, int size, const char **bp, const char **ep)
{
const char *end = beg + size;
- int saw_amp = 0;
+ bool saw_amp = false;
for (; beg < end; beg++)
{
switch (*beg)
{
case '&':
- saw_amp = 1;
+ saw_amp = true;
break;
case '#':
if (!saw_amp)
{
*bp = beg;
*ep = end;
- return 1;
+ return true;
}
/* fallthrough */
default:
- saw_amp = 0;
+ saw_amp = false;
}
}
- return 0;
+ return false;
}
/* Quote FILE for use as local reference to an HTML file.
dl_url_file_map = make_string_hash_table (0); \
} while (0)
-/* Return 1 if S1 and S2 are the same, except for "/index.html". The
- three cases in which it returns one are (substitute any substring
- for "foo"):
+/* Return true if S1 and S2 are the same, except for "/index.html".
+ The three cases in which it returns one are (substitute any
+ substring for "foo"):
m("foo/index.html", "foo/") ==> 1
m("foo/", "foo/index.html") ==> 1
m("foo", "foo/" ==> 1
m("foo", "foo") ==> 1 */
-static int
+static bool
match_except_index (const char *s1, const char *s2)
{
int i;
/* Strings differ at the very beginning -- bail out. We need to
check this explicitly to avoid `lng - 1' reading outside the
array. */
- return 0;
+ return false;
if (!*s1 && !*s2)
/* Both strings hit EOF -- strings are equal. */
- return 1;
+ return true;
else if (*s1 && *s2)
/* Strings are randomly different, e.g. "/foo/bar" and "/foo/qux". */
- return 0;
+ return false;
else if (*s1)
/* S1 is the longer one. */
lng = s1;
if (*lng == '/' && *(lng + 1) == '\0')
/* foo */
/* foo/ */
- return 1;
+ return true;
return 0 == strcmp (lng, "/index.html");
}
static void
dissociate_urls_from_file (const char *file)
{
- hash_table_map (dl_url_file_map, dissociate_urls_from_file_mapper,
- (char *)file);
+ /* Can't use hash_table_iter_* because the table mutates while mapping. */
+ hash_table_for_each (dl_url_file_map, dissociate_urls_from_file_mapper,
+ (char *) file);
}
/* Register that URL has been successfully downloaded to FILE. This
string_set_add (downloaded_html_set, file);
}
-static void downloaded_files_free PARAMS ((void));
+static void downloaded_files_free (void);
+static void nonexisting_urls_free (void);
/* Cleanup the data structures associated with this file. */
if (downloaded_html_set)
string_set_free (downloaded_html_set);
downloaded_files_free ();
+ nonexisting_urls_free ();
if (converted_files)
string_set_free (converted_files);
}
return FILE_NOT_ALREADY_DOWNLOADED;
}
-static int
-df_free_mapper (void *key, void *value, void *ignored)
-{
- xfree (key);
- return 0;
-}
-
static void
downloaded_files_free (void)
{
if (downloaded_files_hash)
{
- hash_table_map (downloaded_files_hash, df_free_mapper, NULL);
+ hash_table_iterator iter;
+ for (hash_table_iterate (downloaded_files_hash, &iter);
+ hash_table_iter_next (&iter);
+ )
+ xfree (iter.key);
hash_table_destroy (downloaded_files_hash);
downloaded_files_hash = NULL;
}
}
+\f
+/* Remembers broken links. */
+
+struct broken_urls_list
+{
+ char *url;
+ struct broken_urls_list *next;
+};
+
+static bool
+in_list (const struct broken_urls_list *list, const char *url)
+{
+ const struct broken_urls_list *ptr;
+
+ for (ptr = list; ptr; ptr = ptr->next)
+ {
+ /* TODO: strcasecmp may not be appropriate to compare URLs */
+ if (strcasecmp (url, ptr->url) == 0) return true;
+ }
+
+ return false;
+}
+
+void
+nonexisting_url (const char *url, const char *referrer)
+{
+ struct broken_urls_list *list;
+
+ if (!nonexisting_urls_hash)
+ nonexisting_urls_hash = make_string_hash_table (0);
+
+ list = hash_table_get (nonexisting_urls_hash, url);
+ if (!list)
+ {
+ list = (struct broken_urls_list *) xnew0 (struct broken_urls_list);
+ list->url = referrer ? xstrdup (referrer) : NULL;
+ hash_table_put (nonexisting_urls_hash, xstrdup (url), list);
+ }
+ else if (list && !in_list (list, referrer))
+ {
+ /* Append referrer at the end of the list */
+ struct broken_urls_list *newnode;
+
+ while (list->next) list = list->next;
+
+ newnode = xnew0 (struct broken_urls_list);
+ newnode->url = xstrdup (referrer);
+ list->next = newnode;
+ }
+}
+
+static void
+nonexisting_urls_free (void)
+{
+ if (nonexisting_urls_hash)
+ {
+ hash_table_iterator iter;
+ for (hash_table_iterate (nonexisting_urls_hash, &iter);
+ hash_table_iter_next (&iter);
+ )
+ {
+ xfree (iter.key);
+ xfree (iter.value);
+ }
+ hash_table_destroy (nonexisting_urls_hash);
+ nonexisting_urls_hash = NULL;
+ }
+}
+
+void
+print_broken_links (void)
+{
+ hash_table_iterator iter;
+ int num_elems;
+
+ if (!nonexisting_urls_hash)
+ {
+ logprintf (LOG_NOTQUIET, _("Found no broken links.\n\n"));
+ return;
+ }
+
+ num_elems = hash_table_count (nonexisting_urls_hash);
+ assert (num_elems > 0);
+
+ if (num_elems > 1)
+ {
+ logprintf (LOG_NOTQUIET, _("Found %d broken links.\n\n"),
+ num_elems);
+ }
+ else
+ {
+ logprintf (LOG_NOTQUIET, _("Found 1 broken link.\n\n"));
+ }
+
+ for (hash_table_iterate (nonexisting_urls_hash, &iter);
+ hash_table_iter_next (&iter);
+ )
+ {
+ struct broken_urls_list *list;
+
+ logprintf (LOG_NOTQUIET, _("%s referred by:\n"), (const char *)iter.key);
+
+ for (list = (struct broken_urls_list *) iter.value;
+ list;
+ list = list->next)
+ {
+ logprintf (LOG_NOTQUIET, _(" %s\n"), list->url);
+ }
+ }
+ logputs (LOG_NOTQUIET, "\n");
+}
+
\f
/* The function returns the pointer to the malloc-ed quoted version of
string s. It will recognize and quote numeric and special graphic
else if (*s == ' ')
i += 4; /* #32; */
}
- res = (char *)xmalloc (i + 1);
+ res = xmalloc (i + 1);
s = b;
for (p = res; *s; s++)
{