/* Conversion of links to local files.
- Copyright (C) 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
+ Copyright (C) 2005 Free Software Foundation, Inc.
This file is part of GNU Wget.
#include <stdio.h>
#include <stdlib.h>
-#ifdef HAVE_STRING_H
-# include <string.h>
-#else
-# include <strings.h>
-#endif /* HAVE_STRING_H */
+#include <string.h>
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif /* HAVE_UNISTD_H */
#include <errno.h>
#include <assert.h>
-#include <sys/types.h>
#include "wget.h"
#include "convert.h"
#include "recur.h"
#include "utils.h"
#include "hash.h"
+#include "ptimer.h"
static struct hash_table *dl_file_url_map;
struct hash_table *dl_url_file_map;
-/* List of HTML files downloaded in this Wget run, used for link
- conversion after Wget is done. The list and the set contain the
- same information, except the list maintains the order. Perhaps I
- should get rid of the list, it's there for historical reasons. */
-static slist *downloaded_html_list;
+/* Set of HTML files downloaded in this Wget run, used for link
+ conversion after Wget is done. */
struct hash_table *downloaded_html_set;
-static void convert_links PARAMS ((const char *, struct urlpos *));
+static void convert_links (const char *, struct urlpos *);
/* This function is called when the retrieval is done to convert the
links that have been downloaded. It has to be called at the end of
void
convert_all_links (void)
{
- slist *html;
- long msecs;
+ int i;
+ double secs;
int file_count = 0;
- struct wget_timer *timer = wtimer_new ();
+ struct ptimer *timer = ptimer_new ();
- /* Destructively reverse downloaded_html_files to get it in the right order.
- recursive_retrieve() used slist_prepend() consistently. */
- downloaded_html_list = slist_nreverse (downloaded_html_list);
+ int cnt;
+ char **file_array;
- for (html = downloaded_html_list; html; html = html->next)
+ cnt = 0;
+ if (downloaded_html_set)
+ cnt = hash_table_count (downloaded_html_set);
+ if (cnt == 0)
+ return;
+ file_array = alloca_array (char *, cnt);
+ string_set_to_array (downloaded_html_set, file_array);
+
+ for (i = 0; i < cnt; i++)
{
struct urlpos *urls, *cur_url;
char *url;
- char *file = html->string;
+ char *file = file_array[i];
/* Determine the URL of the HTML file. get_urls_html will need
it. */
free_urlpos (urls);
}
- wtimer_update (timer);
- msecs = wtimer_read (timer);
- wtimer_delete (timer);
- logprintf (LOG_VERBOSE, _("Converted %d files in %.2f seconds.\n"),
- file_count, (double)msecs / 1000);
+ secs = ptimer_measure (timer) / 1000;
+ ptimer_destroy (timer);
+ logprintf (LOG_VERBOSE, _("Converted %d files in %.*f seconds.\n"),
+ file_count, secs < 10 ? 3 : 1, secs);
}
-static void write_backup_file PARAMS ((const char *, downloaded_file_t));
-static const char *replace_attr PARAMS ((const char *, int, FILE *,
- const char *));
-static const char *replace_attr_refresh_hack PARAMS ((const char *, int, FILE *,
- const char *, int));
-static char *local_quote_string PARAMS ((const char *));
-static char *construct_relative PARAMS ((const char *, const char *));
+static void write_backup_file (const char *, downloaded_file_t);
+static const char *replace_attr (const char *, int, FILE *, const char *);
+static const char *replace_attr_refresh_hack (const char *, int, FILE *,
+ const char *, int);
+static char *local_quote_string (const char *);
+static char *construct_relative (const char *, const char *);
/* Change the links in one HTML file. LINKS is a list of links in the
document, along with their positions and the desired direction of
}
/* Construct LINK as explained above. */
- link = (char *)xmalloc (3 * basedirs + strlen (linkfile) + 1);
+ link = xmalloc (3 * basedirs + strlen (linkfile) + 1);
for (i = 0; i < basedirs; i++)
memcpy (link + 3 * i, "../", 3);
strcpy (link + 3 * i, linkfile);
return link;
}
+/* Used by write_backup_file to remember which files have been
+ written. */
+static struct hash_table *converted_files;
+
static void
write_backup_file (const char *file, downloaded_file_t downloaded_file_return)
{
clobber .orig files sitting around from previous invocations. */
/* Construct the backup filename as the original name plus ".orig". */
- size_t filename_len = strlen(file);
+ size_t filename_len = strlen (file);
char* filename_plus_orig_suffix;
- int already_wrote_backup_file = 0;
- slist* converted_file_ptr;
- static slist* converted_files = NULL;
if (downloaded_file_return == FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED)
{
".html", so we need to compare vs. the original URL plus
".orig", not the original URL plus ".html.orig". */
filename_plus_orig_suffix = alloca (filename_len + 1);
- strcpy(filename_plus_orig_suffix, file);
- strcpy((filename_plus_orig_suffix + filename_len) - 4, "orig");
+ strcpy (filename_plus_orig_suffix, file);
+ strcpy ((filename_plus_orig_suffix + filename_len) - 4, "orig");
}
else /* downloaded_file_return == FILE_DOWNLOADED_NORMALLY */
{
/* Append ".orig" to the name. */
- filename_plus_orig_suffix = alloca (filename_len + sizeof(".orig"));
- strcpy(filename_plus_orig_suffix, file);
- strcpy(filename_plus_orig_suffix + filename_len, ".orig");
+ filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
+ strcpy (filename_plus_orig_suffix, file);
+ strcpy (filename_plus_orig_suffix + filename_len, ".orig");
}
+ if (!converted_files)
+ converted_files = make_string_hash_table (0);
+
/* We can get called twice on the same URL thanks to the
convert_all_links() call in main(). If we write the .orig file
each time in such a case, it'll end up containing the first-pass
conversion, not the original file. So, see if we've already been
called on this file. */
- converted_file_ptr = converted_files;
- while (converted_file_ptr != NULL)
- if (strcmp(converted_file_ptr->string, file) == 0)
- {
- already_wrote_backup_file = 1;
- break;
- }
- else
- converted_file_ptr = converted_file_ptr->next;
-
- if (!already_wrote_backup_file)
+ if (!string_set_contains (converted_files, file))
{
/* Rename <file> to <file>.orig before former gets written over. */
- if (rename(file, filename_plus_orig_suffix) != 0)
+ if (rename (file, filename_plus_orig_suffix) != 0)
logprintf (LOG_NOTQUIET, _("Cannot back up %s as %s: %s\n"),
file, filename_plus_orig_suffix, strerror (errno));
list.
-- Hrvoje Niksic <hniksic@xemacs.org>
*/
- converted_file_ptr = xmalloc (sizeof (*converted_file_ptr));
- converted_file_ptr->string = xstrdup (file);
- converted_file_ptr->next = converted_files;
- converted_files = converted_file_ptr;
+ string_set_add (converted_files, file);
}
}
-static int find_fragment PARAMS ((const char *, int, const char **,
- const char **));
+static int find_fragment (const char *, int, const char **, const char **);
/* Replace an attribute's original text with NEW_TEXT. */
{
if (!downloaded_html_set)
downloaded_html_set = make_string_hash_table (0);
- else if (hash_table_contains (downloaded_html_set, file))
- return;
-
- /* The set and the list should use the same copy of FILE, but the
- slist interface insists on strduping the string it gets. Oh
- well. */
string_set_add (downloaded_html_set, file);
- downloaded_html_list = slist_prepend (downloaded_html_list, file);
}
-/* Cleanup the data structures associated with recursive retrieving
- (the variables above). */
+static void downloaded_files_free (void);
+
+/* Cleanup the data structures associated with this file. */
+
void
convert_cleanup (void)
{
}
if (downloaded_html_set)
string_set_free (downloaded_html_set);
- slist_free (downloaded_html_list);
- downloaded_html_list = NULL;
+ downloaded_files_free ();
+ if (converted_files)
+ string_set_free (converted_files);
}
\f
/* Book-keeping code for downloaded files that enables extension
return 0;
}
-void
+static void
downloaded_files_free (void)
{
if (downloaded_files_hash)
downloaded_files_hash = NULL;
}
}
+\f
+/* The function returns the pointer to the malloc-ed quoted version of
+ string s. It will recognize and quote numeric and special graphic
+ entities, as per RFC1866:
+
+ `&' -> `&'
+ `<' -> `<'
+ `>' -> `>'
+ `"' -> `"'
+ SP -> ` '
+
+ No other entities are recognized or replaced. */
+char *
+html_quote_string (const char *s)
+{
+ const char *b = s;
+ char *p, *res;
+ int i;
+
+ /* Pass through the string, and count the new size. */
+ for (i = 0; *s; s++, i++)
+ {
+ if (*s == '&')
+ i += 4; /* `amp;' */
+ else if (*s == '<' || *s == '>')
+ i += 3; /* `lt;' and `gt;' */
+ else if (*s == '\"')
+ i += 5; /* `quot;' */
+ else if (*s == ' ')
+ i += 4; /* #32; */
+ }
+ res = xmalloc (i + 1);
+ s = b;
+ for (p = res; *s; s++)
+ {
+ switch (*s)
+ {
+ case '&':
+ *p++ = '&';
+ *p++ = 'a';
+ *p++ = 'm';
+ *p++ = 'p';
+ *p++ = ';';
+ break;
+ case '<': case '>':
+ *p++ = '&';
+ *p++ = (*s == '<' ? 'l' : 'g');
+ *p++ = 't';
+ *p++ = ';';
+ break;
+ case '\"':
+ *p++ = '&';
+ *p++ = 'q';
+ *p++ = 'u';
+ *p++ = 'o';
+ *p++ = 't';
+ *p++ = ';';
+ break;
+ case ' ':
+ *p++ = '&';
+ *p++ = '#';
+ *p++ = '3';
+ *p++ = '2';
+ *p++ = ';';
+ break;
+ default:
+ *p++ = *s;
+ }
+ }
+ *p = '\0';
+ return res;
+}