From 30e4a33756b846d83c93db7182a3b8a490d07dcb Mon Sep 17 00:00:00 2001 From: hniksic Date: Wed, 30 Mar 2005 11:27:34 -0800 Subject: [PATCH] [svn] Use sets/hash-tables instead of "slists". Remove slist implementation from utils.c. --- src/ChangeLog | 8 ++++ src/convert.c | 83 +++++++++++++++++------------------------ src/convert.h | 3 ++ src/utils.c | 101 ++++++++------------------------------------------ src/utils.h | 15 +------- 5 files changed, 62 insertions(+), 148 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 8825284e..8f1cd161 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,11 @@ +2005-03-30 Hrvoje Niksic + + * utils.c (string_set_to_array): New function. + + * convert.c: Replace the use of "slists" with sets/hash-tables, + which in fact suit the intended purpose much better. + downloaded_html_list is removed altogether. + 2005-03-29 Hrvoje Niksic * ftp.h (enum): Rename GLOBALL, GETALL, and GETONE to diff --git a/src/convert.c b/src/convert.c index 0a3f6bd0..ed567124 100644 --- a/src/convert.c +++ b/src/convert.c @@ -1,5 +1,5 @@ /* Conversion of links to local files. - Copyright (C) 1996, 1997, 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 2005 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -53,11 +53,8 @@ so, delete this exception statement from your version. */ static struct hash_table *dl_file_url_map; struct hash_table *dl_url_file_map; -/* List of HTML files downloaded in this Wget run, used for link - conversion after Wget is done. The list and the set contain the - same information, except the list maintains the order. Perhaps I - should get rid of the list, it's there for historical reasons. */ -static slist *downloaded_html_list; +/* Set of HTML files downloaded in this Wget run, used for link + conversion after Wget is done. */ struct hash_table *downloaded_html_set; static void convert_links PARAMS ((const char *, struct urlpos *)); @@ -80,21 +77,28 @@ static void convert_links PARAMS ((const char *, struct urlpos *)); void convert_all_links (void) { - slist *html; - long msecs; + int i; + double secs; int file_count = 0; struct wget_timer *timer = wtimer_new (); - /* Destructively reverse downloaded_html_files to get it in the right order. - recursive_retrieve() used slist_prepend() consistently. */ - downloaded_html_list = slist_nreverse (downloaded_html_list); + int cnt; + char **file_array; + + cnt = 0; + if (downloaded_html_set) + cnt = hash_table_count (downloaded_html_set); + if (cnt == 0) + return; + file_array = alloca_array (char *, cnt); + string_set_to_array (downloaded_html_set, file_array); - for (html = downloaded_html_list; html; html = html->next) + for (i = 0; i < cnt; i++) { struct urlpos *urls, *cur_url; char *url; - char *file = html->string; + char *file = file_array[i]; /* Determine the URL of the HTML file. get_urls_html will need it. */ @@ -167,10 +171,10 @@ convert_all_links (void) } wtimer_update (timer); - msecs = wtimer_read (timer); + secs = wtimer_read (timer) / 1000; wtimer_delete (timer); - logprintf (LOG_VERBOSE, _("Converted %d files in %.2f seconds.\n"), - file_count, (double)msecs / 1000); + logprintf (LOG_VERBOSE, _("Converted %d files in %.*f seconds.\n"), + file_count, secs < 10 ? 3 : 1, secs); } static void write_backup_file PARAMS ((const char *, downloaded_file_t)); @@ -400,11 +404,9 @@ write_backup_file (const char *file, downloaded_file_t downloaded_file_return) clobber .orig files sitting around from previous invocations. */ /* Construct the backup filename as the original name plus ".orig". */ - size_t filename_len = strlen(file); + size_t filename_len = strlen (file); char* filename_plus_orig_suffix; - int already_wrote_backup_file = 0; - slist* converted_file_ptr; - static slist* converted_files = NULL; + static struct hash_table *converted_files; if (downloaded_file_return == FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED) { @@ -416,36 +418,29 @@ write_backup_file (const char *file, downloaded_file_t downloaded_file_return) ".html", so we need to compare vs. the original URL plus ".orig", not the original URL plus ".html.orig". */ filename_plus_orig_suffix = alloca (filename_len + 1); - strcpy(filename_plus_orig_suffix, file); - strcpy((filename_plus_orig_suffix + filename_len) - 4, "orig"); + strcpy (filename_plus_orig_suffix, file); + strcpy ((filename_plus_orig_suffix + filename_len) - 4, "orig"); } else /* downloaded_file_return == FILE_DOWNLOADED_NORMALLY */ { /* Append ".orig" to the name. */ - filename_plus_orig_suffix = alloca (filename_len + sizeof(".orig")); - strcpy(filename_plus_orig_suffix, file); - strcpy(filename_plus_orig_suffix + filename_len, ".orig"); + filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig")); + strcpy (filename_plus_orig_suffix, file); + strcpy (filename_plus_orig_suffix + filename_len, ".orig"); } + if (!converted_files) + converted_files = make_string_hash_table (0); + /* We can get called twice on the same URL thanks to the convert_all_links() call in main(). If we write the .orig file each time in such a case, it'll end up containing the first-pass conversion, not the original file. So, see if we've already been called on this file. */ - converted_file_ptr = converted_files; - while (converted_file_ptr != NULL) - if (strcmp(converted_file_ptr->string, file) == 0) - { - already_wrote_backup_file = 1; - break; - } - else - converted_file_ptr = converted_file_ptr->next; - - if (!already_wrote_backup_file) + if (!string_set_contains (converted_files, file)) { /* Rename to .orig before former gets written over. */ - if (rename(file, filename_plus_orig_suffix) != 0) + if (rename (file, filename_plus_orig_suffix) != 0) logprintf (LOG_NOTQUIET, _("Cannot back up %s as %s: %s\n"), file, filename_plus_orig_suffix, strerror (errno)); @@ -466,10 +461,7 @@ write_backup_file (const char *file, downloaded_file_t downloaded_file_return) list. -- Hrvoje Niksic */ - converted_file_ptr = xmalloc (sizeof (*converted_file_ptr)); - converted_file_ptr->string = xstrdup (file); - converted_file_ptr->next = converted_files; - converted_files = converted_file_ptr; + string_set_add (converted_files, file); } } @@ -839,14 +831,7 @@ register_html (const char *url, const char *file) { if (!downloaded_html_set) downloaded_html_set = make_string_hash_table (0); - else if (hash_table_contains (downloaded_html_set, file)) - return; - - /* The set and the list should use the same copy of FILE, but the - slist interface insists on strduping the string it gets. Oh - well. */ string_set_add (downloaded_html_set, file); - downloaded_html_list = slist_prepend (downloaded_html_list, file); } /* Cleanup the data structures associated with recursive retrieving @@ -868,8 +853,6 @@ convert_cleanup (void) } if (downloaded_html_set) string_set_free (downloaded_html_set); - slist_free (downloaded_html_list); - downloaded_html_list = NULL; } /* Book-keeping code for downloaded files that enables extension diff --git a/src/convert.h b/src/convert.h index e9ca0401..53b0c97a 100644 --- a/src/convert.h +++ b/src/convert.h @@ -30,6 +30,9 @@ so, delete this exception statement from your version. */ #ifndef CONVERT_H #define CONVERT_H +struct hash_table; /* forward decl */ +extern struct hash_table *downloaded_html_set; + enum convert_options { CO_NOCONVERT = 0, /* don't convert this URL */ CO_CONVERT_TO_RELATIVE, /* convert to relative, e.g. to diff --git a/src/utils.c b/src/utils.c index 50de97c6..1d2c5c53 100644 --- a/src/utils.c +++ b/src/utils.c @@ -1080,91 +1080,6 @@ merge_vecs (char **v1, char **v2) xfree (v2); return v1; } - -/* A set of simple-minded routines to store strings in a linked list. - This used to also be used for searching, but now we have hash - tables for that. */ - -/* It's a shame that these simple things like linked lists and hash - tables (see hash.c) need to be implemented over and over again. It - would be nice to be able to use the routines from glib -- see - www.gtk.org for details. However, that would make Wget depend on - glib, and I want to avoid dependencies to external libraries for - reasons of convenience and portability (I suspect Wget is more - portable than anything ever written for Gnome). */ - -/* Append an element to the list. If the list has a huge number of - elements, this can get slow because it has to find the list's - ending. If you think you have to call slist_append in a loop, - think about calling slist_prepend() followed by slist_nreverse(). */ - -slist * -slist_append (slist *l, const char *s) -{ - slist *newel = xnew (slist); - slist *beg = l; - - newel->string = xstrdup (s); - newel->next = NULL; - - if (!l) - return newel; - /* Find the last element. */ - while (l->next) - l = l->next; - l->next = newel; - return beg; -} - -/* Prepend S to the list. Unlike slist_append(), this is O(1). */ - -slist * -slist_prepend (slist *l, const char *s) -{ - slist *newel = xnew (slist); - newel->string = xstrdup (s); - newel->next = l; - return newel; -} - -/* Destructively reverse L. */ - -slist * -slist_nreverse (slist *l) -{ - slist *prev = NULL; - while (l) - { - slist *next = l->next; - l->next = prev; - prev = l; - l = next; - } - return prev; -} - -/* Is there a specific entry in the list? */ -int -slist_contains (slist *l, const char *s) -{ - for (; l; l = l->next) - if (!strcmp (l->string, s)) - return 1; - return 0; -} - -/* Free the whole slist. */ -void -slist_free (slist *l) -{ - while (l) - { - slist *n = l->next; - xfree (l->string); - xfree (l); - l = n; - } -} /* Sometimes it's useful to create "sets" of strings, i.e. special hash tables where you want to store strings as keys and merely @@ -1195,6 +1110,22 @@ string_set_contains (struct hash_table *ht, const char *s) return hash_table_contains (ht, s); } +static int +string_set_to_array_mapper (void *key, void *value_ignored, void *arg) +{ + char ***arrayptr = (char ***) arg; + *(*arrayptr)++ = (char *) key; + return 0; +} + +/* Convert the specified string set to array. ARRAY should be large + enough to hold hash_table_count(ht) char pointers. */ + +void string_set_to_array (struct hash_table *ht, char **array) +{ + hash_table_map (ht, string_set_to_array_mapper, &array); +} + static int string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored) { diff --git a/src/utils.h b/src/utils.h index 5845f80f..f93d2ae9 100644 --- a/src/utils.h +++ b/src/utils.h @@ -1,5 +1,5 @@ /* Declarations for utils.c. - Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. + Copyright (C) 2005 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -34,13 +34,6 @@ enum accd { ALLABS = 1 }; -/* A linked list of strings. The list is ordered alphabetically. */ -typedef struct _slist -{ - char *string; - struct _slist *next; -} slist; - struct hash_table; struct file_memory { @@ -101,14 +94,10 @@ void read_file_free PARAMS ((struct file_memory *)); void free_vec PARAMS ((char **)); char **merge_vecs PARAMS ((char **, char **)); -slist *slist_append PARAMS ((slist *, const char *)); -slist *slist_prepend PARAMS ((slist *, const char *)); -slist *slist_nreverse PARAMS ((slist *)); -int slist_contains PARAMS ((slist *, const char *)); -void slist_free PARAMS ((slist *)); void string_set_add PARAMS ((struct hash_table *, const char *)); int string_set_contains PARAMS ((struct hash_table *, const char *)); +void string_set_to_array PARAMS ((struct hash_table *, char **)); void string_set_free PARAMS ((struct hash_table *)); void free_keys_and_values PARAMS ((struct hash_table *)); -- 2.39.2