/* Hash tables.
Copyright (C) 2000, 2001 Free Software Foundation, Inc.
-This file is part of Wget.
+This file is part of GNU Wget.
-This program is free software; you can redistribute it and/or modify
+GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.
-This program is distributed in the hope that it will be useful,
+GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+along with Wget; if not, write to the Free Software
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+In addition, as a special exception, the Free Software Foundation
+gives permission to link the code of its release of Wget with the
+OpenSSL project's "OpenSSL" library (or with modified versions of it
+that use the same license as the "OpenSSL" library), and distribute
+the linked executables. You must obey the GNU General Public License
+in all respects for all of the code used other than "OpenSSL". If you
+modify this file, you may extend this exception to your version of the
+file, but you are not obligated to do so. If you do not wish to do
+so, delete this exception statement from your version. */
#ifdef HAVE_CONFIG_H
# include <config.h>
# define xmalloc malloc
# define xrealloc realloc
# define xfree free
+
+# undef TOLOWER
+# define TOLOWER(x) ('A' <= (x) && (x) <= 'Z' ? (x) - 32 : (x))
#endif
/* INTERFACE:
The basics are all covered. hash_table_new creates a hash table,
and hash_table_destroy deletes it. hash_table_put establishes a
mapping between a key and a value. hash_table_get retrieves the
- value that corresponds to a key. hash_table_exists queries whether
- a key is stored in a table at all. hash_table_remove removes a
- mapping that corresponds to a key. hash_table_map allows you to
- map through all the entries in a hash table. hash_table_clear
- clears all the entries from the hash table.
+ value that corresponds to a key. hash_table_contains queries
+ whether a key is stored in a table at all. hash_table_remove
+ removes a mapping that corresponds to a key. hash_table_map allows
+ you to map through all the entries in a hash table.
+ hash_table_clear clears all the entries from the hash table.
The number of mappings in a table is not limited, except by the
amount of memory. As you add new elements to a table, it regrows
};
struct hash_table {
- unsigned long (*hash_function) (const void *);
- int (*test_function) (const void *, const void *);
+ unsigned long (*hash_function) PARAMS ((const void *));
+ int (*test_function) PARAMS ((const void *, const void *));
int size; /* size of the array */
int count; /* number of non-empty, non-deleted
int resize_threshold; /* after size exceeds this number of
entries, resize the table. */
+ int prime_offset; /* the offset of the current prime in
+ the prime table. */
- struct mapping *mappings;
+ struct mapping *mappings; /* the array of mapping pairs. */
};
#define EMPTY_MAPPING_P(mp) ((mp)->key == NULL)
#define LOOP_NON_EMPTY(mp, mappings, size) \
for (; !EMPTY_MAPPING_P (mp); mp = NEXT_MAPPING (mp, mappings, size))
+/* #### We might want to multiply with the "golden ratio" here to get
+ better randomness for keys that do not result from a good hash
+ function. This is currently not a problem in Wget because we only
+ use the string hash tables. */
+
#define HASH_POSITION(ht, key) (ht->hash_function (key) % ht->size)
-/* Find a prime near, but greather than or equal to SIZE. */
+/* Find a prime near, but greather than or equal to SIZE. Of course,
+ the primes are not calculated, but looked up from a table. The
+ table does not contain all primes in range, just a selection useful
+ for this purpose.
+
+ PRIME_OFFSET is a micro-optimization: if specified, it starts the
+ search for the prime number beginning with the specific offset in
+ the prime number table. The final offset is stored in the same
+ variable. */
static int
-prime_size (int size)
+prime_size (int size, int *prime_offset)
{
static const unsigned long primes [] = {
- 19, 29, 41, 59, 79, 107, 149, 197, 263, 347, 457, 599, 787, 1031,
+ 13, 19, 29, 41, 59, 79, 107, 149, 197, 263, 347, 457, 599, 787, 1031,
1361, 1777, 2333, 3037, 3967, 5167, 6719, 8737, 11369, 14783,
19219, 24989, 32491, 42257, 54941, 71429, 92861, 120721, 156941,
204047, 265271, 344857, 448321, 582821, 757693, 985003, 1280519,
10445899, 13579681, 17653589, 22949669, 29834603, 38784989,
50420551, 65546729, 85210757, 110774011, 144006217, 187208107,
243370577, 316381771, 411296309, 534685237, 695090819, 903618083,
- 1174703521, 1527114613, 1985248999, 2580823717UL, 3355070839UL
+ 1174703521, 1527114613, 1985248999,
+ (unsigned long)0x99d43ea5, (unsigned long)0xc7fa5177
};
- int i;
- for (i = 0; i < ARRAY_SIZE (primes); i++)
+ int i = *prime_offset;
+
+ for (; i < ARRAY_SIZE (primes); i++)
if (primes[i] >= size)
- return primes[i];
- /* huh? */
- return size;
+ {
+ /* Set the offset to the next prime. That is safe because,
+ next time we are called, it will be with a larger SIZE,
+ which means we could never return the same prime anyway.
+ (If that is not the case, the caller can simply reset
+ *prime_offset.) */
+ *prime_offset = i + 1;
+ return primes[i];
+ }
+
+ abort ();
+ return 0;
}
/* Create a hash table of INITIAL_SIZE with hash function
ht->hash_function = hash_function;
ht->test_function = test_function;
- ht->size = prime_size (initial_size);
+ ht->prime_offset = 0;
+ ht->size = prime_size (initial_size, &ht->prime_offset);
ht->resize_threshold = ht->size * 3 / 4;
- ht->count = 0;
+ ht->count = 0;
ht->mappings = xmalloc (ht->size * sizeof (struct mapping));
memset (ht->mappings, '\0', ht->size * sizeof (struct mapping));
struct mapping *mappings = ht->mappings;
int size = ht->size;
struct mapping *mp = mappings + HASH_POSITION (ht, key);
- int (*equals) (const void *, const void *) = ht->test_function;
+ int (*equals) PARAMS ((const void *, const void *)) = ht->test_function;
LOOP_NON_EMPTY (mp, mappings, size)
if (equals (key, mp->key))
/* Get the value that corresponds to the key KEY in the hash table HT.
If no value is found, return NULL. Note that NULL is a legal value
for value; if you are storing NULLs in your hash table, you can use
- hash_table_exists to be sure that a (possibly NULL) value exists in
- the table. Or, you can use hash_table_get_pair instead of this
+ hash_table_contains to be sure that a (possibly NULL) value exists
+ in the table. Or, you can use hash_table_get_pair instead of this
function. */
void *
return 0;
}
-/* Return 1 if KEY exists in HT, 0 otherwise. */
+/* Return 1 if HT contains KEY, 0 otherwise. */
int
-hash_table_exists (struct hash_table *ht, const void *key)
+hash_table_contains (struct hash_table *ht, const void *key)
{
return find_mapping (ht, key) != NULL;
}
struct mapping *mp, *mappings;
int newsize;
- newsize = prime_size (ht->size * 2);
+ newsize = prime_size (ht->size * 2, &ht->prime_offset);
#if 0
- printf ("growing from %d to %d\n", ht->size, newsize);
+ printf ("growing from %d to %d; fullness %.2f%% to %.2f%%\n",
+ ht->size, newsize,
+ (double)100 * ht->count / ht->size,
+ (double)100 * ht->count / newsize);
#endif
ht->size = newsize;
{
struct mapping *mappings = ht->mappings;
int size = ht->size;
- int (*equals) (const void *, const void *) = ht->test_function;
+ int (*equals) PARAMS ((const void *, const void *)) = ht->test_function;
struct mapping *mp = mappings + HASH_POSITION (ht, key);
return ht->count;
}
\f
-/* Support for hash tables whose keys are strings. */
+/* Functions from this point onward are meant for convenience and
+ don't strictly belong to this file. However, this is as good a
+ place for them as any. */
+
+/* ========
+ Support for hash tables whose keys are strings.
+ ======== */
/* 31 bit hash function. Taken from Gnome's glib, modified to use
standard C types.
return h;
}
+/* Frontend for strcmp usable for hash tables. */
+
+int
+string_cmp (const void *s1, const void *s2)
+{
+ return !strcmp ((const char *)s1, (const char *)s2);
+}
+
+/* Return a hash table of initial size INITIAL_SIZE suitable to use
+ strings as keys. */
+
+struct hash_table *
+make_string_hash_table (int initial_size)
+{
+ return hash_table_new (initial_size, string_hash, string_cmp);
+}
+
+/* ========
+ Support for hash tables whose keys are strings, but which are
+ compared case-insensitively.
+ ======== */
+
+/* Like string_hash, but produce the same hash regardless of the case. */
+
+static unsigned long
+string_hash_nocase (const void *key)
+{
+ const char *p = key;
+ unsigned int h = TOLOWER (*p);
+
+ if (h)
+ for (p += 1; *p != '\0'; p++)
+ h = (h << 5) - h + TOLOWER (*p);
+
+ return h;
+}
+
+/* Like string_cmp, but doing case-insensitive compareison. */
+
+static int
+string_cmp_nocase (const void *s1, const void *s2)
+{
+ return !strcasecmp ((const char *)s1, (const char *)s2);
+}
+
+/* Like make_string_hash_table, but uses string_hash_nocase and
+ string_cmp_nocase. */
+
+struct hash_table *
+make_nocase_string_hash_table (int initial_size)
+{
+ return hash_table_new (initial_size, string_hash_nocase, string_cmp_nocase);
+}
+
#if 0
/* If I ever need it: hashing of integers. */
return key;
}
#endif
-
-int
-string_cmp (const void *s1, const void *s2)
-{
- return !strcmp ((const char *)s1, (const char *)s2);
-}
-
-/* Return a hash table of initial size INITIAL_SIZE suitable to use
- strings as keys. */
-
-struct hash_table *
-make_string_hash_table (int initial_size)
-{
- return hash_table_new (initial_size, string_hash, string_cmp);
-}
-
\f
#ifdef STANDALONE
if (len <= 1)
continue;
line[--len] = '\0';
- if (!hash_table_exists (ht, line))
+ if (!hash_table_contains (ht, line))
hash_table_put (ht, strdup (line), "here I am!");
#if 1
if (len % 5 == 0)