/* Hash tables.
- Copyright (C) 2000-2005 Free Software Foundation, Inc.
+ Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+ 2008 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or (at
+the Free Software Foundation; either version 3 of the License, or (at
your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with Wget; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-
-In addition, as a special exception, the Free Software Foundation
-gives permission to link the code of its release of Wget with the
-OpenSSL project's "OpenSSL" library (or with modified versions of it
-that use the same license as the "OpenSSL" library), and distribute
-the linked executables. You must obey the GNU General Public License
-in all respects for all of the code used other than "OpenSSL". If you
-modify this file, you may extend this exception to your version of the
-file, but you are not obligated to do so. If you do not wish to do
-so, delete this exception statement from your version. */
+along with Wget. If not, see <http://www.gnu.org/licenses/>.
+
+Additional permission under GNU GPL version 3 section 7
+
+If you modify this program, or any covered work, by linking or
+combining it with the OpenSSL project's OpenSSL library (or a
+modified version of that library), containing parts covered by the
+terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
+grants you additional permission to convey the resulting work.
+Corresponding Source for a non-source form of such a combination
+shall include the source code for the parts of OpenSSL used as well
+as that of the covered work. */
/* With -DSTANDALONE, this file can be compiled outside Wget source
tree. To test, also use -DTEST. */
-#ifdef HAVE_CONFIG_H
-# include <config.h>
+#ifndef STANDALONE
+# include "wget.h"
#endif
#include <stdio.h>
#ifndef STANDALONE
/* Get Wget's utility headers. */
-# include "wget.h"
# include "utils.h"
#else
/* Make do without them. */
# define xnew_array(type, x) xmalloc (sizeof (type) * (x))
# define xmalloc malloc
# define xfree free
-# define countof(x) (sizeof (x) / sizeof ((x)[0]))
-# define TOLOWER(x) ('A' <= (x) && (x) <= 'Z' ? (x) - 32 : (x))
+# ifndef countof
+# define countof(x) (sizeof (x) / sizeof ((x)[0]))
+# endif
+# include <ctype.h>
+# define c_tolower(x) tolower ((unsigned char) (x))
+# if __STDC_VERSION__ >= 199901L
+# include <stdint.h> /* for uintptr_t */
+# else
+ typedef unsigned long uintptr_t;
+# endif
#endif
#include "hash.h"
hashfun_t hash_function;
testfun_t test_function;
- struct cell *cells; /* contiguous array of cells. */
- int size; /* size of the array. */
+ struct cell *cells; /* contiguous array of cells. */
+ int size; /* size of the array. */
- int count; /* number of occupied entries. */
- int resize_threshold; /* after size exceeds this number of
- entries, resize the table. */
- int prime_offset; /* the offset of the current prime in
- the prime table. */
+ int count; /* number of occupied entries. */
+ int resize_threshold; /* after size exceeds this number of
+ entries, resize the table. */
+ int prime_offset; /* the offset of the current prime in
+ the prime table. */
};
/* We use the all-bits-set constant (INVALID_PTR) marker to mean that
-1. This is acceptable because it still allows the use of
nonnegative integer keys. */
-#define INVALID_PTR ((void *) ~0UL)
+#define INVALID_PTR ((void *) ~(uintptr_t) 0)
#ifndef UCHAR_MAX
# define UCHAR_MAX 0xff
#endif
/* Loop over occupied cells starting at C, terminating the loop when
an empty cell is encountered. */
-#define FOREACH_OCCUPIED_ADJACENT(c, cells, size) \
+#define FOREACH_OCCUPIED_ADJACENT(c, cells, size) \
for (; CELL_OCCUPIED (c); c = NEXT_CELL (c, cells, size))
/* Return the position of KEY in hash table SIZE large, hash function
243370577, 316381771, 411296309, 534685237, 695090819, 903618083,
1174703521, 1527114613, 1837299131, 2147483647
};
- int i;
+ size_t i;
for (i = *prime_offset; i < countof (primes); i++)
if (primes[i] >= size)
{
- /* Set the offset to the next prime. That is safe because,
- next time we are called, it will be with a larger SIZE,
- which means we could never return the same prime anyway.
- (If that is not the case, the caller can simply reset
- *prime_offset.) */
- *prime_offset = i + 1;
- return primes[i];
+ /* Set the offset to the next prime. That is safe because,
+ next time we are called, it will be with a larger SIZE,
+ which means we could never return the same prime anyway.
+ (If that is not the case, the caller can simply reset
+ *prime_offset.) */
+ *prime_offset = i + 1;
+ return primes[i];
}
abort ();
struct hash_table *
hash_table_new (int items,
- unsigned long (*hash_function) (const void *),
- int (*test_function) (const void *, const void *))
+ unsigned long (*hash_function) (const void *),
+ int (*test_function) (const void *, const void *))
{
int size;
struct hash_table *ht = xnew (struct hash_table);
int
hash_table_get_pair (const struct hash_table *ht, const void *lookup_key,
- void *orig_key, void *value)
+ void *orig_key, void *value)
{
struct cell *c = find_cell (ht, lookup_key);
if (CELL_OCCUPIED (c))
{
if (orig_key)
- *(void **)orig_key = c->key;
+ *(void **)orig_key = c->key;
if (value)
- *(void **)value = c->value;
+ *(void **)value = c->value;
return 1;
}
else
newsize = prime_size (ht->size * HASH_RESIZE_FACTOR, &ht->prime_offset);
#if 0
printf ("growing from %d to %d; fullness %.2f%% to %.2f%%\n",
- ht->size, newsize,
- 100.0 * ht->count / ht->size,
- 100.0 * ht->count / newsize);
+ ht->size, newsize,
+ 100.0 * ht->count / ht->size,
+ 100.0 * ht->count / newsize);
#endif
ht->size = newsize;
for (c = old_cells; c < old_end; c++)
if (CELL_OCCUPIED (c))
{
- struct cell *new_c;
- /* We don't need to test for uniqueness of keys because they
- come from the hash table and are therefore known to be
- unique. */
- new_c = cells + HASH_POSITION (c->key, hasher, newsize);
- FOREACH_OCCUPIED_ADJACENT (new_c, cells, newsize)
- ;
- *new_c = *c;
+ struct cell *new_c;
+ /* We don't need to test for uniqueness of keys because they
+ come from the hash table and are therefore known to be
+ unique. */
+ new_c = cells + HASH_POSITION (c->key, hasher, newsize);
+ FOREACH_OCCUPIED_ADJACENT (new_c, cells, newsize)
+ ;
+ *new_c = *c;
}
xfree (old_cells);
/* add new item */
++ht->count;
- c->key = (void *)key; /* const? */
+ c->key = (void *)key; /* const? */
c->value = value;
}
--ht->count;
/* Rehash all the entries following C. The alternative
- approach is to mark the entry as deleted, i.e. create a
- "tombstone". That speeds up removal, but leaves a lot of
- garbage and slows down hash_table_get and hash_table_put. */
+ approach is to mark the entry as deleted, i.e. create a
+ "tombstone". That speeds up removal, but leaves a lot of
+ garbage and slows down hash_table_get and hash_table_put. */
c = NEXT_CELL (c, cells, size);
FOREACH_OCCUPIED_ADJACENT (c, cells, size)
- {
- const void *key2 = c->key;
- struct cell *c_new;
-
- /* Find the new location for the key. */
- c_new = cells + HASH_POSITION (key2, hasher, size);
- FOREACH_OCCUPIED_ADJACENT (c_new, cells, size)
- if (key2 == c_new->key)
- /* The cell C (key2) is already where we want it (in
- C_NEW's "chain" of keys.) */
- goto next_rehash;
-
- *c_new = *c;
- CLEAR_CELL (c);
-
- next_rehash:
- ;
- }
+ {
+ const void *key2 = c->key;
+ struct cell *c_new;
+
+ /* Find the new location for the key. */
+ c_new = cells + HASH_POSITION (key2, hasher, size);
+ FOREACH_OCCUPIED_ADJACENT (c_new, cells, size)
+ if (key2 == c_new->key)
+ /* The cell C (key2) is already where we want it (in
+ C_NEW's "chain" of keys.) */
+ goto next_rehash;
+
+ *c_new = *c;
+ CLEAR_CELL (c);
+
+ next_rehash:
+ ;
+ }
return 1;
}
}
void
hash_table_for_each (struct hash_table *ht,
- int (*fn) (void *, void *, void *), void *arg)
+ int (*fn) (void *, void *, void *), void *arg)
{
struct cell *c = ht->cells;
struct cell *end = ht->cells + ht->size;
for (; c < end; c++)
if (CELL_OCCUPIED (c))
{
- void *key;
+ void *key;
repeat:
- key = c->key;
- if (fn (key, c->value, arg))
- return;
- /* hash_table_remove might have moved the adjacent cells. */
- if (c->key != key && CELL_OCCUPIED (c))
- goto repeat;
+ key = c->key;
+ if (fn (key, c->value, arg))
+ return;
+ /* hash_table_remove might have moved the adjacent cells. */
+ if (c->key != key && CELL_OCCUPIED (c))
+ goto repeat;
}
}
-/* Initiate iteration over HT. Get the next entry using
- hash_table_iter_next. The typical loop looks like this:
+/* Initiate iteration over HT. Entries are obtained with
+ hash_table_iter_next, a typical iteration loop looking like this:
hash_table_iterator iter;
for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
ITER->value respectively and 1 is returned. When there are no more
entries, 0 is returned.
- The hash table must not be modified between calls to this
- function. */
+ If the hash table is modified between calls to this function, the
+ result is undefined. */
int
hash_table_iter_next (hash_table_iterator *iter)
for (; c < end; c++)
if (CELL_OCCUPIED (c))
{
- iter->key = c->key;
- iter->value = c->value;
- iter->pos = c + 1;
- return 1;
+ iter->key = c->key;
+ iter->value = c->value;
+ iter->pos = c + 1;
+ return 1;
}
return 0;
}
a hash function that returns 0 for any given object is a
perfectly valid (albeit extremely bad) hash function. A hash
function that hashes a string by adding up all its characters is
- another example of a valid (but quite bad) hash function.
+ another example of a valid (but still quite bad) hash function.
It is not hard to make hash and test functions agree about
equality. For example, if the test function compares strings
characters when calculating the hash value. That ensures that
two strings differing only in case will hash the same.
- - If you care about performance, choose a hash function with as
- good "spreading" as possible. A good hash function will use all
- the bits of the input when calculating the hash, and will react
- to even small changes in input with a completely different
- output. Finally, don't make the hash function itself overly
- slow, because you'll be incurring a non-negligible overhead to
- all hash table operations. */
+ - To prevent performance degradation, choose a hash function with
+ as good "spreading" as possible. A good hash function will use
+ all the bits of the input when calculating the hash, and will
+ react to even small changes in input with a completely different
+ output. But don't make the hash function itself overly slow,
+ because you'll be incurring a non-negligible overhead to all hash
+ table operations. */
/*
* Support for hash tables whose keys are strings.
*
*/
-/* 31 bit hash function. Taken from Gnome's glib, modified to use
+/* Base 31 hash function. Taken from Gnome's glib, modified to use
standard C types.
We used to use the popular hash function from the Dragon Book, but
- this one seems to perform much better. */
+ this one seems to perform much better, both by being faster and by
+ generating less collisions. */
static unsigned long
hash_string (const void *key)
hash_string_nocase (const void *key)
{
const char *p = key;
- unsigned int h = TOLOWER (*p);
+ unsigned int h = c_tolower (*p);
if (h)
for (p += 1; *p != '\0'; p++)
- h = (h << 5) - h + TOLOWER (*p);
+ h = (h << 5) - h + c_tolower (*p);
return h;
}
unsigned long
hash_pointer (const void *ptr)
{
- unsigned long key = (unsigned long) ptr;
+ uintptr_t key = (uintptr_t) ptr;
key += (key << 12);
key ^= (key >> 22);
key += (key << 4);
key ^= (key >> 2);
key += (key << 7);
key ^= (key >> 12);
-#if SIZEOF_LONG > 4
+#if SIZEOF_VOID_P > 4
key += (key << 44);
key ^= (key >> 54);
key += (key << 36);
key += (key << 39);
key ^= (key >> 44);
#endif
- return key;
+ return (unsigned long) key;
}
static int
{
int len = strlen (line);
if (len <= 1)
- continue;
+ continue;
line[--len] = '\0';
if (!hash_table_contains (ht, line))
- hash_table_put (ht, strdup (line), "here I am!");
+ hash_table_put (ht, strdup (line), "here I am!");
#if 1
if (len % 5 == 0)
- {
- char *line_copy;
- if (hash_table_get_pair (ht, line, &line_copy, NULL))
- {
- hash_table_remove (ht, line);
- xfree (line_copy);
- }
- }
+ {
+ char *line_copy;
+ if (hash_table_get_pair (ht, line, &line_copy, NULL))
+ {
+ hash_table_remove (ht, line);
+ xfree (line_copy);
+ }
+ }
#endif
}
#if 0