2 Copyright (C) 2000 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
33 # define xmalloc malloc
34 # define xrealloc realloc
37 /* This file implements simple hash tables based on linear probing.
38 The hash table stores key-value pairs in a contiguous array. Both
39 key and value are void pointers that the hash and test functions
42 Although Knuth & co. recommend double hashing over linear probing,
43 we use the latter because it accesses array elements sequentially
44 in case of a collision, yielding in better cache behaviour and
45 ultimately in better speed. To avoid collision problems with
46 linear probing, we make sure that the table grows as soon as the
47 fullness/size ratio exceeds 75%. */
55 unsigned long (*hash_function) (const void *);
56 int (*test_function) (const void *, const void *);
58 int size; /* size of the array */
59 int fullness; /* number of non-empty fields */
60 int count; /* number of non-empty, non-deleted
63 struct ht_pair *pairs;
66 #define ENTRY_DELETED ((void *)0xdeadbeef)
68 #define DELETED_ENTRY_P(ptr) ((ptr) == ENTRY_DELETED)
69 #define EMPTY_ENTRY_P(ptr) ((ptr) == NULL)
71 /* Find a prime near, but greather than or equal to SIZE. */
76 static const unsigned long primes [] = {
77 19, 29, 41, 59, 79, 107, 149, 197, 263, 347, 457, 599, 787, 1031,
78 1361, 1777, 2333, 3037, 3967, 5167, 6719, 8737, 11369, 14783,
79 19219, 24989, 32491, 42257, 54941, 71429, 92861, 120721, 156941,
80 204047, 265271, 344857, 448321, 582821, 757693, 985003, 1280519,
81 1664681, 2164111, 2813353, 3657361, 4754591, 6180989, 8035301,
82 10445899, 13579681, 17653589, 22949669, 29834603, 38784989,
83 50420551, 65546729, 85210757, 110774011, 144006217, 187208107,
84 243370577, 316381771, 411296309, 534685237, 695090819, 903618083,
85 1174703521, 1527114613, 1985248999, 2580823717UL, 3355070839UL
88 for (i = 0; i < ARRAY_SIZE (primes); i++)
89 if (primes[i] >= size)
95 /* Create a hash table of INITIAL_SIZE with hash function
96 HASH_FUNCTION and test function TEST_FUNCTION. If you wish to
97 start out with a "small" table which will be regrown as needed,
98 specify 0 as INITIAL_SIZE. */
101 hash_table_new (int initial_size,
102 unsigned long (*hash_function) (const void *),
103 int (*test_function) (const void *, const void *))
105 struct hash_table *ht
106 = (struct hash_table *)xmalloc (sizeof (struct hash_table));
107 ht->hash_function = hash_function;
108 ht->test_function = test_function;
109 ht->size = prime_size (initial_size);
112 ht->pairs = xmalloc (ht->size * sizeof (struct ht_pair));
113 memset (ht->pairs, '\0', ht->size * sizeof (struct ht_pair));
117 /* Free the data associated with hash table HT. */
120 hash_table_destroy (struct hash_table *ht)
126 /* Get the value that corresponds to the key KEY in the hash table HT.
127 If no value is found, return NULL. Note that NULL is a legal value
128 for value; if you are storing NULLs in your hash table, you can use
129 hash_table_exists to be sure that a (possibly NULL) value exists in
133 hash_table_get (struct hash_table *ht, const void *key)
135 int location = ht->hash_function (key) % ht->size;
138 struct ht_pair *the_pair = ht->pairs + location;
139 if (EMPTY_ENTRY_P (the_pair->key))
141 else if (DELETED_ENTRY_P (the_pair->key)
142 || !ht->test_function (key, the_pair->key))
145 if (location == ht->size)
149 return the_pair->value;
153 /* Return 1 if KEY exists in HT, 0 otherwise. */
156 hash_table_exists (struct hash_table *ht, const void *key)
158 int location = ht->hash_function (key) % ht->size;
161 struct ht_pair *the_pair = ht->pairs + location;
162 if (EMPTY_ENTRY_P (the_pair->key))
164 else if (DELETED_ENTRY_P (the_pair->key)
165 || !ht->test_function (key, the_pair->key))
168 if (location == ht->size)
176 #define MAX(i, j) (((i) >= (j)) ? (i) : (j))
178 /* Grow hash table HT as necessary, and rehash all the key-value
182 grow_hash_table (struct hash_table *ht)
185 struct ht_pair *old_pairs = ht->pairs;
186 int old_count = ht->count; /* for assert() below */
187 int old_size = ht->size;
189 /* Normally, the idea is to double ht->size (and round it to next
190 prime) on each regrow:
192 ht->size = prime_size (ht->size * 2);
194 But it is possible that the table has large fullness because of
195 the many deleted entries. If that is the case, we don't want to
196 blindly grow the table; we just want to rehash it. For that
197 reason, we use ht->count as the relevant parameter. MAX is used
198 only because we don't want to actually shrink the table. (But
199 maybe that's wrong.) */
201 int needed_size = prime_size (ht->count * 2);
202 ht->size = MAX (old_size, needed_size);
204 ht->pairs = xmalloc (ht->size * sizeof (struct ht_pair));
205 memset (ht->pairs, '\0', ht->size * sizeof (struct ht_pair));
207 /* Need to reset these two; hash_table_put will reinitialize them. */
210 for (i = 0; i < old_size; i++)
212 struct ht_pair *the_pair = old_pairs + i;
213 if (!EMPTY_ENTRY_P (the_pair->key)
214 && !DELETED_ENTRY_P (the_pair->key))
215 hash_table_put (ht, the_pair->key, the_pair->value);
217 assert (ht->count == old_count);
221 /* Put VALUE in the hash table HT under the key KEY. This regrows the
222 table if necessary. */
225 hash_table_put (struct hash_table *ht, const void *key, void *value)
227 int location = ht->hash_function (key) % ht->size;
230 struct ht_pair *the_pair = ht->pairs + location;
231 if (EMPTY_ENTRY_P (the_pair->key))
236 the_pair->key = (void *)key; /* const? */
237 the_pair->value = value;
240 else if (DELETED_ENTRY_P (the_pair->key))
242 /* We're replacing a deleteed entry, so ht->count gets
243 increased, but ht->fullness remains unchanged. */
247 else if (ht->test_function (key, the_pair->key))
249 /* We're replacing an existing entry, so ht->count and
250 ht->fullness remain unchanged. */
256 if (location == ht->size)
260 if (ht->fullness * 4 > ht->size * 3)
261 /* When fullness exceeds 75% of size, regrow the table. */
262 grow_hash_table (ht);
265 /* Remove KEY from HT. */
268 hash_table_remove (struct hash_table *ht, const void *key)
270 int location = ht->hash_function (key) % ht->size;
273 struct ht_pair *the_pair = ht->pairs + location;
274 if (EMPTY_ENTRY_P (the_pair->key))
276 else if (DELETED_ENTRY_P (the_pair->key)
277 || !ht->test_function (key, the_pair->key))
280 if (location == ht->size)
285 /* We don't really remove an entry from the hash table: we
286 just mark it as deleted. This is because there may be
287 other entries located after this entry whose hash number
288 points to a location before this entry. (Example: keys
289 A, B and C have the same hash. If you were to really
290 *delete* B from the table, C could no longer be found.)
292 As an optimization, it might be worthwhile to check
293 whether the immediately preceding entry is empty and, if
294 so, really delete the pair (set it to empty and decrease
295 the fullness along with the count). I *think* it should
297 the_pair->key = ENTRY_DELETED;
305 hash_table_clear (struct hash_table *ht)
307 memset (ht->pairs, '\0', ht->size * sizeof (struct ht_pair));
313 hash_table_map (struct hash_table *ht,
314 int (*mapfun) (void *, void *, void *),
318 for (i = 0; i < ht->size; i++)
320 struct ht_pair *the_pair = ht->pairs + i;
321 if (!EMPTY_ENTRY_P (the_pair->key)
322 && !DELETED_ENTRY_P (the_pair->key))
323 if (mapfun (the_pair->key, the_pair->value, closure))
328 /* Support for hash tables whose keys are strings. */
330 /* supposedly from the Dragon Book P436. */
332 string_hash (const void *sv)
335 unsigned const char *x = (unsigned const char *) sv;
341 if ((g = h & 0xf0000000) != 0)
342 h = (h ^ (g >> 24)) ^ g;
349 string_cmp (const void *s1, const void *s2)
351 return !strcmp ((const char *)s1, (const char *)s2);
355 make_string_hash_table (int initial_size)
357 return hash_table_new (initial_size, string_hash, string_cmp);
367 print_hash_table_mapper (const void *key, void *value, void *count)
370 printf ("%s: %s\n", (const char *)key, (char *)value);
375 print_hash (struct hash_table *sht)
378 hash_table_map (sht, print_hash_table_mapper, &debug_count);
379 assert (debug_count == sht->count);
385 struct hash_table *ht = make_string_hash_table (0);
387 while ((fgets (line, sizeof (line), stdin)))
389 int len = strlen (line);
393 hash_table_put (ht, strdup (line), "here I am!");
395 hash_table_remove (ht, line);
399 printf ("%d %d %d\n", ht->count, ht->fullness, ht->size);