sjero.net Git - wget/blob - src/hash.c

   1 /* Hash tables.
   2    Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
   3    2009, 2010, 2011 Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3 of the License, or (at
  10 your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
  19
  20 Additional permission under GNU GPL version 3 section 7
  21
  22 If you modify this program, or any covered work, by linking or
  23 combining it with the OpenSSL project's OpenSSL library (or a
  24 modified version of that library), containing parts covered by the
  25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
  26 grants you additional permission to convey the resulting work.
  27 Corresponding Source for a non-source form of such a combination
  28 shall include the source code for the parts of OpenSSL used as well
  29 as that of the covered work.  */
  30
  31 /* With -DSTANDALONE, this file can be compiled outside Wget source
  32    tree.  To test, also use -DTEST.  */
  33
  34 #ifndef STANDALONE
  35 # include "wget.h"
  36 #endif
  37
  38 #include <stdio.h>
  39 #include <stdlib.h>
  40 #include <assert.h>
  41 #include <string.h>
  42 #include <limits.h>
  43
  44 #ifndef STANDALONE
  45 /* Get Wget's utility headers. */
  46 # include "utils.h"
  47 #else
  48 /* Make do without them. */
  49 # define xnew(x) xmalloc (sizeof (x))
  50 # define xnew_array(type, x) xmalloc (sizeof (type) * (x))
  51 # define xmalloc malloc
  52 # define xfree free
  53 # ifndef countof
  54 #  define countof(x) (sizeof (x) / sizeof ((x)[0]))
  55 # endif
  56 # include <ctype.h>
  57 # define c_tolower(x) tolower ((unsigned char) (x))
  58 # ifdef HAVE_STDINT_H
  59 #  include <stdint.h>
  60 # else
  61    typedef unsigned long uintptr_t;
  62 # endif
  63 #endif
  64
  65 #include "hash.h"
  66
  67 /* INTERFACE:
  68
  69    Hash tables are a technique used to implement mapping between
  70    objects with near-constant-time access and storage.  The table
  71    associates keys to values, and a value can be very quickly
  72    retrieved by providing the key.  Fast lookup tables are typically
  73    implemented as hash tables.
  74
  75    The entry points are
  76      hash_table_new       -- creates the table.
  77      hash_table_destroy   -- destroys the table.
  78      hash_table_put       -- establishes or updates key->value mapping.
  79      hash_table_get       -- retrieves value of key.
  80      hash_table_get_pair  -- get key/value pair for key.
  81      hash_table_contains  -- test whether the table contains key.
  82      hash_table_remove    -- remove key->value mapping for given key.
  83      hash_table_for_each  -- call function for each table entry.
  84      hash_table_iterate   -- iterate over entries in hash table.
  85      hash_table_iter_next -- return next element during iteration.
  86      hash_table_clear     -- clear hash table contents.
  87      hash_table_count     -- return the number of entries in the table.
  88
  89    The hash table grows internally as new entries are added and is not
  90    limited in size, except by available memory.  The table doubles
  91    with each resize, which ensures that the amortized time per
  92    operation remains constant.
  93
  94    If not instructed otherwise, tables created by hash_table_new
  95    consider the keys to be equal if their pointer values are the same.
  96    You can use make_string_hash_table to create tables whose keys are
  97    considered equal if their string contents are the same.  In the
  98    general case, the criterion of equality used to compare keys is
  99    specified at table creation time with two callback functions,
 100    "hash" and "test".  The hash function transforms the key into an
 101    arbitrary number that must be the same for two equal keys.  The
 102    test function accepts two keys and returns non-zero if they are to
 103    be considered equal.
 104
 105    Note that neither keys nor values are copied when inserted into the
 106    hash table, so they must exist for the lifetime of the table.  This
 107    means that e.g. the use of static strings is OK, but objects with a
 108    shorter life-time probably need to be copied (with strdup() or the
 109    like in the case of strings) before being inserted.  */
 110
 111 /* IMPLEMENTATION:
 112
 113    The hash table is implemented as an open-addressed table with
 114    linear probing collision resolution.
 115
 116    The above means that all the cells (each cell containing a key and
 117    a value pointer) are stored in a contiguous array.  Array position
 118    of each cell is determined by the hash value of its key and the
 119    size of the table: location := hash(key) % size.  If two different
 120    keys end up on the same position (collide), the one that came
 121    second is stored in the first unoccupied cell that follows it.
 122    This collision resolution technique is called "linear probing".
 123
 124    There are more advanced collision resolution methods (quadratic
 125    probing, double hashing), but we don't use them because they incur
 126    more non-sequential access to the array, which results in worse CPU
 127    cache behavior.  Linear probing works well as long as the
 128    count/size ratio (fullness) is kept below 75%.  We make sure to
 129    grow and rehash the table whenever this threshold is exceeded.
 130
 131    Collisions complicate deletion because simply clearing a cell
 132    followed by previously collided entries would cause those neighbors
 133    to not be picked up by find_cell later.  One solution is to leave a
 134    "tombstone" marker instead of clearing the cell, and another is to
 135    recalculate the positions of adjacent cells.  We take the latter
 136    approach because it results in less bookkeeping garbage and faster
 137    retrieval at the (slight) expense of deletion.  */
 138
 139 /* Maximum allowed fullness: when hash table's fullness exceeds this
 140    value, the table is resized.  */
 141 #define HASH_MAX_FULLNESS 0.75
 142
 143 /* The hash table size is multiplied by this factor (and then rounded
 144    to the next prime) with each resize.  This guarantees infrequent
 145    resizes.  */
 146 #define HASH_RESIZE_FACTOR 2
 147
 148 struct cell {
 149   void *key;
 150   void *value;
 151 };
 152
 153 typedef unsigned long (*hashfun_t) (const void *);
 154 typedef int (*testfun_t) (const void *, const void *);
 155
 156 struct hash_table {
 157   hashfun_t hash_function;
 158   testfun_t test_function;
 159
 160   struct cell *cells;           /* contiguous array of cells. */
 161   int size;                     /* size of the array. */
 162
 163   int count;                    /* number of occupied entries. */
 164   int resize_threshold;         /* after size exceeds this number of
 165                                    entries, resize the table.  */
 166   int prime_offset;             /* the offset of the current prime in
 167                                    the prime table. */
 168 };
 169
 170 /* We use the all-bits-set constant (INVALID_PTR) marker to mean that
 171    a cell is empty.  It is unaligned and therefore illegal as a
 172    pointer.  INVALID_PTR_CHAR (0xff) is the single-character constant
 173    used to initialize the entire cells array as empty.
 174
 175    The all-bits-set value is a better choice than NULL because it
 176    allows the use of NULL/0 keys.  Since the keys are either integers
 177    or pointers, the only key that cannot be used is the integer value
 178    -1.  This is acceptable because it still allows the use of
 179    nonnegative integer keys.  */
 180
 181 #define INVALID_PTR ((void *) ~(uintptr_t) 0)
 182 #ifndef UCHAR_MAX
 183 # define UCHAR_MAX 0xff
 184 #endif
 185 #define INVALID_PTR_CHAR UCHAR_MAX
 186
 187 /* Whether the cell C is occupied (non-empty). */
 188 #define CELL_OCCUPIED(c) ((c)->key != INVALID_PTR)
 189
 190 /* Clear the cell C, i.e. mark it as empty (unoccupied). */
 191 #define CLEAR_CELL(c) ((c)->key = INVALID_PTR)
 192
 193 /* "Next" cell is the cell following C, but wrapping back to CELLS
 194    when C would reach CELLS+SIZE.  */
 195 #define NEXT_CELL(c, cells, size) (c != cells + (size - 1) ? c + 1 : cells)
 196
 197 /* Loop over occupied cells starting at C, terminating the loop when
 198    an empty cell is encountered.  */
 199 #define FOREACH_OCCUPIED_ADJACENT(c, cells, size)                               \
 200   for (; CELL_OCCUPIED (c); c = NEXT_CELL (c, cells, size))
 201
 202 /* Return the position of KEY in hash table SIZE large, hash function
 203    being HASHFUN.  */
 204 #define HASH_POSITION(key, hashfun, size) ((hashfun) (key) % size)
 205
 206 /* Find a prime near, but greather than or equal to SIZE.  The primes
 207    are looked up from a table with a selection of primes convenient
 208    for this purpose.
 209
 210    PRIME_OFFSET is a minor optimization: it specifies start position
 211    for the search for the large enough prime.  The final offset is
 212    stored in the same variable.  That way the list of primes does not
 213    have to be scanned from the beginning each time around.  */
 214
 215 static int
 216 prime_size (int size, int *prime_offset)
 217 {
 218   static const int primes[] = {
 219     13, 19, 29, 41, 59, 79, 107, 149, 197, 263, 347, 457, 599, 787, 1031,
 220     1361, 1777, 2333, 3037, 3967, 5167, 6719, 8737, 11369, 14783,
 221     19219, 24989, 32491, 42257, 54941, 71429, 92861, 120721, 156941,
 222     204047, 265271, 344857, 448321, 582821, 757693, 985003, 1280519,
 223     1664681, 2164111, 2813353, 3657361, 4754591, 6180989, 8035301,
 224     10445899, 13579681, 17653589, 22949669, 29834603, 38784989,
 225     50420551, 65546729, 85210757, 110774011, 144006217, 187208107,
 226     243370577, 316381771, 411296309, 534685237, 695090819, 903618083,
 227     1174703521, 1527114613, 1837299131, 2147483647
 228   };
 229   size_t i;
 230
 231   for (i = *prime_offset; i < countof (primes); i++)
 232     if (primes[i] >= size)
 233       {
 234         /* Set the offset to the next prime.  That is safe because,
 235            next time we are called, it will be with a larger SIZE,
 236            which means we could never return the same prime anyway.
 237            (If that is not the case, the caller can simply reset
 238            *prime_offset.)  */
 239         *prime_offset = i + 1;
 240         return primes[i];
 241       }
 242
 243   abort ();
 244 }
 245
 246 static int cmp_pointer (const void *, const void *);
 247
 248 /* Create a hash table with hash function HASH_FUNCTION and test
 249    function TEST_FUNCTION.  The table is empty (its count is 0), but
 250    pre-allocated to store at least ITEMS items.
 251
 252    ITEMS is the number of items that the table can accept without
 253    needing to resize.  It is useful when creating a table that is to
 254    be immediately filled with a known number of items.  In that case,
 255    the regrows are a waste of time, and specifying ITEMS correctly
 256    will avoid them altogether.
 257
 258    Note that hash tables grow dynamically regardless of ITEMS.  The
 259    only use of ITEMS is to preallocate the table and avoid unnecessary
 260    dynamic regrows.  Don't bother making ITEMS prime because it's not
 261    used as size unchanged.  To start with a small table that grows as
 262    needed, simply specify zero ITEMS.
 263
 264    If hash and test callbacks are not specified, identity mapping is
 265    assumed, i.e. pointer values are used for key comparison.  (Common
 266    Lisp calls such tables EQ hash tables, and Java calls them
 267    IdentityHashMaps.)  If your keys require different comparison,
 268    specify hash and test functions.  For easy use of C strings as hash
 269    keys, you can use the convenience functions make_string_hash_table
 270    and make_nocase_string_hash_table.  */
 271
 272 struct hash_table *
 273 hash_table_new (int items,
 274                 unsigned long (*hash_function) (const void *),
 275                 int (*test_function) (const void *, const void *))
 276 {
 277   int size;
 278   struct hash_table *ht = xnew (struct hash_table);
 279
 280   ht->hash_function = hash_function ? hash_function : hash_pointer;
 281   ht->test_function = test_function ? test_function : cmp_pointer;
 282
 283   /* If the size of struct hash_table ever becomes a concern, this
 284      field can go.  (Wget doesn't create many hashes.)  */
 285   ht->prime_offset = 0;
 286
 287   /* Calculate the size that ensures that the table will store at
 288      least ITEMS keys without the need to resize.  */
 289   size = 1 + items / HASH_MAX_FULLNESS;
 290   size = prime_size (size, &ht->prime_offset);
 291   ht->size = size;
 292   ht->resize_threshold = size * HASH_MAX_FULLNESS;
 293   /*assert (ht->resize_threshold >= items);*/
 294
 295   ht->cells = xnew_array (struct cell, ht->size);
 296
 297   /* Mark cells as empty.  We use 0xff rather than 0 to mark empty
 298      keys because it allows us to use NULL/0 as keys.  */
 299   memset (ht->cells, INVALID_PTR_CHAR, size * sizeof (struct cell));
 300
 301   ht->count = 0;
 302
 303   return ht;
 304 }
 305
 306 /* Free the data associated with hash table HT. */
 307
 308 void
 309 hash_table_destroy (struct hash_table *ht)
 310 {
 311   xfree (ht->cells);
 312   xfree (ht);
 313 }
 314
 315 /* The heart of most functions in this file -- find the cell whose
 316    KEY is equal to key, using linear probing.  Returns the cell
 317    that matches KEY, or the first empty cell if none matches.  */
 318
 319 static inline struct cell *
 320 find_cell (const struct hash_table *ht, const void *key)
 321 {
 322   struct cell *cells = ht->cells;
 323   int size = ht->size;
 324   struct cell *c = cells + HASH_POSITION (key, ht->hash_function, size);
 325   testfun_t equals = ht->test_function;
 326
 327   FOREACH_OCCUPIED_ADJACENT (c, cells, size)
 328     if (equals (key, c->key))
 329       break;
 330   return c;
 331 }
 332
 333 /* Get the value that corresponds to the key KEY in the hash table HT.
 334    If no value is found, return NULL.  Note that NULL is a legal value
 335    for value; if you are storing NULLs in your hash table, you can use
 336    hash_table_contains to be sure that a (possibly NULL) value exists
 337    in the table.  Or, you can use hash_table_get_pair instead of this
 338    function.  */
 339
 340 void *
 341 hash_table_get (const struct hash_table *ht, const void *key)
 342 {
 343   struct cell *c = find_cell (ht, key);
 344   if (CELL_OCCUPIED (c))
 345     return c->value;
 346   else
 347     return NULL;
 348 }
 349
 350 /* Like hash_table_get, but writes out the pointers to both key and
 351    value.  Returns non-zero on success.  */
 352
 353 int
 354 hash_table_get_pair (const struct hash_table *ht, const void *lookup_key,
 355                      void *orig_key, void *value)
 356 {
 357   struct cell *c = find_cell (ht, lookup_key);
 358   if (CELL_OCCUPIED (c))
 359     {
 360       if (orig_key)
 361         *(void **)orig_key = c->key;
 362       if (value)
 363         *(void **)value = c->value;
 364       return 1;
 365     }
 366   else
 367     return 0;
 368 }
 369
 370 /* Return 1 if HT contains KEY, 0 otherwise. */
 371
 372 int
 373 hash_table_contains (const struct hash_table *ht, const void *key)
 374 {
 375   struct cell *c = find_cell (ht, key);
 376   return CELL_OCCUPIED (c);
 377 }
 378
 379 /* Grow hash table HT as necessary, and rehash all the key-value
 380    mappings.  */
 381
 382 static void
 383 grow_hash_table (struct hash_table *ht)
 384 {
 385   hashfun_t hasher = ht->hash_function;
 386   struct cell *old_cells = ht->cells;
 387   struct cell *old_end   = ht->cells + ht->size;
 388   struct cell *c, *cells;
 389   int newsize;
 390
 391   newsize = prime_size (ht->size * HASH_RESIZE_FACTOR, &ht->prime_offset);
 392 #if 0
 393   printf ("growing from %d to %d; fullness %.2f%% to %.2f%%\n",
 394           ht->size, newsize,
 395           100.0 * ht->count / ht->size,
 396           100.0 * ht->count / newsize);
 397 #endif
 398
 399   ht->size = newsize;
 400   ht->resize_threshold = newsize * HASH_MAX_FULLNESS;
 401
 402   cells = xnew_array (struct cell, newsize);
 403   memset (cells, INVALID_PTR_CHAR, newsize * sizeof (struct cell));
 404   ht->cells = cells;
 405
 406   for (c = old_cells; c < old_end; c++)
 407     if (CELL_OCCUPIED (c))
 408       {
 409         struct cell *new_c;
 410         /* We don't need to test for uniqueness of keys because they
 411            come from the hash table and are therefore known to be
 412            unique.  */
 413         new_c = cells + HASH_POSITION (c->key, hasher, newsize);
 414         FOREACH_OCCUPIED_ADJACENT (new_c, cells, newsize)
 415           ;
 416         *new_c = *c;
 417       }
 418
 419   xfree (old_cells);
 420 }
 421
 422 /* Put VALUE in the hash table HT under the key KEY.  This regrows the
 423    table if necessary.  */
 424
 425 void
 426 hash_table_put (struct hash_table *ht, const void *key, const void *value)
 427 {
 428   struct cell *c = find_cell (ht, key);
 429   if (CELL_OCCUPIED (c))
 430     {
 431       /* update existing item */
 432       c->key   = (void *)key; /* const? */
 433       c->value = (void *)value;
 434       return;
 435     }
 436
 437   /* If adding the item would make the table exceed max. fullness,
 438      grow the table first.  */
 439   if (ht->count >= ht->resize_threshold)
 440     {
 441       grow_hash_table (ht);
 442       c = find_cell (ht, key);
 443     }
 444
 445   /* add new item */
 446   ++ht->count;
 447   c->key   = (void *)key;       /* const? */
 448   c->value = (void *)value;
 449 }
 450
 451 /* Remove KEY->value mapping from HT.  Return 0 if there was no such
 452    entry; return 1 if an entry was removed.  */
 453
 454 int
 455 hash_table_remove (struct hash_table *ht, const void *key)
 456 {
 457   struct cell *c = find_cell (ht, key);
 458   if (!CELL_OCCUPIED (c))
 459     return 0;
 460   else
 461     {
 462       int size = ht->size;
 463       struct cell *cells = ht->cells;
 464       hashfun_t hasher = ht->hash_function;
 465
 466       CLEAR_CELL (c);
 467       --ht->count;
 468
 469       /* Rehash all the entries following C.  The alternative
 470          approach is to mark the entry as deleted, i.e. create a
 471          "tombstone".  That speeds up removal, but leaves a lot of
 472          garbage and slows down hash_table_get and hash_table_put.  */
 473
 474       c = NEXT_CELL (c, cells, size);
 475       FOREACH_OCCUPIED_ADJACENT (c, cells, size)
 476         {
 477           const void *key2 = c->key;
 478           struct cell *c_new;
 479
 480           /* Find the new location for the key. */
 481           c_new = cells + HASH_POSITION (key2, hasher, size);
 482           FOREACH_OCCUPIED_ADJACENT (c_new, cells, size)
 483             if (key2 == c_new->key)
 484               /* The cell C (key2) is already where we want it (in
 485                  C_NEW's "chain" of keys.)  */
 486               goto next_rehash;
 487
 488           *c_new = *c;
 489           CLEAR_CELL (c);
 490
 491         next_rehash:
 492           ;
 493         }
 494       return 1;
 495     }
 496 }
 497
 498 /* Clear HT of all entries.  After calling this function, the count
 499    and the fullness of the hash table will be zero.  The size will
 500    remain unchanged.  */
 501
 502 void
 503 hash_table_clear (struct hash_table *ht)
 504 {
 505   memset (ht->cells, INVALID_PTR_CHAR, ht->size * sizeof (struct cell));
 506   ht->count = 0;
 507 }
 508
 509 /* Call FN for each entry in HT.  FN is called with three arguments:
 510    the key, the value, and ARG.  When FN returns a non-zero value, the
 511    mapping stops.
 512
 513    It is undefined what happens if you add or remove entries in the
 514    hash table while hash_table_for_each is running.  The exception is
 515    the entry you're currently mapping over; you may call
 516    hash_table_put or hash_table_remove on that entry's key.  That is
 517    also the reason why this function cannot be implemented in terms of
 518    hash_table_iterate.  */
 519
 520 void
 521 hash_table_for_each (struct hash_table *ht,
 522                      int (*fn) (void *, void *, void *), void *arg)
 523 {
 524   struct cell *c = ht->cells;
 525   struct cell *end = ht->cells + ht->size;
 526
 527   for (; c < end; c++)
 528     if (CELL_OCCUPIED (c))
 529       {
 530         void *key;
 531       repeat:
 532         key = c->key;
 533         if (fn (key, c->value, arg))
 534           return;
 535         /* hash_table_remove might have moved the adjacent cells. */
 536         if (c->key != key && CELL_OCCUPIED (c))
 537           goto repeat;
 538       }
 539 }
 540
 541 /* Initiate iteration over HT.  Entries are obtained with
 542    hash_table_iter_next, a typical iteration loop looking like this:
 543
 544        hash_table_iterator iter;
 545        for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
 546          ... do something with iter.key and iter.value ...
 547
 548    The iterator does not need to be deallocated after use.  The hash
 549    table must not be modified while being iterated over.  */
 550
 551 void
 552 hash_table_iterate (struct hash_table *ht, hash_table_iterator *iter)
 553 {
 554   iter->pos = ht->cells;
 555   iter->end = ht->cells + ht->size;
 556 }
 557
 558 /* Get the next hash table entry.  ITER is an iterator object
 559    initialized using hash_table_iterate.  While there are more
 560    entries, the key and value pointers are stored to ITER->key and
 561    ITER->value respectively and 1 is returned.  When there are no more
 562    entries, 0 is returned.
 563
 564    If the hash table is modified between calls to this function, the
 565    result is undefined.  */
 566
 567 int
 568 hash_table_iter_next (hash_table_iterator *iter)
 569 {
 570   struct cell *c = iter->pos;
 571   struct cell *end = iter->end;
 572   for (; c < end; c++)
 573     if (CELL_OCCUPIED (c))
 574       {
 575         iter->key = c->key;
 576         iter->value = c->value;
 577         iter->pos = c + 1;
 578         return 1;
 579       }
 580   return 0;
 581 }
 582
 583 /* Return the number of elements in the hash table.  This is not the
 584    same as the physical size of the hash table, which is always
 585    greater than the number of elements.  */
 586
 587 int
 588 hash_table_count (const struct hash_table *ht)
 589 {
 590   return ht->count;
 591 }
 592 \f
 593 /* Functions from this point onward are meant for convenience and
 594    don't strictly belong to this file.  However, this is as good a
 595    place for them as any.  */
 596
 597 /* Guidelines for creating custom hash and test functions:
 598
 599    - The test function returns non-zero for keys that are considered
 600      "equal", zero otherwise.
 601
 602    - The hash function returns a number that represents the
 603      "distinctness" of the object.  In more precise terms, it means
 604      that for any two objects that test "equal" under the test
 605      function, the hash function MUST produce the same result.
 606
 607      This does not mean that all different objects must produce
 608      different values (that would be "perfect" hashing), only that
 609      non-distinct objects must produce the same values!  For instance,
 610      a hash function that returns 0 for any given object is a
 611      perfectly valid (albeit extremely bad) hash function.  A hash
 612      function that hashes a string by adding up all its characters is
 613      another example of a valid (but still quite bad) hash function.
 614
 615      It is not hard to make hash and test functions agree about
 616      equality.  For example, if the test function compares strings
 617      case-insensitively, the hash function can lower-case the
 618      characters when calculating the hash value.  That ensures that
 619      two strings differing only in case will hash the same.
 620
 621    - To prevent performance degradation, choose a hash function with
 622      as good "spreading" as possible.  A good hash function will use
 623      all the bits of the input when calculating the hash, and will
 624      react to even small changes in input with a completely different
 625      output.  But don't make the hash function itself overly slow,
 626      because you'll be incurring a non-negligible overhead to all hash
 627      table operations.  */
 628
 629 /*
 630  * Support for hash tables whose keys are strings.
 631  *
 632  */
 633
 634 /* Base 31 hash function.  Taken from Gnome's glib, modified to use
 635    standard C types.
 636
 637    We used to use the popular hash function from the Dragon Book, but
 638    this one seems to perform much better, both by being faster and by
 639    generating less collisions.  */
 640
 641 static unsigned long
 642 hash_string (const void *key)
 643 {
 644   const char *p = key;
 645   unsigned int h = *p;
 646
 647   if (h)
 648     for (p += 1; *p != '\0'; p++)
 649       h = (h << 5) - h + *p;
 650
 651   return h;
 652 }
 653
 654 /* Frontend for strcmp usable for hash tables. */
 655
 656 static int
 657 cmp_string (const void *s1, const void *s2)
 658 {
 659   return !strcmp ((const char *)s1, (const char *)s2);
 660 }
 661
 662 /* Return a hash table of preallocated to store at least ITEMS items
 663    suitable to use strings as keys.  */
 664
 665 struct hash_table *
 666 make_string_hash_table (int items)
 667 {
 668   return hash_table_new (items, hash_string, cmp_string);
 669 }
 670
 671 /*
 672  * Support for hash tables whose keys are strings, but which are
 673  * compared case-insensitively.
 674  *
 675  */
 676
 677 /* Like hash_string, but produce the same hash regardless of the case. */
 678
 679 static unsigned long
 680 hash_string_nocase (const void *key)
 681 {
 682   const char *p = key;
 683   unsigned int h = c_tolower (*p);
 684
 685   if (h)
 686     for (p += 1; *p != '\0'; p++)
 687       h = (h << 5) - h + c_tolower (*p);
 688
 689   return h;
 690 }
 691
 692 /* Like string_cmp, but doing case-insensitive compareison. */
 693
 694 static int
 695 string_cmp_nocase (const void *s1, const void *s2)
 696 {
 697   return !strcasecmp ((const char *)s1, (const char *)s2);
 698 }
 699
 700 /* Like make_string_hash_table, but uses string_hash_nocase and
 701    string_cmp_nocase.  */
 702
 703 struct hash_table *
 704 make_nocase_string_hash_table (int items)
 705 {
 706   return hash_table_new (items, hash_string_nocase, string_cmp_nocase);
 707 }
 708
 709 /* Hashing of numeric values, such as pointers and integers.
 710
 711    This implementation is the Robert Jenkins' 32 bit Mix Function,
 712    with a simple adaptation for 64-bit values.  According to Jenkins
 713    it should offer excellent spreading of values.  Unlike the popular
 714    Knuth's multiplication hash, this function doesn't need to know the
 715    hash table size to work.  */
 716
 717 unsigned long
 718 hash_pointer (const void *ptr)
 719 {
 720   uintptr_t key = (uintptr_t) ptr;
 721   key += (key << 12);
 722   key ^= (key >> 22);
 723   key += (key << 4);
 724   key ^= (key >> 9);
 725   key += (key << 10);
 726   key ^= (key >> 2);
 727   key += (key << 7);
 728   key ^= (key >> 12);
 729 #if SIZEOF_VOID_P > 4
 730   key += (key << 44);
 731   key ^= (key >> 54);
 732   key += (key << 36);
 733   key ^= (key >> 41);
 734   key += (key << 42);
 735   key ^= (key >> 34);
 736   key += (key << 39);
 737   key ^= (key >> 44);
 738 #endif
 739   return (unsigned long) key;
 740 }
 741
 742 static int
 743 cmp_pointer (const void *ptr1, const void *ptr2)
 744 {
 745   return ptr1 == ptr2;
 746 }
 747 \f
 748 #ifdef TEST
 749
 750 #include <stdio.h>
 751 #include <string.h>
 752
 753 void
 754 print_hash (struct hash_table *sht)
 755 {
 756   hash_table_iterator iter;
 757   int count = 0;
 758
 759   for (hash_table_iterate (sht, &iter); hash_table_iter_next (&iter);
 760        ++count)
 761     printf ("%s: %s\n", iter.key, iter.value);
 762   assert (count == sht->count);
 763 }
 764
 765 int
 766 main (void)
 767 {
 768   struct hash_table *ht = make_string_hash_table (0);
 769   char line[80];
 770   while ((fgets (line, sizeof (line), stdin)))
 771     {
 772       int len = strlen (line);
 773       if (len <= 1)
 774         continue;
 775       line[--len] = '\0';
 776       if (!hash_table_contains (ht, line))
 777         hash_table_put (ht, strdup (line), "here I am!");
 778 #if 1
 779       if (len % 5 == 0)
 780         {
 781           char *line_copy;
 782           if (hash_table_get_pair (ht, line, &line_copy, NULL))
 783             {
 784               hash_table_remove (ht, line);
 785               xfree (line_copy);
 786             }
 787         }
 788 #endif
 789     }
 790 #if 0
 791   print_hash (ht);
 792 #endif
 793 #if 1
 794   printf ("%d %d\n", ht->count, ht->size);
 795 #endif
 796   return 0;
 797 }
 798 #endif /* TEST */