sjero.net Git - wget/blob - src/hash.c

   1 /* Hash tables.
   2    Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
   3    2008 Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3 of the License, or (at
  10 your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
  19
  20 Additional permission under GNU GPL version 3 section 7
  21
  22 If you modify this program, or any covered work, by linking or
  23 combining it with the OpenSSL project's OpenSSL library (or a
  24 modified version of that library), containing parts covered by the
  25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
  26 grants you additional permission to convey the resulting work.
  27 Corresponding Source for a non-source form of such a combination
  28 shall include the source code for the parts of OpenSSL used as well
  29 as that of the covered work.  */
  30
  31 /* With -DSTANDALONE, this file can be compiled outside Wget source
  32    tree.  To test, also use -DTEST.  */
  33
  34 #define USE_GNULIB_ALLOC
  35
  36 #ifndef STANDALONE
  37 # include "wget.h"
  38 #endif
  39
  40 #include <stdio.h>
  41 #include <stdlib.h>
  42 #include <assert.h>
  43 #include <string.h>
  44 #include <limits.h>
  45
  46 #ifndef STANDALONE
  47 /* Get Wget's utility headers. */
  48 # include "utils.h"
  49 #else
  50 /* Make do without them. */
  51 # define xnew(x) xmalloc (sizeof (x))
  52 # define xnew_array(type, x) xmalloc (sizeof (type) * (x))
  53 # define xmalloc malloc
  54 # define xfree free
  55 # ifndef countof
  56 #  define countof(x) (sizeof (x) / sizeof ((x)[0]))
  57 # endif
  58 # include <ctype.h>
  59 # define c_tolower(x) tolower ((unsigned char) (x))
  60 # if __STDC_VERSION__ >= 199901L
  61 #  include <stdint.h>  /* for uintptr_t */
  62 # else
  63    typedef unsigned long uintptr_t;
  64 # endif
  65 #endif
  66
  67 #include "hash.h"
  68
  69 /* INTERFACE:
  70
  71    Hash tables are a technique used to implement mapping between
  72    objects with near-constant-time access and storage.  The table
  73    associates keys to values, and a value can be very quickly
  74    retrieved by providing the key.  Fast lookup tables are typically
  75    implemented as hash tables.
  76
  77    The entry points are
  78      hash_table_new       -- creates the table.
  79      hash_table_destroy   -- destroys the table.
  80      hash_table_put       -- establishes or updates key->value mapping.
  81      hash_table_get       -- retrieves value of key.
  82      hash_table_get_pair  -- get key/value pair for key.
  83      hash_table_contains  -- test whether the table contains key.
  84      hash_table_remove    -- remove key->value mapping for given key.
  85      hash_table_for_each  -- call function for each table entry.
  86      hash_table_iterate   -- iterate over entries in hash table.
  87      hash_table_iter_next -- return next element during iteration.
  88      hash_table_clear     -- clear hash table contents.
  89      hash_table_count     -- return the number of entries in the table.
  90
  91    The hash table grows internally as new entries are added and is not
  92    limited in size, except by available memory.  The table doubles
  93    with each resize, which ensures that the amortized time per
  94    operation remains constant.
  95
  96    If not instructed otherwise, tables created by hash_table_new
  97    consider the keys to be equal if their pointer values are the same.
  98    You can use make_string_hash_table to create tables whose keys are
  99    considered equal if their string contents are the same.  In the
 100    general case, the criterion of equality used to compare keys is
 101    specified at table creation time with two callback functions,
 102    "hash" and "test".  The hash function transforms the key into an
 103    arbitrary number that must be the same for two equal keys.  The
 104    test function accepts two keys and returns non-zero if they are to
 105    be considered equal.
 106
 107    Note that neither keys nor values are copied when inserted into the
 108    hash table, so they must exist for the lifetime of the table.  This
 109    means that e.g. the use of static strings is OK, but objects with a
 110    shorter life-time probably need to be copied (with strdup() or the
 111    like in the case of strings) before being inserted.  */
 112
 113 /* IMPLEMENTATION:
 114
 115    The hash table is implemented as an open-addressed table with
 116    linear probing collision resolution.
 117
 118    The above means that all the cells (each cell containing a key and
 119    a value pointer) are stored in a contiguous array.  Array position
 120    of each cell is determined by the hash value of its key and the
 121    size of the table: location := hash(key) % size.  If two different
 122    keys end up on the same position (collide), the one that came
 123    second is stored in the first unoccupied cell that follows it.
 124    This collision resolution technique is called "linear probing".
 125
 126    There are more advanced collision resolution methods (quadratic
 127    probing, double hashing), but we don't use them because they incur
 128    more non-sequential access to the array, which results in worse CPU
 129    cache behavior.  Linear probing works well as long as the
 130    count/size ratio (fullness) is kept below 75%.  We make sure to
 131    grow and rehash the table whenever this threshold is exceeded.
 132
 133    Collisions complicate deletion because simply clearing a cell
 134    followed by previously collided entries would cause those neighbors
 135    to not be picked up by find_cell later.  One solution is to leave a
 136    "tombstone" marker instead of clearing the cell, and another is to
 137    recalculate the positions of adjacent cells.  We take the latter
 138    approach because it results in less bookkeeping garbage and faster
 139    retrieval at the (slight) expense of deletion.  */
 140
 141 /* Maximum allowed fullness: when hash table's fullness exceeds this
 142    value, the table is resized.  */
 143 #define HASH_MAX_FULLNESS 0.75
 144
 145 /* The hash table size is multiplied by this factor (and then rounded
 146    to the next prime) with each resize.  This guarantees infrequent
 147    resizes.  */
 148 #define HASH_RESIZE_FACTOR 2
 149
 150 struct cell {
 151   void *key;
 152   void *value;
 153 };
 154
 155 typedef unsigned long (*hashfun_t) (const void *);
 156 typedef int (*testfun_t) (const void *, const void *);
 157
 158 struct hash_table {
 159   hashfun_t hash_function;
 160   testfun_t test_function;
 161
 162   struct cell *cells;           /* contiguous array of cells. */
 163   int size;                     /* size of the array. */
 164
 165   int count;                    /* number of occupied entries. */
 166   int resize_threshold;         /* after size exceeds this number of
 167                                    entries, resize the table.  */
 168   int prime_offset;             /* the offset of the current prime in
 169                                    the prime table. */
 170 };
 171
 172 /* We use the all-bits-set constant (INVALID_PTR) marker to mean that
 173    a cell is empty.  It is unaligned and therefore illegal as a
 174    pointer.  INVALID_PTR_CHAR (0xff) is the single-character constant
 175    used to initialize the entire cells array as empty.
 176
 177    The all-bits-set value is a better choice than NULL because it
 178    allows the use of NULL/0 keys.  Since the keys are either integers
 179    or pointers, the only key that cannot be used is the integer value
 180    -1.  This is acceptable because it still allows the use of
 181    nonnegative integer keys.  */
 182
 183 #define INVALID_PTR ((void *) ~(uintptr_t) 0)
 184 #ifndef UCHAR_MAX
 185 # define UCHAR_MAX 0xff
 186 #endif
 187 #define INVALID_PTR_CHAR UCHAR_MAX
 188
 189 /* Whether the cell C is occupied (non-empty). */
 190 #define CELL_OCCUPIED(c) ((c)->key != INVALID_PTR)
 191
 192 /* Clear the cell C, i.e. mark it as empty (unoccupied). */
 193 #define CLEAR_CELL(c) ((c)->key = INVALID_PTR)
 194
 195 /* "Next" cell is the cell following C, but wrapping back to CELLS
 196    when C would reach CELLS+SIZE.  */
 197 #define NEXT_CELL(c, cells, size) (c != cells + (size - 1) ? c + 1 : cells)
 198
 199 /* Loop over occupied cells starting at C, terminating the loop when
 200    an empty cell is encountered.  */
 201 #define FOREACH_OCCUPIED_ADJACENT(c, cells, size)                               \
 202   for (; CELL_OCCUPIED (c); c = NEXT_CELL (c, cells, size))
 203
 204 /* Return the position of KEY in hash table SIZE large, hash function
 205    being HASHFUN.  */
 206 #define HASH_POSITION(key, hashfun, size) ((hashfun) (key) % size)
 207
 208 /* Find a prime near, but greather than or equal to SIZE.  The primes
 209    are looked up from a table with a selection of primes convenient
 210    for this purpose.
 211
 212    PRIME_OFFSET is a minor optimization: it specifies start position
 213    for the search for the large enough prime.  The final offset is
 214    stored in the same variable.  That way the list of primes does not
 215    have to be scanned from the beginning each time around.  */
 216
 217 static int
 218 prime_size (int size, int *prime_offset)
 219 {
 220   static const int primes[] = {
 221     13, 19, 29, 41, 59, 79, 107, 149, 197, 263, 347, 457, 599, 787, 1031,
 222     1361, 1777, 2333, 3037, 3967, 5167, 6719, 8737, 11369, 14783,
 223     19219, 24989, 32491, 42257, 54941, 71429, 92861, 120721, 156941,
 224     204047, 265271, 344857, 448321, 582821, 757693, 985003, 1280519,
 225     1664681, 2164111, 2813353, 3657361, 4754591, 6180989, 8035301,
 226     10445899, 13579681, 17653589, 22949669, 29834603, 38784989,
 227     50420551, 65546729, 85210757, 110774011, 144006217, 187208107,
 228     243370577, 316381771, 411296309, 534685237, 695090819, 903618083,
 229     1174703521, 1527114613, 1837299131, 2147483647
 230   };
 231   int i;
 232
 233   for (i = *prime_offset; i < countof (primes); i++)
 234     if (primes[i] >= size)
 235       {
 236         /* Set the offset to the next prime.  That is safe because,
 237            next time we are called, it will be with a larger SIZE,
 238            which means we could never return the same prime anyway.
 239            (If that is not the case, the caller can simply reset
 240            *prime_offset.)  */
 241         *prime_offset = i + 1;
 242         return primes[i];
 243       }
 244
 245   abort ();
 246 }
 247
 248 static int cmp_pointer (const void *, const void *);
 249
 250 /* Create a hash table with hash function HASH_FUNCTION and test
 251    function TEST_FUNCTION.  The table is empty (its count is 0), but
 252    pre-allocated to store at least ITEMS items.
 253
 254    ITEMS is the number of items that the table can accept without
 255    needing to resize.  It is useful when creating a table that is to
 256    be immediately filled with a known number of items.  In that case,
 257    the regrows are a waste of time, and specifying ITEMS correctly
 258    will avoid them altogether.
 259
 260    Note that hash tables grow dynamically regardless of ITEMS.  The
 261    only use of ITEMS is to preallocate the table and avoid unnecessary
 262    dynamic regrows.  Don't bother making ITEMS prime because it's not
 263    used as size unchanged.  To start with a small table that grows as
 264    needed, simply specify zero ITEMS.
 265
 266    If hash and test callbacks are not specified, identity mapping is
 267    assumed, i.e. pointer values are used for key comparison.  (Common
 268    Lisp calls such tables EQ hash tables, and Java calls them
 269    IdentityHashMaps.)  If your keys require different comparison,
 270    specify hash and test functions.  For easy use of C strings as hash
 271    keys, you can use the convenience functions make_string_hash_table
 272    and make_nocase_string_hash_table.  */
 273
 274 struct hash_table *
 275 hash_table_new (int items,
 276                 unsigned long (*hash_function) (const void *),
 277                 int (*test_function) (const void *, const void *))
 278 {
 279   int size;
 280   struct hash_table *ht = xnew (struct hash_table);
 281
 282   ht->hash_function = hash_function ? hash_function : hash_pointer;
 283   ht->test_function = test_function ? test_function : cmp_pointer;
 284
 285   /* If the size of struct hash_table ever becomes a concern, this
 286      field can go.  (Wget doesn't create many hashes.)  */
 287   ht->prime_offset = 0;
 288
 289   /* Calculate the size that ensures that the table will store at
 290      least ITEMS keys without the need to resize.  */
 291   size = 1 + items / HASH_MAX_FULLNESS;
 292   size = prime_size (size, &ht->prime_offset);
 293   ht->size = size;
 294   ht->resize_threshold = size * HASH_MAX_FULLNESS;
 295   /*assert (ht->resize_threshold >= items);*/
 296
 297   ht->cells = xnew_array (struct cell, ht->size);
 298
 299   /* Mark cells as empty.  We use 0xff rather than 0 to mark empty
 300      keys because it allows us to use NULL/0 as keys.  */
 301   memset (ht->cells, INVALID_PTR_CHAR, size * sizeof (struct cell));
 302
 303   ht->count = 0;
 304
 305   return ht;
 306 }
 307
 308 /* Free the data associated with hash table HT. */
 309
 310 void
 311 hash_table_destroy (struct hash_table *ht)
 312 {
 313   xfree (ht->cells);
 314   xfree (ht);
 315 }
 316
 317 /* The heart of most functions in this file -- find the cell whose
 318    KEY is equal to key, using linear probing.  Returns the cell
 319    that matches KEY, or the first empty cell if none matches.  */
 320
 321 static inline struct cell *
 322 find_cell (const struct hash_table *ht, const void *key)
 323 {
 324   struct cell *cells = ht->cells;
 325   int size = ht->size;
 326   struct cell *c = cells + HASH_POSITION (key, ht->hash_function, size);
 327   testfun_t equals = ht->test_function;
 328
 329   FOREACH_OCCUPIED_ADJACENT (c, cells, size)
 330     if (equals (key, c->key))
 331       break;
 332   return c;
 333 }
 334
 335 /* Get the value that corresponds to the key KEY in the hash table HT.
 336    If no value is found, return NULL.  Note that NULL is a legal value
 337    for value; if you are storing NULLs in your hash table, you can use
 338    hash_table_contains to be sure that a (possibly NULL) value exists
 339    in the table.  Or, you can use hash_table_get_pair instead of this
 340    function.  */
 341
 342 void *
 343 hash_table_get (const struct hash_table *ht, const void *key)
 344 {
 345   struct cell *c = find_cell (ht, key);
 346   if (CELL_OCCUPIED (c))
 347     return c->value;
 348   else
 349     return NULL;
 350 }
 351
 352 /* Like hash_table_get, but writes out the pointers to both key and
 353    value.  Returns non-zero on success.  */
 354
 355 int
 356 hash_table_get_pair (const struct hash_table *ht, const void *lookup_key,
 357                      void *orig_key, void *value)
 358 {
 359   struct cell *c = find_cell (ht, lookup_key);
 360   if (CELL_OCCUPIED (c))
 361     {
 362       if (orig_key)
 363         *(void **)orig_key = c->key;
 364       if (value)
 365         *(void **)value = c->value;
 366       return 1;
 367     }
 368   else
 369     return 0;
 370 }
 371
 372 /* Return 1 if HT contains KEY, 0 otherwise. */
 373
 374 int
 375 hash_table_contains (const struct hash_table *ht, const void *key)
 376 {
 377   struct cell *c = find_cell (ht, key);
 378   return CELL_OCCUPIED (c);
 379 }
 380
 381 /* Grow hash table HT as necessary, and rehash all the key-value
 382    mappings.  */
 383
 384 static void
 385 grow_hash_table (struct hash_table *ht)
 386 {
 387   hashfun_t hasher = ht->hash_function;
 388   struct cell *old_cells = ht->cells;
 389   struct cell *old_end   = ht->cells + ht->size;
 390   struct cell *c, *cells;
 391   int newsize;
 392
 393   newsize = prime_size (ht->size * HASH_RESIZE_FACTOR, &ht->prime_offset);
 394 #if 0
 395   printf ("growing from %d to %d; fullness %.2f%% to %.2f%%\n",
 396           ht->size, newsize,
 397           100.0 * ht->count / ht->size,
 398           100.0 * ht->count / newsize);
 399 #endif
 400
 401   ht->size = newsize;
 402   ht->resize_threshold = newsize * HASH_MAX_FULLNESS;
 403
 404   cells = xnew_array (struct cell, newsize);
 405   memset (cells, INVALID_PTR_CHAR, newsize * sizeof (struct cell));
 406   ht->cells = cells;
 407
 408   for (c = old_cells; c < old_end; c++)
 409     if (CELL_OCCUPIED (c))
 410       {
 411         struct cell *new_c;
 412         /* We don't need to test for uniqueness of keys because they
 413            come from the hash table and are therefore known to be
 414            unique.  */
 415         new_c = cells + HASH_POSITION (c->key, hasher, newsize);
 416         FOREACH_OCCUPIED_ADJACENT (new_c, cells, newsize)
 417           ;
 418         *new_c = *c;
 419       }
 420
 421   xfree (old_cells);
 422 }
 423
 424 /* Put VALUE in the hash table HT under the key KEY.  This regrows the
 425    table if necessary.  */
 426
 427 void
 428 hash_table_put (struct hash_table *ht, const void *key, void *value)
 429 {
 430   struct cell *c = find_cell (ht, key);
 431   if (CELL_OCCUPIED (c))
 432     {
 433       /* update existing item */
 434       c->key   = (void *)key; /* const? */
 435       c->value = value;
 436       return;
 437     }
 438
 439   /* If adding the item would make the table exceed max. fullness,
 440      grow the table first.  */
 441   if (ht->count >= ht->resize_threshold)
 442     {
 443       grow_hash_table (ht);
 444       c = find_cell (ht, key);
 445     }
 446
 447   /* add new item */
 448   ++ht->count;
 449   c->key   = (void *)key;       /* const? */
 450   c->value = value;
 451 }
 452
 453 /* Remove KEY->value mapping from HT.  Return 0 if there was no such
 454    entry; return 1 if an entry was removed.  */
 455
 456 int
 457 hash_table_remove (struct hash_table *ht, const void *key)
 458 {
 459   struct cell *c = find_cell (ht, key);
 460   if (!CELL_OCCUPIED (c))
 461     return 0;
 462   else
 463     {
 464       int size = ht->size;
 465       struct cell *cells = ht->cells;
 466       hashfun_t hasher = ht->hash_function;
 467
 468       CLEAR_CELL (c);
 469       --ht->count;
 470
 471       /* Rehash all the entries following C.  The alternative
 472          approach is to mark the entry as deleted, i.e. create a
 473          "tombstone".  That speeds up removal, but leaves a lot of
 474          garbage and slows down hash_table_get and hash_table_put.  */
 475
 476       c = NEXT_CELL (c, cells, size);
 477       FOREACH_OCCUPIED_ADJACENT (c, cells, size)
 478         {
 479           const void *key2 = c->key;
 480           struct cell *c_new;
 481
 482           /* Find the new location for the key. */
 483           c_new = cells + HASH_POSITION (key2, hasher, size);
 484           FOREACH_OCCUPIED_ADJACENT (c_new, cells, size)
 485             if (key2 == c_new->key)
 486               /* The cell C (key2) is already where we want it (in
 487                  C_NEW's "chain" of keys.)  */
 488               goto next_rehash;
 489
 490           *c_new = *c;
 491           CLEAR_CELL (c);
 492
 493         next_rehash:
 494           ;
 495         }
 496       return 1;
 497     }
 498 }
 499
 500 /* Clear HT of all entries.  After calling this function, the count
 501    and the fullness of the hash table will be zero.  The size will
 502    remain unchanged.  */
 503
 504 void
 505 hash_table_clear (struct hash_table *ht)
 506 {
 507   memset (ht->cells, INVALID_PTR_CHAR, ht->size * sizeof (struct cell));
 508   ht->count = 0;
 509 }
 510
 511 /* Call FN for each entry in HT.  FN is called with three arguments:
 512    the key, the value, and ARG.  When FN returns a non-zero value, the
 513    mapping stops.
 514
 515    It is undefined what happens if you add or remove entries in the
 516    hash table while hash_table_for_each is running.  The exception is
 517    the entry you're currently mapping over; you may call
 518    hash_table_put or hash_table_remove on that entry's key.  That is
 519    also the reason why this function cannot be implemented in terms of
 520    hash_table_iterate.  */
 521
 522 void
 523 hash_table_for_each (struct hash_table *ht,
 524                      int (*fn) (void *, void *, void *), void *arg)
 525 {
 526   struct cell *c = ht->cells;
 527   struct cell *end = ht->cells + ht->size;
 528
 529   for (; c < end; c++)
 530     if (CELL_OCCUPIED (c))
 531       {
 532         void *key;
 533       repeat:
 534         key = c->key;
 535         if (fn (key, c->value, arg))
 536           return;
 537         /* hash_table_remove might have moved the adjacent cells. */
 538         if (c->key != key && CELL_OCCUPIED (c))
 539           goto repeat;
 540       }
 541 }
 542
 543 /* Initiate iteration over HT.  Entries are obtained with
 544    hash_table_iter_next, a typical iteration loop looking like this:
 545
 546        hash_table_iterator iter;
 547        for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
 548          ... do something with iter.key and iter.value ...
 549
 550    The iterator does not need to be deallocated after use.  The hash
 551    table must not be modified while being iterated over.  */
 552
 553 void
 554 hash_table_iterate (struct hash_table *ht, hash_table_iterator *iter)
 555 {
 556   iter->pos = ht->cells;
 557   iter->end = ht->cells + ht->size;
 558 }
 559
 560 /* Get the next hash table entry.  ITER is an iterator object
 561    initialized using hash_table_iterate.  While there are more
 562    entries, the key and value pointers are stored to ITER->key and
 563    ITER->value respectively and 1 is returned.  When there are no more
 564    entries, 0 is returned.
 565
 566    If the hash table is modified between calls to this function, the
 567    result is undefined.  */
 568
 569 int
 570 hash_table_iter_next (hash_table_iterator *iter)
 571 {
 572   struct cell *c = iter->pos;
 573   struct cell *end = iter->end;
 574   for (; c < end; c++)
 575     if (CELL_OCCUPIED (c))
 576       {
 577         iter->key = c->key;
 578         iter->value = c->value;
 579         iter->pos = c + 1;
 580         return 1;
 581       }
 582   return 0;
 583 }
 584
 585 /* Return the number of elements in the hash table.  This is not the
 586    same as the physical size of the hash table, which is always
 587    greater than the number of elements.  */
 588
 589 int
 590 hash_table_count (const struct hash_table *ht)
 591 {
 592   return ht->count;
 593 }
 594 \f
 595 /* Functions from this point onward are meant for convenience and
 596    don't strictly belong to this file.  However, this is as good a
 597    place for them as any.  */
 598
 599 /* Guidelines for creating custom hash and test functions:
 600
 601    - The test function returns non-zero for keys that are considered
 602      "equal", zero otherwise.
 603
 604    - The hash function returns a number that represents the
 605      "distinctness" of the object.  In more precise terms, it means
 606      that for any two objects that test "equal" under the test
 607      function, the hash function MUST produce the same result.
 608
 609      This does not mean that all different objects must produce
 610      different values (that would be "perfect" hashing), only that
 611      non-distinct objects must produce the same values!  For instance,
 612      a hash function that returns 0 for any given object is a
 613      perfectly valid (albeit extremely bad) hash function.  A hash
 614      function that hashes a string by adding up all its characters is
 615      another example of a valid (but still quite bad) hash function.
 616
 617      It is not hard to make hash and test functions agree about
 618      equality.  For example, if the test function compares strings
 619      case-insensitively, the hash function can lower-case the
 620      characters when calculating the hash value.  That ensures that
 621      two strings differing only in case will hash the same.
 622
 623    - To prevent performance degradation, choose a hash function with
 624      as good "spreading" as possible.  A good hash function will use
 625      all the bits of the input when calculating the hash, and will
 626      react to even small changes in input with a completely different
 627      output.  But don't make the hash function itself overly slow,
 628      because you'll be incurring a non-negligible overhead to all hash
 629      table operations.  */
 630
 631 /*
 632  * Support for hash tables whose keys are strings.
 633  *
 634  */
 635
 636 /* Base 31 hash function.  Taken from Gnome's glib, modified to use
 637    standard C types.
 638
 639    We used to use the popular hash function from the Dragon Book, but
 640    this one seems to perform much better, both by being faster and by
 641    generating less collisions.  */
 642
 643 static unsigned long
 644 hash_string (const void *key)
 645 {
 646   const char *p = key;
 647   unsigned int h = *p;
 648
 649   if (h)
 650     for (p += 1; *p != '\0'; p++)
 651       h = (h << 5) - h + *p;
 652
 653   return h;
 654 }
 655
 656 /* Frontend for strcmp usable for hash tables. */
 657
 658 static int
 659 cmp_string (const void *s1, const void *s2)
 660 {
 661   return !strcmp ((const char *)s1, (const char *)s2);
 662 }
 663
 664 /* Return a hash table of preallocated to store at least ITEMS items
 665    suitable to use strings as keys.  */
 666
 667 struct hash_table *
 668 make_string_hash_table (int items)
 669 {
 670   return hash_table_new (items, hash_string, cmp_string);
 671 }
 672
 673 /*
 674  * Support for hash tables whose keys are strings, but which are
 675  * compared case-insensitively.
 676  *
 677  */
 678
 679 /* Like hash_string, but produce the same hash regardless of the case. */
 680
 681 static unsigned long
 682 hash_string_nocase (const void *key)
 683 {
 684   const char *p = key;
 685   unsigned int h = c_tolower (*p);
 686
 687   if (h)
 688     for (p += 1; *p != '\0'; p++)
 689       h = (h << 5) - h + c_tolower (*p);
 690
 691   return h;
 692 }
 693
 694 /* Like string_cmp, but doing case-insensitive compareison. */
 695
 696 static int
 697 string_cmp_nocase (const void *s1, const void *s2)
 698 {
 699   return !strcasecmp ((const char *)s1, (const char *)s2);
 700 }
 701
 702 /* Like make_string_hash_table, but uses string_hash_nocase and
 703    string_cmp_nocase.  */
 704
 705 struct hash_table *
 706 make_nocase_string_hash_table (int items)
 707 {
 708   return hash_table_new (items, hash_string_nocase, string_cmp_nocase);
 709 }
 710
 711 /* Hashing of numeric values, such as pointers and integers.
 712
 713    This implementation is the Robert Jenkins' 32 bit Mix Function,
 714    with a simple adaptation for 64-bit values.  According to Jenkins
 715    it should offer excellent spreading of values.  Unlike the popular
 716    Knuth's multiplication hash, this function doesn't need to know the
 717    hash table size to work.  */
 718
 719 unsigned long
 720 hash_pointer (const void *ptr)
 721 {
 722   uintptr_t key = (uintptr_t) ptr;
 723   key += (key << 12);
 724   key ^= (key >> 22);
 725   key += (key << 4);
 726   key ^= (key >> 9);
 727   key += (key << 10);
 728   key ^= (key >> 2);
 729   key += (key << 7);
 730   key ^= (key >> 12);
 731 #if SIZEOF_VOID_P > 4
 732   key += (key << 44);
 733   key ^= (key >> 54);
 734   key += (key << 36);
 735   key ^= (key >> 41);
 736   key += (key << 42);
 737   key ^= (key >> 34);
 738   key += (key << 39);
 739   key ^= (key >> 44);
 740 #endif
 741   return (unsigned long) key;
 742 }
 743
 744 static int
 745 cmp_pointer (const void *ptr1, const void *ptr2)
 746 {
 747   return ptr1 == ptr2;
 748 }
 749 \f
 750 #ifdef TEST
 751
 752 #include <stdio.h>
 753 #include <string.h>
 754
 755 void
 756 print_hash (struct hash_table *sht)
 757 {
 758   hash_table_iterator iter;
 759   int count = 0;
 760
 761   for (hash_table_iterate (sht, &iter); hash_table_iter_next (&iter);
 762        ++count)
 763     printf ("%s: %s\n", iter.key, iter.value);
 764   assert (count == sht->count);
 765 }
 766
 767 int
 768 main (void)
 769 {
 770   struct hash_table *ht = make_string_hash_table (0);
 771   char line[80];
 772   while ((fgets (line, sizeof (line), stdin)))
 773     {
 774       int len = strlen (line);
 775       if (len <= 1)
 776         continue;
 777       line[--len] = '\0';
 778       if (!hash_table_contains (ht, line))
 779         hash_table_put (ht, strdup (line), "here I am!");
 780 #if 1
 781       if (len % 5 == 0)
 782         {
 783           char *line_copy;
 784           if (hash_table_get_pair (ht, line, &line_copy, NULL))
 785             {
 786               hash_table_remove (ht, line);
 787               xfree (line_copy);
 788             }
 789         }
 790 #endif
 791     }
 792 #if 0
 793   print_hash (ht);
 794 #endif
 795 #if 1
 796   printf ("%d %d\n", ht->count, ht->size);
 797 #endif
 798   return 0;
 799 }
 800 #endif /* TEST */