sjero.net Git - wget/blob - src/hash.c

   1 /* Hash tables.
   2    Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Free
   3    Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3 of the License, or (at
  10 your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
  19
  20 Additional permission under GNU GPL version 3 section 7
  21
  22 If you modify this program, or any covered work, by linking or
  23 combining it with the OpenSSL project's OpenSSL library (or a
  24 modified version of that library), containing parts covered by the
  25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
  26 grants you additional permission to convey the resulting work.
  27 Corresponding Source for a non-source form of such a combination
  28 shall include the source code for the parts of OpenSSL used as well
  29 as that of the covered work.  */
  30
  31 /* With -DSTANDALONE, this file can be compiled outside Wget source
  32    tree.  To test, also use -DTEST.  */
  33
  34 #ifdef HAVE_CONFIG_H
  35 # include <config.h>
  36 #endif
  37
  38 #include <stdio.h>
  39 #include <stdlib.h>
  40 #include <assert.h>
  41 #include <string.h>
  42 #include <limits.h>
  43
  44 #ifndef STANDALONE
  45 /* Get Wget's utility headers. */
  46 # include "wget.h"
  47 # include "utils.h"
  48 #else
  49 /* Make do without them. */
  50 # define xnew(x) xmalloc (sizeof (x))
  51 # define xnew_array(type, x) xmalloc (sizeof (type) * (x))
  52 # define xmalloc malloc
  53 # define xfree free
  54 # ifndef countof
  55 #  define countof(x) (sizeof (x) / sizeof ((x)[0]))
  56 # endif
  57 # include <ctype.h>
  58 # define TOLOWER(x) tolower ((unsigned char) (x))
  59 # if __STDC_VERSION__ >= 199901L
  60 #  include <stdint.h>  /* for uintptr_t */
  61 # else
  62    typedef unsigned long uintptr_t;
  63 # endif
  64 #endif
  65
  66 #include "hash.h"
  67
  68 /* INTERFACE:
  69
  70    Hash tables are a technique used to implement mapping between
  71    objects with near-constant-time access and storage.  The table
  72    associates keys to values, and a value can be very quickly
  73    retrieved by providing the key.  Fast lookup tables are typically
  74    implemented as hash tables.
  75
  76    The entry points are
  77      hash_table_new       -- creates the table.
  78      hash_table_destroy   -- destroys the table.
  79      hash_table_put       -- establishes or updates key->value mapping.
  80      hash_table_get       -- retrieves value of key.
  81      hash_table_get_pair  -- get key/value pair for key.
  82      hash_table_contains  -- test whether the table contains key.
  83      hash_table_remove    -- remove key->value mapping for given key.
  84      hash_table_for_each  -- call function for each table entry.
  85      hash_table_iterate   -- iterate over entries in hash table.
  86      hash_table_iter_next -- return next element during iteration.
  87      hash_table_clear     -- clear hash table contents.
  88      hash_table_count     -- return the number of entries in the table.
  89
  90    The hash table grows internally as new entries are added and is not
  91    limited in size, except by available memory.  The table doubles
  92    with each resize, which ensures that the amortized time per
  93    operation remains constant.
  94
  95    If not instructed otherwise, tables created by hash_table_new
  96    consider the keys to be equal if their pointer values are the same.
  97    You can use make_string_hash_table to create tables whose keys are
  98    considered equal if their string contents are the same.  In the
  99    general case, the criterion of equality used to compare keys is
 100    specified at table creation time with two callback functions,
 101    "hash" and "test".  The hash function transforms the key into an
 102    arbitrary number that must be the same for two equal keys.  The
 103    test function accepts two keys and returns non-zero if they are to
 104    be considered equal.
 105
 106    Note that neither keys nor values are copied when inserted into the
 107    hash table, so they must exist for the lifetime of the table.  This
 108    means that e.g. the use of static strings is OK, but objects with a
 109    shorter life-time probably need to be copied (with strdup() or the
 110    like in the case of strings) before being inserted.  */
 111
 112 /* IMPLEMENTATION:
 113
 114    The hash table is implemented as an open-addressed table with
 115    linear probing collision resolution.
 116
 117    The above means that all the cells (each cell containing a key and
 118    a value pointer) are stored in a contiguous array.  Array position
 119    of each cell is determined by the hash value of its key and the
 120    size of the table: location := hash(key) % size.  If two different
 121    keys end up on the same position (collide), the one that came
 122    second is stored in the first unoccupied cell that follows it.
 123    This collision resolution technique is called "linear probing".
 124
 125    There are more advanced collision resolution methods (quadratic
 126    probing, double hashing), but we don't use them because they incur
 127    more non-sequential access to the array, which results in worse CPU
 128    cache behavior.  Linear probing works well as long as the
 129    count/size ratio (fullness) is kept below 75%.  We make sure to
 130    grow and rehash the table whenever this threshold is exceeded.
 131
 132    Collisions complicate deletion because simply clearing a cell
 133    followed by previously collided entries would cause those neighbors
 134    to not be picked up by find_cell later.  One solution is to leave a
 135    "tombstone" marker instead of clearing the cell, and another is to
 136    recalculate the positions of adjacent cells.  We take the latter
 137    approach because it results in less bookkeeping garbage and faster
 138    retrieval at the (slight) expense of deletion.  */
 139
 140 /* Maximum allowed fullness: when hash table's fullness exceeds this
 141    value, the table is resized.  */
 142 #define HASH_MAX_FULLNESS 0.75
 143
 144 /* The hash table size is multiplied by this factor (and then rounded
 145    to the next prime) with each resize.  This guarantees infrequent
 146    resizes.  */
 147 #define HASH_RESIZE_FACTOR 2
 148
 149 struct cell {
 150   void *key;
 151   void *value;
 152 };
 153
 154 typedef unsigned long (*hashfun_t) (const void *);
 155 typedef int (*testfun_t) (const void *, const void *);
 156
 157 struct hash_table {
 158   hashfun_t hash_function;
 159   testfun_t test_function;
 160
 161   struct cell *cells;           /* contiguous array of cells. */
 162   int size;                     /* size of the array. */
 163
 164   int count;                    /* number of occupied entries. */
 165   int resize_threshold;         /* after size exceeds this number of
 166                                    entries, resize the table.  */
 167   int prime_offset;             /* the offset of the current prime in
 168                                    the prime table. */
 169 };
 170
 171 /* We use the all-bits-set constant (INVALID_PTR) marker to mean that
 172    a cell is empty.  It is unaligned and therefore illegal as a
 173    pointer.  INVALID_PTR_CHAR (0xff) is the single-character constant
 174    used to initialize the entire cells array as empty.
 175
 176    The all-bits-set value is a better choice than NULL because it
 177    allows the use of NULL/0 keys.  Since the keys are either integers
 178    or pointers, the only key that cannot be used is the integer value
 179    -1.  This is acceptable because it still allows the use of
 180    nonnegative integer keys.  */
 181
 182 #define INVALID_PTR ((void *) ~(uintptr_t) 0)
 183 #ifndef UCHAR_MAX
 184 # define UCHAR_MAX 0xff
 185 #endif
 186 #define INVALID_PTR_CHAR UCHAR_MAX
 187
 188 /* Whether the cell C is occupied (non-empty). */
 189 #define CELL_OCCUPIED(c) ((c)->key != INVALID_PTR)
 190
 191 /* Clear the cell C, i.e. mark it as empty (unoccupied). */
 192 #define CLEAR_CELL(c) ((c)->key = INVALID_PTR)
 193
 194 /* "Next" cell is the cell following C, but wrapping back to CELLS
 195    when C would reach CELLS+SIZE.  */
 196 #define NEXT_CELL(c, cells, size) (c != cells + (size - 1) ? c + 1 : cells)
 197
 198 /* Loop over occupied cells starting at C, terminating the loop when
 199    an empty cell is encountered.  */
 200 #define FOREACH_OCCUPIED_ADJACENT(c, cells, size)                               \
 201   for (; CELL_OCCUPIED (c); c = NEXT_CELL (c, cells, size))
 202
 203 /* Return the position of KEY in hash table SIZE large, hash function
 204    being HASHFUN.  */
 205 #define HASH_POSITION(key, hashfun, size) ((hashfun) (key) % size)
 206
 207 /* Find a prime near, but greather than or equal to SIZE.  The primes
 208    are looked up from a table with a selection of primes convenient
 209    for this purpose.
 210
 211    PRIME_OFFSET is a minor optimization: it specifies start position
 212    for the search for the large enough prime.  The final offset is
 213    stored in the same variable.  That way the list of primes does not
 214    have to be scanned from the beginning each time around.  */
 215
 216 static int
 217 prime_size (int size, int *prime_offset)
 218 {
 219   static const int primes[] = {
 220     13, 19, 29, 41, 59, 79, 107, 149, 197, 263, 347, 457, 599, 787, 1031,
 221     1361, 1777, 2333, 3037, 3967, 5167, 6719, 8737, 11369, 14783,
 222     19219, 24989, 32491, 42257, 54941, 71429, 92861, 120721, 156941,
 223     204047, 265271, 344857, 448321, 582821, 757693, 985003, 1280519,
 224     1664681, 2164111, 2813353, 3657361, 4754591, 6180989, 8035301,
 225     10445899, 13579681, 17653589, 22949669, 29834603, 38784989,
 226     50420551, 65546729, 85210757, 110774011, 144006217, 187208107,
 227     243370577, 316381771, 411296309, 534685237, 695090819, 903618083,
 228     1174703521, 1527114613, 1837299131, 2147483647
 229   };
 230   int i;
 231
 232   for (i = *prime_offset; i < countof (primes); i++)
 233     if (primes[i] >= size)
 234       {
 235         /* Set the offset to the next prime.  That is safe because,
 236            next time we are called, it will be with a larger SIZE,
 237            which means we could never return the same prime anyway.
 238            (If that is not the case, the caller can simply reset
 239            *prime_offset.)  */
 240         *prime_offset = i + 1;
 241         return primes[i];
 242       }
 243
 244   abort ();
 245 }
 246
 247 static int cmp_pointer (const void *, const void *);
 248
 249 /* Create a hash table with hash function HASH_FUNCTION and test
 250    function TEST_FUNCTION.  The table is empty (its count is 0), but
 251    pre-allocated to store at least ITEMS items.
 252
 253    ITEMS is the number of items that the table can accept without
 254    needing to resize.  It is useful when creating a table that is to
 255    be immediately filled with a known number of items.  In that case,
 256    the regrows are a waste of time, and specifying ITEMS correctly
 257    will avoid them altogether.
 258
 259    Note that hash tables grow dynamically regardless of ITEMS.  The
 260    only use of ITEMS is to preallocate the table and avoid unnecessary
 261    dynamic regrows.  Don't bother making ITEMS prime because it's not
 262    used as size unchanged.  To start with a small table that grows as
 263    needed, simply specify zero ITEMS.
 264
 265    If hash and test callbacks are not specified, identity mapping is
 266    assumed, i.e. pointer values are used for key comparison.  (Common
 267    Lisp calls such tables EQ hash tables, and Java calls them
 268    IdentityHashMaps.)  If your keys require different comparison,
 269    specify hash and test functions.  For easy use of C strings as hash
 270    keys, you can use the convenience functions make_string_hash_table
 271    and make_nocase_string_hash_table.  */
 272
 273 struct hash_table *
 274 hash_table_new (int items,
 275                 unsigned long (*hash_function) (const void *),
 276                 int (*test_function) (const void *, const void *))
 277 {
 278   int size;
 279   struct hash_table *ht = xnew (struct hash_table);
 280
 281   ht->hash_function = hash_function ? hash_function : hash_pointer;
 282   ht->test_function = test_function ? test_function : cmp_pointer;
 283
 284   /* If the size of struct hash_table ever becomes a concern, this
 285      field can go.  (Wget doesn't create many hashes.)  */
 286   ht->prime_offset = 0;
 287
 288   /* Calculate the size that ensures that the table will store at
 289      least ITEMS keys without the need to resize.  */
 290   size = 1 + items / HASH_MAX_FULLNESS;
 291   size = prime_size (size, &ht->prime_offset);
 292   ht->size = size;
 293   ht->resize_threshold = size * HASH_MAX_FULLNESS;
 294   /*assert (ht->resize_threshold >= items);*/
 295
 296   ht->cells = xnew_array (struct cell, ht->size);
 297
 298   /* Mark cells as empty.  We use 0xff rather than 0 to mark empty
 299      keys because it allows us to use NULL/0 as keys.  */
 300   memset (ht->cells, INVALID_PTR_CHAR, size * sizeof (struct cell));
 301
 302   ht->count = 0;
 303
 304   return ht;
 305 }
 306
 307 /* Free the data associated with hash table HT. */
 308
 309 void
 310 hash_table_destroy (struct hash_table *ht)
 311 {
 312   xfree (ht->cells);
 313   xfree (ht);
 314 }
 315
 316 /* The heart of most functions in this file -- find the cell whose
 317    KEY is equal to key, using linear probing.  Returns the cell
 318    that matches KEY, or the first empty cell if none matches.  */
 319
 320 static inline struct cell *
 321 find_cell (const struct hash_table *ht, const void *key)
 322 {
 323   struct cell *cells = ht->cells;
 324   int size = ht->size;
 325   struct cell *c = cells + HASH_POSITION (key, ht->hash_function, size);
 326   testfun_t equals = ht->test_function;
 327
 328   FOREACH_OCCUPIED_ADJACENT (c, cells, size)
 329     if (equals (key, c->key))
 330       break;
 331   return c;
 332 }
 333
 334 /* Get the value that corresponds to the key KEY in the hash table HT.
 335    If no value is found, return NULL.  Note that NULL is a legal value
 336    for value; if you are storing NULLs in your hash table, you can use
 337    hash_table_contains to be sure that a (possibly NULL) value exists
 338    in the table.  Or, you can use hash_table_get_pair instead of this
 339    function.  */
 340
 341 void *
 342 hash_table_get (const struct hash_table *ht, const void *key)
 343 {
 344   struct cell *c = find_cell (ht, key);
 345   if (CELL_OCCUPIED (c))
 346     return c->value;
 347   else
 348     return NULL;
 349 }
 350
 351 /* Like hash_table_get, but writes out the pointers to both key and
 352    value.  Returns non-zero on success.  */
 353
 354 int
 355 hash_table_get_pair (const struct hash_table *ht, const void *lookup_key,
 356                      void *orig_key, void *value)
 357 {
 358   struct cell *c = find_cell (ht, lookup_key);
 359   if (CELL_OCCUPIED (c))
 360     {
 361       if (orig_key)
 362         *(void **)orig_key = c->key;
 363       if (value)
 364         *(void **)value = c->value;
 365       return 1;
 366     }
 367   else
 368     return 0;
 369 }
 370
 371 /* Return 1 if HT contains KEY, 0 otherwise. */
 372
 373 int
 374 hash_table_contains (const struct hash_table *ht, const void *key)
 375 {
 376   struct cell *c = find_cell (ht, key);
 377   return CELL_OCCUPIED (c);
 378 }
 379
 380 /* Grow hash table HT as necessary, and rehash all the key-value
 381    mappings.  */
 382
 383 static void
 384 grow_hash_table (struct hash_table *ht)
 385 {
 386   hashfun_t hasher = ht->hash_function;
 387   struct cell *old_cells = ht->cells;
 388   struct cell *old_end   = ht->cells + ht->size;
 389   struct cell *c, *cells;
 390   int newsize;
 391
 392   newsize = prime_size (ht->size * HASH_RESIZE_FACTOR, &ht->prime_offset);
 393 #if 0
 394   printf ("growing from %d to %d; fullness %.2f%% to %.2f%%\n",
 395           ht->size, newsize,
 396           100.0 * ht->count / ht->size,
 397           100.0 * ht->count / newsize);
 398 #endif
 399
 400   ht->size = newsize;
 401   ht->resize_threshold = newsize * HASH_MAX_FULLNESS;
 402
 403   cells = xnew_array (struct cell, newsize);
 404   memset (cells, INVALID_PTR_CHAR, newsize * sizeof (struct cell));
 405   ht->cells = cells;
 406
 407   for (c = old_cells; c < old_end; c++)
 408     if (CELL_OCCUPIED (c))
 409       {
 410         struct cell *new_c;
 411         /* We don't need to test for uniqueness of keys because they
 412            come from the hash table and are therefore known to be
 413            unique.  */
 414         new_c = cells + HASH_POSITION (c->key, hasher, newsize);
 415         FOREACH_OCCUPIED_ADJACENT (new_c, cells, newsize)
 416           ;
 417         *new_c = *c;
 418       }
 419
 420   xfree (old_cells);
 421 }
 422
 423 /* Put VALUE in the hash table HT under the key KEY.  This regrows the
 424    table if necessary.  */
 425
 426 void
 427 hash_table_put (struct hash_table *ht, const void *key, void *value)
 428 {
 429   struct cell *c = find_cell (ht, key);
 430   if (CELL_OCCUPIED (c))
 431     {
 432       /* update existing item */
 433       c->key   = (void *)key; /* const? */
 434       c->value = value;
 435       return;
 436     }
 437
 438   /* If adding the item would make the table exceed max. fullness,
 439      grow the table first.  */
 440   if (ht->count >= ht->resize_threshold)
 441     {
 442       grow_hash_table (ht);
 443       c = find_cell (ht, key);
 444     }
 445
 446   /* add new item */
 447   ++ht->count;
 448   c->key   = (void *)key;       /* const? */
 449   c->value = value;
 450 }
 451
 452 /* Remove KEY->value mapping from HT.  Return 0 if there was no such
 453    entry; return 1 if an entry was removed.  */
 454
 455 int
 456 hash_table_remove (struct hash_table *ht, const void *key)
 457 {
 458   struct cell *c = find_cell (ht, key);
 459   if (!CELL_OCCUPIED (c))
 460     return 0;
 461   else
 462     {
 463       int size = ht->size;
 464       struct cell *cells = ht->cells;
 465       hashfun_t hasher = ht->hash_function;
 466
 467       CLEAR_CELL (c);
 468       --ht->count;
 469
 470       /* Rehash all the entries following C.  The alternative
 471          approach is to mark the entry as deleted, i.e. create a
 472          "tombstone".  That speeds up removal, but leaves a lot of
 473          garbage and slows down hash_table_get and hash_table_put.  */
 474
 475       c = NEXT_CELL (c, cells, size);
 476       FOREACH_OCCUPIED_ADJACENT (c, cells, size)
 477         {
 478           const void *key2 = c->key;
 479           struct cell *c_new;
 480
 481           /* Find the new location for the key. */
 482           c_new = cells + HASH_POSITION (key2, hasher, size);
 483           FOREACH_OCCUPIED_ADJACENT (c_new, cells, size)
 484             if (key2 == c_new->key)
 485               /* The cell C (key2) is already where we want it (in
 486                  C_NEW's "chain" of keys.)  */
 487               goto next_rehash;
 488
 489           *c_new = *c;
 490           CLEAR_CELL (c);
 491
 492         next_rehash:
 493           ;
 494         }
 495       return 1;
 496     }
 497 }
 498
 499 /* Clear HT of all entries.  After calling this function, the count
 500    and the fullness of the hash table will be zero.  The size will
 501    remain unchanged.  */
 502
 503 void
 504 hash_table_clear (struct hash_table *ht)
 505 {
 506   memset (ht->cells, INVALID_PTR_CHAR, ht->size * sizeof (struct cell));
 507   ht->count = 0;
 508 }
 509
 510 /* Call FN for each entry in HT.  FN is called with three arguments:
 511    the key, the value, and ARG.  When FN returns a non-zero value, the
 512    mapping stops.
 513
 514    It is undefined what happens if you add or remove entries in the
 515    hash table while hash_table_for_each is running.  The exception is
 516    the entry you're currently mapping over; you may call
 517    hash_table_put or hash_table_remove on that entry's key.  That is
 518    also the reason why this function cannot be implemented in terms of
 519    hash_table_iterate.  */
 520
 521 void
 522 hash_table_for_each (struct hash_table *ht,
 523                      int (*fn) (void *, void *, void *), void *arg)
 524 {
 525   struct cell *c = ht->cells;
 526   struct cell *end = ht->cells + ht->size;
 527
 528   for (; c < end; c++)
 529     if (CELL_OCCUPIED (c))
 530       {
 531         void *key;
 532       repeat:
 533         key = c->key;
 534         if (fn (key, c->value, arg))
 535           return;
 536         /* hash_table_remove might have moved the adjacent cells. */
 537         if (c->key != key && CELL_OCCUPIED (c))
 538           goto repeat;
 539       }
 540 }
 541
 542 /* Initiate iteration over HT.  Entries are obtained with
 543    hash_table_iter_next, a typical iteration loop looking like this:
 544
 545        hash_table_iterator iter;
 546        for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
 547          ... do something with iter.key and iter.value ...
 548
 549    The iterator does not need to be deallocated after use.  The hash
 550    table must not be modified while being iterated over.  */
 551
 552 void
 553 hash_table_iterate (struct hash_table *ht, hash_table_iterator *iter)
 554 {
 555   iter->pos = ht->cells;
 556   iter->end = ht->cells + ht->size;
 557 }
 558
 559 /* Get the next hash table entry.  ITER is an iterator object
 560    initialized using hash_table_iterate.  While there are more
 561    entries, the key and value pointers are stored to ITER->key and
 562    ITER->value respectively and 1 is returned.  When there are no more
 563    entries, 0 is returned.
 564
 565    If the hash table is modified between calls to this function, the
 566    result is undefined.  */
 567
 568 int
 569 hash_table_iter_next (hash_table_iterator *iter)
 570 {
 571   struct cell *c = iter->pos;
 572   struct cell *end = iter->end;
 573   for (; c < end; c++)
 574     if (CELL_OCCUPIED (c))
 575       {
 576         iter->key = c->key;
 577         iter->value = c->value;
 578         iter->pos = c + 1;
 579         return 1;
 580       }
 581   return 0;
 582 }
 583
 584 /* Return the number of elements in the hash table.  This is not the
 585    same as the physical size of the hash table, which is always
 586    greater than the number of elements.  */
 587
 588 int
 589 hash_table_count (const struct hash_table *ht)
 590 {
 591   return ht->count;
 592 }
 593 \f
 594 /* Functions from this point onward are meant for convenience and
 595    don't strictly belong to this file.  However, this is as good a
 596    place for them as any.  */
 597
 598 /* Guidelines for creating custom hash and test functions:
 599
 600    - The test function returns non-zero for keys that are considered
 601      "equal", zero otherwise.
 602
 603    - The hash function returns a number that represents the
 604      "distinctness" of the object.  In more precise terms, it means
 605      that for any two objects that test "equal" under the test
 606      function, the hash function MUST produce the same result.
 607
 608      This does not mean that all different objects must produce
 609      different values (that would be "perfect" hashing), only that
 610      non-distinct objects must produce the same values!  For instance,
 611      a hash function that returns 0 for any given object is a
 612      perfectly valid (albeit extremely bad) hash function.  A hash
 613      function that hashes a string by adding up all its characters is
 614      another example of a valid (but still quite bad) hash function.
 615
 616      It is not hard to make hash and test functions agree about
 617      equality.  For example, if the test function compares strings
 618      case-insensitively, the hash function can lower-case the
 619      characters when calculating the hash value.  That ensures that
 620      two strings differing only in case will hash the same.
 621
 622    - To prevent performance degradation, choose a hash function with
 623      as good "spreading" as possible.  A good hash function will use
 624      all the bits of the input when calculating the hash, and will
 625      react to even small changes in input with a completely different
 626      output.  But don't make the hash function itself overly slow,
 627      because you'll be incurring a non-negligible overhead to all hash
 628      table operations.  */
 629
 630 /*
 631  * Support for hash tables whose keys are strings.
 632  *
 633  */
 634
 635 /* Base 31 hash function.  Taken from Gnome's glib, modified to use
 636    standard C types.
 637
 638    We used to use the popular hash function from the Dragon Book, but
 639    this one seems to perform much better, both by being faster and by
 640    generating less collisions.  */
 641
 642 static unsigned long
 643 hash_string (const void *key)
 644 {
 645   const char *p = key;
 646   unsigned int h = *p;
 647
 648   if (h)
 649     for (p += 1; *p != '\0'; p++)
 650       h = (h << 5) - h + *p;
 651
 652   return h;
 653 }
 654
 655 /* Frontend for strcmp usable for hash tables. */
 656
 657 static int
 658 cmp_string (const void *s1, const void *s2)
 659 {
 660   return !strcmp ((const char *)s1, (const char *)s2);
 661 }
 662
 663 /* Return a hash table of preallocated to store at least ITEMS items
 664    suitable to use strings as keys.  */
 665
 666 struct hash_table *
 667 make_string_hash_table (int items)
 668 {
 669   return hash_table_new (items, hash_string, cmp_string);
 670 }
 671
 672 /*
 673  * Support for hash tables whose keys are strings, but which are
 674  * compared case-insensitively.
 675  *
 676  */
 677
 678 /* Like hash_string, but produce the same hash regardless of the case. */
 679
 680 static unsigned long
 681 hash_string_nocase (const void *key)
 682 {
 683   const char *p = key;
 684   unsigned int h = TOLOWER (*p);
 685
 686   if (h)
 687     for (p += 1; *p != '\0'; p++)
 688       h = (h << 5) - h + TOLOWER (*p);
 689
 690   return h;
 691 }
 692
 693 /* Like string_cmp, but doing case-insensitive compareison. */
 694
 695 static int
 696 string_cmp_nocase (const void *s1, const void *s2)
 697 {
 698   return !strcasecmp ((const char *)s1, (const char *)s2);
 699 }
 700
 701 /* Like make_string_hash_table, but uses string_hash_nocase and
 702    string_cmp_nocase.  */
 703
 704 struct hash_table *
 705 make_nocase_string_hash_table (int items)
 706 {
 707   return hash_table_new (items, hash_string_nocase, string_cmp_nocase);
 708 }
 709
 710 /* Hashing of numeric values, such as pointers and integers.
 711
 712    This implementation is the Robert Jenkins' 32 bit Mix Function,
 713    with a simple adaptation for 64-bit values.  According to Jenkins
 714    it should offer excellent spreading of values.  Unlike the popular
 715    Knuth's multiplication hash, this function doesn't need to know the
 716    hash table size to work.  */
 717
 718 unsigned long
 719 hash_pointer (const void *ptr)
 720 {
 721   uintptr_t key = (uintptr_t) ptr;
 722   key += (key << 12);
 723   key ^= (key >> 22);
 724   key += (key << 4);
 725   key ^= (key >> 9);
 726   key += (key << 10);
 727   key ^= (key >> 2);
 728   key += (key << 7);
 729   key ^= (key >> 12);
 730 #if SIZEOF_VOID_P > 4
 731   key += (key << 44);
 732   key ^= (key >> 54);
 733   key += (key << 36);
 734   key ^= (key >> 41);
 735   key += (key << 42);
 736   key ^= (key >> 34);
 737   key += (key << 39);
 738   key ^= (key >> 44);
 739 #endif
 740   return (unsigned long) key;
 741 }
 742
 743 static int
 744 cmp_pointer (const void *ptr1, const void *ptr2)
 745 {
 746   return ptr1 == ptr2;
 747 }
 748 \f
 749 #ifdef TEST
 750
 751 #include <stdio.h>
 752 #include <string.h>
 753
 754 void
 755 print_hash (struct hash_table *sht)
 756 {
 757   hash_table_iterator iter;
 758   int count = 0;
 759
 760   for (hash_table_iterate (sht, &iter); hash_table_iter_next (&iter);
 761        ++count)
 762     printf ("%s: %s\n", iter.key, iter.value);
 763   assert (count == sht->count);
 764 }
 765
 766 int
 767 main (void)
 768 {
 769   struct hash_table *ht = make_string_hash_table (0);
 770   char line[80];
 771   while ((fgets (line, sizeof (line), stdin)))
 772     {
 773       int len = strlen (line);
 774       if (len <= 1)
 775         continue;
 776       line[--len] = '\0';
 777       if (!hash_table_contains (ht, line))
 778         hash_table_put (ht, strdup (line), "here I am!");
 779 #if 1
 780       if (len % 5 == 0)
 781         {
 782           char *line_copy;
 783           if (hash_table_get_pair (ht, line, &line_copy, NULL))
 784             {
 785               hash_table_remove (ht, line);
 786               xfree (line_copy);
 787             }
 788         }
 789 #endif
 790     }
 791 #if 0
 792   print_hash (ht);
 793 #endif
 794 #if 1
 795   printf ("%d %d\n", ht->count, ht->size);
 796 #endif
 797   return 0;
 798 }
 799 #endif /* TEST */