sjero.net Git - wget/blob - src/hash.c

   1 /* Hash tables.
   2    Copyright (C) 2000, 2001 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or (at
   9 your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software
  18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #ifdef HAVE_CONFIG_H
  31 # include <config.h>
  32 #endif
  33
  34 #ifdef HAVE_STRING_H
  35 # include <string.h>
  36 #else
  37 # include <strings.h>
  38 #endif
  39 #ifdef HAVE_LIMITS_H
  40 # include <limits.h>
  41 #endif
  42 #include <stdlib.h>
  43 #include <assert.h>
  44
  45 #include "wget.h"
  46 #include "utils.h"
  47
  48 #include "hash.h"
  49
  50 #ifdef STANDALONE
  51 # undef xmalloc
  52 # undef xrealloc
  53 # undef xfree
  54
  55 # define xmalloc malloc
  56 # define xrealloc realloc
  57 # define xfree free
  58
  59 # undef TOLOWER
  60 # define TOLOWER(x) ('A' <= (x) && (x) <= 'Z' ? (x) - 32 : (x))
  61 #endif
  62
  63 /* INTERFACE:
  64
  65    Hash tables are a technique used to implement mapping between
  66    objects with near-constant-time access and storage.  The table
  67    associates keys to values, and a value can be very quickly
  68    retrieved by providing the key.  Fast lookup tables are typically
  69    implemented as hash tables.
  70
  71    The entry points are
  72      hash_table_new       -- creates the table.
  73      hash_table_destroy   -- destroys the table.
  74      hash_table_put       -- establishes or updates key->value mapping.
  75      hash_table_get       -- retrieves value of key.
  76      hash_table_get_pair  -- get key/value pair for key.
  77      hash_table_contains  -- test whether the table contains key.
  78      hash_table_remove    -- remove the key->value mapping for key.
  79      hash_table_map       -- iterate through table mappings.
  80      hash_table_clear     -- clear hash table contents.
  81      hash_table_count     -- return the number of entries in the table.
  82
  83    The hash table grows internally as new entries are added and is not
  84    limited in size, except by available memory.  The table doubles
  85    with each resize, which ensures that the amortized time per
  86    operation remains constant.
  87
  88    By default, tables created by hash_table_new consider the keys to
  89    be equal if their pointer values are the same.  You can use
  90    make_string_hash_table to create tables whose keys are considered
  91    equal if their string contents are the same.  In the general case,
  92    the criterion of equality used to compare keys is specified at
  93    table creation time with two callback functions, "hash" and "test".
  94    The hash function transforms the key into an arbitrary number that
  95    must be the same for two equal keys.  The test function accepts two
  96    keys and returns non-zero if they are to be considered equal.
  97
  98    Note that neither keys nor values are copied when inserted into the
  99    hash table, so they must exist for the lifetime of the table.  This
 100    means that e.g. the use of static strings is OK, but objects with a
 101    shorter life-time need to be copied (with strdup() or the like in
 102    the case of strings) before being inserted.  */
 103
 104 /* IMPLEMENTATION:
 105
 106    The hash table is implemented as an open-addressed table with
 107    linear probing collision resolution.
 108
 109    The above means that all the hash entries (pairs of pointers, key
 110    and value) are stored in a contiguous array.  The position of each
 111    mapping is determined by the hash value of its key and the size of
 112    the table: location := hash(key) % size.  If two different keys end
 113    up on the same position (collide), the one that came second is
 114    placed at the next empty position following the occupied place.
 115    This collision resolution technique is called "linear probing".
 116
 117    There are more advanced collision resolution methods (quadratic
 118    probing, double hashing), but we don't use them because they incur
 119    more non-sequential access to the array, which results in worse CPU
 120    cache behavior.  Linear probing works well as long as the
 121    count/size ratio (fullness) is kept below 75%.  We make sure to
 122    grow and rehash the table whenever this threshold is exceeded.
 123
 124    Collisions make deletion tricky because clearing a position
 125    followed by a colliding entry would make the position seem empty
 126    and the colliding entry not found.  One solution is to leave a
 127    "tombstone" instead of clearing the entry, and another is to
 128    carefully rehash the entries immediately following the deleted one.
 129    We use the latter method because it results in less bookkeeping and
 130    faster retrieval at the (slight) expense of deletion.  */
 131
 132 /* Maximum allowed fullness: when hash table's fullness exceeds this
 133    value, the table is resized.  */
 134 #define HASH_MAX_FULLNESS 0.75
 135
 136 /* The hash table size is multiplied by this factor (and then rounded
 137    to the next prime) with each resize.  This guarantees infrequent
 138    resizes.  */
 139 #define HASH_RESIZE_FACTOR 2
 140
 141 struct mapping {
 142   void *key;
 143   void *value;
 144 };
 145
 146 typedef unsigned long (*hashfun_t) PARAMS ((const void *));
 147 typedef int (*testfun_t) PARAMS ((const void *, const void *));
 148
 149 struct hash_table {
 150   hashfun_t hash_function;
 151   testfun_t test_function;
 152
 153   struct mapping *mappings;     /* pointer to the table entries. */
 154   int size;                     /* size of the array. */
 155
 156   int count;                    /* number of non-empty entries. */
 157   int resize_threshold;         /* after size exceeds this number of
 158                                    entries, resize the table.  */
 159   int prime_offset;             /* the offset of the current prime in
 160                                    the prime table. */
 161 };
 162
 163 /* We use the all-bits-set constant (INVALID_PTR) marker to mean that
 164    a mapping is empty.  It is unaligned and therefore illegal as a
 165    pointer.  INVALID_PTR_BYTE (0xff) is the one-byte value used to
 166    initialize the mappings array as empty.
 167
 168    The all-bits-set value is a better choice than NULL because it
 169    allows the use of NULL/0 keys.  Since the keys are either integers
 170    or pointers, the only key that cannot be used is the integer value
 171    -1.  This is acceptable because it still allows the use of
 172    nonnegative integer keys.  */
 173
 174 #define INVALID_PTR ((void *) ~(unsigned long)0)
 175 #ifndef UCHAR_MAX
 176 # define UCHAR_MAX 0xff
 177 #endif
 178 #define INVALID_PTR_BYTE UCHAR_MAX
 179
 180 #define NON_EMPTY(mp) ((mp)->key != INVALID_PTR)
 181 #define MARK_AS_EMPTY(mp) ((mp)->key = INVALID_PTR)
 182
 183 /* "Next" mapping is the mapping after MP, but wrapping back to
 184    MAPPINGS when MP would reach MAPPINGS+SIZE.  */
 185 #define NEXT_MAPPING(mp, mappings, size) (mp != mappings + (size - 1)   \
 186                                           ? mp + 1 : mappings)
 187
 188 /* Loop over non-empty mappings starting at MP. */
 189 #define LOOP_NON_EMPTY(mp, mappings, size)                              \
 190   for (; NON_EMPTY (mp); mp = NEXT_MAPPING (mp, mappings, size))
 191
 192 /* Return the position of KEY in hash table SIZE large, hash function
 193    being HASHFUN.  */
 194 #define HASH_POSITION(key, hashfun, size) ((hashfun) (key) % size)
 195
 196 /* Find a prime near, but greather than or equal to SIZE.  The primes
 197    are looked up from a table with a selection of primes convenient
 198    for this purpose.
 199
 200    PRIME_OFFSET is a minor optimization: it specifies start position
 201    for the search for the large enough prime.  The final offset is
 202    stored in the same variable.  That way the list of primes does not
 203    have to be scanned from the beginning each time around.  */
 204
 205 static int
 206 prime_size (int size, int *prime_offset)
 207 {
 208   static const unsigned long primes [] = {
 209     13, 19, 29, 41, 59, 79, 107, 149, 197, 263, 347, 457, 599, 787, 1031,
 210     1361, 1777, 2333, 3037, 3967, 5167, 6719, 8737, 11369, 14783,
 211     19219, 24989, 32491, 42257, 54941, 71429, 92861, 120721, 156941,
 212     204047, 265271, 344857, 448321, 582821, 757693, 985003, 1280519,
 213     1664681, 2164111, 2813353, 3657361, 4754591, 6180989, 8035301,
 214     10445899, 13579681, 17653589, 22949669, 29834603, 38784989,
 215     50420551, 65546729, 85210757, 110774011, 144006217, 187208107,
 216     243370577, 316381771, 411296309, 534685237, 695090819, 903618083,
 217     1174703521, 1527114613, 1985248999,
 218     (unsigned long)0x99d43ea5, (unsigned long)0xc7fa5177
 219   };
 220   int i;
 221
 222   for (i = *prime_offset; i < countof (primes); i++)
 223     if (primes[i] >= size)
 224       {
 225         /* Set the offset to the next prime.  That is safe because,
 226            next time we are called, it will be with a larger SIZE,
 227            which means we could never return the same prime anyway.
 228            (If that is not the case, the caller can simply reset
 229            *prime_offset.)  */
 230         *prime_offset = i + 1;
 231         return primes[i];
 232       }
 233
 234   abort ();
 235   return 0;
 236 }
 237
 238 static unsigned long ptrhash PARAMS ((const void *));
 239 static int ptrcmp PARAMS ((const void *, const void *));
 240
 241 /* Create a hash table with hash function HASH_FUNCTION and test
 242    function TEST_FUNCTION.  The table is empty (its count is 0), but
 243    pre-allocated to store at least ITEMS items.
 244
 245    ITEMS is the number of items that the table can accept without
 246    needing to resize.  It is useful when creating a table that is to
 247    be immediately filled with a known number of items.  In that case,
 248    the regrows are a waste of time, and specifying ITEMS correctly
 249    will avoid them altogether.
 250
 251    Note that hash tables grow dynamically regardless of ITEMS.  The
 252    only use of ITEMS is to preallocate the table and avoid unnecessary
 253    dynamic regrows.  Don't bother making ITEMS prime because it's not
 254    used as size unchanged.  To start with a small table that grows as
 255    needed, simply specify zero ITEMS.
 256
 257    If hash and test callbacks are not specified, identity mapping is
 258    assumed, i.e. pointer values are used for key comparison.  (Common
 259    Lisp calls such tables EQ hash tables, and Java calls them
 260    IdentityHashMaps.)  If your keys require different comparison,
 261    specify hash and test functions.  For easy use of C strings as hash
 262    keys, you can use the convenience functions make_string_hash_table
 263    and make_nocase_string_hash_table.  */
 264
 265 struct hash_table *
 266 hash_table_new (int items,
 267                 unsigned long (*hash_function) (const void *),
 268                 int (*test_function) (const void *, const void *))
 269 {
 270   int size;
 271   struct hash_table *ht = xnew (struct hash_table);
 272
 273   ht->hash_function = hash_function ? hash_function : ptrhash;
 274   ht->test_function = test_function ? test_function : ptrcmp;
 275
 276   /* If the size of struct hash_table ever becomes a concern, this
 277      field can go.  (Wget doesn't create many hashes.)  */
 278   ht->prime_offset = 0;
 279
 280   /* Calculate the size that ensures that the table will store at
 281      least ITEMS keys without the need to resize.  */
 282   size = 1 + items / HASH_MAX_FULLNESS;
 283   size = prime_size (size, &ht->prime_offset);
 284   ht->size = size;
 285   ht->resize_threshold = size * HASH_MAX_FULLNESS;
 286   /*assert (ht->resize_threshold >= items);*/
 287
 288   ht->mappings = xnew_array (struct mapping, ht->size);
 289   /* Mark mappings as empty.  We use 0xff rather than 0 to mark empty
 290      keys because it allows us to use NULL/0 as keys.  */
 291   memset (ht->mappings, INVALID_PTR_BYTE, size * sizeof (struct mapping));
 292
 293   ht->count = 0;
 294
 295   return ht;
 296 }
 297
 298 /* Free the data associated with hash table HT. */
 299
 300 void
 301 hash_table_destroy (struct hash_table *ht)
 302 {
 303   xfree (ht->mappings);
 304   xfree (ht);
 305 }
 306
 307 /* The heart of most functions in this file -- find the mapping whose
 308    KEY is equal to key, using linear probing.  Returns the mapping
 309    that matches KEY, or the first empty mapping if none matches.  */
 310
 311 static inline struct mapping *
 312 find_mapping (const struct hash_table *ht, const void *key)
 313 {
 314   struct mapping *mappings = ht->mappings;
 315   int size = ht->size;
 316   struct mapping *mp = mappings + HASH_POSITION (key, ht->hash_function, size);
 317   testfun_t equals = ht->test_function;
 318
 319   LOOP_NON_EMPTY (mp, mappings, size)
 320     if (equals (key, mp->key))
 321       break;
 322   return mp;
 323 }
 324
 325 /* Get the value that corresponds to the key KEY in the hash table HT.
 326    If no value is found, return NULL.  Note that NULL is a legal value
 327    for value; if you are storing NULLs in your hash table, you can use
 328    hash_table_contains to be sure that a (possibly NULL) value exists
 329    in the table.  Or, you can use hash_table_get_pair instead of this
 330    function.  */
 331
 332 void *
 333 hash_table_get (const struct hash_table *ht, const void *key)
 334 {
 335   struct mapping *mp = find_mapping (ht, key);
 336   if (NON_EMPTY (mp))
 337     return mp->value;
 338   else
 339     return NULL;
 340 }
 341
 342 /* Like hash_table_get, but writes out the pointers to both key and
 343    value.  Returns non-zero on success.  */
 344
 345 int
 346 hash_table_get_pair (const struct hash_table *ht, const void *lookup_key,
 347                      void *orig_key, void *value)
 348 {
 349   struct mapping *mp = find_mapping (ht, lookup_key);
 350   if (NON_EMPTY (mp))
 351     {
 352       if (orig_key)
 353         *(void **)orig_key = mp->key;
 354       if (value)
 355         *(void **)value = mp->value;
 356       return 1;
 357     }
 358   else
 359     return 0;
 360 }
 361
 362 /* Return 1 if HT contains KEY, 0 otherwise. */
 363
 364 int
 365 hash_table_contains (const struct hash_table *ht, const void *key)
 366 {
 367   struct mapping *mp = find_mapping (ht, key);
 368   return NON_EMPTY (mp);
 369 }
 370
 371 /* Grow hash table HT as necessary, and rehash all the key-value
 372    mappings.  */
 373
 374 static void
 375 grow_hash_table (struct hash_table *ht)
 376 {
 377   hashfun_t hasher = ht->hash_function;
 378   struct mapping *old_mappings = ht->mappings;
 379   struct mapping *old_end      = ht->mappings + ht->size;
 380   struct mapping *mp, *mappings;
 381   int newsize;
 382
 383   newsize = prime_size (ht->size * HASH_RESIZE_FACTOR, &ht->prime_offset);
 384 #if 0
 385   printf ("growing from %d to %d; fullness %.2f%% to %.2f%%\n",
 386           ht->size, newsize,
 387           100.0 * ht->count / ht->size,
 388           100.0 * ht->count / newsize);
 389 #endif
 390
 391   ht->size = newsize;
 392   ht->resize_threshold = newsize * HASH_MAX_FULLNESS;
 393
 394   mappings = xnew_array (struct mapping, newsize);
 395   memset (mappings, INVALID_PTR_BYTE, newsize * sizeof (struct mapping));
 396   ht->mappings = mappings;
 397
 398   for (mp = old_mappings; mp < old_end; mp++)
 399     if (NON_EMPTY (mp))
 400       {
 401         struct mapping *new_mp;
 402         /* We don't need to test for uniqueness of keys because they
 403            come from the hash table and are therefore known to be
 404            unique.  */
 405         new_mp = mappings + HASH_POSITION (mp->key, hasher, newsize);
 406         LOOP_NON_EMPTY (new_mp, mappings, newsize)
 407           ;
 408         *new_mp = *mp;
 409       }
 410
 411   xfree (old_mappings);
 412 }
 413
 414 /* Put VALUE in the hash table HT under the key KEY.  This regrows the
 415    table if necessary.  */
 416
 417 void
 418 hash_table_put (struct hash_table *ht, const void *key, void *value)
 419 {
 420   struct mapping *mp = find_mapping (ht, key);
 421   if (NON_EMPTY (mp))
 422     {
 423       /* update existing item */
 424       mp->key   = (void *)key; /* const? */
 425       mp->value = value;
 426       return;
 427     }
 428
 429   /* If adding the item would make the table exceed max. fullness,
 430      grow the table first.  */
 431   if (ht->count >= ht->resize_threshold)
 432     {
 433       grow_hash_table (ht);
 434       mp = find_mapping (ht, key);
 435     }
 436
 437   /* add new item */
 438   ++ht->count;
 439   mp->key   = (void *)key;      /* const? */
 440   mp->value = value;
 441 }
 442
 443 /* Remove a mapping that matches KEY from HT.  Return 0 if there was
 444    no such entry; return 1 if an entry was removed.  */
 445
 446 int
 447 hash_table_remove (struct hash_table *ht, const void *key)
 448 {
 449   struct mapping *mp = find_mapping (ht, key);
 450   if (!NON_EMPTY (mp))
 451     return 0;
 452   else
 453     {
 454       int size = ht->size;
 455       struct mapping *mappings = ht->mappings;
 456       hashfun_t hasher = ht->hash_function;
 457
 458       MARK_AS_EMPTY (mp);
 459       --ht->count;
 460
 461       /* Rehash all the entries following MP.  The alternative
 462          approach is to mark the entry as deleted, i.e. create a
 463          "tombstone".  That speeds up removal, but leaves a lot of
 464          garbage and slows down hash_table_get and hash_table_put.  */
 465
 466       mp = NEXT_MAPPING (mp, mappings, size);
 467       LOOP_NON_EMPTY (mp, mappings, size)
 468         {
 469           const void *key2 = mp->key;
 470           struct mapping *mp_new;
 471
 472           /* Find the new location for the key. */
 473           mp_new = mappings + HASH_POSITION (key2, hasher, size);
 474           LOOP_NON_EMPTY (mp_new, mappings, size)
 475             if (key2 == mp_new->key)
 476               /* The mapping MP (key2) is already where we want it (in
 477                  MP_NEW's "chain" of keys.)  */
 478               goto next_rehash;
 479
 480           *mp_new = *mp;
 481           MARK_AS_EMPTY (mp);
 482
 483         next_rehash:
 484           ;
 485         }
 486       return 1;
 487     }
 488 }
 489
 490 /* Clear HT of all entries.  After calling this function, the count
 491    and the fullness of the hash table will be zero.  The size will
 492    remain unchanged.  */
 493
 494 void
 495 hash_table_clear (struct hash_table *ht)
 496 {
 497   memset (ht->mappings, INVALID_PTR_BYTE, ht->size * sizeof (struct mapping));
 498   ht->count = 0;
 499 }
 500
 501 /* Map MAPFUN over all the mappings in hash table HT.  MAPFUN is
 502    called with three arguments: the key, the value, and MAPARG.
 503
 504    It is undefined what happens if you add or remove entries in the
 505    hash table while hash_table_map is running.  The exception is the
 506    entry you're currently mapping over; you may remove or change that
 507    entry.  */
 508
 509 void
 510 hash_table_map (struct hash_table *ht,
 511                 int (*mapfun) (void *, void *, void *),
 512                 void *maparg)
 513 {
 514   struct mapping *mp  = ht->mappings;
 515   struct mapping *end = ht->mappings + ht->size;
 516
 517   for (; mp < end; mp++)
 518     if (NON_EMPTY (mp))
 519       {
 520         void *key;
 521       repeat:
 522         key = mp->key;
 523         if (mapfun (key, mp->value, maparg))
 524           return;
 525         /* hash_table_remove might have moved the adjacent
 526            mappings. */
 527         if (mp->key != key && NON_EMPTY (mp))
 528           goto repeat;
 529       }
 530 }
 531
 532 /* Return the number of elements in the hash table.  This is not the
 533    same as the physical size of the hash table, which is always
 534    greater than the number of elements.  */
 535
 536 int
 537 hash_table_count (const struct hash_table *ht)
 538 {
 539   return ht->count;
 540 }
 541 \f
 542 /* Functions from this point onward are meant for convenience and
 543    don't strictly belong to this file.  However, this is as good a
 544    place for them as any.  */
 545
 546 /* Rules for creating custom hash and test functions:
 547
 548    - The test function returns non-zero for keys that are considered
 549      "equal", zero otherwise.
 550
 551    - The hash function returns a number that represents the
 552      "distinctness" of the object.  In more precise terms, it means
 553      that for any two objects that test "equal" under the test
 554      function, the hash function MUST produce the same result.
 555
 556      This does not mean that all different objects must produce
 557      different values (that would be "perfect" hashing), only that
 558      non-distinct objects must produce the same values!  For instance,
 559      a hash function that returns 0 for any given object is a
 560      perfectly valid (albeit extremely bad) hash function.  A hash
 561      function that hashes a string by adding up all its characters is
 562      another example of a valid (but quite bad) hash function.
 563
 564      It is not hard to make hash and test functions agree about
 565      equality.  For example, if the test function compares strings
 566      case-insensitively, the hash function can lower-case the
 567      characters when calculating the hash value.  That ensures that
 568      two strings differing only in case will hash the same.
 569
 570    - If you care about performance, choose a hash function with as
 571      good "spreading" as possible.  A good hash function will use all
 572      the bits of the input when calculating the hash, and will react
 573      to even small changes in input with a completely different
 574      output.  Finally, don't make the hash function itself overly
 575      slow, because you'll be incurring a non-negligible overhead to
 576      all hash table operations.  */
 577
 578 /*
 579  * Support for hash tables whose keys are strings.
 580  *
 581  */
 582
 583 /* 31 bit hash function.  Taken from Gnome's glib, modified to use
 584    standard C types.
 585
 586    We used to use the popular hash function from the Dragon Book, but
 587    this one seems to perform much better.  */
 588
 589 unsigned long
 590 string_hash (const void *key)
 591 {
 592   const char *p = key;
 593   unsigned int h = *p;
 594
 595   if (h)
 596     for (p += 1; *p != '\0'; p++)
 597       h = (h << 5) - h + *p;
 598
 599   return h;
 600 }
 601
 602 /* Frontend for strcmp usable for hash tables. */
 603
 604 int
 605 string_cmp (const void *s1, const void *s2)
 606 {
 607   return !strcmp ((const char *)s1, (const char *)s2);
 608 }
 609
 610 /* Return a hash table of preallocated to store at least ITEMS items
 611    suitable to use strings as keys.  */
 612
 613 struct hash_table *
 614 make_string_hash_table (int items)
 615 {
 616   return hash_table_new (items, string_hash, string_cmp);
 617 }
 618
 619 /*
 620  * Support for hash tables whose keys are strings, but which are
 621  * compared case-insensitively.
 622  *
 623  */
 624
 625 /* Like string_hash, but produce the same hash regardless of the case. */
 626
 627 static unsigned long
 628 string_hash_nocase (const void *key)
 629 {
 630   const char *p = key;
 631   unsigned int h = TOLOWER (*p);
 632
 633   if (h)
 634     for (p += 1; *p != '\0'; p++)
 635       h = (h << 5) - h + TOLOWER (*p);
 636
 637   return h;
 638 }
 639
 640 /* Like string_cmp, but doing case-insensitive compareison. */
 641
 642 static int
 643 string_cmp_nocase (const void *s1, const void *s2)
 644 {
 645   return !strcasecmp ((const char *)s1, (const char *)s2);
 646 }
 647
 648 /* Like make_string_hash_table, but uses string_hash_nocase and
 649    string_cmp_nocase.  */
 650
 651 struct hash_table *
 652 make_nocase_string_hash_table (int items)
 653 {
 654   return hash_table_new (items, string_hash_nocase, string_cmp_nocase);
 655 }
 656
 657 /* Hashing of numeric values, such as pointers and integers.
 658
 659    This implementation is the Robert Jenkins' 32 bit Mix Function,
 660    with a simple adaptation for 64-bit values.  It offers excellent
 661    spreading of values and doesn't need to know the hash table size to
 662    work (unlike the very popular Knuth's multiplication hash).  */
 663
 664 static unsigned long
 665 ptrhash (const void *ptr)
 666 {
 667   unsigned long key = (unsigned long)ptr;
 668   key += (key << 12);
 669   key ^= (key >> 22);
 670   key += (key << 4);
 671   key ^= (key >> 9);
 672   key += (key << 10);
 673   key ^= (key >> 2);
 674   key += (key << 7);
 675   key ^= (key >> 12);
 676 #if SIZEOF_LONG > 4
 677   key += (key << 44);
 678   key ^= (key >> 54);
 679   key += (key << 36);
 680   key ^= (key >> 41);
 681   key += (key << 42);
 682   key ^= (key >> 34);
 683   key += (key << 39);
 684   key ^= (key >> 44);
 685 #endif
 686   return key;
 687 }
 688
 689 static int
 690 ptrcmp (const void *ptr1, const void *ptr2)
 691 {
 692   return ptr1 == ptr2;
 693 }
 694 \f
 695 #ifdef STANDALONE
 696
 697 #include <stdio.h>
 698 #include <string.h>
 699
 700 int
 701 print_hash_table_mapper (void *key, void *value, void *count)
 702 {
 703   ++*(int *)count;
 704   printf ("%s: %s\n", (const char *)key, (char *)value);
 705   return 0;
 706 }
 707
 708 void
 709 print_hash (struct hash_table *sht)
 710 {
 711   int debug_count = 0;
 712   hash_table_map (sht, print_hash_table_mapper, &debug_count);
 713   assert (debug_count == sht->count);
 714 }
 715
 716 int
 717 main (void)
 718 {
 719   struct hash_table *ht = make_string_hash_table (0);
 720   char line[80];
 721   while ((fgets (line, sizeof (line), stdin)))
 722     {
 723       int len = strlen (line);
 724       if (len <= 1)
 725         continue;
 726       line[--len] = '\0';
 727       if (!hash_table_contains (ht, line))
 728         hash_table_put (ht, strdup (line), "here I am!");
 729 #if 1
 730       if (len % 5 == 0)
 731         {
 732           char *line_copy;
 733           if (hash_table_get_pair (ht, line, &line_copy, NULL))
 734             {
 735               hash_table_remove (ht, line);
 736               xfree (line_copy);
 737             }
 738         }
 739 #endif
 740     }
 741 #if 0
 742   print_hash (ht);
 743 #endif
 744 #if 1
 745   printf ("%d %d\n", ht->count, ht->size);
 746 #endif
 747   return 0;
 748 }
 749 #endif