sjero.net Git - wget/blob - src/hash.c

   1 /* Hash tables.
   2    Copyright (C) 2000, 2001 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or (at
   9 your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software
  18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #ifdef HAVE_CONFIG_H
  31 # include <config.h>
  32 #endif
  33
  34 #ifdef HAVE_STRING_H
  35 # include <string.h>
  36 #else
  37 # include <strings.h>
  38 #endif /* HAVE_STRING_H */
  39 #include <stdlib.h>
  40 #include <assert.h>
  41
  42 #include "wget.h"
  43 #include "utils.h"
  44
  45 #include "hash.h"
  46
  47 #ifdef STANDALONE
  48 # undef xmalloc
  49 # undef xrealloc
  50 # undef xfree
  51
  52 # define xmalloc malloc
  53 # define xrealloc realloc
  54 # define xfree free
  55
  56 # undef TOLOWER
  57 # define TOLOWER(x) ('A' <= (x) && (x) <= 'Z' ? (x) - 32 : (x))
  58 #endif
  59
  60 /* INTERFACE:
  61
  62    Hash tables are a technique used to implement mapping between
  63    objects with near-constant-time access and storage.  The table
  64    associates keys to values, and a value can be very quickly
  65    retrieved by providing the key.  Fast lookup tables are typically
  66    implemented as hash tables.
  67
  68    The entry points are
  69      hash_table_new       -- creates the table.
  70      hash_table_destroy   -- destroys the table.
  71      hash_table_put       -- establishes or updates key->value mapping.
  72      hash_table_get       -- retrieves value of key.
  73      hash_table_get_pair  -- get key/value pair for key.
  74      hash_table_contains  -- test whether the table contains key.
  75      hash_table_remove    -- remove the key->value mapping for key.
  76      hash_table_map       -- iterate through table mappings.
  77      hash_table_clear     -- clear hash table contents.
  78      hash_table_count     -- return the number of entries in the table.
  79
  80    The hash table grows internally as new entries are added and is not
  81    limited in size, except by available memory.  The table doubles
  82    with each resize, which ensures that the amortized time per
  83    operation remains constant.
  84
  85    By default, tables created by hash_table_new consider the keys to
  86    be equal if their pointer values are the same.  You can use
  87    make_string_hash_table to create tables whose keys are considered
  88    equal if their string contents are the same.  In the general case,
  89    the criterion of equality used to compare keys is specified at
  90    table creation time with two callback functions, "hash" and "test".
  91    The hash function transforms the key into an arbitrary number that
  92    must be the same for two equal keys.  The test function accepts two
  93    keys and returns non-zero if they are to be considered equal.
  94
  95    Note that neither keys nor values are copied when inserted into the
  96    hash table, so they must exist for the lifetime of the table.  This
  97    means that e.g. the use of static strings is OK, but objects with a
  98    shorter life-time need to be copied (with strdup() or the like in
  99    the case of strings) before being inserted.  */
 100
 101 /* IMPLEMENTATION:
 102
 103    The hash table is implemented as an open-addressed table with
 104    linear probing collision resolution.
 105
 106    For those not up to CS parlance, it means that all the hash entries
 107    (pairs of pointers key and value) are stored in a contiguous array.
 108    The position of each mapping is determined by the hash value of its
 109    key and the size of the table: location := hash(key) % size.  If
 110    two different keys end up on the same position (collide), the one
 111    that came second is placed at the next empty position following the
 112    occupied place.  This collision resolution technique is called
 113    "linear probing".
 114
 115    There are more advanced collision resolution methods (quadratic
 116    probing, double hashing), but we don't use them because they incur
 117    more non-sequential access to the array, which results in worse CPU
 118    cache behavior.  Linear probing works well as long as the
 119    count/size ratio (fullness) is kept below 75%.  We make sure to
 120    grow and rehash the table whenever this threshold is exceeded.
 121
 122    Collisions make deletion tricky because clearing a position
 123    followed by a colliding entry would make the position seem empty
 124    and the colliding entry not found.  One solution is to leave a
 125    "tombstone" instead of clearing the entry, and another is to
 126    carefully rehash the entries immediately following the deleted one.
 127    We use the latter method because it results in less bookkeeping and
 128    faster retrieval at the (slight) expense of deletion.  */
 129
 130 /* Maximum allowed fullness: when hash table's fullness exceeds this
 131    value, the table is resized.  */
 132 #define HASH_MAX_FULLNESS 0.75
 133
 134 /* The hash table size is multiplied by this factor (and then rounded
 135    to the next prime) with each resize.  This guarantees infrequent
 136    resizes.  */
 137 #define HASH_RESIZE_FACTOR 2
 138
 139 struct mapping {
 140   void *key;
 141   void *value;
 142 };
 143
 144 struct hash_table {
 145   unsigned long (*hash_function) PARAMS ((const void *));
 146   int (*test_function) PARAMS ((const void *, const void *));
 147
 148   int size;                     /* size of the array. */
 149   int count;                    /* number of non-empty entries. */
 150
 151   int resize_threshold;         /* after size exceeds this number of
 152                                    entries, resize the table.  */
 153   int prime_offset;             /* the offset of the current prime in
 154                                    the prime table. */
 155
 156   struct mapping *mappings;     /* the array of mapping pairs. */
 157 };
 158
 159 /* We use all-bit-set marker to mean that a mapping is empty.  It is
 160    (hopefully) illegal as a pointer, and it allows the users to use
 161    NULL (as well as any non-negative integer) as key.  */
 162 #define NON_EMPTY(mp) (mp->key != (void *)~(unsigned long)0)
 163
 164 /* "Next" mapping is the mapping after MP, but wrapping back to
 165    MAPPINGS when MP would reach MAPPINGS+SIZE.  */
 166 #define NEXT_MAPPING(mp, mappings, size) (mp != mappings + (size - 1)   \
 167                                           ? mp + 1 : mappings)
 168
 169 /* Loop over non-empty mappings starting at MP. */
 170 #define LOOP_NON_EMPTY(mp, mappings, size)                              \
 171   for (; NON_EMPTY (mp); mp = NEXT_MAPPING (mp, mappings, size))
 172
 173 /* #### Some implementations multiply the hash with the "golden ratio"
 174    of the table to get better spread for keys that do not come from a
 175    good hashing source.  I'm not sure if that is necessary for the
 176    hash functions we use.  */
 177
 178 #define HASH_POSITION(ht, key) (ht->hash_function (key) % ht->size)
 179
 180 /* Find a prime near, but greather than or equal to SIZE.  Of course,
 181    the primes are not calculated, but looked up from a table.  The
 182    table does not contain all primes in range, just a selection useful
 183    for this purpose.
 184
 185    PRIME_OFFSET is a minor optimization: if specified, it starts the
 186    search for the prime number beginning with the specific offset in
 187    the prime number table.  The final offset is stored in the same
 188    variable.  */
 189
 190 static int
 191 prime_size (int size, int *prime_offset)
 192 {
 193   static const unsigned long primes [] = {
 194     13, 19, 29, 41, 59, 79, 107, 149, 197, 263, 347, 457, 599, 787, 1031,
 195     1361, 1777, 2333, 3037, 3967, 5167, 6719, 8737, 11369, 14783,
 196     19219, 24989, 32491, 42257, 54941, 71429, 92861, 120721, 156941,
 197     204047, 265271, 344857, 448321, 582821, 757693, 985003, 1280519,
 198     1664681, 2164111, 2813353, 3657361, 4754591, 6180989, 8035301,
 199     10445899, 13579681, 17653589, 22949669, 29834603, 38784989,
 200     50420551, 65546729, 85210757, 110774011, 144006217, 187208107,
 201     243370577, 316381771, 411296309, 534685237, 695090819, 903618083,
 202     1174703521, 1527114613, 1985248999,
 203     (unsigned long)0x99d43ea5, (unsigned long)0xc7fa5177
 204   };
 205   int i = *prime_offset;
 206
 207   for (; i < countof (primes); i++)
 208     if (primes[i] >= size)
 209       {
 210         /* Set the offset to the next prime.  That is safe because,
 211            next time we are called, it will be with a larger SIZE,
 212            which means we could never return the same prime anyway.
 213            (If that is not the case, the caller can simply reset
 214            *prime_offset.)  */
 215         *prime_offset = i + 1;
 216         return primes[i];
 217       }
 218
 219   abort ();
 220   return 0;
 221 }
 222
 223 static unsigned long ptrhash PARAMS ((const void *));
 224 static int ptrcmp PARAMS ((const void *, const void *));
 225
 226 /* Create a hash table with hash function HASH_FUNCTION and test
 227    function TEST_FUNCTION.  The table is empty (its count is 0), but
 228    pre-allocated to store at least ITEMS items.
 229
 230    ITEMS is the number of items that the table can accept without
 231    needing to resize.  It is useful when creating a table that is to
 232    be immediately filled with a known number of items.  In that case,
 233    the regrows are a waste of time, and specifying ITEMS correctly
 234    will avoid them altogether.
 235
 236    Note that hash tables grow dynamically regardless of ITEMS.  The
 237    only use of ITEMS is to preallocate the table and avoid unnecessary
 238    dynamic regrows.  Don't bother making ITEMS prime because it's not
 239    used as size unchanged.  To start with a small table that grows as
 240    needed, simply specify zero ITEMS.
 241
 242    If HASH_FUNCTION is not provided, identity table is assumed,
 243    i.e. key pointers are compared as keys.  If you want strings with
 244    equal contents to hash the same, use make_string_hash_table.  */
 245
 246 struct hash_table *
 247 hash_table_new (int items,
 248                 unsigned long (*hash_function) (const void *),
 249                 int (*test_function) (const void *, const void *))
 250 {
 251   int size;
 252   struct hash_table *ht = xnew (struct hash_table);
 253
 254   ht->hash_function = hash_function ? hash_function : ptrhash;
 255   ht->test_function = test_function ? test_function : ptrcmp;
 256
 257   /* If the size of struct hash_table ever becomes a concern, this
 258      field can go.  (Wget doesn't create many hashes.)  */
 259   ht->prime_offset = 0;
 260
 261   /* Calculate the size that ensures that the table will store at
 262      least ITEMS keys without the need to resize.  */
 263   size = 1 + items / HASH_MAX_FULLNESS;
 264   size = prime_size (size, &ht->prime_offset);
 265   ht->size = size;
 266   ht->resize_threshold = size * HASH_MAX_FULLNESS;
 267   /*assert (ht->resize_threshold >= items);*/
 268
 269   ht->mappings = xnew_array (struct mapping, ht->size);
 270   /* Mark mappings as empty.  We use 0xff rather than 0 to mark empty
 271      keys because it allows us to store NULL keys to the table.  */
 272   memset (ht->mappings, 255, size * sizeof (struct mapping));
 273
 274   ht->count = 0;
 275
 276   return ht;
 277 }
 278
 279 /* Free the data associated with hash table HT. */
 280
 281 void
 282 hash_table_destroy (struct hash_table *ht)
 283 {
 284   xfree (ht->mappings);
 285   xfree (ht);
 286 }
 287
 288 /* The heart of most functions in this file -- find the mapping whose
 289    KEY is equal to key, using linear probing.  Returns the mapping
 290    that matches KEY, or the first empty mapping if none matches.  */
 291
 292 static inline struct mapping *
 293 find_mapping (const struct hash_table *ht, const void *key)
 294 {
 295   struct mapping *mappings = ht->mappings;
 296   int size = ht->size;
 297   struct mapping *mp = mappings + HASH_POSITION (ht, key);
 298   int (*equals) PARAMS ((const void *, const void *)) = ht->test_function;
 299
 300   LOOP_NON_EMPTY (mp, mappings, size)
 301     if (equals (key, mp->key))
 302       break;
 303   return mp;
 304 }
 305
 306 /* Get the value that corresponds to the key KEY in the hash table HT.
 307    If no value is found, return NULL.  Note that NULL is a legal value
 308    for value; if you are storing NULLs in your hash table, you can use
 309    hash_table_contains to be sure that a (possibly NULL) value exists
 310    in the table.  Or, you can use hash_table_get_pair instead of this
 311    function.  */
 312
 313 void *
 314 hash_table_get (const struct hash_table *ht, const void *key)
 315 {
 316   struct mapping *mp = find_mapping (ht, key);
 317   if (NON_EMPTY (mp))
 318     return mp->value;
 319   else
 320     return NULL;
 321 }
 322
 323 /* Like hash_table_get, but writes out the pointers to both key and
 324    value.  Returns non-zero on success.  */
 325
 326 int
 327 hash_table_get_pair (const struct hash_table *ht, const void *lookup_key,
 328                      void *orig_key, void *value)
 329 {
 330   struct mapping *mp = find_mapping (ht, lookup_key);
 331   if (NON_EMPTY (mp))
 332     {
 333       if (orig_key)
 334         *(void **)orig_key = mp->key;
 335       if (value)
 336         *(void **)value = mp->value;
 337       return 1;
 338     }
 339   else
 340     return 0;
 341 }
 342
 343 /* Return 1 if HT contains KEY, 0 otherwise. */
 344
 345 int
 346 hash_table_contains (const struct hash_table *ht, const void *key)
 347 {
 348   struct mapping *mp = find_mapping (ht, key);
 349   return NON_EMPTY (mp);
 350 }
 351
 352 /* Grow hash table HT as necessary, and rehash all the key-value
 353    mappings.  */
 354
 355 static void
 356 grow_hash_table (struct hash_table *ht)
 357 {
 358   struct mapping *old_mappings = ht->mappings;
 359   struct mapping *old_end      = ht->mappings + ht->size;
 360   struct mapping *mp, *mappings;
 361   int newsize;
 362
 363   newsize = prime_size (ht->size * HASH_RESIZE_FACTOR, &ht->prime_offset);
 364 #if 0
 365   printf ("growing from %d to %d; fullness %.2f%% to %.2f%%\n",
 366           ht->size, newsize,
 367           100.0 * ht->count / ht->size,
 368           100.0 * ht->count / newsize);
 369 #endif
 370
 371   ht->size = newsize;
 372   ht->resize_threshold = newsize * HASH_MAX_FULLNESS;
 373
 374   mappings = xnew_array (struct mapping, newsize);
 375   memset (mappings, 255, newsize * sizeof (struct mapping));
 376   ht->mappings = mappings;
 377
 378   for (mp = old_mappings; mp < old_end; mp++)
 379     if (NON_EMPTY (mp))
 380       {
 381         struct mapping *new_mp = mappings + HASH_POSITION (ht, mp->key);
 382         /* We don't need to test for uniqueness of keys because they
 383            come from the hash table and are therefore known to be
 384            unique.  */
 385         LOOP_NON_EMPTY (new_mp, mappings, newsize)
 386           ;
 387         *new_mp = *mp;
 388       }
 389
 390   xfree (old_mappings);
 391 }
 392
 393 /* Put VALUE in the hash table HT under the key KEY.  This regrows the
 394    table if necessary.  */
 395
 396 void
 397 hash_table_put (struct hash_table *ht, const void *key, void *value)
 398 {
 399   struct mapping *mp = find_mapping (ht, key);
 400   if (NON_EMPTY (mp))
 401     {
 402       /* update existing item */
 403       mp->key   = (void *)key; /* const? */
 404       mp->value = value;
 405       return;
 406     }
 407
 408   /* If adding the item would make the table exceed max. fullness,
 409      grow the table first.  */
 410   if (ht->count >= ht->resize_threshold)
 411     {
 412       grow_hash_table (ht);
 413       mp = find_mapping (ht, key);
 414     }
 415
 416   /* add new item */
 417   ++ht->count;
 418   mp->key   = (void *)key;      /* const? */
 419   mp->value = value;
 420 }
 421
 422 /* Remove a mapping that matches KEY from HT.  Return 0 if there was
 423    no such entry; return 1 if an entry was removed.  */
 424
 425 int
 426 hash_table_remove (struct hash_table *ht, const void *key)
 427 {
 428   struct mapping *mp = find_mapping (ht, key);
 429   if (!NON_EMPTY (mp))
 430     return 0;
 431   else
 432     {
 433       int size = ht->size;
 434       struct mapping *mappings = ht->mappings;
 435
 436       mp->key = NULL;
 437       --ht->count;
 438
 439       /* Rehash all the entries following MP.  The alternative
 440          approach is to mark the entry as deleted, i.e. create a
 441          "tombstone".  That makes remove faster, but leaves a lot of
 442          garbage and slows down hash_table_get and hash_table_put.  */
 443
 444       mp = NEXT_MAPPING (mp, mappings, size);
 445       LOOP_NON_EMPTY (mp, mappings, size)
 446         {
 447           const void *key2 = mp->key;
 448           struct mapping *mp_new = mappings + HASH_POSITION (ht, key2);
 449
 450           /* Find the new location for the key. */
 451
 452           LOOP_NON_EMPTY (mp_new, mappings, size)
 453             if (key2 == mp_new->key)
 454               /* The mapping MP (key2) is already where we want it (in
 455                  MP_NEW's "chain" of keys.)  */
 456               goto next_rehash;
 457
 458           *mp_new = *mp;
 459           mp->key = NULL;
 460
 461         next_rehash:
 462           ;
 463         }
 464       return 1;
 465     }
 466 }
 467
 468 /* Clear HT of all entries.  After calling this function, the count
 469    and the fullness of the hash table will be zero.  The size will
 470    remain unchanged.  */
 471
 472 void
 473 hash_table_clear (struct hash_table *ht)
 474 {
 475   memset (ht->mappings, '\0', ht->size * sizeof (struct mapping));
 476   ht->count = 0;
 477 }
 478
 479 /* Map MAPFUN over all the mappings in hash table HT.  MAPFUN is
 480    called with three arguments: the key, the value, and MAPARG.
 481
 482    It is undefined what happens if you add or remove entries in the
 483    hash table while hash_table_map is running.  The exception is the
 484    entry you're currently mapping over; you may remove or change that
 485    entry.  */
 486
 487 void
 488 hash_table_map (struct hash_table *ht,
 489                 int (*mapfun) (void *, void *, void *),
 490                 void *maparg)
 491 {
 492   struct mapping *mp  = ht->mappings;
 493   struct mapping *end = ht->mappings + ht->size;
 494
 495   for (; mp < end; mp++)
 496     if (NON_EMPTY (mp))
 497       {
 498         void *key;
 499       repeat:
 500         key = mp->key;
 501         if (mapfun (key, mp->value, maparg))
 502           return;
 503         /* hash_table_remove might have moved the adjacent
 504            mappings. */
 505         if (mp->key != key && NON_EMPTY (mp))
 506           goto repeat;
 507       }
 508 }
 509
 510 /* Return the number of elements in the hash table.  This is not the
 511    same as the physical size of the hash table, which is always
 512    greater than the number of elements.  */
 513
 514 int
 515 hash_table_count (const struct hash_table *ht)
 516 {
 517   return ht->count;
 518 }
 519 \f
 520 /* Functions from this point onward are meant for convenience and
 521    don't strictly belong to this file.  However, this is as good a
 522    place for them as any.  */
 523
 524 /* Rules for creating custom hash and test functions:
 525
 526    - The test function returns non-zero for keys that are considered
 527      "equal", zero otherwise.
 528
 529    - The hash function returns a number that represents the
 530      "distinctness" of the object.  In more precise terms, it means
 531      that for any two objects that test "equal" under the test
 532      function, the hash function MUST produce the same result.
 533
 534      This does not mean that all different objects must produce
 535      different values (that would be "perfect" hashing), only that
 536      non-distinct objects must produce the same values!  For instance,
 537      a hash function that returns 0 for any given object is a
 538      perfectly valid (albeit extremely bad) hash function.  A hash
 539      function that hashes a string by adding up all its characters is
 540      another example of a valid (but quite bad) hash function.
 541
 542      It is not hard to make hash and test functions agree about
 543      equality.  For example, if the test function compares strings
 544      case-insensitively, the hash function can lower-case the
 545      characters when calculating the hash value.  That ensures that
 546      two strings differing only in case will hash the same.
 547
 548    - If you care about performance, choose a hash function with as
 549      good "spreading" as possible.  A good hash function will use all
 550      the bits of the input when calculating the hash, and will react
 551      to even small changes in input with a completely different
 552      output.  Finally, don't make the hash function itself overly
 553      slow, because you'll be incurring a non-negligible overhead to
 554      all hash table operations.  */
 555
 556 /*
 557  * Support for hash tables whose keys are strings.
 558  *
 559  */
 560
 561 /* 31 bit hash function.  Taken from Gnome's glib, modified to use
 562    standard C types.
 563
 564    We used to use the popular hash function from the Dragon Book, but
 565    this one seems to perform much better.  */
 566
 567 unsigned long
 568 string_hash (const void *key)
 569 {
 570   const char *p = key;
 571   unsigned int h = *p;
 572
 573   if (h)
 574     for (p += 1; *p != '\0'; p++)
 575       h = (h << 5) - h + *p;
 576
 577   return h;
 578 }
 579
 580 /* Frontend for strcmp usable for hash tables. */
 581
 582 int
 583 string_cmp (const void *s1, const void *s2)
 584 {
 585   return !strcmp ((const char *)s1, (const char *)s2);
 586 }
 587
 588 /* Return a hash table of preallocated to store at least ITEMS items
 589    suitable to use strings as keys.  */
 590
 591 struct hash_table *
 592 make_string_hash_table (int items)
 593 {
 594   return hash_table_new (items, string_hash, string_cmp);
 595 }
 596
 597 /*
 598  * Support for hash tables whose keys are strings, but which are
 599  * compared case-insensitively.
 600  *
 601  */
 602
 603 /* Like string_hash, but produce the same hash regardless of the case. */
 604
 605 static unsigned long
 606 string_hash_nocase (const void *key)
 607 {
 608   const char *p = key;
 609   unsigned int h = TOLOWER (*p);
 610
 611   if (h)
 612     for (p += 1; *p != '\0'; p++)
 613       h = (h << 5) - h + TOLOWER (*p);
 614
 615   return h;
 616 }
 617
 618 /* Like string_cmp, but doing case-insensitive compareison. */
 619
 620 static int
 621 string_cmp_nocase (const void *s1, const void *s2)
 622 {
 623   return !strcasecmp ((const char *)s1, (const char *)s2);
 624 }
 625
 626 /* Like make_string_hash_table, but uses string_hash_nocase and
 627    string_cmp_nocase.  */
 628
 629 struct hash_table *
 630 make_nocase_string_hash_table (int items)
 631 {
 632   return hash_table_new (items, string_hash_nocase, string_cmp_nocase);
 633 }
 634
 635 /* Hashing of pointers.  Used for hash tables that are keyed by
 636    pointer identity.  (Common Lisp calls them EQ hash tables, and Java
 637    calls them IdentityHashMaps.)  */
 638
 639 static unsigned long
 640 ptrhash (const void *ptr)
 641 {
 642   unsigned long key = (unsigned long)ptr;
 643   key += (key << 12);
 644   key ^= (key >> 22);
 645   key += (key << 4);
 646   key ^= (key >> 9);
 647   key += (key << 10);
 648   key ^= (key >> 2);
 649   key += (key << 7);
 650   key ^= (key >> 12);
 651 #if SIZEOF_LONG > 4
 652   key += (key << 44);
 653   key ^= (key >> 54);
 654   key += (key << 36);
 655   key ^= (key >> 41);
 656   key += (key << 42);
 657   key ^= (key >> 34);
 658   key += (key << 39);
 659   key ^= (key >> 44);
 660 #endif
 661   return key;
 662 }
 663
 664 static int
 665 ptrcmp (const void *ptr1, const void *ptr2)
 666 {
 667   return ptr1 == ptr2;
 668 }
 669 \f
 670 #ifdef STANDALONE
 671
 672 #include <stdio.h>
 673 #include <string.h>
 674
 675 int
 676 print_hash_table_mapper (void *key, void *value, void *count)
 677 {
 678   ++*(int *)count;
 679   printf ("%s: %s\n", (const char *)key, (char *)value);
 680   return 0;
 681 }
 682
 683 void
 684 print_hash (struct hash_table *sht)
 685 {
 686   int debug_count = 0;
 687   hash_table_map (sht, print_hash_table_mapper, &debug_count);
 688   assert (debug_count == sht->count);
 689 }
 690
 691 int
 692 main (void)
 693 {
 694   struct hash_table *ht = make_string_hash_table (0);
 695   char line[80];
 696   while ((fgets (line, sizeof (line), stdin)))
 697     {
 698       int len = strlen (line);
 699       if (len <= 1)
 700         continue;
 701       line[--len] = '\0';
 702       if (!hash_table_contains (ht, line))
 703         hash_table_put (ht, strdup (line), "here I am!");
 704 #if 1
 705       if (len % 5 == 0)
 706         {
 707           char *line_copy;
 708           if (hash_table_get_pair (ht, line, &line_copy, NULL))
 709             {
 710               hash_table_remove (ht, line);
 711               xfree (line_copy);
 712             }
 713         }
 714 #endif
 715     }
 716 #if 0
 717   print_hash (ht);
 718 #endif
 719 #if 1
 720   printf ("%d %d\n", ht->count, ht->size);
 721 #endif
 722   return 0;
 723 }
 724 #endif