iri.h is already included in wget.h, so don't include it in C files

[wget] / src / hash.c
diff --git a/src/hash.c b/src/hash.c

index 36a90be82c9cffcc408f5a2846f4b775a12f7954..1e19fd63dc234283ce105d0e3e68b891a4ea303b 100644 (file)
--- a/src/hash.c
+++ b/src/hash.c
@@ -1,11 +1,12 @@
  /* Hash tables.
-   Copyright (C) 2000-2005 Free Software Foundation, Inc.
+   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+   2008 Free Software Foundation, Inc.
  
  This file is part of GNU Wget.
  
  GNU Wget is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or (at
+the Free Software Foundation; either version 3 of the License, or (at
  your option) any later version.
  
  GNU Wget is distributed in the hope that it will be useful,
@@ -14,24 +15,24 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  
  You should have received a copy of the GNU General Public License
-along with Wget; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-
-In addition, as a special exception, the Free Software Foundation
-gives permission to link the code of its release of Wget with the
-OpenSSL project's "OpenSSL" library (or with modified versions of it
-that use the same license as the "OpenSSL" library), and distribute
-the linked executables.  You must obey the GNU General Public License
-in all respects for all of the code used other than "OpenSSL".  If you
-modify this file, you may extend this exception to your version of the
-file, but you are not obligated to do so.  If you do not wish to do
-so, delete this exception statement from your version.  */
+along with Wget.  If not, see <http://www.gnu.org/licenses/>.
+
+Additional permission under GNU GPL version 3 section 7
+
+If you modify this program, or any covered work, by linking or
+combining it with the OpenSSL project's OpenSSL library (or a
+modified version of that library), containing parts covered by the
+terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
+grants you additional permission to convey the resulting work.
+Corresponding Source for a non-source form of such a combination
+shall include the source code for the parts of OpenSSL used as well
+as that of the covered work.  */
  
  /* With -DSTANDALONE, this file can be compiled outside Wget source
     tree.  To test, also use -DTEST.  */
  
-#ifdef HAVE_CONFIG_H
-# include <config.h>
+#ifndef STANDALONE
+# include "wget.h"
  #endif
  
  #include <stdio.h>
@@ -42,7 +43,6 @@ so, delete this exception statement from your version.  */
  
  #ifndef STANDALONE
  /* Get Wget's utility headers. */
-# include "wget.h"
  # include "utils.h"
  #else
  /* Make do without them. */
@@ -50,8 +50,16 @@ so, delete this exception statement from your version.  */
  # define xnew_array(type, x) xmalloc (sizeof (type) * (x))
  # define xmalloc malloc
  # define xfree free
-# define countof(x) (sizeof (x) / sizeof ((x)[0]))
-# define TOLOWER(x) ('A' <= (x) && (x) <= 'Z' ? (x) - 32 : (x))
+# ifndef countof
+#  define countof(x) (sizeof (x) / sizeof ((x)[0]))
+# endif
+# include <ctype.h>
+# define c_tolower(x) tolower ((unsigned char) (x))
+# if __STDC_VERSION__ >= 199901L
+#  include <stdint.h>  /* for uintptr_t */
+# else
+   typedef unsigned long uintptr_t;
+# endif
  #endif
  
  #include "hash.h"
@@ -149,14 +157,14 @@ struct hash_table {
    hashfun_t hash_function;
    testfun_t test_function;
  
-  struct cell *cells;          /* contiguous array of cells. */
-  int size;                    /* size of the array. */
+  struct cell *cells;           /* contiguous array of cells. */
+  int size;                     /* size of the array. */
  
-  int count;                   /* number of occupied entries. */
-  int resize_threshold;                /* after size exceeds this number of
-                                  entries, resize the table.  */
-  int prime_offset;            /* the offset of the current prime in
-                                  the prime table. */
+  int count;                    /* number of occupied entries. */
+  int resize_threshold;         /* after size exceeds this number of
+                                   entries, resize the table.  */
+  int prime_offset;             /* the offset of the current prime in
+                                   the prime table. */
  };
  
  /* We use the all-bits-set constant (INVALID_PTR) marker to mean that
@@ -170,7 +178,7 @@ struct hash_table {
     -1.  This is acceptable because it still allows the use of
     nonnegative integer keys.  */
  
-#define INVALID_PTR ((void *) ~0UL)
+#define INVALID_PTR ((void *) ~(uintptr_t) 0)
  #ifndef UCHAR_MAX
  # define UCHAR_MAX 0xff
  #endif
@@ -188,7 +196,7 @@ struct hash_table {
  
  /* Loop over occupied cells starting at C, terminating the loop when
     an empty cell is encountered.  */
-#define FOREACH_OCCUPIED_ADJACENT(c, cells, size)                              \
+#define FOREACH_OCCUPIED_ADJACENT(c, cells, size)                               \
    for (; CELL_OCCUPIED (c); c = NEXT_CELL (c, cells, size))
  
  /* Return the position of KEY in hash table SIZE large, hash function
@@ -218,18 +226,18 @@ prime_size (int size, int *prime_offset)
      243370577, 316381771, 411296309, 534685237, 695090819, 903618083,
      1174703521, 1527114613, 1837299131, 2147483647
    };
-  int i;
+  size_t i;
  
    for (i = *prime_offset; i < countof (primes); i++)
      if (primes[i] >= size)
        {
-       /* Set the offset to the next prime.  That is safe because,
-          next time we are called, it will be with a larger SIZE,
-          which means we could never return the same prime anyway.
-          (If that is not the case, the caller can simply reset
-          *prime_offset.)  */
-       *prime_offset = i + 1;
-       return primes[i];
+        /* Set the offset to the next prime.  That is safe because,
+           next time we are called, it will be with a larger SIZE,
+           which means we could never return the same prime anyway.
+           (If that is not the case, the caller can simply reset
+           *prime_offset.)  */
+        *prime_offset = i + 1;
+        return primes[i];
        }
  
    abort ();
@@ -263,8 +271,8 @@ static int cmp_pointer (const void *, const void *);
  
  struct hash_table *
  hash_table_new (int items,
-               unsigned long (*hash_function) (const void *),
-               int (*test_function) (const void *, const void *))
+                unsigned long (*hash_function) (const void *),
+                int (*test_function) (const void *, const void *))
  {
    int size;
    struct hash_table *ht = xnew (struct hash_table);
@@ -344,15 +352,15 @@ hash_table_get (const struct hash_table *ht, const void *key)
  
  int
  hash_table_get_pair (const struct hash_table *ht, const void *lookup_key,
-                    void *orig_key, void *value)
+                     void *orig_key, void *value)
  {
    struct cell *c = find_cell (ht, lookup_key);
    if (CELL_OCCUPIED (c))
      {
        if (orig_key)
-       *(void **)orig_key = c->key;
+        *(void **)orig_key = c->key;
        if (value)
-       *(void **)value = c->value;
+        *(void **)value = c->value;
        return 1;
      }
    else
@@ -383,9 +391,9 @@ grow_hash_table (struct hash_table *ht)
    newsize = prime_size (ht->size * HASH_RESIZE_FACTOR, &ht->prime_offset);
  #if 0
    printf ("growing from %d to %d; fullness %.2f%% to %.2f%%\n",
-         ht->size, newsize,
-         100.0 * ht->count / ht->size,
-         100.0 * ht->count / newsize);
+          ht->size, newsize,
+          100.0 * ht->count / ht->size,
+          100.0 * ht->count / newsize);
  #endif
  
    ht->size = newsize;
@@ -398,14 +406,14 @@ grow_hash_table (struct hash_table *ht)
    for (c = old_cells; c < old_end; c++)
      if (CELL_OCCUPIED (c))
        {
-       struct cell *new_c;
-       /* We don't need to test for uniqueness of keys because they
-          come from the hash table and are therefore known to be
-          unique.  */
-       new_c = cells + HASH_POSITION (c->key, hasher, newsize);
-       FOREACH_OCCUPIED_ADJACENT (new_c, cells, newsize)
-         ;
-       *new_c = *c;
+        struct cell *new_c;
+        /* We don't need to test for uniqueness of keys because they
+           come from the hash table and are therefore known to be
+           unique.  */
+        new_c = cells + HASH_POSITION (c->key, hasher, newsize);
+        FOREACH_OCCUPIED_ADJACENT (new_c, cells, newsize)
+          ;
+        *new_c = *c;
        }
  
    xfree (old_cells);
@@ -436,7 +444,7 @@ hash_table_put (struct hash_table *ht, const void *key, void *value)
  
    /* add new item */
    ++ht->count;
-  c->key   = (void *)key;      /* const? */
+  c->key   = (void *)key;       /* const? */
    c->value = value;
  }
  
@@ -459,30 +467,30 @@ hash_table_remove (struct hash_table *ht, const void *key)
        --ht->count;
  
        /* Rehash all the entries following C.  The alternative
-        approach is to mark the entry as deleted, i.e. create a
-        "tombstone".  That speeds up removal, but leaves a lot of
-        garbage and slows down hash_table_get and hash_table_put.  */
+         approach is to mark the entry as deleted, i.e. create a
+         "tombstone".  That speeds up removal, but leaves a lot of
+         garbage and slows down hash_table_get and hash_table_put.  */
  
        c = NEXT_CELL (c, cells, size);
        FOREACH_OCCUPIED_ADJACENT (c, cells, size)
-       {
-         const void *key2 = c->key;
-         struct cell *c_new;
-
-         /* Find the new location for the key. */
-         c_new = cells + HASH_POSITION (key2, hasher, size);
-         FOREACH_OCCUPIED_ADJACENT (c_new, cells, size)
-           if (key2 == c_new->key)
-             /* The cell C (key2) is already where we want it (in
-                C_NEW's "chain" of keys.)  */
-             goto next_rehash;
-
-         *c_new = *c;
-         CLEAR_CELL (c);
-
-       next_rehash:
-         ;
-       }
+        {
+          const void *key2 = c->key;
+          struct cell *c_new;
+
+          /* Find the new location for the key. */
+          c_new = cells + HASH_POSITION (key2, hasher, size);
+          FOREACH_OCCUPIED_ADJACENT (c_new, cells, size)
+            if (key2 == c_new->key)
+              /* The cell C (key2) is already where we want it (in
+                 C_NEW's "chain" of keys.)  */
+              goto next_rehash;
+
+          *c_new = *c;
+          CLEAR_CELL (c);
+
+        next_rehash:
+          ;
+        }
        return 1;
      }
  }
@@ -511,7 +519,7 @@ hash_table_clear (struct hash_table *ht)
  
  void
  hash_table_for_each (struct hash_table *ht,
-                    int (*fn) (void *, void *, void *), void *arg)
+                     int (*fn) (void *, void *, void *), void *arg)
  {
    struct cell *c = ht->cells;
    struct cell *end = ht->cells + ht->size;
@@ -519,19 +527,19 @@ hash_table_for_each (struct hash_table *ht,
    for (; c < end; c++)
      if (CELL_OCCUPIED (c))
        {
-       void *key;
+        void *key;
        repeat:
-       key = c->key;
-       if (fn (key, c->value, arg))
-         return;
-       /* hash_table_remove might have moved the adjacent cells. */
-       if (c->key != key && CELL_OCCUPIED (c))
-         goto repeat;
+        key = c->key;
+        if (fn (key, c->value, arg))
+          return;
+        /* hash_table_remove might have moved the adjacent cells. */
+        if (c->key != key && CELL_OCCUPIED (c))
+          goto repeat;
        }
  }
  
-/* Initiate iteration over HT.  Get the next entry using
-   hash_table_iter_next.  The typical loop looks like this:
+/* Initiate iteration over HT.  Entries are obtained with
+   hash_table_iter_next, a typical iteration loop looking like this:
  
         hash_table_iterator iter;
         for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
@@ -553,8 +561,8 @@ hash_table_iterate (struct hash_table *ht, hash_table_iterator *iter)
     ITER->value respectively and 1 is returned.  When there are no more
     entries, 0 is returned.
  
-   The hash table must not be modified between calls to this
-   function.  */
+   If the hash table is modified between calls to this function, the
+   result is undefined.  */
  
  int
  hash_table_iter_next (hash_table_iterator *iter)
@@ -564,10 +572,10 @@ hash_table_iter_next (hash_table_iterator *iter)
    for (; c < end; c++)
      if (CELL_OCCUPIED (c))
        {
-       iter->key = c->key;
-       iter->value = c->value;
-       iter->pos = c + 1;
-       return 1;
+        iter->key = c->key;
+        iter->value = c->value;
+        iter->pos = c + 1;
+        return 1;
        }
    return 0;
  }
@@ -602,7 +610,7 @@ hash_table_count (const struct hash_table *ht)
       a hash function that returns 0 for any given object is a
       perfectly valid (albeit extremely bad) hash function.  A hash
       function that hashes a string by adding up all its characters is
-     another example of a valid (but quite bad) hash function.
+     another example of a valid (but still quite bad) hash function.
  
       It is not hard to make hash and test functions agree about
       equality.  For example, if the test function compares strings
@@ -610,24 +618,25 @@ hash_table_count (const struct hash_table *ht)
       characters when calculating the hash value.  That ensures that
       two strings differing only in case will hash the same.
  
-   - If you care about performance, choose a hash function with as
-     good "spreading" as possible.  A good hash function will use all
-     the bits of the input when calculating the hash, and will react
-     to even small changes in input with a completely different
-     output.  Finally, don't make the hash function itself overly
-     slow, because you'll be incurring a non-negligible overhead to
-     all hash table operations.  */
+   - To prevent performance degradation, choose a hash function with
+     as good "spreading" as possible.  A good hash function will use
+     all the bits of the input when calculating the hash, and will
+     react to even small changes in input with a completely different
+     output.  But don't make the hash function itself overly slow,
+     because you'll be incurring a non-negligible overhead to all hash
+     table operations.  */
  
  /*
   * Support for hash tables whose keys are strings.
   *
   */
     
-/* 31 bit hash function.  Taken from Gnome's glib, modified to use
+/* Base 31 hash function.  Taken from Gnome's glib, modified to use
     standard C types.
  
     We used to use the popular hash function from the Dragon Book, but
-   this one seems to perform much better.  */
+   this one seems to perform much better, both by being faster and by
+   generating less collisions.  */
  
  static unsigned long
  hash_string (const void *key)
@@ -671,11 +680,11 @@ static unsigned long
  hash_string_nocase (const void *key)
  {
    const char *p = key;
-  unsigned int h = TOLOWER (*p);
+  unsigned int h = c_tolower (*p);
    
    if (h)
      for (p += 1; *p != '\0'; p++)
-      h = (h << 5) - h + TOLOWER (*p);
+      h = (h << 5) - h + c_tolower (*p);
    
    return h;
  }
@@ -708,7 +717,7 @@ make_nocase_string_hash_table (int items)
  unsigned long
  hash_pointer (const void *ptr)
  {
-  unsigned long key = (unsigned long) ptr;
+  uintptr_t key = (uintptr_t) ptr;
    key += (key << 12);
    key ^= (key >> 22);
    key += (key << 4);
@@ -717,7 +726,7 @@ hash_pointer (const void *ptr)
    key ^= (key >> 2);
    key += (key << 7);
    key ^= (key >> 12);
-#if SIZEOF_LONG > 4
+#if SIZEOF_VOID_P > 4
    key += (key << 44);
    key ^= (key >> 54);
    key += (key << 36);
@@ -727,7 +736,7 @@ hash_pointer (const void *ptr)
    key += (key << 39);
    key ^= (key >> 44);
  #endif
-  return key;
+  return (unsigned long) key;
  }
  
  static int
@@ -762,20 +771,20 @@ main (void)
      {
        int len = strlen (line);
        if (len <= 1)
-       continue;
+        continue;
        line[--len] = '\0';
        if (!hash_table_contains (ht, line))
-       hash_table_put (ht, strdup (line), "here I am!");
+        hash_table_put (ht, strdup (line), "here I am!");
  #if 1
        if (len % 5 == 0)
-       {
-         char *line_copy;
-         if (hash_table_get_pair (ht, line, &line_copy, NULL))
-           {
-             hash_table_remove (ht, line);
-             xfree (line_copy);
-           }
-       }
+        {
+          char *line_copy;
+          if (hash_table_get_pair (ht, line, &line_copy, NULL))
+            {
+              hash_table_remove (ht, line);
+              xfree (line_copy);
+            }
+        }
  #endif
      }
  #if 0