# include <string.h>
#else
# include <strings.h>
-#endif /* HAVE_STRING_H */
+#endif
+#ifdef HAVE_LIMITS_H
+# include <limits.h>
+#endif
#include <stdlib.h>
#include <assert.h>
The hash table is implemented as an open-addressed table with
linear probing collision resolution.
- For those not up to CS parlance, it means that all the hash entries
- (pairs of pointers key and value) are stored in a contiguous array.
- The position of each mapping is determined by the hash value of its
- key and the size of the table: location := hash(key) % size. If
- two different keys end up on the same position (collide), the one
- that came second is placed at the next empty position following the
+ In regular language, it means that all the hash entries (pairs of
+ pointers key and value) are stored in a contiguous array. The
+ position of each mapping is determined by the hash value of its key
+ and the size of the table: location := hash(key) % size. If two
+ different keys end up on the same position (collide), the one that
+ came second is placed at the next empty position following the
occupied place. This collision resolution technique is called
"linear probing".
the prime table. */
};
-/* We use all-bit-set marker to mean that a mapping is empty. It is
- (hopefully) illegal as a pointer, and it allows the users to use
- NULL (as well as any non-negative integer) as key. */
+/* We use the all-bits-set constant (INVALID_PTR) marker to mean that
+ a mapping is empty. It is unaligned and therefore illegal as a
+ pointer. INVALID_PTR_BYTE (0xff) is the one-byte value used to
+ initialize the mappings array as empty.
+
+ The all-bits-set value is a better choice than NULL because it
+ allows the use of NULL/0 keys. Since the keys are either integers
+ or pointers, the only key that cannot be used is the integer value
+ -1. This is acceptable because it still allows the use of
+ nonnegative integer keys. */
+
+#define INVALID_PTR ((void *) ~(unsigned long)0)
+#ifndef UCHAR_MAX
+# define UCHAR_MAX 0xff
+#endif
+#define INVALID_PTR_BYTE UCHAR_MAX
-#define NON_EMPTY(mp) (mp->key != (void *)~(unsigned long)0)
-#define MARK_AS_EMPTY(mp) (mp->key = (void *)~(unsigned long)0)
+#define NON_EMPTY(mp) ((mp)->key != INVALID_PTR)
+#define MARK_AS_EMPTY(mp) ((mp)->key = INVALID_PTR)
/* "Next" mapping is the mapping after MP, but wrapping back to
MAPPINGS when MP would reach MAPPINGS+SIZE. */
being HASHFUN. */
#define HASH_POSITION(key, hashfun, size) ((hashfun) (key) % size)
-/* Find a prime near, but greather than or equal to SIZE. Of course,
- the primes are not calculated, but looked up from a table. The
- table does not contain all primes in range, just a selection useful
+/* Find a prime near, but greather than or equal to SIZE. The primes
+ are looked up from a table with a selection of primes convenient
for this purpose.
PRIME_OFFSET is a minor optimization: it specifies start position
needed, simply specify zero ITEMS.
If hash and test callbacks are not specified, identity mapping is
- assumed, i.e. pointer values are used for key comparison. If,
- instead of that, you want strings with equal contents to hash the
- same, use make_string_hash_table. */
+ assumed, i.e. pointer values are used for key comparison. (Common
+ Lisp calls such tables EQ hash tables, and Java calls them
+ IdentityHashMaps.) If your keys require different comparison,
+ specify hash and test functions. For easy use of C strings as hash
+ keys, you can use the convenience functions make_string_hash_table
+ and make_nocase_string_hash_table. */
struct hash_table *
hash_table_new (int items,
ht->mappings = xnew_array (struct mapping, ht->size);
/* Mark mappings as empty. We use 0xff rather than 0 to mark empty
- keys because it allows us to store NULL keys to the table. */
- memset (ht->mappings, 0xff, size * sizeof (struct mapping));
+ keys because it allows us to use NULL/0 as keys. */
+ memset (ht->mappings, INVALID_PTR_BYTE, size * sizeof (struct mapping));
ht->count = 0;
ht->resize_threshold = newsize * HASH_MAX_FULLNESS;
mappings = xnew_array (struct mapping, newsize);
- memset (mappings, 0xff, newsize * sizeof (struct mapping));
+ memset (mappings, INVALID_PTR_BYTE, newsize * sizeof (struct mapping));
ht->mappings = mappings;
for (mp = old_mappings; mp < old_end; mp++)
void
hash_table_clear (struct hash_table *ht)
{
- memset (ht->mappings, 0xff, ht->size * sizeof (struct mapping));
+ memset (ht->mappings, INVALID_PTR_BYTE, ht->size * sizeof (struct mapping));
ht->count = 0;
}
return hash_table_new (items, string_hash_nocase, string_cmp_nocase);
}
-/* Hashing of numeric values, such as pointers and integers. Used for
- hash tables that are keyed by pointer identity. (Common Lisp calls
- them EQ hash tables, and Java calls them IdentityHashMaps.)
+/* Hashing of numeric values, such as pointers and integers.
This implementation is the Robert Jenkins' 32 bit Mix Function,
with a simple adaptation for 64-bit values. It offers excellent