commit 25e61caedcce78bbbc1f0be6d52115f2d932b2c8
parent d2e2ea2be72d5c6e8621cb3c8aa6e5f56adf9ff4
Author: vaplv <vaplv@free.fr>
Date: Sat, 5 Apr 2014 17:05:10 +0200
Implement and test the hash table data structure
Diffstat:
6 files changed, 552 insertions(+), 2 deletions(-)
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -107,6 +107,7 @@ endmacro(new_test)
new_test(test_atomic)
new_test(test_dynamic_array rsys)
new_test(test_free_list rsys)
+new_test(test_hash_table rsys)
new_test(test_library rsys)
new_test(test_list rsys)
new_test(test_mem_allocator rsys)
diff --git a/src/dynamic_array_char.h b/src/dynamic_array_char.h
@@ -0,0 +1,10 @@
+#ifndef DYNAMIC_ARRRAY_CHAR_H
+#define DYNAMIC_ARRRAY_CHAR_H
+
+#include "dynamic_array.h"
+
+#define DARRAY_NAME char
+#define DARRAY_DATA char
+#include "dynamic_array.h"
+
+#endif /* DYNAMIC_ARRRAY_CHAR_H */
diff --git a/src/hash.h b/src/hash.h
@@ -0,0 +1,45 @@
+#ifndef HASH_H
+#define HASH_H
+
+#include "hash.h"
+
+/* 32-bits Fowler/Noll/Vo hash function */
+static INLINE uint32_t
+hash_fnv32(const void* data, const size_t len)
+{
+ const uint32_t FNV32_PRIME =
+ (uint32_t)(((uint32_t)1<<24) + ((uint32_t)1<<8) + 0x93);
+ const uint32_t OFFSET32_BASIS = 2166136261u;
+ const char* octets = (const char*)data;
+ uint32_t hash = OFFSET32_BASIS;
+ size_t i;
+ ASSERT(data);
+
+ FOR_EACH(i, 0, len) {
+ hash = hash ^ (uint32_t)((unsigned char)octets[i]);
+ hash = hash * FNV32_PRIME;
+ }
+ return hash;
+}
+
+/* 64-bits Fowler/Noll/Vo hash function */
+static INLINE uint64_t
+hash_fnv64(const void* data, const size_t len)
+{
+ const uint64_t FNV64_PRIME =
+ (uint64_t)(((uint64_t)1<<40) + ((uint64_t)1<<8) + 0xB3);
+ const uint64_t OFFSET64_BASIS = 14695981039346656037u;
+ const char* octets = (const char*)data;
+ uint64_t hash = OFFSET64_BASIS;
+ size_t i;
+ ASSERT(data);
+
+ FOR_EACH(i, 0, len) {
+ hash = hash ^ (uint64_t)((unsigned char)octets[i]);
+ hash = hash * FNV64_PRIME;
+ }
+ return hash;
+}
+
+#endif /* HASH_H */
+
diff --git a/src/hash_table.h b/src/hash_table.h
@@ -0,0 +1,421 @@
+#if !defined(HTABLE_NAME) && !defined(HTABLE_KEY) && !defined(HTABLE_DATA)
+#ifndef HASH_TABLE_H
+#define HASH_TABLE_H
+
+/* Pre include required headers */
+#include "dynamic_array_char.h"
+#include "hash.h"
+#include "mem_allocator.h"
+#include "math.h"
+#include "rsys.h"
+#include <string.h>
+
+#endif /* HASH_TABLE_H */
+#else
+/*
+ * Generate the hash table data types and functions with respect to the
+ * following macros:
+ * - HTABLE_NAME: prefix of the hash table functions & types;
+ * - HTABLE_DATA: type of the data registered into the hash table;
+ * - HTABLE_KEY: type of the key indexing the hash table data;
+ * TODO
+ * - HTABLE_FUNCTOR_INIT: init functor on HTABLE_DATA. If not defined, no
+ * specific treatment is performed on the created data;
+ * - HTABLE_FUNCTOR_COPY: copy functor on HTABLE_DATA. If not defined a
+ * bitwise copy is used instead;
+ * - HTABLE_FUNCTOR_RELEASE: release functor on HTABLE_DATA. If not defined
+ * nothing is done on the release of an element;
+ * - HTABLE_FUNCTOR_COPY_AND_RELEASE: Copy and release of a HTABLE_DATA. If
+ * not defined the copy and the release functors is used.
+ *
+ * The name of the generated types are:
+ * struct htable_<HTABLE_NAME>[_pair|_iterator]
+ *
+ * while the generated functions are:
+ * htable_<HTABLE_NAME>_<FUNCTION_NAME>
+ *
+ * The hash table collisions are resolved by the "open addressing" strategy
+ * with a fixed linear probing of one.
+ */
+#if !defined( HTABLE_NAME )
+# error "Undefined HTABLE_NAME macro defining the hash table suffix"
+#endif
+#if !defined( HTABLE_KEY)
+# error "Undefined HTABLE_KEY macro defining the hash key type"
+#endif
+#if !defined( HTABLE_DATA )
+# error "Undefined HTABLE_DATA macro defining the hash data type"
+#endif
+
+#define HTABLE__ CONCAT(htable_, HTABLE_NAME)
+#define HTABLE_PAIR__ CONCAT(CONCAT(HTABLE__, _), pair)
+#define HTABLE_INTERATOR__ CONCAT(HTABLE, iterator)
+#define HTABLE_FUNC__(Func) \
+ CONCAT(CONCAT(CONCAT(CONCAT(htable, _), HTABLE_NAME), _), Func)
+
+/* Internal data structure */
+struct HTABLE_PAIR__
+{
+ HTABLE_DATA data;
+ HTABLE_KEY key;
+};
+
+#define DARRAY_NAME CONCAT(HTABLE_NAME, __)
+#define DARRAY_DATA struct HTABLE_PAIR__
+#include "dynamic_array.h"
+
+#define HTABLE_DATA__ CONCAT(CONCAT(darray_, HTABLE_NAME), __)
+#define HTABLE_DATA_FUNC__(Func) CONCAT(CONCAT(HTABLE_DATA__, _), Func)
+
+struct HTABLE__
+{
+ struct HTABLE_DATA__ table;
+ struct darray_char table_slot_is_used;
+ size_t table_size_in_use;
+
+ size_t (*hash)(HTABLE_KEY const*);
+ char (*eq_key)(HTABLE_KEY const*, HTABLE_KEY const*);
+
+ struct mem_allocator* allocator;
+};
+
+struct HTABLE_INTERATOR__
+{
+ struct HTABLE__* hash_table;
+ size_t slot;
+ size_t entries_scanned;
+};
+
+/*******************************************************************************
+ * Internal helper functions
+ ******************************************************************************/
+static INLINE size_t
+HTABLE_FUNC__(find_slot__)(const struct HTABLE__* htbl, HTABLE_KEY const* key)
+{
+ size_t slot = 0;
+ const struct HTABLE_PAIR__* pairs = NULL;
+ const char* used_pairs = NULL;
+ ASSERT(htbl && key);
+ /* The tbl size must be a pow of 2 */
+ ASSERT(is_power_of_2(HTABLE_DATA_FUNC__(size_get)(&htbl->table)));
+ /* The tbl is not full */
+ ASSERT(htbl->table_size_in_use < HTABLE_DATA_FUNC__(size_get)(&htbl->table));
+
+ pairs = HTABLE_DATA_FUNC__(cdata_get)(&htbl->table);
+ used_pairs = darray_char_cdata_get(&htbl->table_slot_is_used);
+ /* slot = hash % size */
+ slot = htbl->hash(key) & (HTABLE_DATA_FUNC__(size_get)(&htbl->table) - 1);
+
+ while(used_pairs[slot] && !htbl->eq_key(key, &pairs[slot].key)) {
+ /* slot = (slot + 1) % size */
+ slot = (slot + 1) & (HTABLE_DATA_FUNC__(size_get)(&htbl->table) - 1);
+ }
+ return slot;
+}
+
+static INLINE size_t
+HTABLE_FUNC__(hash__)(HTABLE_KEY const* key)
+{
+#ifdef ARCH_32BITS
+ return (size_t)hash_fnv32(key, sizeof(HTABLE_KEY));
+#elif defined(ARCH_64BITS)
+ return (size_t)hash_fnv64(key, sizeof(HTABLE_KEY));
+#else
+ #error "Unexpected architecture"
+#endif
+}
+
+/*******************************************************************************
+ * Hash table API
+ ******************************************************************************/
+static INLINE void
+HTABLE_FUNC__(init)
+ (struct HTABLE__* htbl,
+ /* May be NULL <=> use default allocator */
+ struct mem_allocator* allocator,
+ /* May be NULL <=> use default hash function */
+ size_t (*hash)(HTABLE_KEY const*),
+ char (*eq_key)(HTABLE_KEY const*, HTABLE_KEY const*))
+{
+ ASSERT( htbl && eq_key );
+ HTABLE_DATA_FUNC__(init)(allocator, &htbl->table);
+ darray_char_init(allocator, &htbl->table_slot_is_used);
+ htbl->table_size_in_use = 0;
+ htbl->hash = hash ? hash : HTABLE_FUNC__(hash__);
+ htbl->eq_key = eq_key;
+ htbl->allocator = allocator ? allocator : &mem_default_allocator;
+}
+
+static INLINE void
+HTABLE_FUNC__(release)(struct HTABLE__* htbl)
+{
+ ASSERT(htbl);
+ HTABLE_DATA_FUNC__(release)(&htbl->table);
+ darray_char_release(&htbl->table_slot_is_used);
+}
+
+static INLINE void
+HTABLE_FUNC__(clear)(struct HTABLE__* htbl)
+{
+ ASSERT( htbl );
+ htbl->table_size_in_use = 0;
+ memset
+ (darray_char_data_get(&htbl->table_slot_is_used), 0,
+ darray_char_size_get(&htbl->table_slot_is_used) * sizeof(char));
+}
+
+/* Return 0 on success and -1 otherwise */
+static int
+HTABLE_FUNC__(reserve)(struct HTABLE__* htbl, const size_t size_submitted)
+{
+ struct HTABLE_DATA__ tbl;
+ struct darray_char tbl_slot_is_used;
+ const struct HTABLE_PAIR__* old_pairs = NULL;
+ struct HTABLE_PAIR__* new_pairs = NULL;
+ const char* old_used_pairs = NULL;
+ char* new_used_pairs = NULL;
+ size_t islot = 0;
+ size_t in_use = 0;
+ size_t size = 0;
+ int res = 0;
+ ASSERT(htbl);
+
+ size = round_up_pow2( size_submitted );
+ if( size <= HTABLE_DATA_FUNC__(size_get)(&htbl->table))
+ goto exit;
+
+ HTABLE_DATA_FUNC__(init)(htbl->allocator, &tbl);
+ if(HTABLE_DATA_FUNC__(resize)(&tbl, size))
+ goto error;
+
+ darray_char_init(htbl->allocator, &tbl_slot_is_used);
+ if(darray_char_resize(&tbl_slot_is_used, size))
+ goto error;
+ memset(darray_char_data_get(&tbl_slot_is_used), 0, size*sizeof(char));
+
+ /* Rehash the hash table */
+ old_pairs = HTABLE_DATA_FUNC__(cdata_get)(&htbl->table);
+ new_pairs = HTABLE_DATA_FUNC__(data_get)(&tbl);
+ old_used_pairs = darray_char_cdata_get(&htbl->table_slot_is_used);
+ new_used_pairs = darray_char_data_get(&tbl_slot_is_used);
+ FOR_EACH(islot, 0, HTABLE_DATA_FUNC__(size_get)(&htbl->table)) {
+ size_t islot_new = 0;
+
+ if(in_use == htbl->table_size_in_use)
+ break; /* Early stop the rehasing since there is no more slot in htbl */
+
+ if(!old_used_pairs[islot])
+ continue;
+
+ islot_new = htbl->hash(&old_pairs[islot].key) & (size - 1);
+ while(new_used_pairs[islot_new]) {
+ ASSERT /* There is at most 1 entry for a given key */
+ (!htbl->eq_key(&new_pairs[islot_new].key, &old_pairs[islot].key ));
+ islot_new = (islot_new + 1) & (size - 1); /* (islot_new + 1) % size */
+ }
+ new_pairs[islot_new] = old_pairs[islot];
+ new_used_pairs[islot_new] = 1;
+ ++in_use;
+ }
+
+ HTABLE_DATA_FUNC__(copy_and_release)(&htbl->table, &tbl);
+ darray_char_copy_and_release(&htbl->table_slot_is_used, &tbl_slot_is_used);
+
+exit:
+ return res;
+error:
+ HTABLE_DATA_FUNC__(release)(&tbl);
+ darray_char_release(&tbl_slot_is_used);
+ res = -1;
+ goto exit;
+}
+
+/* Return 0 on success and -1 otherwise */
+static INLINE int
+HTABLE_FUNC__(set)
+ ( struct HTABLE__* htbl,
+ HTABLE_KEY const* key,
+ HTABLE_DATA const* data )
+{
+ size_t tbl_size = 0;
+ size_t i = 0;
+ int res = 0;
+ ASSERT(htbl && key && data);
+
+ /* Increase hash table size when the load factor is too high */
+ tbl_size = HTABLE_DATA_FUNC__(size_get)(&htbl->table);
+ if(htbl->table_size_in_use >= tbl_size * 3/4) {
+ if(!tbl_size) {
+ res = HTABLE_FUNC__(reserve)(htbl, 32);
+ } else {
+ res = HTABLE_FUNC__(reserve)(htbl, tbl_size * 2 );
+ }
+ if(res)
+ return res;
+ }
+
+ i = HTABLE_FUNC__(find_slot__)(htbl, key);
+ if(darray_char_cdata_get(&htbl->table_slot_is_used)[i]) {
+ HTABLE_DATA_FUNC__(data_get)(&htbl->table)[i].data = *data;
+ } else {
+ HTABLE_DATA_FUNC__(data_get)(&htbl->table)[i].data = *data;
+ HTABLE_DATA_FUNC__(data_get)(&htbl->table)[i].key = *key;
+ darray_char_data_get(&htbl->table_slot_is_used)[i] = 1;
+ ++htbl->table_size_in_use;
+ }
+ return 0;
+}
+
+/* Return the number of erased elements, i.e. 0 or 1 */
+static INLINE size_t
+HTABLE_FUNC__(erase)(struct HTABLE__* htbl, HTABLE_KEY const* key)
+{
+ size_t i = 0, j = 0, tbl_size = 0;
+ struct HTABLE_PAIR__* pairs = NULL;
+ char* used_pairs = NULL;
+ ASSERT(htbl && key );
+
+ pairs = HTABLE_DATA_FUNC__(data_get)(&htbl->table);
+ tbl_size = HTABLE_DATA_FUNC__(size_get)(&htbl->table);
+ ASSERT(is_power_of_2(tbl_size));
+
+ used_pairs = darray_char_data_get(&htbl->table_slot_is_used);
+ i = HTABLE_FUNC__(find_slot__)(htbl, key);
+ if(!used_pairs[i])
+ return 0; /* No entry */
+
+ /* Remove the entry */
+ used_pairs[i] = 0;
+ --htbl->table_size_in_use;
+
+ for(j = (i + 1) & (tbl_size - 1); /* <=> j = (i+1) % size */
+ used_pairs[j];
+ j = (j + 1) & (tbl_size - 1) ) { /* <=> j = (j+1) % size */
+ const size_t k = htbl->hash(&pairs[j].key) & (tbl_size- 1);
+
+ if(i <= j ? (i < k && k <= j) : (i < k || k <= j))
+ continue;
+
+ pairs[i] = pairs[j];
+ used_pairs[i] = 1;
+ used_pairs[j] = 0;
+ i = j;
+ }
+ return 1;
+}
+
+static INLINE char
+HTABLE_FUNC__(is_empty)(const struct HTABLE__* htbl)
+{
+ ASSERT(htbl);
+ return htbl->table_size_in_use == 0;
+}
+
+static INLINE HTABLE_DATA*
+HTABLE_FUNC__(find)(struct HTABLE__* htbl, HTABLE_KEY const* key)
+{
+ size_t i = 0;
+ ASSERT(htbl && key);
+ if(HTABLE_FUNC__(is_empty)(htbl))
+ return NULL;
+
+ i = HTABLE_FUNC__(find_slot__)(htbl, key);
+ if(darray_char_cdata_get(&htbl->table_slot_is_used)[i]) {
+ return &HTABLE_DATA_FUNC__(data_get)(&htbl->table)[i].data;
+ } else {
+ return NULL;
+ }
+}
+
+static INLINE size_t
+HTABLE_FUNC__(size_get)(const struct HTABLE__* htbl)
+{
+ ASSERT(htbl);
+ return htbl->table_size_in_use;
+}
+
+static INLINE void
+HTABLE_FUNC__(begin)
+ (struct HTABLE__* htbl,
+ struct HTABLE_INTERATOR__* it)
+{
+ const char* used_pairs = NULL;
+ size_t i = 0;
+ size_t tbl_size = 0;
+ ASSERT(htbl && it);
+
+ tbl_size = HTABLE_DATA_FUNC__(size_get)(&htbl->table);
+ used_pairs = darray_char_cdata_get(&htbl->table_slot_is_used);
+
+ it->slot = it->entries_scanned = 0;
+ for( i = 0; i < tbl_size && !used_pairs[i]; ++i) {}
+ it->slot = i;
+ it->entries_scanned = i < tbl_size;
+ it->hash_table = htbl;
+}
+
+static INLINE void
+HTABLE_FUNC__(end)
+ (struct HTABLE__* htbl,
+ struct HTABLE_INTERATOR__* it)
+{
+ ASSERT(htbl && it);
+ it->slot = HTABLE_DATA_FUNC__(size_get)(&htbl->table);
+ it->entries_scanned = htbl->table_size_in_use;
+ it->hash_table = htbl;
+}
+
+static INLINE void
+HTABLE_FUNC__(iterator_next)(struct HTABLE_INTERATOR__* it)
+{
+ size_t tbl_size = 0;
+ ASSERT(it);
+
+ tbl_size = HTABLE_DATA_FUNC__(size_get)(&it->hash_table->table);
+ if(it->entries_scanned >= it->hash_table->table_size_in_use) {
+ it->slot = tbl_size;
+ } else {
+ size_t i = 0;
+ FOR_EACH(i, it->slot + 1, tbl_size)
+ if(darray_char_cdata_get(&it->hash_table->table_slot_is_used)[i]) break;
+ it->slot = i;
+ it->entries_scanned += i < tbl_size;
+ }
+}
+
+static INLINE char
+HTABLE_FUNC__(iterator_eq)
+ (const struct HTABLE_INTERATOR__* it0,
+ const struct HTABLE_INTERATOR__* it1)
+{
+ ASSERT(it0 && it1);
+ return
+ it0->slot == it1->slot
+ && it0->entries_scanned == it1->entries_scanned
+ && it0->hash_table == it1->hash_table;
+}
+
+static INLINE HTABLE_KEY*
+HTABLE_FUNC__(iterator_key_get)(const struct HTABLE_INTERATOR__* it)
+{
+ ASSERT(it && it->slot < HTABLE_DATA_FUNC__(size_get)(&it->hash_table->table));
+ return &HTABLE_DATA_FUNC__(data_get)(&it->hash_table->table)[it->slot].key;
+}
+
+static INLINE HTABLE_DATA*
+HTABLE_FUNC__(iterator_data_get)(const struct HTABLE_INTERATOR__* it)
+{
+ ASSERT(it && it->slot < HTABLE_DATA_FUNC__(size_get)(&it->hash_table->table));
+ return &HTABLE_DATA_FUNC__(data_get)(&it->hash_table->table)[it->slot].data;
+}
+
+#undef HTABLE_KEY
+#undef HTABLE_DATA
+#undef HTABLE_NAME
+#undef HTABLE
+#undef HTABLE_PAIR__
+#undef HTABLE_INTERATOR__
+#undef HTABLE_FUNC__
+
+#endif /* Pre inclusion */
+
diff --git a/src/math.h b/src/math.h
@@ -3,8 +3,8 @@
#include "rsys.h"
-#define max(A, B) (A) > (B) ? (A) : (B)
-#define min(A, B) (A) < (B) ? (A) : (B)
+#define max(A, B) ((A) > (B) ? (A) : (B))
+#define min(A, B) ((A) < (B) ? (A) : (B))
static FINLINE char
is_power_of_2(const size_t i)
diff --git a/src/test_hash_table.c b/src/test_hash_table.c
@@ -0,0 +1,73 @@
+#include "hash_table.h"
+#include <string.h>
+
+#define HTABLE_KEY int
+#define HTABLE_DATA float
+#define HTABLE_NAME int_float
+#include "hash_table.h"
+
+static INLINE char
+eq_int( const int* a, const int* b )
+{
+ return *a == *b;
+}
+
+static void
+test_htbl_int_float( void )
+{
+ struct mem_allocator allocator_proxy;
+ struct htable_int_float htbl;
+ int i;
+ const int n = 678;
+
+ htable_int_float_init(&htbl, NULL, NULL, eq_int);
+ htable_int_float_release(&htbl);
+
+ mem_init_proxy_allocator(&allocator_proxy, &mem_default_allocator);
+
+ htable_int_float_init(&htbl, &allocator_proxy, NULL, eq_int);
+ htable_int_float_reserve(&htbl, 30);
+
+ FOR_EACH(i, 0, n) {
+ float f = (float)i;
+ CHECK(htable_int_float_set(&htbl, &i, &f), 0);
+ }
+
+ FOR_EACH( i, 0, n ) {
+ float* p = htable_int_float_find(&htbl, &i);
+ NCHECK(p, NULL);
+ CHECK(*p, (float) i);
+ }
+ CHECK(htable_int_float_size_get(&htbl), (size_t)n);
+ FOR_EACH(i, 0, n / 2) {
+ CHECK(htable_int_float_erase(&htbl, &i), 1);
+ }
+ CHECK(htable_int_float_size_get(&htbl), (size_t)(n/2));
+ FOR_EACH(i, 0, n/2) {
+ float* p = htable_int_float_find(&htbl, &i);
+ CHECK(p, NULL);
+ }
+ FOR_EACH(i, n/2, n) {
+ float* p = htable_int_float_find(&htbl, &i);
+ NCHECK(p, NULL);
+ CHECK(*p, (float) i);
+ }
+ htable_int_float_release(&htbl);
+
+ if(MEM_ALLOCATED_SIZE(&allocator_proxy)) {
+ char dump[512];
+ MEM_DUMP(&allocator_proxy, dump, sizeof(dump)/sizeof(char));
+ fprintf(stderr, "%s\n", dump);
+ FATAL("Memory leaks\n");
+ }
+ mem_shutdown_proxy_allocator(&allocator_proxy);
+}
+
+int
+main(int argc, char** argv)
+{
+ (void)argc, (void)argv;
+ test_htbl_int_float();
+ return 0;
+}
+