From 9df451de619c7467355a707e242c570eb7f5fe5f Mon Sep 17 00:00:00 2001 From: Vftdan Date: Wed, 23 Oct 2024 14:03:51 +0200 Subject: [PATCH] Copy hashtable implementation from another project --- Makefile | 2 +- src/common/defs.h | 23 +++ src/common/util/hash_table.c | 360 +++++++++++++++++++++++++++++++++++ src/common/util/hash_table.h | 71 +++++++ src/server/main.c | 1 + 5 files changed, 456 insertions(+), 1 deletion(-) create mode 100644 src/common/defs.h create mode 100644 src/common/util/hash_table.c create mode 100644 src/common/util/hash_table.h diff --git a/Makefile b/Makefile index 818fe94..442f685 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ include common.mk all: server -server: $(BUILD_DIR)/server/main.o +server: $(BUILD_DIR)/server/main.o $(BUILD_DIR)/common/util/hash_table.o $(COMPILE_EXE) run: server diff --git a/src/common/defs.h b/src/common/defs.h new file mode 100644 index 0000000..6eedee3 --- /dev/null +++ b/src/common/defs.h @@ -0,0 +1,23 @@ +#ifndef COMMON_DEFS_H_ +#define COMMON_DEFS_H_ + +#include +#include +#include +#include + +#define lengthof(arr) (sizeof(arr) / sizeof(*arr)) +#define containerof(ptr, contype, membpath) ((contype*)(0 ? (void)(((contype*)NULL)->membpath = *(ptr)) : (void)0, ((char *)(ptr)) - offsetof(contype, membpath))) +// Assuming child type has a field for the base type +// So for structs it is usually actual downcast, but for unions it is an upcast +#define DOWNCAST(contype, basename, ptr) containerof(ptr, contype, as_##basename) +#define T_ALLOC(count, T) ((T*)calloc((count), sizeof(T))) +#define HEADER_FN __attribute__((unused)) inline static + +#ifdef DEBUG +# define DEBUG_PRINT_VALUE(x, fmt) (void) (fprintf(stderr, #x " = " fmt "\n", x), fflush(stderr)) +#else +# define DEBUG_PRINT_VALUE(...) (void) (0) +#endif + +#endif /* end of include guard: COMMON_DEFS_H_ */ diff --git a/src/common/util/hash_table.c b/src/common/util/hash_table.c new file mode 100644 index 0000000..849fca5 --- /dev/null +++ b/src/common/util/hash_table.c @@ -0,0 +1,360 @@ +#include +#include "hash_table.h" + +#define FNV_OFFSET_BASIS 0xCBF29CE484222325 +#define FNV_PRIME 0x00000100000001B3 + +inline static uint64_t +fnv_1a(uint8_t *bytes, size_t length) +{ + uint64_t hash = FNV_OFFSET_BASIS; + for (size_t i = 0; i < length; ++i) { + uint8_t b = bytes[i]; + hash ^= b; + hash *= FNV_PRIME; + } + return hash; +} + +#define NULL_KEY ((HashTableKey){.length = 0, .bytes = NULL, .pre_hash = 0}) + +HashTableKey +hash_table_key_from_bytes(const char *bytes, size_t size) +{ + if (!bytes) { + return NULL_KEY; + } + HashTableKey key = { + .length = size, + .bytes = bytes, + .pre_hash = fnv_1a((uint8_t*) bytes, size), + }; + return key; +} + +HashTableKey +hash_table_key_copy(const HashTableKey old) +{ + if (!old.bytes) { + return NULL_KEY; + } + char *new_bytes = malloc(old.length + 1); + if (!new_bytes) { + return NULL_KEY; + } + memcpy(new_bytes, old.bytes, old.length); + new_bytes[old.length] = 0; + return (HashTableKey) { + .length = old.length, + .bytes = new_bytes, + .pre_hash = old.pre_hash, + }; +} + +void +hash_table_key_deinit_copied(HashTableKey *key) +{ + if (key->bytes) { + free((char*) key->bytes); + key->bytes = NULL; + } + key->length = 0; + key->pre_hash = 0; +} + +#define FAMILY_PRIME 0x1FFFFFFFFFFFFFFF + +inline static HashFamilyMember +family_random_member() +{ + uint64_t n[2]; + for (int i = 0; i < 2; ++i) { + n[i] = (uint64_t)((rand() / (double) RAND_MAX) * (FAMILY_PRIME - RAND_MAX) + rand()) % FAMILY_PRIME; + } + if (!n[0]) { + n[0] = 1; + } + return (HashFamilyMember) { + .a = n[0], + .b = n[1], + }; +} + +inline static size_t +family_map(HashFamilyMember member, uint64_t pre_hash) +{ + if (!member.a) + ++member.a; + uint64_t hash = (pre_hash * member.a + member.b) % FAMILY_PRIME; // Integer overflows may influence the properties of this family + return hash; +} + +bool +hash_table_key_equals(const HashTableKey lhs, const HashTableKey rhs) +{ + if (lhs.pre_hash != rhs.pre_hash) { + return false; + } + if (lhs.length != rhs.length) { + return false; + } + if (lhs.bytes == rhs.bytes) { + return true; + } + if (!lhs.bytes || !rhs.bytes) { + return false; + } + size_t length = lhs.length; + return memcmp(lhs.bytes, rhs.bytes, length) == 0; +} + +inline static size_t +initial_position(const HashTableKey key, const HashTableDynamicData * ht) +{ + uint64_t hash = family_map(ht->family_member, key.pre_hash); + return hash % ht->capacity; +} + +inline static size_t +probe_next(size_t current_index, size_t base_index, size_t iteration, size_t modulo) +{ + (void) base_index; + (void) iteration; + ++current_index; + current_index %= modulo; + return current_index; +} + +static HashTableDynamicData +create_table(size_t capacity, size_t value_size, HashFamilyMember family_member) +{ + HashTableDynamicData data = { + .value_array = NULL, + .capacity = 0, + .length = 0, + .key_array = NULL, + .family_member = family_member, + .value_size = value_size, + }; + void *value_array = calloc(capacity, value_size); + if (!value_array) { + return data; + } + HashTableKeyEntry *key_array = T_ALLOC(capacity, HashTableKeyEntry); + if (!key_array) { + free(value_array); + return data; + } + data.value_array = value_array; + data.capacity = capacity; + data.key_array = key_array; + return data; +} + +void +hash_table_init_impl(HashTableDynamicData * data, size_t value_size, void (*value_deinit)(void*)) +{ + *data = create_table(5, value_size, family_random_member()); + data->value_deinit = value_deinit; +} + +void +hash_table_deinit_impl(HashTableDynamicData * data) +{ + if (data->key_array && data->value_array && data->length) { + void (*value_deinit)(void*) = data->value_deinit; + for (HashTableIndex i = 0; i < (ssize_t) data->capacity; ++i) { + if (data->key_array[i].key.bytes) { + if (value_deinit) { + value_deinit(data->value_array + (data->value_size * (size_t) i)); + } + hash_table_key_deinit_copied(&data->key_array[i].key); + } + } + } + if (data->key_array) { + free(data->key_array); + data->key_array = NULL; + } + if (data->value_array) { + free(data->value_array); + data->value_array = NULL; + } + data->capacity = 0; + data->length = 0; +} + +static void +hash_table_grow(HashTableDynamicData * old_ht) +{ + size_t capacity = old_ht->capacity; + capacity += (capacity >> 1) + 1; + const size_t value_size = old_ht->value_size; + HashTableDynamicData new_ht = create_table(capacity, value_size, old_ht->family_member); + if (!new_ht.key_array) { + return; + } + + for (size_t i = 0; i < old_ht->capacity; ++i) { + if (!old_ht->key_array[i].key.bytes) { + continue; + } + if (hash_table_insert_impl(&new_ht, old_ht->key_array[i].key, old_ht->value_array + (i * value_size)) < 0) { + hash_table_deinit_impl(&new_ht); + return; + } + } + + new_ht.value_deinit = old_ht->value_deinit; + old_ht->value_deinit = NULL; + // TODO avoid duplication-deletion of the keys + hash_table_deinit_impl(old_ht); + *old_ht = new_ht; +} + +static void +hash_table_change_hash(HashTableDynamicData * old_ht) +{ + const size_t capacity = old_ht->capacity; + const size_t value_size = old_ht->value_size; + HashTableDynamicData new_ht = create_table(capacity, value_size, family_random_member()); + if (!new_ht.key_array) { + return; + } + + for (size_t i = 0; i < old_ht->capacity; ++i) { + if (!old_ht->key_array[i].key.bytes) { + continue; + } + if (hash_table_insert_impl(&new_ht, old_ht->key_array[i].key, old_ht->value_array + (i * value_size)) < 0) { + hash_table_deinit_impl(&new_ht); + return; + } + } + + new_ht.value_deinit = old_ht->value_deinit; + old_ht->value_deinit = NULL; + // TODO avoid duplication-deletion of the keys + hash_table_deinit_impl(old_ht); + *old_ht = new_ht; +} + +HashTableIndex +hash_table_insert_impl(HashTableDynamicData * ht, const HashTableKey key, const void * value_ptr) +{ + if (!key.bytes) { + return -1; + } + size_t length = ht->length; + size_t capacity = ht->capacity; + if (length + (length >> 1) >= ht->capacity) { + hash_table_grow(ht); + capacity = ht->capacity; + } + if (length >= capacity) { + return -1; + } + + const size_t base_index = initial_position(key, ht); + size_t current_index = base_index; + size_t collision_offset = 0; + bool found = false; + bool overwrite = false; + bool pre_hash_collision = false; + for (; collision_offset < capacity; current_index = probe_next(current_index, base_index, ++collision_offset, capacity)) { + if (!ht->key_array[current_index].key.bytes) { + found = true; + break; + } + if (hash_table_key_equals(key, ht->key_array[current_index].key)) { + overwrite = true; + found = true; + break; + } + if (!pre_hash_collision) { + pre_hash_collision = key.pre_hash == ht->key_array[current_index].key.pre_hash; + } + } + if (!found) { + return -1; + } + + void *value_target_ptr = ht->value_array + (ht->value_size * current_index); + if (ht->key_array[base_index].max_collision_offset < collision_offset) { + ht->key_array[base_index].max_collision_offset = collision_offset; + } + if (overwrite) { + // Deinitialize the old value + void (*value_deinit)(void*) = ht->value_deinit; + if (value_deinit) { + value_deinit(value_target_ptr); + } + } else { + // Make a local copy of the key + ht->key_array[current_index].key = hash_table_key_copy(key); + } + memcpy(value_target_ptr, value_ptr, ht->value_size); // Assign the value + ht->length = ++length; + + if (!pre_hash_collision && (collision_offset << 1) > capacity + 6) { + hash_table_change_hash(ht); + } + + return current_index; +} + +HashTableIndex +hash_table_find_impl(const HashTableDynamicData * ht, const HashTableKey key) +{ + if (!key.bytes) { + return -1; + } + + const size_t capacity = ht->capacity; + const size_t base_index = initial_position(key, ht); + const size_t max_collision_offset = ht->key_array[base_index].max_collision_offset; + size_t collision_offset = 0; + size_t current_index = base_index; + + for (; collision_offset <= max_collision_offset; current_index = probe_next(current_index, base_index, ++collision_offset, capacity)) { + if (hash_table_key_equals(key, ht->key_array[current_index].key)) { + return current_index; + break; + } + } + + return -1; +} + +bool +hash_table_delete_at_index_impl(HashTableDynamicData * ht, const HashTableIndex index) +{ + if (!ht) { + return false; + } + + if (index < 0 || (size_t) index > ht->capacity) { + return false; + } + + HashTableKeyEntry *key_entry_ptr = ht->key_array + index; + void *value_ptr = ht->value_array + (ht->value_size * index); + + if (!key_entry_ptr->key.bytes) { + // No entry at index + return false; + } + + // TODO find base_index and reduce its max_collision_offset when provably justified + + hash_table_key_deinit_copied(&key_entry_ptr->key); + ht->length -= 1; + + void (*value_deinit)(void*) = ht->value_deinit; + if (value_deinit) { + value_deinit(value_ptr); + } + memset(value_ptr, 0, ht->value_size); + + return true; +} diff --git a/src/common/util/hash_table.h b/src/common/util/hash_table.h new file mode 100644 index 0000000..cc8ae50 --- /dev/null +++ b/src/common/util/hash_table.h @@ -0,0 +1,71 @@ +#ifndef COMMON_UTIL_HASH_TABLE_H_ +#define COMMON_UTIL_HASH_TABLE_H_ + +#include +#include "common/defs.h" + +#define HASH_TABLE_INTERFACE_FIELDS \ + size_t capacity; \ + size_t length; \ + HashTableKeyEntry *key_array; \ + HashFamilyMember family_member; \ +; + +#define TYPED_HASH_TABLE(T) union { HashTableDynamicData as_HashTableDynamicData; struct { typeof(T) *value_array; HASH_TABLE_INTERFACE_FIELDS; void (*value_deinit)(typeof(T)*); }; } + +typedef ptrdiff_t HashTableIndex; // Invalidated after hashtable modification + +typedef struct { + size_t length; + const char *bytes; + uint64_t pre_hash; +} HashTableKey; + +typedef struct { + HashTableKey key; + size_t max_collision_offset; +} HashTableKeyEntry; + +typedef struct { + uint64_t a, b; +} HashFamilyMember; + +typedef struct { + void *value_array; + HASH_TABLE_INTERFACE_FIELDS; + void (*value_deinit)(void*); + size_t value_size; +} HashTableDynamicData; + +HashTableKey hash_table_key_from_bytes(const char *bytes, size_t size); +HashTableKey hash_table_key_copy(const HashTableKey old); +void hash_table_key_deinit_copied(HashTableKey *key); +bool hash_table_key_equals(const HashTableKey lhs, const HashTableKey rhs); + +__attribute__((unused)) inline static HashTableKey +hash_table_key_from_cstr(const char *s) +{ + size_t length = strlen(s); + return hash_table_key_from_bytes(s, length); +} + +void hash_table_init_impl(HashTableDynamicData * dyndata, size_t value_size, void (*value_deinit)(void*)); +void hash_table_deinit_impl(HashTableDynamicData * dyndata); +HashTableIndex hash_table_insert_impl(HashTableDynamicData * dyndata, const HashTableKey key, const void * value_ptr); +HashTableIndex hash_table_find_impl(const HashTableDynamicData * dyndata, const HashTableKey key); +bool hash_table_delete_at_index_impl(HashTableDynamicData * dyndata, const HashTableIndex index); + +__attribute__((unused)) inline static bool +hash_table_delete_by_key_impl(HashTableDynamicData * dyndata, const HashTableKey key) +{ + return hash_table_delete_at_index_impl(dyndata, hash_table_find_impl(dyndata, key)); +} + +#define hash_table_init(ht, value_deinit) hash_table_init_impl(&(ht)->as_HashTableDynamicData, sizeof(*(ht)->value_array), IMPLICIT_CAST(void(void*), void(typeof((ht)->value_array)), value_deinit)) +#define hash_table_deinit(ht) hash_table_deinit_impl(&(ht)->as_HashTableDynamicData) +#define hash_table_insert(ht, key, value_ptr) hash_table_insert_impl(&(ht)->as_HashTableDynamicData, key, IMPLICIT_CAST(const void, const typeof(*(ht)->value_array), value_ptr)) +#define hash_table_find(ht, key) hash_table_find_impl(&(ht)->as_HashTableDynamicData, key) +#define hash_table_delete_at_index(ht, index) hash_table_delete_at_index_impl(&(ht)->as_HashTableDynamicData, index) +#define hash_table_delete_by_key(ht, key) hash_table_delete_by_key_impl(&(ht)->as_HashTableDynamicData, key) + +#endif /* end of include guard: COMMON_UTIL_HASH_TABLE_H_ */ diff --git a/src/server/main.c b/src/server/main.c index 9a19362..89d9c9c 100644 --- a/src/server/main.c +++ b/src/server/main.c @@ -1,6 +1,7 @@ #include "main.h" #include +#include "common/util/hash_table.h" int main(int argc, char **argv)