From 71e04c7f2db0f1d80667c8265fedc5037fa1d3f0 Mon Sep 17 00:00:00 2001 From: Vftdan Date: Thu, 15 Aug 2024 17:35:51 +0200 Subject: [PATCH] Implement hash table and use it for node lookup --- Makefile | 2 +- defs.h | 2 + hash_table.c | 327 +++++++++++++++++++++++++++++++++++++++++++++++++++ hash_table.h | 62 ++++++++++ main.c | 18 +-- 5 files changed, 403 insertions(+), 8 deletions(-) create mode 100644 hash_table.c create mode 100644 hash_table.h diff --git a/Makefile b/Makefile index 7feaec3..4abfe67 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ CPPFLAGS += $(shell pkg-config --cflags $(DEPS)) LDLIBS += $(shell pkg-config --libs $(DEPS)) INTERP ?= MAIN = main -OBJS = main.o events.o processing.o graph.o config.o nodes/getchar.o nodes/print.o nodes/evdev.o +OBJS = main.o events.o processing.o graph.o config.o hash_table.o nodes/getchar.o nodes/print.o nodes/evdev.o all: $(MAIN) diff --git a/defs.h b/defs.h index fc87fe8..4a32a87 100644 --- a/defs.h +++ b/defs.h @@ -11,6 +11,8 @@ // Assuming child type has a field for the base type // So for structs it is usually actual downcast, but for unions it is an upcast #define DOWNCAST(contype, basename, ptr) containerof(ptr, contype, as_##basename) +// Expects ptr to be of type srctype* or void*, returns (dsttype*)ptr +#define IMPLICIT_CAST(dsttype, srctype, ptr) (((union { typeof(srctype) *src; typeof(dsttype) *dst; }){.src = ptr}).dst) #define T_ALLOC(count, T) ((T*)calloc(count, sizeof(T))) #define DEBUG_PRINT_VALUE(x, fmt) fprintf(stderr, #x " = " fmt "\n", x); fflush(stderr) diff --git a/hash_table.c b/hash_table.c new file mode 100644 index 0000000..471f2fc --- /dev/null +++ b/hash_table.c @@ -0,0 +1,327 @@ +#include +#include "hash_table.h" + +#define FNV_OFFSET_BASIS 0xCBF29CE484222325 +#define FNV_PRIME 0x00000100000001B3 + +inline static uint64_t +fnv_1a(uint8_t *bytes, size_t length) +{ + uint64_t hash = FNV_OFFSET_BASIS; + for (size_t i = 0; i < length; ++i) { + uint8_t b = bytes[i]; + hash ^= b; + hash *= FNV_PRIME; + } + return hash; +} + +#define NULL_KEY ((HashTableKey){.length = 0, .bytes = NULL, .pre_hash = 0}) + +HashTableKey +hash_table_key_from_bytes(const char *bytes, size_t size) +{ + if (!bytes) { + return NULL_KEY; + } + HashTableKey key = { + .length = size, + .bytes = bytes, + .pre_hash = fnv_1a((uint8_t*) bytes, size), + }; + return key; +} + +HashTableKey +hash_table_key_copy(const HashTableKey old) +{ + if (!old.bytes) { + return NULL_KEY; + } + char *new_bytes = malloc(old.length + 1); + if (!new_bytes) { + return NULL_KEY; + } + memcpy(new_bytes, old.bytes, old.length); + new_bytes[old.length] = 0; + return (HashTableKey) { + .length = old.length, + .bytes = new_bytes, + .pre_hash = old.pre_hash, + }; +} + +void +hash_table_key_deinit_copied(HashTableKey *key) +{ + if (key->bytes) { + free((char*) key->bytes); + key->bytes = NULL; + } + key->length = 0; + key->pre_hash = 0; +} + +#define FAMILY_PRIME 0x1FFFFFFFFFFFFFFF + +inline static HashFamilyMember +family_random_member() +{ + uint64_t n[2]; + for (int i = 0; i < 2; ++i) { + n[i] = (uint64_t)((rand() / (double) RAND_MAX) * (FAMILY_PRIME - RAND_MAX) + rand()) % FAMILY_PRIME; + } + if (!n[0]) { + n[0] = 1; + } + return (HashFamilyMember) { + .a = n[0], + .b = n[1], + }; +} + +inline static size_t +family_map(HashFamilyMember member, uint64_t pre_hash) +{ + if (!member.a) + ++member.a; + uint64_t hash = (pre_hash * member.a + member.b) % FAMILY_PRIME; // Integer overflows may influence the properties of this family + return hash; +} + +bool +hash_table_key_equals(const HashTableKey lhs, const HashTableKey rhs) +{ + if (lhs.pre_hash != rhs.pre_hash) { + return false; + } + if (lhs.length != rhs.length) { + return false; + } + if (lhs.bytes == rhs.bytes) { + return true; + } + if (!lhs.bytes || !rhs.bytes) { + return false; + } + size_t length = lhs.length; + return memcmp(lhs.bytes, rhs.bytes, length) == 0; +} + +inline static size_t +initial_position(const HashTableKey key, const HashTableDynamicData * ht) +{ + uint64_t hash = family_map(ht->family_member, key.pre_hash); + return hash % ht->capacity; +} + +inline static size_t +probe_next(size_t current_index, size_t base_index, size_t iteration, size_t modulo) +{ + (void) base_index; + (void) iteration; + ++current_index; + current_index %= modulo; + return current_index; +} + +static HashTableDynamicData +create_table(size_t capacity, size_t value_size, HashFamilyMember family_member) +{ + HashTableDynamicData data = { + .value_array = NULL, + .capacity = 0, + .length = 0, + .key_array = NULL, + .family_member = family_member, + .value_size = value_size, + }; + void *value_array = calloc(capacity, value_size); + if (!value_array) { + return data; + } + HashTableKeyEntry *key_array = T_ALLOC(capacity, HashTableKeyEntry); + if (!key_array) { + free(value_array); + return data; + } + data.value_array = value_array; + data.capacity = capacity; + data.key_array = key_array; + return data; +} + +void +hash_table_init_impl(HashTableDynamicData * data, size_t value_size, void (*value_deinit)(void*)) +{ + *data = create_table(5, value_size, family_random_member()); + data->value_deinit = value_deinit; +} + +void +hash_table_deinit_impl(HashTableDynamicData * data) +{ + if (data->key_array && data->value_array && data->length) { + void (*value_deinit)(void*) = data->value_deinit; + for (HashTableIndex i = 0; i < (ssize_t) data->capacity; ++i) { + if (data->key_array[i].key.bytes) { + if (value_deinit) { + value_deinit(data->value_array + (data->value_size * (size_t) i)); + } + hash_table_key_deinit_copied(&data->key_array[i].key); + } + } + } + if (data->key_array) { + free(data->key_array); + data->key_array = NULL; + } + if (data->value_array) { + free(data->value_array); + data->value_array = NULL; + } + data->capacity = 0; + data->length = 0; +} + +static void +hash_table_grow(HashTableDynamicData * old_ht) +{ + size_t capacity = old_ht->capacity; + capacity += (capacity >> 1) + 1; + const size_t value_size = old_ht->value_size; + HashTableDynamicData new_ht = create_table(capacity, value_size, old_ht->family_member); + if (!new_ht.key_array) { + return; + } + + for (size_t i = 0; i < old_ht->capacity; ++i) { + if (!old_ht->key_array[i].key.bytes) { + continue; + } + if (hash_table_insert_impl(&new_ht, old_ht->key_array[i].key, old_ht->value_array + (i * value_size)) < 0) { + hash_table_deinit_impl(&new_ht); + return; + } + } + + new_ht.value_deinit = old_ht->value_deinit; + old_ht->value_deinit = NULL; + // TODO avoid duplication-deletion of the keys + hash_table_deinit_impl(old_ht); + *old_ht = new_ht; +} + +static void +hash_table_change_hash(HashTableDynamicData * old_ht) +{ + const size_t capacity = old_ht->capacity; + const size_t value_size = old_ht->value_size; + HashTableDynamicData new_ht = create_table(capacity, value_size, family_random_member()); + if (!new_ht.key_array) { + return; + } + + for (size_t i = 0; i < old_ht->capacity; ++i) { + if (!old_ht->key_array[i].key.bytes) { + continue; + } + if (hash_table_insert_impl(&new_ht, old_ht->key_array[i].key, old_ht->value_array + (i * value_size)) < 0) { + hash_table_deinit_impl(&new_ht); + return; + } + } + + new_ht.value_deinit = old_ht->value_deinit; + old_ht->value_deinit = NULL; + // TODO avoid duplication-deletion of the keys + hash_table_deinit_impl(old_ht); + *old_ht = new_ht; +} + +HashTableIndex +hash_table_insert_impl(HashTableDynamicData * ht, const HashTableKey key, const void * value_ptr) +{ + if (!key.bytes) { + return -1; + } + size_t length = ht->length; + size_t capacity = ht->capacity; + if (length + (length >> 1) >= ht->capacity) { + hash_table_grow(ht); + capacity = ht->capacity; + } + if (length >= capacity) { + return -1; + } + + const size_t base_index = initial_position(key, ht); + size_t current_index = base_index; + size_t collision_offset = 0; + bool found = false; + bool overwrite = false; + bool pre_hash_collision = false; + for (; collision_offset < capacity; current_index = probe_next(current_index, base_index, ++collision_offset, capacity)) { + if (!ht->key_array[current_index].key.bytes) { + found = true; + break; + } + if (hash_table_key_equals(key, ht->key_array[current_index].key)) { + overwrite = true; + found = true; + break; + } + if (!pre_hash_collision) { + pre_hash_collision = key.pre_hash == ht->key_array[current_index].key.pre_hash; + } + } + if (!found) { + return -1; + } + + void *value_target_ptr = ht->value_array + (ht->value_size * current_index); + if (ht->key_array[base_index].max_collision_offset < collision_offset) { + ht->key_array[base_index].max_collision_offset = collision_offset; + } + if (overwrite) { + // Deinitialize the old value + void (*value_deinit)(void*) = ht->value_deinit; + if (value_deinit) { + value_deinit(value_target_ptr); + } + } else { + // Make a local copy of the key + ht->key_array[current_index].key = hash_table_key_copy(key); + } + memcpy(value_target_ptr, value_ptr, ht->value_size); // Assign the value + ht->length = ++length; + + if (!pre_hash_collision && (collision_offset << 1) > capacity + 6) { + hash_table_change_hash(ht); + } + + return current_index; +} + +HashTableIndex +hash_table_find_impl(const HashTableDynamicData * ht, const HashTableKey key) +{ + if (!key.bytes) { + return -1; + } + + const size_t capacity = ht->capacity; + const size_t base_index = initial_position(key, ht); + const size_t max_collision_offset = ht->key_array[base_index].max_collision_offset; + size_t collision_offset = 0; + size_t current_index = base_index; + + for (; collision_offset <= max_collision_offset; current_index = probe_next(current_index, base_index, ++collision_offset, capacity)) { + if (hash_table_key_equals(key, ht->key_array[current_index].key)) { + return current_index; + break; + } + } + + return -1; +} diff --git a/hash_table.h b/hash_table.h new file mode 100644 index 0000000..dc29193 --- /dev/null +++ b/hash_table.h @@ -0,0 +1,62 @@ +#ifndef HASH_TABLE_H_ +#define HASH_TABLE_H_ + +#include +#include "defs.h" + +#define HASH_TABLE_INTERFACE_FIELDS \ + size_t capacity; \ + size_t length; \ + HashTableKeyEntry *key_array; \ + HashFamilyMember family_member; \ +; + +#define TYPED_HASH_TABLE(T) union { HashTableDynamicData as_HashTableDynamicData; struct { typeof(T) *value_array; HASH_TABLE_INTERFACE_FIELDS; void (*value_deinit)(typeof(T)*); }; } + +typedef ssize_t HashTableIndex; // Invalidated after hashtable modification + +typedef struct { + size_t length; + const char *bytes; + uint64_t pre_hash; +} HashTableKey; + +typedef struct { + HashTableKey key; + size_t max_collision_offset; +} HashTableKeyEntry; + +typedef struct { + uint64_t a, b; +} HashFamilyMember; + +typedef struct { + void *value_array; + HASH_TABLE_INTERFACE_FIELDS; + void (*value_deinit)(void*); + size_t value_size; +} HashTableDynamicData; + +HashTableKey hash_table_key_from_bytes(const char *bytes, size_t size); +HashTableKey hash_table_key_copy(const HashTableKey old); +void hash_table_key_deinit_copied(HashTableKey *key); +bool hash_table_key_equals(const HashTableKey lhs, const HashTableKey rhs); + +__attribute__((unused)) inline static HashTableKey +hash_table_key_from_cstr(const char *s) +{ + size_t length = strlen(s); + return hash_table_key_from_bytes(s, length); +} + +void hash_table_init_impl(HashTableDynamicData * dyndata, size_t value_size, void (*value_deinit)(void*)); +void hash_table_deinit_impl(HashTableDynamicData * dyndata); +HashTableIndex hash_table_insert_impl(HashTableDynamicData * dyndata, const HashTableKey key, const void * value_ptr); +HashTableIndex hash_table_find_impl(const HashTableDynamicData * dyndata, const HashTableKey key); + +#define hash_table_init(ht, value_deinit) hash_table_init_impl(&(ht)->as_HashTableDynamicData, sizeof(*(ht)->value_array), IMPLICIT_CAST(void(void*), void(typeof((ht)->value_array)), value_deinit)) +#define hash_table_deinit(ht) hash_table_deinit_impl(&(ht)->as_HashTableDynamicData) +#define hash_table_insert(ht, key, value_ptr) hash_table_insert_impl(&(ht)->as_HashTableDynamicData, key, IMPLICIT_CAST(const void, const typeof(*(ht)->value_array), value_ptr)) +#define hash_table_find(ht, key) hash_table_find_impl(&(ht)->as_HashTableDynamicData, key) + +#endif /* end of include guard: HASH_TABLE_H_ */ diff --git a/main.c b/main.c index b0873f5..258e025 100644 --- a/main.c +++ b/main.c @@ -1,4 +1,5 @@ #include "processing.h" +#include "hash_table.h" #include "nodes/print.h" #include "nodes/getchar.h" #include "nodes/evdev.h" @@ -33,6 +34,8 @@ main(int argc, char ** argv) } GraphNode **nodes = T_ALLOC(loaded_config.nodes.length, GraphNode*); + TYPED_HASH_TABLE(size_t) named_nodes; + hash_table_init(&named_nodes, NULL); for (size_t i = 0; i < loaded_config.nodes.length; ++i) { const char* type_name = loaded_config.nodes.items[i].type; if (!type_name) { @@ -55,6 +58,9 @@ main(int argc, char ** argv) fprintf(stderr, "%ld \"%s\"\n", i, loaded_config.nodes.items[i].name); exit(1); } + if (loaded_config.nodes.items[i].name) { + hash_table_insert(&named_nodes, hash_table_key_from_cstr(loaded_config.nodes.items[i].name), &i); + } } GraphChannel *channels = T_ALLOC(loaded_config.channels.length, GraphChannel); @@ -64,16 +70,13 @@ main(int argc, char ** argv) node_names[0] = loaded_config.channels.items[i].from.name; node_names[1] = loaded_config.channels.items[i].to.name; for (int j = 0; j < 2; ++j) { - for (size_t k = 0; k < loaded_config.nodes.length; ++k) { - if (strcmp(loaded_config.nodes.items[k].name, node_names[j]) == 0) { - end_nodes[j] = nodes[k]; - break; - } - } - if (!end_nodes[j]) { + HashTableIndex k = hash_table_find(&named_nodes, hash_table_key_from_cstr(node_names[j])); + if (k < 0) { + perror("Errno"); fprintf(stderr, "No node named \"%s\"\n", node_names[j]); exit(1); } + end_nodes[j] = nodes[named_nodes.value_array[k]]; } graph_channel_init(&channels[i], end_nodes[0], loaded_config.channels.items[i].from.index, @@ -89,6 +92,7 @@ main(int argc, char ** argv) process_iteration(&state); } + hash_table_deinit(&named_nodes); for (ssize_t i = loaded_config.nodes.length - 1; i >= 0; --i) { graph_node_delete(nodes[i]); }