hash table

Run Settings
LanguageC++
Language Version
Run Command
#include <string.h> #include <assert.h> #include <stdio.h> #include <stdlib.h> #include <stdint.h> #define forI(index, length) \ for(uint64_t index = 0; index < (length); ++index) #define CONCAT_(a, b) a##b #define CONCAT(a, b) CONCAT_(a, b) #include "misc_string.hpp" #include "hash_table_core.hpp" #define VAR_NAME int #define VAR_TYPE int #include "hash_table.hpp" #define VAR_NAME float #define VAR_TYPE float #include "hash_table.hpp" int main() { hash_table_float test = hash_table_init_float(); test.insert(string_init("ter0"), 3.0f); test.insert(string_init("ter1"), 3.0f); test.insert(string_init("ter2"), 3.0f); test.insert(string_init("ter3"), 3.0f); test.insert(string_init("ter15"), 3.0f); test.insert(string_init("ter5"), 3.0f); test.insert(string_init("ter6"), 72.3f); bool ye; float teg = test.get(string_init("ter6"), &ye); assert(ye); printf("Value: %g\n", teg); hash_print_table(&test); printf("Table size: %llu\n", test.size ); test.remove(string_init("ter0")); test.remove(string_init("ter1")); test.remove(string_init("ter2")); test.remove(string_init("ter3")); test.remove(string_init("ter4")); test.remove(string_init("ter5")); test.remove(string_init("ter6")); hash_print_table(&test); return 0; }
struct string{ uint64_t length; char *data; }; string string_init(const char *in) { string result; result.length = strlen(in) + 1; result.data = (char *)malloc(result.length); assert(result.data); for(uint64_t i = 0; i < result.length; ++i) { result.data[i] = in[i]; } return result; }
#define GET_POINTER_TO_hash_key(ptr, size, index) \ ((hash_key_void *)((size_t)(ptr) + (size_t)(index) * (size_t)(size))) struct hash_key_void{ char *str; uint32_t hash; }; enum struct hash_slot_state{ empty, match, collision }; static uint32_t hash_func(char *str, uint64_t length) { uint32_t hash = 5381; forI(i, length) { hash = ((hash << 5) + hash) + (uint32_t)str[i]; } return hash; } static uint32_t hash_distance(uint64_t mask, uint32_t hash, uint32_t slot) { int32_t result = (int32_t)((slot & mask) - (hash & mask)); if(result < 0) result += mask + 1; return (uint32_t)result; } static hash_slot_state hash_find_slot(void *main_table, size_t size, uint64_t mask, uint32_t *slot, uint32_t hash, char *key) { uint32_t original_slot = *slot; uint32_t new_distance = 0; while(true) { hash_key_void *entry = GET_POINTER_TO_hash_key(main_table, size, *slot); uint32_t old_distance = new_distance; new_distance = hash_distance(mask, entry->hash, *slot); if(entry->str == 0) { return hash_slot_state::empty; } // Entry hash match else if(hash_distance(mask, entry->hash, original_slot) == 0) { break; } // Current entry need to fit inbetween the previous slot and this slot. else if(old_distance > new_distance) { return hash_slot_state::collision; } *slot = (*slot + 1) & mask; } while(true) { hash_key_void *entry = GET_POINTER_TO_hash_key(main_table, size, *slot); if(entry->str == 0) { return hash_slot_state::empty; } else if(hash == entry->hash && (key == entry->str || strcmp(key, entry->str) == 0)) { return hash_slot_state::match; } else if(hash_distance(mask, hash, entry->hash) != 0) { return hash_slot_state::collision; } *slot = (*slot + 1) & mask; } } // optional: hash_table_bool struct hash_internal_nil_type{}; #define VAR_NAME internal_bool_type #define VAR_TYPE hash_internal_nil_type #include "hash_table.hpp" struct hash_table_bool{ uint64_t size; // mask == mask - 1; Must be pow2 uint64_t count; hash_key_internal_bool_type *table; bool insert(string key) { bool exist; ((hash_table_internal_bool_type *)this)->insert(key, hash_internal_nil_type{}, &exist); return exist; } bool get(string key) { bool exist; ((hash_table_internal_bool_type *)this)->get(key, &exist); return exist; } bool remove(string key) { bool exist; ((hash_table_internal_bool_type *)this)->remove(key, &exist); return exist; } }; hash_table_bool hash_table_init_bool() { hash_table_internal_bool_type result = hash_table_init_internal_bool_type(); return *(hash_table_bool *)&result; }
#ifndef VAR_NAME #error Must define VAR_NAME #endif #ifndef VAR_TYPE #error Must define VAR_TYPE #endif #define hash_key CONCAT(hash_key_, VAR_NAME) struct hash_key{ char *str; uint32_t hash; VAR_TYPE value; }; #define hash_table CONCAT(hash_table_, VAR_NAME) struct hash_table; static void hash_insert(hash_table *main_table, char *key, VAR_TYPE value, uint32_t hash, bool *key_exist, bool call_from_resize); static VAR_TYPE hash_get(hash_table *main_table, char *key, uint32_t hash, bool *key_exist); static void hash_remove(hash_table *main_table, char *key, uint32_t hash, bool *key_exist); struct hash_table{ uint64_t size; // mask == mask - 1; Must be pow2 uint64_t count; hash_key *table; void insert(string key, VAR_TYPE value, bool *key_exist = 0) { hash_insert( this, key.data, value, hash_func(key.data, key.length), key_exist, false); } VAR_TYPE get(string key, bool *key_exist = 0) { return hash_get( this, key.data, hash_func(key.data, key.length), key_exist); } void remove(string key, bool *key_exist = 0) { hash_remove( this, key.data, hash_func(key.data, key.length), key_exist); } }; #define hash_table_init CONCAT(hash_table_init_, VAR_NAME) hash_table hash_table_init() { hash_table result = {4, 0}; result.table = (hash_key *)calloc(result.size, sizeof(hash_key)); assert(result.table); return result; } static void hash_resize(hash_table *main_table) { hash_table new_table = {}; if(main_table->count > 0.7 * main_table->size) { new_table.size = main_table->size * 2; } else if(main_table->size > 4 && main_table->count < 0.2 * main_table->size) { new_table.size = main_table->size / 2; } else { return; } new_table.table = (hash_key *)calloc(new_table.size, sizeof(hash_key)); assert(new_table.table); forI(i, main_table->size) { if(main_table->table[i].str) { hash_insert( &new_table, main_table->table[i].str, main_table->table[i].value, main_table->table[i].hash, 0, true); } } free(main_table->table); main_table->size = new_table.size; assert(main_table->count == new_table.count); main_table->table = new_table.table; } static void hash_insert(hash_table *main_table, char *key, VAR_TYPE value, uint32_t hash, bool *key_exist = 0, bool call_from_resize = false) { uint64_t mask = main_table->size - 1; uint32_t slot = hash & mask; hash_slot_state slot_state = hash_find_slot(main_table->table, sizeof(hash_key), mask, &slot, hash, key); // Move slots in front forwards if there's a collision. if(slot_state == hash_slot_state::collision) { uint64_t move_slot = slot; hash_key load_key[2] = {}; load_key[0] = main_table->table[move_slot]; bool break_after_this_loop = false; while(break_after_this_loop == false) { move_slot = (move_slot + 1) & mask; if(main_table->table[move_slot].str == 0) { break_after_this_loop = true; } load_key[1] = main_table->table[move_slot]; main_table->table[move_slot] = load_key[0]; load_key[0] = load_key[1]; } } // Add entry if(key_exist) *key_exist = true; if(slot_state != hash_slot_state::match) { if(key_exist) *key_exist = false; ++main_table->count; } main_table->table[slot] = hash_key{key, hash, value}; if(call_from_resize == false) hash_resize(main_table); } static VAR_TYPE hash_get(hash_table *main_table, char *key, uint32_t hash, bool *key_exist = 0) { uint64_t mask = main_table->size - 1; uint32_t slot = hash & mask; struct{VAR_TYPE value;} result = {}; // Universal init to zero. if(key_exist) *key_exist = false; hash_slot_state slot_state = hash_find_slot(main_table->table, sizeof(hash_key), mask, &slot, hash, key); if(slot_state == hash_slot_state::empty || slot_state == hash_slot_state::collision) { return result.value; } if(key_exist) *key_exist = true; result.value = main_table->table[slot].value; return result.value; } static void hash_remove(hash_table *main_table, char *key, uint32_t hash, bool *key_exist = 0) { uint64_t mask = main_table->size - 1; uint32_t slot = hash & mask; if(key_exist) *key_exist = false; hash_slot_state slot_state = hash_find_slot(main_table->table, sizeof(hash_key), mask, &slot, hash, key); if(slot_state == hash_slot_state::empty || slot_state == hash_slot_state::collision) { return; } // delete entry if(key_exist) *key_exist = true; --main_table->count; main_table->table[slot].str = 0; // Move slots in front back one slot if they can move closer to their original hashed slot. while(true) { uint32_t next_slot = (slot + 1) & mask; if(main_table->table[next_slot].str == 0) { break; } else if(hash_distance(mask, main_table->table[next_slot].hash, next_slot) == 0) { break; } main_table->table[slot] = main_table->table[next_slot]; main_table->table[next_slot].str = 0; slot = next_slot; } hash_resize(main_table); } void hash_print_table(hash_table *main_table) { bool first_value = true; forI(i, main_table->size) { if(main_table->table[i].str != 0) { //if(first_value == false) printf(", "); first_value = false; printf("index:%llu, distance:%i, slot:%llu hash:%u\n", i, hash_distance(main_table->size-1, main_table->table[i].hash, (uint32_t)i), main_table->table[i].hash & main_table->size-1, main_table->table[i].hash); } } printf("\n"); } void hash_test_integrity(hash_table *main_table) { bool found_first = false; uint64_t first_slot = 0; uint64_t last_slot = 0; uint64_t acc = 0; uint64_t mask = main_table->size - 1; uint64_t prev_desired_slot = 0; bool wrap_point_passed = false; forI(i, main_table->size) { if(main_table->table[i].str != 0) { uint64_t current_desired_slot = main_table->table[i].hash & mask; if(found_first == false) { first_slot = i; found_first = true; } last_slot = i; if(current_desired_slot < prev_desired_slot) { assert(wrap_point_passed == false); wrap_point_passed = true; } prev_desired_slot = current_desired_slot; ++acc; } } assert(acc == main_table->count); if(acc > 1) { uint64_t first_desired_slot = main_table->table[first_slot].hash & mask; uint64_t last_desired_slot = main_table->table[last_slot].hash & mask; if(wrap_point_passed == true) { assert(first_desired_slot >= last_desired_slot); } else { assert(first_desired_slot <= last_desired_slot); } } } #undef VAR_NAME #undef VAR_TYPE #undef hash_key #undef hash_return #undef hash_table
Editor Settings
Theme
Key bindings
Full width
Lines