Hash Table Grouped

Run Settings
LanguageC++
Language Version
Run Command
#include <stdio.h> #include <stdint.h> #include <stdlib.h> #include <math.h> #include <string.h> #include <assert.h> #include "ev_small_function.hpp" #include "ev_alloc.hpp" #include "ev_string.hpp" #include "hash_table_core.hpp" #define VAR_NAME int #define VAR_TYPE int #include "hash_table.hpp" int main() { hash::Table__int a = hash::table_init__int(); a.insert(ev::string_init("Hei"), 32); a.insert(ev::string_init("Hallo"), 91); printf("%i", a.get(ev::string_init("Hei"))); return 0; }
/* -- Pseudo code concept of how insertion, get and removal is handled. Evine variation on Robin Hood hashing with Backwards Shift Deletion EvvoRHhwBSD insert(uint8_t hash) get(uint8_t hash) remove(uint8_t hash) S = Slot // Desired slot where search will start. F = Found // Key already exist N = Not Found // Key not found, Stops search I = Insert // Key is inserted M = Move // Key have moved R = Removed // Key have been removed Hash table empty, 16 Slots Slot: 0 1 2 3 4 5 6 7 8 9 A B C D E F Entries: X X X X X X X X X X X X X X X X Operation: insert(0x5B) Slot: 0 1 2 3 4 5 6 7 8 9 A B C D E F Entries: X X X X X X X X X X X 5B X X X X Operation: SNI^ insert(0x7B) Slot: 0 1 2 3 4 5 6 7 8 9 A B C D E F Entries: X X X X X X X X X X X 5B 7B X X X Operation: S^ NI^ insert(0xA4) Slot: 0 1 2 3 4 5 6 7 8 9 A B C D E F Entries: X X X X A4 X X X X X X 5B 7B X X X Operation: SNI^ insert(0x7B) Slot: 0 1 2 3 4 5 6 7 8 9 A B C D E F Entries: X X X X A4 X X X X X X 5B 7B X X X Operation: S^ FI^ insert(0xBC) Slot: 0 1 2 3 4 5 6 7 8 9 A B C D E F Entries: X X X X A4 X X X X X X 5B 7B BC X X Operation: S^ NI^ insert(0x9B) Slot: 0 1 2 3 4 5 6 7 8 9 A B C D E F Entries: X X X X A4 X X X X X X 5B 7B 9B BC X Operation: S^ NI^ M^ remove(0x5B) Slot: 0 1 2 3 4 5 6 7 8 9 A B C D E F Entries: X X X X A4 X X X X X X 7B 9B BC X X Operation: SFM^ M^ M^ R^ insert(0x1D) Slot: 0 1 2 3 4 5 6 7 8 9 A B C D E F Entries: X X X X A4 X X X X X X 7B 9B BC 1D X Operation: S^ NI^ get(0x9B) Slot: 0 1 2 3 4 5 6 7 8 9 A B C D E F Entries: X X X X A4 X X X X X X 7B 9B BC 1D X Operation: S^ F^ get(0xAC) Slot: 0 1 2 3 4 5 6 7 8 9 A B C D E F Entries: X X X X A4 X X X X X X 7B 9B BC 1D X Operation: S^ N^ insert(0x5D) Slot: 0 1 2 3 4 5 6 7 8 9 A B C D E F Entries: X X X X A4 X X X X X X 7B 9B BC 1D 5D Operation: S^ NI^ A closer look a how these entries group up. The important part is how the hash keys is ordered. By keeping the hash grouped like this it reduces the probe length for the keys. Slot: 0 1 2 3 | 4 | 5 6 7 8 9 A | B | C | D | E F | | | \ \ \______ | | | \ \ \ | | | \ \ | Entries: X X X X |A4 | X X X X X X |7B 9B |BC |1D 5D | // Create "int" hash table #define VAR_NAME int #define VAR_TYPE int #include "hash_table.cpp" // Or a "int *" type #define VAR_NAME int_ptr #define VAR_TYPE int * #include "hash_table.cpp" struct hash::Table__bool{ uint64_t size; //<-- number of slots in the current table. uint64_t count; //<-- number of inserted elements. void *table; //<-- free this pointer to release memory. // Member Functions // Returns true if the key already exist. bool insert(ev::String key); bool get(ev::String key); bool remove(ev::String key); }; // Note the VAR_NAME you defined is concatenated to the end. struct hash::Table__##VAR_NAME{ uint64_t size; //<-- number of slots in the current table. uint64_t count; //<-- number of inserted elements. void *table; //<-- free this pointer to release memory. // Member Functions void insert(ev::String key, VAR_TYPE value, bool *key_exist); VAR_TYPE get(ev::String key, bool *key_exist); void remove(ev::String key, bool *key_exist); }; // Functions hash::Table__bool hash::table_init__bool(); hash::Table__##VAR_TYPE hash::table_init__##VAR_TYPE(); // Iterator. Note that the keys will only be "char *" and not "ev::String" // because the length is not stored in the table. forI(i, table_name->size) { if(table_name->table[i].str != 0) { char *key = table_name->table[i].str; VAR_TYPE value = table_name->table[i].value; // your code } } */ #define GET_POINTER_TO_hash_key(ptr, size, index) \ ((Hash_Key_Void *)((size_t)(ptr) + (size_t)(index) * (size_t)(size))) namespace hash{ struct Hash_Key_Void{ char *str; uint32_t hash; }; enum struct Hash_Slot_State{ empty, match, collision }; static uint32_t hash_func(char *str, uint64_t length) { uint32_t hash = 5381; forI(i, length) { hash = ((hash << 5) + hash) + (uint32_t)str[i]; } return hash; } static uint32_t distance(uint64_t mask, uint32_t hash, uint32_t slot) { int32_t result = (int32_t)((slot & mask) - (hash & mask)); if(result < 0) result += mask + 1; return (uint32_t)result; } static Hash_Slot_State find_slot(void *main_table, size_t size, uint64_t mask, uint32_t *slot, uint32_t hash, char *key) { uint32_t original_slot = *slot; uint32_t new_distance = 0; while(true) { Hash_Key_Void *entry = GET_POINTER_TO_hash_key(main_table, size, *slot); uint32_t old_distance = new_distance; new_distance = distance(mask, entry->hash, *slot); if(entry->str == 0) { return Hash_Slot_State::empty; } // Entry hash match else if(distance(mask, entry->hash, original_slot) == 0) { break; } // Current entry need to fit inbetween the previous slot and this slot. else if(old_distance > new_distance) { return Hash_Slot_State::collision; } *slot = (*slot + 1) & mask; } while(true) { Hash_Key_Void *entry = GET_POINTER_TO_hash_key(main_table, size, *slot); if(entry->str == 0) { return Hash_Slot_State::empty; } else if(hash == entry->hash && (key == entry->str || strcmp(key, entry->str) == 0)) { return Hash_Slot_State::match; } else if(distance(mask, hash, entry->hash) != 0) { return Hash_Slot_State::collision; } *slot = (*slot + 1) & mask; } } } namespace hash{ // optional: hash_table_bool struct hash_internal_nil_type{}; } #define VAR_NAME internal_bool_type #define VAR_TYPE hash_internal_nil_type #include "hash_table.hpp" namespace hash{ struct Table__bool{ uint64_t size; // mask == mask - 1; Must be pow2 uint64_t count; hash::Key__internal_bool_type *table; bool insert(ev::String key) { bool exist; ((hash::Table__internal_bool_type *)this)->insert(key, hash_internal_nil_type{}, &exist); return exist; } bool get(ev::String key) { bool exist; ((hash::Table__internal_bool_type *)this)->get(key, &exist); return exist; } bool remove(ev::String key) { bool exist; ((hash::Table__internal_bool_type *)this)->remove(key, &exist); return exist; } }; Table__bool table_init__bool() { hash::Table__internal_bool_type result = hash::table_init__internal_bool_type(); return *(Table__bool *)&result; } void test_print_table(Table__bool *main_table) { test_print_table((hash::Table__internal_bool_type *)main_table); } void test_integrity(Table__bool *main_table) { test_integrity((hash::Table__internal_bool_type *)main_table); } }
#ifndef VAR_NAME #error Must define VAR_NAME #endif #ifndef VAR_TYPE #error Must define VAR_TYPE #endif namespace hash{ #define Hash_Key CONCAT(Key__, VAR_NAME) struct Hash_Key{ char *str; uint32_t hash; VAR_TYPE value; }; #define Hash_Table CONCAT(Table__, VAR_NAME) struct Hash_Table; static void direct_insert(Hash_Table *main_table, char *key, VAR_TYPE value, uint32_t hash, bool *key_exist, bool call_from_resize); static VAR_TYPE direct_get(Hash_Table *main_table, char *key, uint32_t hash, bool *key_exist); static void direct_remove(Hash_Table *main_table, char *key, uint32_t hash, bool *key_exist); struct Hash_Table{ uint64_t size; // mask == mask - 1; Must be pow2 uint64_t count; Hash_Key *table; void insert(ev::String key, VAR_TYPE value, bool *key_exist = 0) { direct_insert( this, key.data, value, hash_func(key.data, key.length), key_exist, false); } VAR_TYPE get(ev::String key, bool *key_exist = 0) { return direct_get( this, key.data, hash_func(key.data, key.length), key_exist); } void remove(ev::String key, bool *key_exist = 0) { direct_remove( this, key.data, hash_func(key.data, key.length), key_exist); } }; #define hash_table_init CONCAT(table_init__, VAR_NAME) Hash_Table hash_table_init() { Hash_Table result = {4, 0}; result.table = (Hash_Key *)ev::z_alloc(result.size, sizeof(Hash_Key)); assert(result.table); return result; } static void check_resize(Hash_Table *main_table) { Hash_Table new_table = {}; if(main_table->count > 0.7 * main_table->size) { new_table.size = main_table->size * 2; } else if(main_table->size > 4 && main_table->count < 0.2 * main_table->size) { new_table.size = main_table->size / 2; } else { return; } new_table.table = (Hash_Key *)ev::z_alloc(new_table.size, sizeof(Hash_Key)); assert(new_table.table); forI(i, main_table->size) { if(main_table->table[i].str) { direct_insert( &new_table, main_table->table[i].str, main_table->table[i].value, main_table->table[i].hash, 0, true); } } main_table->size = new_table.size; assert(main_table->count == new_table.count); ev_free(&main_table->table); main_table->table = new_table.table; } static void direct_insert(Hash_Table *main_table, char *key, VAR_TYPE value, uint32_t hash, bool *key_exist = 0, bool call_from_resize = false) { assert(main_table); assert(main_table->table); assert(key); uint64_t mask = main_table->size - 1; uint32_t slot = hash & mask; Hash_Slot_State slot_state = find_slot(main_table->table, sizeof(Hash_Key), mask, &slot, hash, key); // Move slots in front forwards if there's a collision. if(slot_state == Hash_Slot_State::collision) { uint64_t move_slot = slot; Hash_Key load_key[2] = {}; load_key[0] = main_table->table[move_slot]; bool break_after_this_loop = false; while(break_after_this_loop == false) { move_slot = (move_slot + 1) & mask; if(main_table->table[move_slot].str == 0) { break_after_this_loop = true; } load_key[1] = main_table->table[move_slot]; main_table->table[move_slot] = load_key[0]; load_key[0] = load_key[1]; } } // Add entry if(key_exist) *key_exist = true; if(slot_state != Hash_Slot_State::match) { if(key_exist) *key_exist = false; ++main_table->count; } main_table->table[slot] = Hash_Key{key, hash, value}; if(call_from_resize == false) check_resize(main_table); } static VAR_TYPE direct_get(Hash_Table *main_table, char *key, uint32_t hash, bool *key_exist = 0) { assert(main_table); assert(main_table->table); assert(key); uint64_t mask = main_table->size - 1; uint32_t slot = hash & mask; struct{VAR_TYPE value;} result = {}; // Universal init to zero. if(key_exist) *key_exist = false; Hash_Slot_State slot_state = find_slot(main_table->table, sizeof(Hash_Key), mask, &slot, hash, key); if(slot_state == Hash_Slot_State::empty || slot_state == Hash_Slot_State::collision) { return result.value; } if(key_exist) *key_exist = true; result.value = main_table->table[slot].value; return result.value; } static void direct_remove(Hash_Table *main_table, char *key, uint32_t hash, bool *key_exist = 0) { assert(main_table); assert(main_table->table); assert(key); uint64_t mask = main_table->size - 1; uint32_t slot = hash & mask; if(key_exist) *key_exist = false; Hash_Slot_State slot_state = find_slot(main_table->table, sizeof(Hash_Key), mask, &slot, hash, key); if(slot_state == Hash_Slot_State::empty || slot_state == Hash_Slot_State::collision) { return; } // delete entry if(key_exist) *key_exist = true; --main_table->count; main_table->table[slot].str = 0; // Move slots in front back one slot if they can move closer to their original hashed slot. while(true) { uint32_t next_slot = (slot + 1) & mask; if(main_table->table[next_slot].str == 0) { break; } else if(distance(mask, main_table->table[next_slot].hash, next_slot) == 0) { break; } main_table->table[slot] = main_table->table[next_slot]; main_table->table[next_slot].str = 0; slot = next_slot; } check_resize(main_table); } void test_print_table(Hash_Table *main_table) { bool first_value = true; forI(i, main_table->size) { if(main_table->table[i].str != 0) { first_value = false; printf("Slot stored: %llu, Distance to desired: %i, Desired: %llu Hash: %u\n", i, distance(main_table->size-1, main_table->table[i].hash, (uint32_t)i), main_table->table[i].hash & main_table->size-1, main_table->table[i].hash); } } printf("\n"); } // Todo: verify the slot is correct in regards to desired hash slot. void test_integrity(Hash_Table *main_table) { bool found_first = false; uint64_t first_slot = 0; uint64_t last_slot = 0; uint64_t acc = 0; uint64_t mask = main_table->size - 1; uint64_t prev_desired_slot = 0; bool wrap_point_passed = false; forI(i, main_table->size) { if(main_table->table[i].str != 0) { uint64_t current_desired_slot = main_table->table[i].hash & mask; if(found_first == false) { first_slot = i; found_first = true; } last_slot = i; if(current_desired_slot < prev_desired_slot) { assert(wrap_point_passed == false); wrap_point_passed = true; } prev_desired_slot = current_desired_slot; ++acc; } } assert(acc == main_table->count); if(acc > 1) { uint64_t first_desired_slot = main_table->table[first_slot].hash & mask; uint64_t last_desired_slot = main_table->table[last_slot].hash & mask; if(wrap_point_passed == true) { assert(first_desired_slot >= last_desired_slot); } else { assert(first_desired_slot <= last_desired_slot); } } } } #undef VAR_NAME #undef VAR_TYPE #undef Hash_Key #undef Hash_Table
namespace ev{ struct{ size_t index; char *data; }global_string_memory; struct String{ uint64_t length; char *data; }; String string_init(const char *in) { String result; result.length = strlen(in) + 1; result.data = (char *)ev::g_alloc(result.length, 1); assert(result.data); for(uint64_t i = 0; i < result.length; ++i) { result.data[i] = in[i]; } return result; } }
namespace ev{ void *g_alloc(size_t size, size_t element_size) { void *result = malloc(size * element_size); assert(result); return result; } void *z_alloc(size_t size, size_t element_size) { void *result = calloc(size, element_size); assert(result); return result; } #define ev_free(in) ev::free_((void **)(in), *(in)) void free_(void **in, void *JUST_A_VOID_PTR_CHECKER_BECAUSE_YOU_CANT_IMPLICIT_CAST_TO_VOID_PTR_PTR) { free(*in); *in = 0; } }
#define forI(index, length) \ for(uint64_t index = 0; index < (length); ++index) #define CONCAT_(a, b) a##b #define CONCAT(a, b) CONCAT_(a, b) #define MIN(a, b) ((a)<(b)?(a):(b)) #define MAX(a, b) ((a)>(b)?(a):(b)) #define OFFSET_MEMBER(type, member) (&((type *)0)->member) #define KB(a) ((a)*0x400ULL) #define MB(a) ((a)*0x100000ULL) #define GB(a) ((a)*0x40000000ULL) #define TB(a) ((a)*0x10000000000ULL) namespace ev{ int64_t dual_uint32_t_to_int64_t_copy(uint32_t low, uint32_t high) { int64_t result = low; result += ((int64_t)high) << 32; assert(result >= 0); return result; } uint64_t ceil_align(uint64_t in, uint64_t ceil_val) { uint64_t result; if(in % ceil_val == 0) result = in; else result = (in / ceil_val) * ceil_val + ceil_val; return result; } uint64_t floor_align(uint64_t in, uint64_t floor_val) { return (in / floor_val) * floor_val; } void *ptr_mask(void *a, size_t mask) { return (void *)((size_t)a & mask); } }
Editor Settings
Theme
Key bindings
Full width
Lines