While posting this, I noticed there are quite a few similar C hash table implementations here already. So I figured one more wouldn't hurt. :-) In any case, I'm writing a teaching article on "how to implement a hash table in C", and I'm wondering if I can get some feedback on my implementation (ht.h
and ht.c
, with a usage example in demo.c
). The goal is not maximum performance, but simplicity and good style.
This hash table is a very simple array of entries that uses open addressing and linear probing, and the FNV-1 hash function. The capacity is always a power of two, and it automatically expands and re-hashes when it's half full.
A few notes about the C API design:
- For simplicity, we use C-style NUL-terminated strings. I know there are more efficient approaches to string handling, but this fits with C's standard library.
- The
ht_set
function allocates and copies the key (if inserting for the first time). Usually you don't want the caller to have to worry about this, or ensuring the key memory stays around. Note thatht_set
returns a pointer to the duplicated key. This is mainly used as an "out of memory" error signal -- it returns NULL on failure. - Values can't be NULL. This makes the return value of
ht_get
slightly simpler (NULL means not present and can't mean a NULL value). I'm not sure whether this is a good idea or not. - There are various ways I could have done iteration. Using an explicit iterator type with a while loop seems simple and natural in C (see the example below). The value returned from
ht_iterator
is a value, not a pointer, both for efficiency and so the caller doesn't have to free anything. - There's no
ht_remove
to remove an item from the hash table. It wouldn't be hard to write, but I don't often need to remove items when using a hash table, so for simplicity I've left it out.
ht.h:
#ifndef _HT_H
#define _HT_H
#include <stdbool.h>
#include <stddef.h>
// Hash table entry (this is not actually part of the public API).
typedef struct {
char* key;
void* value;
} _ht_entry;
// Hash table structure: create with ht_create, free with ht_destroy.
typedef struct {
// Don't use these fields directly.
_ht_entry* _entries; // hash buckets (entry.key != NULL if populated)
size_t _capacity; // size of _entries array
size_t _length; // number of items in hash table
} ht;
// Hash table iterator: create with ht_iterator, iterate with ht_next.
typedef struct {
char* key; // current key
void* value; // current value
// Don't use these fields directly.
ht* _table; // reference to hash table being iterated
size_t _index; // current index into ht._entries
} hti;
// Create new hash table and return pointer to it, or NULL if out of
// memory.
ht* ht_create(void);
// Free memory allocated for hash table, including allocated keys.
void ht_destroy(ht* table);
// Get item with given key (NUL-terminated) from hash table. Return
// value (which was set with ht_set), or NULL if key not found.
void* ht_get(ht* table, const char* key);
// Set item with given key (NUL-terminated) to value (which must not
// be NULL). If not already present in table, key is copied to newly
// allocated memory (keys are freed automatically when ht_destroy is
// called). Return address of copied key, or NULL if out of memory.
const char* ht_set(ht* table, const char* key, void* value);
// Return number of items in hash table.
size_t ht_length(ht* table);
// Return new hash table iterator (for use with ht_next).
hti ht_iterator(ht* table);
// Move iterator to next item in hash table, update iterator's key
// and value to current item, and return true. If there are no more
// items, return false. Don't call ht_set during iteration.
bool ht_next(hti* it);
#endif // _HT_H
ht.c:
#include "ht.h"
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#define INITIAL_SIZE 8
static ht* _ht_create(size_t size) {
// Allocate space for hash table struct.
ht* table = malloc(sizeof(ht));
if (table == NULL) {
return NULL;
}
table->_length = 0;
table->_capacity = size * 2;
// Allocate (zero'd) space for entry buckets.
table->_entries = calloc(table->_capacity, sizeof(_ht_entry));
if (table->_entries == NULL) {
free(table); // error, free table before we return!
return NULL;
}
return table;
}
ht* ht_create(void) {
return _ht_create(INITIAL_SIZE);
}
void ht_destroy(ht* table) {
// First free allocated keys.
for (size_t i = 0; i < table->_capacity; i++) {
if (table->_entries[i].key != NULL) {
free(table->_entries[i].key);
}
}
// Then free entries array and table itself.
free(table->_entries);
free(table);
}
#define FNV_OFFSET 14695981039346656037UL
#define FNV_PRIME 1099511628211UL
// Return 64-bit FNV-1 hash for key (NUL-terminated). See description at:
// https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function
static uint64_t _hash(const char* key) {
uint64_t hash = FNV_OFFSET;
for (const char* p = key; *p; p++) {
hash *= FNV_PRIME;
hash ^= (uint64_t)(*p);
}
return hash;
}
void* ht_get(ht* table, const char* key) {
// AND hash with capacity-1 to ensure it's within entries array.
uint64_t hash = _hash(key);
size_t index = (size_t)(hash & (uint64_t)(table->_capacity - 1));
// Loop till we find an empty entry.
while (table->_entries[index].key != NULL) {
if (strcmp(key, table->_entries[index].key) == 0) {
// Found key, return value.
return table->_entries[index].value;
}
// Key wasn't in this slot, move to next (linear probing).
index++;
if (index >= table->_capacity) {
// At end of entries array, wrap around.
index = 0;
}
}
return NULL;
}
// Expand hash table to twice its current size. Return true on success,
// false if out of memory.
static bool _ht_expand(ht* table) {
// Creating new table so we can use ht_set to move items into it.
ht* new_table = _ht_create(table->_capacity);
if (new_table == NULL) {
return false;
}
// Iterate entries, move all non-empty ones to new table's entries.
for (size_t i = 0; i < table->_capacity; i++) {
_ht_entry entry = table->_entries[i];
if (entry.key != NULL) {
const char* key = ht_set(new_table, entry.key, entry.value);
if (key == NULL) {
ht_destroy(new_table);
return false;
}
}
}
// Free old entries array and update this table's details.
free(table->_entries);
table->_entries = new_table->_entries;
table->_capacity = new_table->_capacity;
// Free new table structure (we've updated the existing one).
free(new_table);
return true;
}
const char* ht_set(ht* table, const char* key, void* value) {
assert(value != NULL);
if (value == NULL) {
return NULL;
}
// AND hash with capacity-1 to ensure it's within entries array.
uint64_t hash = _hash(key);
size_t index = (size_t)(hash & (uint64_t)(table->_capacity - 1));
// If length will exceed half of current capacity, expand it.
if (table->_length >= table->_capacity / 2) {
if (!_ht_expand(table)) {
return NULL;
}
// Recalculate index as capacity has changed.
index = (size_t)(hash & (uint64_t)(table->_capacity - 1));
}
// Loop till we find an empty entry.
while (table->_entries[index].key != NULL) {
if (strcmp(key, table->_entries[index].key) == 0) {
// Found key (it already exists), update value.
table->_entries[index].value = value;
return table->_entries[index].key;
}
// Key wasn't in this slot, move to next (linear probing).
index++;
if (index >= table->_capacity) {
// At end of entries array, wrap around.
index = 0;
}
}
// Didn't find key, allocate/copy key and insert it.
char* key_copy = strdup(key);
if (key_copy == NULL) {
return NULL;
}
table->_entries[index].key = key_copy;
table->_entries[index].value = value;
table->_length++; // be sure to update length
return key_copy;
}
size_t ht_length(ht* table) {
return table->_length;
}
hti ht_iterator(ht* table) {
hti it;
it._table = table;
it._index = 0;
return it;
}
bool ht_next(hti* it) {
// Loop till we've hit end of entries array.
ht* table = it->_table;
while (it->_index < table->_capacity) {
size_t i = it->_index;
it->_index++;
if (table->_entries[i].key != NULL) {
// Found next non-empty item, update iterator key and value.
_ht_entry entry = table->_entries[i];
it->key = entry.key;
it->value = entry.value;
return true;
}
}
return false;
}
And now for a simple example of usage:
demo.c:
#include "../ht.h"
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
// Example:
// $ echo 'foo bar the bar bar bar the' | ./demo
// bar 4
// foo 1
// the 2
// 3
void exit_nomem(void) {
fprintf(stderr, "out of memory\n");
exit(1);
}
int main(void) {
ht* counts = ht_create();
if (counts == NULL) {
exit_nomem();
}
// Read next word from stdin (at most 100 chars long).
char word[101];
while (scanf("%100s", word) != EOF) {
// Look up word.
void* value = ht_get(counts, word);
if (value != NULL) {
// Already exists, increment int that value points to.
int* pcount = (int*)value;
(*pcount)++;
continue;
}
// Word not found, allocate space for new int and set to 1.
int* pcount = malloc(sizeof(int));
if (pcount == NULL) {
exit_nomem();
}
*pcount = 1;
if (ht_set(counts, word, pcount) == NULL) {
exit_nomem();
}
}
// Print out words and frequencies, freeing values as we go.
hti it = ht_iterator(counts);
while (ht_next(&it)) {
printf("%s %d\n", it.key, *(int*)it.value);
free(it.value);
}
// Show the number of unique words.
printf("%d\n", (int)ht_length(counts));
ht_destroy(counts);
return 0;
}
ht.c
, dropping the_
prefix on internal names, and fixing the sign extension issue in_hash
. Thanks for the feedback! \$\endgroup\$