Hash tables are a type of data structure that provides a mechanism to store and retrieve values based on a key. This is achieved using an array of lists, where each list is known as a ‘bucket’. In the event of collisions, when two different keys hash to the same bucket, we need to have a mechanism to distinguish between the two. One such mechanism is to use chaining, where each bucket points to a list of all entries that hash to the same bucket. This article presents a hash table implementation in C using singly-linked lists to manage such collisions.

Structure of the Hash Table

Here are the core components of our hash table:

  • hashnode: This represents a node in the singly-linked list;
  • hashtable: This is the main hash table structure. It contains an array of hashnode pointers (the ‘buckets’), along with metadata such as the total number of entries, the size of the table, and function pointers for hashing and comparison operations.

Key Functions

  • hashtable_create: Initializes a new hash table;
  • hashtable_add: Adds a new entry to the hash table;
  • hashtable_find: Finds an entry in the hash table based on its key;
  • hashtable_remove: Removes an entry from the hash table based on its key;
  • hashtable_empty: Empties the hash table, removing all entries;
  • hashtable_delete: Deallocates the hash table and all its contents;
  • hashtable_get_load_factor: Calculates the load factor of the hash table, which is a measure of how full the table is;
  • hashtable_for_each: Iterates over each entry in the hash table;
  • hashtable_set_hashfn: Sets a new hash function for the table.

Hashing Mechanism

The hash function used in this implementation is the sdbm hash function, which is a simple and effective string hashing function. However, the design allows for a custom hash function to be set, catering to different requirements.

persons with computer screens, surrounded by programming icons like HTML, CSS, and "phyton"

Example Program

Here’s a sample program:

#include <stdio.h>
#include <string.h>
 
#include <hashtable.h>
 
int main(void)
{
    hashtable * table;
    const char * result;
    unsigned int e;
    char * elements[] = {"A", "B", "C", "D", "E", "F"};
    const unsigned int n = sizeof(elements) / sizeof(const char*);
 
    table = hashtable_create(7, (hashtable_cmpfn)strcmp);
    for (e = 0; e < n; e++) {
        hashtable_add(table, elements[e]);
    }
    hashtable_for_each(table, (hashtable_forfn)puts);
    for (e = 0; e < n; e++) {
        result = hashtable_find(table, elements[e]);
        if (result) {
            printf("Found: %s\n", result);
        }
        else {
            printf("Couldn't find %s\n", elements[e]);
        }
    }
    printf("The load factor is %.2f\n", hashtable_get_load_factor(table));
    for (e = 0; e < n; e++) {
        result = hashtable_remove(table, elements[e]);
        if (result) {
            printf("Removed: %s\n", result);
        }
        else {
            printf("Couldn't remove %s\n", elements[e]);
        }
    }
    hashtable_delete(table);
 
    return 0;
}

The header file:

#include <stdlib.h>
 
#include <hashtable.h>
 
hashnode * hashnode_create(void * data)
{
    hashnode * node = malloc(sizeof(hashnode));
    if (node) {
        node->data = data;
        node->next = NULL;
    }
    return node;
}
 
void hashnode_delete(hashnode * node)
{
    free(node);
}
 
static unsigned long sdbm(const char *str)
{
    unsigned long hash = 0;
    int c;
 
    while ((c = *str++))
        hash = c + (hash << 6) + (hash << 16) - hash;
 
    return hash;
}
 
hashtable * hashtable_create(size_t size, hashtable_cmpfn compare)
{
    hashtable * table = malloc(sizeof (hashtable));
    if (table) {
        table->size = size;
        table->hash = (hashtable_hashfn)sdbm;
        table->compare = compare;
        table->count = 0;
        table->buckets = malloc(size * sizeof(hashnode *));
        if (table->buckets) {
            unsigned int b;
            for (b = 0; b < size; b++) {
                table->buckets[b] = NULL;
            }
        }
        else {
            free(table);
            table = NULL;
        }
    }
    return table;
}
 
void hashtable_empty(hashtable * table)
{
    unsigned int i;
    hashnode * temp;
    for (i = 0; i < table->size; i++) {
        hashnode * current = table->buckets[i];
        while (current != NULL) {
            temp = current->next;
            hashnode_delete(current);
            current = temp;
        }
        table->buckets[i] = NULL;
    }
    table->count = 0;
}
 
void hashtable_delete(hashtable * table)
{
    if (table) {
        hashtable_empty(table);
        free(table->buckets);
        free(table);
    }
}
 
void * hashtable_add(hashtable * table, void * data)
{
    const unsigned int bucket = table->hash(data) % table->size;
    void * found = NULL;
 
    if (table->buckets[bucket] == NULL) {
        /* An empty bucket */
        table->buckets[bucket] = hashnode_create(data);
    }
    else {
        unsigned int added = 0;
        hashnode * current, * previous = NULL;
        for (current = table->buckets[bucket]; current != NULL && !found && !added; current = current->next) {
            const int result = table->compare(current->data, data);
            if (result == 0) {
                /* Changing an existing entry */
                found = current->data;
                current->data = data;
            }
            else if (result > 0) {
                /* Add before current */
                hashnode * node = hashnode_create(data);
                node->next = current;
                if (previous == NULL) {
                    /* Adding at the front */
                    table->buckets[bucket] = node;
                }
                else {
                    previous->next = node;
                }
                added = 1;
            }
            previous = current;
        }
        if (!found && !added && current == NULL) {
            /* Adding at the end */
            previous->next = hashnode_create(data);
        }
    }
    if (found == NULL) {
        table->count++;
    }
 
    return found;
}
 
void * hashtable_find(const hashtable * table, const void * data)
{
    hashnode * current;
    const unsigned int bucket = table->hash(data) % table->size;
    void * found = NULL;
    unsigned int passed = 0;
    for (current = table->buckets[bucket]; current != NULL && !found && !passed; current = current->next) {
        const int result = table->compare(current->data, data);
        if (result == 0) {
            found = current->data;
        }
        else if (result > 0) {
            passed = 1;
        }
    }
    return found;
}
 
void * hashtable_remove(hashtable * table, const void * data)
{
    hashnode * current, * previous = NULL;
    const unsigned int bucket = table->hash(data) % table->size;
    void * found = NULL;
    unsigned int passed = 0;
 
    current = table->buckets[bucket];
    while (current != NULL && !found && !passed) {
        const int result = table->compare(current->data, data);
        if (result == 0) {
            found = current->data;
            if (previous == NULL) {
                /* Removing the first entry */
                table->buckets[bucket] = current->next;
            }
            else {
                previous->next = current->next;
            }
            hashnode_delete(current);
            table->count--;
        }
        else if (result > 0) {
            passed = 1;
        }
        else {
            previous = current;
            current = current->next;
        }
    }
    return found;
}
 
 
float hashtable_get_load_factor(const hashtable * table)
{
    unsigned int touched = 0;
    float loadfactor;
    unsigned int b;
    for (b = 0; b < table->size; b++) {
        if (table->buckets[b] != NULL) {
            touched++;
        }
    }
    loadfactor = (float)touched / (float)table->size;
    return loadfactor;
}
 
unsigned int hashtable_get_count(const hashtable * table)
{
    return table->count;
}
 
unsigned int hashtable_find_count(const hashtable *table)
{
    unsigned int b;
    const hashnode *node;
    unsigned int count = 0;
    for (b = 0; b < table->size; b++) {
        for (node = table->buckets[b]; node != NULL; node = node->next) {
            count++;
        }
    }
    return count;
}
 
void hashtable_for_each(const hashtable * table, hashtable_forfn fun)
{
    unsigned int b;
 
    for (b = 0; b < table->size; b++) {
        const hashnode *node;
        for (node = table->buckets[b]; node != NULL; node = node->next) {
            fun(node->data);
        }
    }
}
 
 
void hashtable_for_each2(const hashtable * table, hashtable_forfn2 fun, void *data)
{
    unsigned int b;
 
    for (b = 0; b < table->size; b++) {
        const hashnode *node;
        for (node = table->buckets[b]; node != NULL; node = node->next) {
            fun(node->data, data);
        }
    }
}
 
void hashtable_set_hashfn(hashtable * table, hashtable_hashfn hash)
{
    table->hash = hash;
}

Conclusion

Hash tables are an indispensable data structure with a wide variety of applications, from database indexing to caching. This implementation, using singly-linked lists for overflow chains, provides an effective method to handle collisions and offers flexibility with custom hash functions. The functions provided cater to most of the essential operations one might need to perform on a hash table. For those who wish to further explore the intricacies of data structures, our article on the graph data structure in C provides an in-depth look into another fundamental area of computational design and its applications.

Leave a Reply