Tag Archives: Huffman Coding

Huffman coding in C

Huffman coding is a compression method which generates variable-length codes for data – the more frequent the data item, the shorter the code generated. This allows more efficient compression than fixed-length codes. This is an implementation of the algorithm in C. The function huffman() takes arrays of letters and their frequencies, the length of the arrays, and a callback which is called for each code generated. The algorithm requires a priority queue, and I used a min-heap for the purpose.

#include <stdlib.h>

#include <minheap.h>

struct huffman_node {
    char data;
    unsigned int frequency;
    struct huffman_node *left;
    struct huffman_node *right;
};
typedef struct huffman_node huffman_node;

huffman_node *huffman_node_create(char data, unsigned int frequency)
{
    huffman_node *node = malloc(sizeof(huffman_node));
    if (node) {
        node->data = data;
        node->frequency = frequency;
        node->left = NULL;
        node->right = NULL;
    }
    return node;
}

void huffman_node_delete(huffman_node *node)
{
    if (node) {
        huffman_node_delete(node->left);
        huffman_node_delete(node->right);
        free(node);
    }
}

unsigned int max(unsigned int a, unsigned int b)
{
    return a > b ? a : b;
}

unsigned int huffman_node_height(const huffman_node *node)
{
    unsigned int height = 0;
    if (node->left || node->right) {
        height = max(node->left ? huffman_node_height(node->left) : 0,
                node->right ? huffman_node_height(node->right) : 0) + 1;
    }
    return height;
}

void huffman_node_print(const huffman_node *node, unsigned int indent)
{
    unsigned int i;
    for (i = 0; i < indent; i++) {
        printf("  ");
    }
    printf("%c %u\n", node->data, node->frequency);
    if (node->left != NULL) {
        huffman_node_print(node->left, indent + 1);
    }
    if (node->right != NULL) {
        huffman_node_print(node->right, indent + 1);
    }
}

typedef void huffmanfn(char, const unsigned int *, size_t);

void huffman_node_encodings(const huffman_node *node, unsigned int *arr, 
        unsigned int pos, huffmanfn fun)
{
    if (node->left) {
        arr[pos] = 0;
        huffman_node_encodings(node->left, arr, pos + 1, fun);
    }
    if (node->right) {
        arr[pos] = 1;
        huffman_node_encodings(node->right, arr, pos + 1, fun);
    }
    if (!(node->left || node->right)) {
        fun(node->data, arr, pos);
    }
}

void huffman(const char *letters, const int *frequencies, size_t size, huffmanfn fun)
{
    minheap *heap = minheap_create();
    unsigned int i;
    huffman_node *top;
    unsigned int *arr;
    /* Populate the heap */
    for (i = 0; i < size; i++) {
        minheap_add(heap, huffman_node_create(letters[i], frequencies[i]), frequencies[i]);
    }
    /* Build the tree */
    while (minheap_get_count(heap) != 1) {
        huffman_node *left = minheap_remove_min(heap);
        huffman_node *right = minheap_remove_min(heap);
        top = huffman_node_create('$', left->frequency + right->frequency);
        top->left = left;
        top->right = right;
        minheap_add(heap, top, top->frequency);
    }
    top = minheap_remove_min(heap);
    /* Generate the encodings */
    arr = malloc(huffman_node_height(top) * sizeof(unsigned int));
    huffman_node_encodings(top, arr, 0, fun);
    /* Clean up */
    huffman_node_delete(top);
    free(arr);
    minheap_delete(heap);
}

Example program:

void print(char letter, const unsigned int *arr, size_t len)
{
    unsigned int i;
    printf("%c: ", letter);
    for (i = 0; i < len; i++) {
        printf("%u", arr[i]);
    }
    putchar('\n');
}

int main(void)
{
    char letters[] = {'a', 'b', 'c', 'd', 'e', 'f'};
    int frequencies[] = {45, 13, 12, 16, 9, 5};
    const size_t size = sizeof(letters) / sizeof(letters[0]);
    huffman(letters, frequencies, size, print);
    return 0;
}

The output:

a: 0
c: 100
b: 101
f: 1100
e: 1101
d: 111