Huffman coding in C

Huffman coding is a compression method which generates variable-length codes for data – the more frequent the data item, the shorter the code generated. This allows more efficient compression than fixed-length codes. This is an implementation of the algorithm in C. The function huffman() takes arrays of letters and their frequencies, the length of the arrays, and a callback which is called for each code generated. The algorithm requires a priority queue, and I used a min-heap for the purpose.

#include <stdlib.h>

#include <minheap.h>

struct huffman_node {
char data;
unsigned int frequency;
struct huffman_node *left;
struct huffman_node *right;
};
typedef struct huffman_node huffman_node;

huffman_node *huffman_node_create(char data, unsigned int frequency)
{
huffman_node *node = malloc(sizeof(huffman_node));
if (node) {
node->data = data;
node->frequency = frequency;
node->left = NULL;
node->right = NULL;
}
return node;
}

void huffman_node_delete(huffman_node *node)
{
if (node) {
huffman_node_delete(node->left);
huffman_node_delete(node->right);
free(node);
}
}

unsigned int max(unsigned int a, unsigned int b)
{
return a > b ? a : b;
}

unsigned int huffman_node_height(const huffman_node *node)
{
unsigned int height = 0;
if (node->left || node->right) {
height = max(node->left ? huffman_node_height(node->left) : 0,
node->right ? huffman_node_height(node->right) : 0) + 1;
}
return height;
}

void huffman_node_print(const huffman_node *node, unsigned int indent)
{
unsigned int i;
for (i = 0; i < indent; i++) {
printf("  ");
}
printf("%c %u\n", node->data, node->frequency);
if (node->left != NULL) {
huffman_node_print(node->left, indent + 1);
}
if (node->right != NULL) {
huffman_node_print(node->right, indent + 1);
}
}

typedef void huffmanfn(char, const unsigned int *, size_t);

void huffman_node_encodings(const huffman_node *node, unsigned int *arr,
unsigned int pos, huffmanfn fun)
{
if (node->left) {
arr[pos] = 0;
huffman_node_encodings(node->left, arr, pos + 1, fun);
}
if (node->right) {
arr[pos] = 1;
huffman_node_encodings(node->right, arr, pos + 1, fun);
}
if (!(node->left || node->right)) {
fun(node->data, arr, pos);
}
}

void huffman(const char *letters, const int *frequencies, size_t size, huffmanfn fun)
{
minheap *heap = minheap_create();
unsigned int i;
huffman_node *top;
unsigned int *arr;
/* Populate the heap */
for (i = 0; i < size; i++) {
}
/* Build the tree */
while (minheap_get_count(heap) != 1) {
huffman_node *left = minheap_remove_min(heap);
huffman_node *right = minheap_remove_min(heap);
top = huffman_node_create('\$', left->frequency + right->frequency);
top->left = left;
top->right = right;
}
top = minheap_remove_min(heap);
/* Generate the encodings */
arr = malloc(huffman_node_height(top) * sizeof(unsigned int));
huffman_node_encodings(top, arr, 0, fun);
/* Clean up */
huffman_node_delete(top);
free(arr);
minheap_delete(heap);
}


Example program:

void print(char letter, const unsigned int *arr, size_t len)
{
unsigned int i;
printf("%c: ", letter);
for (i = 0; i < len; i++) {
printf("%u", arr[i]);
}
putchar('\n');
}

int main(void)
{
char letters[] = {'a', 'b', 'c', 'd', 'e', 'f'};
int frequencies[] = {45, 13, 12, 16, 9, 5};
const size_t size = sizeof(letters) / sizeof(letters[0]);
huffman(letters, frequencies, size, print);
return 0;
}


The output:

a: 0
c: 100
b: 101
f: 1100
e: 1101
d: 111