Tag Archives: Algorithms

Huffman coding in C

Huffman coding is a compression method which generates variable-length codes for data – the more frequent the data item, the shorter the code generated. This allows more efficient compression than fixed-length codes. This is an implementation of the algorithm in C. The function huffman() takes arrays of letters and their frequencies, the length of the arrays, and a callback which is called for each code generated. The algorithm requires a priority queue, and I used a min-heap for the purpose.

#include <stdlib.h>

#include <minheap.h>

struct huffman_node {
    char data;
    unsigned int frequency;
    struct huffman_node *left;
    struct huffman_node *right;
};
typedef struct huffman_node huffman_node;

huffman_node *huffman_node_create(char data, unsigned int frequency)
{
    huffman_node *node = malloc(sizeof(huffman_node));
    if (node) {
        node->data = data;
        node->frequency = frequency;
        node->left = NULL;
        node->right = NULL;
    }
    return node;
}

void huffman_node_delete(huffman_node *node)
{
    if (node) {
        huffman_node_delete(node->left);
        huffman_node_delete(node->right);
        free(node);
    }
}

unsigned int max(unsigned int a, unsigned int b)
{
    return a > b ? a : b;
}

unsigned int huffman_node_height(const huffman_node *node)
{
    unsigned int height = 0;
    if (node->left || node->right) {
        height = max(node->left ? huffman_node_height(node->left) : 0,
                node->right ? huffman_node_height(node->right) : 0) + 1;
    }
    return height;
}

void huffman_node_print(const huffman_node *node, unsigned int indent)
{
    unsigned int i;
    for (i = 0; i < indent; i++) {
        printf("  ");
    }
    printf("%c %u\n", node->data, node->frequency);
    if (node->left != NULL) {
        huffman_node_print(node->left, indent + 1);
    }
    if (node->right != NULL) {
        huffman_node_print(node->right, indent + 1);
    }
}

typedef void huffmanfn(char, const unsigned int *, size_t);

void huffman_node_encodings(const huffman_node *node, unsigned int *arr, 
        unsigned int pos, huffmanfn fun)
{
    if (node->left) {
        arr[pos] = 0;
        huffman_node_encodings(node->left, arr, pos + 1, fun);
    }
    if (node->right) {
        arr[pos] = 1;
        huffman_node_encodings(node->right, arr, pos + 1, fun);
    }
    if (!(node->left || node->right)) {
        fun(node->data, arr, pos);
    }
}

void huffman(const char *letters, const int *frequencies, size_t size, huffmanfn fun)
{
    minheap *heap = minheap_create();
    unsigned int i;
    huffman_node *top;
    unsigned int *arr;
    /* Populate the heap */
    for (i = 0; i < size; i++) {
        minheap_add(heap, huffman_node_create(letters[i], frequencies[i]), frequencies[i]);
    }
    /* Build the tree */
    while (minheap_get_count(heap) != 1) {
        huffman_node *left = minheap_remove_min(heap);
        huffman_node *right = minheap_remove_min(heap);
        top = huffman_node_create('$', left->frequency + right->frequency);
        top->left = left;
        top->right = right;
        minheap_add(heap, top, top->frequency);
    }
    top = minheap_remove_min(heap);
    /* Generate the encodings */
    arr = malloc(huffman_node_height(top) * sizeof(unsigned int));
    huffman_node_encodings(top, arr, 0, fun);
    /* Clean up */
    huffman_node_delete(top);
    free(arr);
    minheap_delete(heap);
}

Example program:

void print(char letter, const unsigned int *arr, size_t len)
{
    unsigned int i;
    printf("%c: ", letter);
    for (i = 0; i < len; i++) {
        printf("%u", arr[i]);
    }
    putchar('\n');
}

int main(void)
{
    char letters[] = {'a', 'b', 'c', 'd', 'e', 'f'};
    int frequencies[] = {45, 13, 12, 16, 9, 5};
    const size_t size = sizeof(letters) / sizeof(letters[0]);
    huffman(letters, frequencies, size, print);
    return 0;
}

The output:

a: 0
c: 100
b: 101
f: 1100
e: 1101
d: 111

Subset-Sum using dynamic programming in C

The Subset-Sum problem is to determine, given a set of integers, whether there is a subset that sums to a given value. The problem is NP-complete, but can be solved in pseudo-polynomial time using dynamic programming.

Below is an implementation in C. The algorithm works by filling in a table. The rows of the table correspond to values from 0 to the target value, while the columns correspond to elements of the set. Each cell in the table repesents a solution for that row’s total using the elements chosen from the columns to the left. When the table has been filled, the truth value in the bottom-right-hand cell will indicate whether there is a successful solution. I have used chains of pointers within the table to allow the elements in the solution to be read back.

struct entry {
    unsigned int truth;
    int element;
    unsigned int count;
    struct entry *prev;
};
typedef struct entry entry;
 
unsigned int subset_sum(const unsigned int *weights, size_t len, unsigned int target,
        unsigned int **solution)
{
    entry **table;
    unsigned int i, j;
    entry *head;
    unsigned int count = 0;

    table = malloc((target + 1) * sizeof(entry *));
    for (i = 0; i <= target; i++) {
        table[i] = malloc((len + 1 ) * sizeof(entry));
    }
 
    for (i = 0; i <= target; i++) {
        for (j = 0; j <= len; j++) {
            /* First row */
            if (i == 0) {
                table[i][j].truth = 1;
                table[i][j].element = -1;
                table[i][j].count = 0;
                table[i][j].prev = NULL;
            }
            else if (j == 0) {
                /* First column */
                table[i][j].truth = 0;
                table[i][j].element = -1;
                table[i][j].count = 0;
                table[i][j].prev = NULL;
            }
            else {
                /* Initialise for don't add element case */
                table[i][j].truth = table[i][j-1].truth;
                table[i][j].prev = &table[i][j - 1];
                table[i][j].element = -1;
                table[i][j].count = table[i][j - 1].count;
                if (i >= weights[j-1]) {
                    /* Can add element */
                    if (!table[i][j].truth) {
                        /* Add element */
                        table[i][j].truth = table[i - weights[j-1]][j-1].truth;
                        table[i][j].element = j - 1;
                        table[i][j].count = table[i - weights[j-1]][j-1].count + 1;
                        table[i][j].prev = &table[i - weights[j-1]][j-1];
                    }
                }
            }
        }
    }
 
    if (!table[target][len].truth) {
        /* No solution */
        *solution = NULL;
    }
    else {
        /* Solution */
        *solution = calloc(len, sizeof(unsigned int));
    }
    if (*solution) {
        /* Read back elements */
        count = table[target][len].count;
        for (head = &table[target][len]; head != NULL; head = head->prev) {
            if (head->element != -1) {
                (*solution)[head->element]++;
            }
        }
    }
 
    for (i = 0; i <= target; i++) {
        free(table[i]);
    }
    free(table);

    return count;
}

Here is an example program:

int main(void)
{
    unsigned int weights[] = {5, 7, 1, 3, 11, 9};
    unsigned int target = 14;
    const size_t len = sizeof(weights) / sizeof(weights[0]);
    unsigned int *solution;
    const unsigned int count = subset_sum(weights, len, target, &solution);
    if (count) {
        unsigned int i;
        printf("Found a solution\n");
        printf("Elements:\n");
        for (i = 0; i < len; i++) {
            if (solution[i]) {
                printf("%u ", weights[i]);
            }
        }
        putchar('\n');
    }
    else {
        printf("No solution\n");
    }
    free(solution);

    return 0;
}

The output:

Found a solution
Elements:
3 11

Making change using dynamic programming in C

A coin system is canonical if the greedy algorithm for making change is optimal for all values. All coin systems in the world are canonical for obvious reasons. If a coin system is not canonical, the change-making problem becomes NP-hard. We can use dynamic programming to solve the change-making problem for abitrary coin systems.

This is another table-filling algorithm. The table has a column for every value from 0 to the target value, and a column for every coin. Each cell will be filled with the minimum number of the coins from that row upwards that are needed to make that value. When the table has been filled, the minimum number of coins for the target value can be read from the bottom-right-hand cell.

Below is an implemention in C. As with other dynamic programming algorithms, I have used pointers within the table to link each cell to the cell from which its value was derived. These pointers can then be used to read back how many of each coin was used in producing the solution. The function make_change() takes an array of coin values, its length, a target value, and the address of a pointer to which to assign the solution array. The solution array has an entry for each coin and indicates how many of each coin were used in the solution.

#include <stdlib.h>

struct change_entry {
    unsigned int count;
    int coin;
    struct change_entry *prev;
};
typedef struct change_entry change_entry;
 
unsigned int make_change(const unsigned int *coins, size_t len, unsigned int value,
        unsigned int **solution)
{
    unsigned int i, j;
    change_entry **table;
    unsigned int result;

    /* Allocate the table */ 
    table = malloc((len + 1) * sizeof(change_entry *));
    for (i = 0; i <= len; i++) {
        table[i] = malloc((value + 1) * sizeof(change_entry));
    }

    /* Calculate the values and build chains */ 
    for (i = 0; i <= len; i++) {
        for (j = 0; j <= value; j++) {
            if (i == 0) {
                /* Initialising the first row */
                table[i][j].count = j;
                table[i][j].coin = -1;
                table[i][j].prev = NULL;
            }
            else if (j == 0) {
                /* Initialising the first column */
                table[i][j].count = 0;
                table[i][j].coin = -1;
                table[i][j].prev = NULL;
            }
            else if (coins[i - 1] == j) {
                /* Can use coin */
                table[i][j].count = 1;
                table[i][j].coin = i - 1;
                table[i][j].prev = NULL;
            }
            else if (coins[i - 1] > j) {
                /* Can't use coin */
                table[i][j].count = table[i - 1][j].count;
                table[i][j].coin = -1;
                table[i][j].prev = &table[i - 1][j];
            }
            else {
                /* Can use coin - choose best solution */
                if (table[i - 1][j].count < table[i][j - coins[i - 1]].count + 1) {
                    /* Don't use coin */
                    table[i][j].count = table[i - 1][j].count;
                    table[i][j].coin = -1;
                    table[i][j].prev = &table[i - 1][j];
                }
                else {
                    /* Use coin */
                    table[i][j].count = table[i][j - coins[i - 1]].count + 1;
                    table[i][j].coin = i - 1;
                    table[i][j].prev = &table[i][j - coins[i - 1]];
                }
            }
        }
    }
    result = table[len][value].count;
    /* Read back the solution */
    *solution = calloc(len, sizeof(unsigned int));
    if (*solution) {
        change_entry *head;
        for (head = &table[len][value];
                head != NULL;
                head = head->prev) {
            if (head->coin != -1) {
                (*solution)[head->coin]++;
            }
        }
    }
    else {
        result = 0;
    }
    
    for (i = 0; i <= len; i++) {
        free(table[i]);
    }
    free(table);

    return result;
}
 

Here is an example program:

int main(void)
{
    unsigned int coins[] = {1, 2, 5, 10, 20, 50, 100};
    const size_t len = sizeof(coins) / sizeof(coins[0]);
    const unsigned int value = 252;
    unsigned int *solution;
    unsigned int result = make_change(coins, len, value, &solution);
    unsigned int i;
    printf("Number of coins: %u\n", result);
    printf("Coins used:\n");
    for (i = 0; i < len; i++) {
        if (solution[i] > 0) {
            printf("%u x %u\n", solution[i], coins[i]);
        }
    }
    free(solution);
    return 0;
}

The output:

Number of coins: 4
Coins used:
1 x 2
1 x 50
2 x 100

Partition Problem using dynamic programming in C

The Partition Problem is to determine if, given a set of positive integers, there is a subset that sums to precisely half of the sum of the set. For example, given the numbers from 1 to 7, their sum is 28, half of which is 14, and the subset (2, 3, 4, 5} sums to 14, so this is a positive instance. The partition problem is NP-complete, but can be solved in pseudo-polynomial time using dynamic programming.

The algorithm works by constructing an array with entries for every number from 0 to the sum of the set, and filling in the cells that are the sum of some subset. Once complete, the solution can be read back from the middle cell of the array, which corresponds to a subset that sums to half of the total sum. At first, only the 0 cell is filled in, because it is always possible to find a subset that sums to 0 — the empty set. After that, the cells to fill in are found by simulating repeated additions of the numbers from the set to the already-filled-in cells. For example, if the cell for some number \(m\) is already filled in, and the set contains the number \(n\), then the cell \(m + n\) can be filled in. When a cell is filled in, a pointer is set to point back to the number from which it has been derived so that the solution can be read back.

Below is the code in C. The function partition() takes an array of numbers, its length, and the address of a pointer to which to attach the solution array. It returns the number of elements in the solution subset if there is a partition, or 0 otherwise.

struct partition_entry {
    unsigned int value;
    unsigned int truth;
    unsigned int count;
    struct partition_entry *prev;
};
typedef struct partition_entry partition_entry;

unsigned int sum(const unsigned int *numbers, size_t len)
{
    unsigned int i, total = 0;
    for (i = 0; i < len; i++) {
        total += numbers[i];
    }
    return total;
}

int compare_unsigned_ints(const unsigned int *p1, const unsigned int *p2)
{
    if (*p1 > *p2) {
        return 1;
    }
    if (*p1 < *p2) {
        return -1;
    }
    return 0;
}

unsigned int min(unsigned int first, unsigned int second)
{
    if (first < second) {
        return first;
    }
    return second;
}

unsigned int partition(unsigned int *numbers, size_t n, unsigned int **solution)
{
    unsigned int total = sum(numbers, n);
    int i, j, rightmost = 0, size;
    partition_entry *partitions;
    if (total % 2 == 1) {
        /* Total is odd */
        *solution = NULL;
        return 0;
    }
    /* Initialise array */
    partitions = malloc((total + 1) * sizeof(partition_entry));
    if (partitions == NULL) {
        *solution = NULL;
        return 0;
    }
    for (i = 0; i <= total; i++) {
        partitions[i].value = i;
        partitions[i].truth = 0;
        partitions[i].count = 0;
        partitions[i].prev = NULL;
    }
    partitions[0].truth = 1;
    /* Sort the numbers */
    qsort(numbers, n, 
        sizeof(unsigned int), (int(*)(const void *, const void *))compare_unsigned_ints);
    /* Fill in the sums and build chains */
    for (i = 0; i < n; i++) {
        for (j = rightmost; j >= 0; j--) {
            if (partitions[j].truth && !partitions[j + numbers[i]].truth) {
                partitions[j + numbers[i]].truth = 1;
                partitions[j + numbers[i]].count = partitions[j].count + 1;
                partitions[j + numbers[i]].prev = &partitions[j];
            }
        }
        rightmost = min(total / 2, rightmost + numbers[i]);
    }
    if (partitions[total / 2].truth) {
        /* Success, read back the solution */
        partition_entry *ptr = &partitions[total / 2];
        unsigned int number = ptr->value;
        size = partitions[total / 2].count;
        *solution = malloc(size * sizeof(unsigned int));
        if (*solution) {
            for (i = 0, j = size - 1; i < size; i++, j--) {
                ptr = ptr->prev;
                (*solution)[j] = number - ptr->value;
                number = ptr->value;
            }
        }
        else {
            size = 0;
        }
    }
    free(partitions);
    return size;
}

Here is an example that looks for a partition of the numbers from 1 to 7:

int main(void)
{
    unsigned int numbers[] = {7, 6, 5, 4, 3, 2, 1};
    const size_t len = sizeof(numbers) / sizeof(numbers[0]);
    unsigned int *solution;
    unsigned int i;
    unsigned int size = partition(numbers, len, &solution);
    printf("Partition size: %d\n", size);
    for (i = 0; i < size; i++) {
        printf("%u ", solution[i]);
    }
    putchar('\n');
    free(solution);
    return 0;
}

The output:

Partition size: 4
2 3 4 5

Breadth-First Search of a graph in C

A graph

A breadth-first search (BFS) of a graph visits every neighbour of the current vertex before visiting the neighbours’ neighbours. This means that a breadth-first search extends radially from the starting vertex.

The algorithm is as follows:

  1. Create an empty set for the visited vertices
  2. Create a queue
  3. Put the starting vertex in the visited set
  4. Put the starting vertex in the queue
  5. While the queue is not empty:
    1. Remove a vertex from the queue
    2. Perform any visit actions on it
    3. For each of the vertex’s neighbours:
      • If the neighbour is not in the visited set:
        1. Put the neighbour in the visited set
        2. Add the neighbour to the queue

Here is an implementation in C using a cirque for the queue. The function breadth_first_search() takes a graph in edge list form, the number of edges (size), the number of vertices (order), and a callback function that is called for each vertex visited.

#include <stdlib.h>

#include <cirque.h>

typedef struct {
    unsigned int first;
    unsigned int second;
} edge;

typedef void (*searchfn)(unsigned int);

void breadth_first_search(const edge *edges, unsigned int size, unsigned int order, searchfn fun)
{
    unsigned int start = 0;
    unsigned int *visited = calloc(order, sizeof(unsigned int));
    cirque *queue = cirque_create();
    if (!(visited && queue)) {
        free(visited);
        cirque_delete(queue);
        return;
    }
    visited[start] = 1;
    cirque_insert(queue, &start);
    while (cirque_get_count(queue)) {
        unsigned int e;
        unsigned int *current = cirque_remove(queue);
        fun(*current);
        for (e = 0; e < size; e++) {
            if (edges[e].first == *current || edges[e].second == *current) {
                const unsigned int *neighbour = edges[e].first == *current ?
                    &edges[e].second : &edges[e].first;
                if (!visited[*neighbour]) {
                    visited[*neighbour] = 1;
                    cirque_insert(queue, (void*)neighbour);
                }
            }
        }
    }
    free(visited);
    cirque_delete(queue);
}

Here is an example program that constructs the graph shown at the top and then performs a BFS on it, printing the numbers of the vertices as they are visited.

#include <stdio.h>
#include <stdlib.h>



/* Connect two edges */
void edge_connect(edge *edges, unsigned int first, unsigned int second, 
        unsigned int *pos)
{
    edges[*pos].first = first;
    edges[*pos].second = second;
    (*pos)++;
}

void print(unsigned int vertex)
{
    printf("Visiting vertex %u\n", vertex);
}

int main(void)
{
    const unsigned int size = 12; /* Edges */
    const unsigned int order = 13; /* Vertices */
    edge *edges = malloc(size * sizeof(edge));
    unsigned int i = 0;
    edge_connect(edges, 0, 1, &i);
    edge_connect(edges, 0, 2, &i);
    edge_connect(edges, 0, 3, &i);
    edge_connect(edges, 0, 4, &i);
    edge_connect(edges, 1, 5, &i);
    edge_connect(edges, 1, 6, &i);
    edge_connect(edges, 2, 7, &i);
    edge_connect(edges, 2, 8, &i);
    edge_connect(edges, 3, 9, &i);
    edge_connect(edges, 3, 10, &i);
    edge_connect(edges, 4, 11, &i);
    edge_connect(edges, 4, 12, &i);

    breadth_first_search(edges, size, order, print);

    free(edges);
    return 0;
}

The output:

Visiting vertex 0
Visiting vertex 1
Visiting vertex 2
Visiting vertex 3
Visiting vertex 4
Visiting vertex 5
Visiting vertex 6
Visiting vertex 7
Visiting vertex 8
Visiting vertex 9
Visiting vertex 10
Visiting vertex 11
Visiting vertex 12

Realising a graph degree sequence in C

An example graph

The degree sequence of a graph is the sequence of the degrees of each if its vertices. It is usually written in non-increasing order. For example, the degree sequence of the graph shown above is <4, 2, 2, 2, 2, 1, 1, 1, 1>. It’s easy to find the degree sequence of a graph – count the number of times each vertex occurs as an endpoint in the edge list, and then sort the results. The following C function will return the degree sequence of graph in edge list form:

typedef struct {
    unsigned int first;
    unsigned int second;
} edge;

int compare_unsigned_ints_decreasing(const unsigned int *p1, const unsigned int *p2)
{
    if (*p1 > *p2) {
        return -1;
    }
    if (*p1 < *p2) {
        return 1;
    }
    return 0;
}

unsigned int *degree_sequence(const edge *edges, unsigned int size, unsigned int order)
{
    unsigned int *sequence = calloc(order, sizeof(unsigned int));
    if (sequence != NULL) {
        unsigned int e;
        for (e = 0; e < size; e++) {
            sequence[edges[e].first]++;
            sequence[edges[e].second]++;
        }
    }
    qsort(sequence, order, sizeof(unsigned int),
            (int(*)(const void *, const void *))compare_unsigned_ints_decreasing);
    return sequence;
}

The sum of the numbers in a graph’s degree sequence will divide by 2 because every edge has 2 endpoints, but other than that it isn’t easy to tell if a given sequence of integers can be the degree sequence of a graph. A sequence that can be the degree sequence of a graph is called a graphic sequence. One way to find out if a sequence is graphic is to simulate constructing a graph out of it, treating the vertex degrees as a resource to use up by adding edges. This process is called realising the sequence. There is an algorithm by Havel and Hakimi to do this, which goes as follows:

  1. While there are non-0 numbers in the degree sequence:
    1. Connect the first vertex to as many of the following vertices as its degree, decrementing their degrees accordingly
    2. Make the first vertex’s degree 0 – it is now finished with
    3. Sort the vertices in descending order
  2. If at any point you run out of vertices because you need to decrement a 0-degree vertex or there are not enough vertices in the sequence, then the sequence is not graphical

There are many possible graphs for any given graphical degree sequence, so given the degree sequence of a graph the algorithm will not in general reconstruct the same graph. For example, given the degree sequence of the graph at the top, the algorithm will produce this graph:

Another graph with the same degree sequence

Below is an implementation of the Havel-Hakimi algorithm in C. It takes a degree sequence and it length, a callback function to call when each edge would be added, and a void pointer for user data to be passed to the callback. It doesn’t actually construct a graph data structure but instead leaves it to the client to realise edge-addition events in any way it sees fit.

#include <stdlib.h>

typedef struct {
    unsigned int id;
    unsigned int degree;
} vertex;

int compare_vertices_decreasing(const vertex *v1, const vertex *v2)
{
     return compare_unsigned_ints_decreasing(&v1->degree,
             &v2->degree);
}

unsigned int sum(const unsigned int *numbers, size_t len)
{
    unsigned int i, total = 0;
    for (i = 0; i < len; i++) {
        total += numbers[i];
    }
    return total;
}

typedef void(*realisefn)(unsigned int, unsigned int, void *);

unsigned int realise_sequence(const unsigned int *degrees, size_t len, realisefn fun, void *data)
{
    unsigned int i;
    unsigned int failed = 0;
    unsigned int unused_vertices = len;
    vertex *vertices;
    /* Check for even sum of degrees */
    if (sum(degrees, len) % 2 != 0) {
        return 0;
    }
    /* Initialise vertices */
    vertices = malloc(len * sizeof(vertex));
    if (vertices == NULL) {
        return 0;
    }
    for (i = 0; i < len; i++) {
        vertices[i].id = i;
        vertices[i].degree = degrees[i];
    }
    /* Main loop */
    while (unused_vertices > 0 && !failed) {
        /* Sort vertices in decreasing order of degree */
        qsort(vertices, len, sizeof(vertex), 
                (int(*)(const void *, const void *))compare_vertices_decreasing);
        /* Connect the first vertex to as many others as its degree */
        for (i = 1; i <= vertices[0].degree && !failed; i++) {
            if (i < len && vertices[i].degree > 0) {
                vertices[i].degree -= 1;
                /* Call callback */
                fun(vertices[0].id, vertices[i].id, data);
                if (vertices[i].degree == 0) {
                    unused_vertices--;
                }
            }
            else {
                /* Ran out of vertices */            
                failed = 1;
            }
        }
        /* Finished with first vertex */
        vertices[0].degree = 0;
        unused_vertices--;
    }
    free(vertices);
    return !failed;
}

Below is an example program. It constructs the graph shown at the top, gets its degree sequence, and then passes this to the algorithm, along with a callback and data that will construct the realisation of the degree sequence. The degree sequence of the new graph is then retrieved and is shown to be the same as that of the original graph.

#include <stdio.h>
#include <stdlib.h>

/* Connect two edges */
void edge_connect(edge *edges, unsigned int first, unsigned int second, 
        unsigned int *pos)
{
    edges[*pos].first = first;
    edges[*pos].second = second;
    (*pos)++;
}


/* Structure to hold edge list and keep track of current edge */
typedef struct {
    edge *edges;
    unsigned int i;
} edge_list;

/* Callback function to connect two edges */
void connect(unsigned int v1, unsigned int v2, edge_list *graph)
{
    edge_connect(graph->edges, v1, v2, &graph->i);
}

int main(void)
{
    const unsigned int size = 8; /* Edges */
    const unsigned int order = 9; /* Vertices */
    edge *edges = malloc(size * sizeof(edge));
    unsigned int *sequence;
    unsigned int i = 0;
    edge_list graph;

    /* Construct the graph */
    edge_connect(edges, 0, 1, &i);
    edge_connect(edges, 0, 2, &i);
    edge_connect(edges, 0, 3, &i);
    edge_connect(edges, 0, 4, &i);
    edge_connect(edges, 1, 5, &i);
    edge_connect(edges, 2, 6, &i);
    edge_connect(edges, 3, 7, &i);
    edge_connect(edges, 4, 8, &i);

    /* Get the degree sequence */
    sequence = degree_sequence(edges, size, order);
    for (i = 0; i < order; i++) {
        printf("%u ", sequence[i]);
    }
    putchar('\n');
    
    /* Realise the sequence */
    graph.edges = edges;
    graph.i = 0;
    realise_sequence(sequence, order, (realisefn)connect, &graph);

    /* Get the degree sequence again */
    free(sequence);
    sequence = degree_sequence(edges, size, order);
    for (i = 0; i < order; i++) {
        printf("%u ", sequence[i]);
    }
    putchar('\n');

    free(sequence);
    free(edges);
    return 0;

Output:

4 2 2 2 2 1 1 1 1
4 2 2 2 2 1 1 1 1

Topological sort in C

A digraph

A topological sort of a directed graph is an ordering of the vertices such that the starting vertex of all arcs occurs before its ending vertex. Only graphs without cycles can be topologically sorted, and attempting to topologically sort a digraph is one way of finding out if it is a directed acyclic graph (DAG).

There is a simple linear time algorithm for topologically sorting a graph:

  1. Make a collection of all of the vertices that have no incoming arcs (in-degree 0)
  2. Make a collection of all of the arcs
  3. Make an empty collection for the sorted sequence
  4. While the vertices collection is not empty:
    1. Remove the first vertex from the vertices collection
    2. Add it to the sorted sequence
    3. Remove all arcs from it to its neighbours
    4. If any of those neighbours now have no incoming arcs, add them to the vertices collection
  5. If the arcs collection is now empty, the graph is acyclic, and the sorted sequence contains a topological sort

Note that it doesn’t matter what kind of collection the vertices collection is – it can be a set, queue, or stack – the constraint of only adding vertices that have no incoming arcs ensures that the ordering produced will be valid. This means that a graph will in general have several valid topological sorts.

Below is an implementation of the algorithm in C. The input is a graph in arc list form, the number of arcs (size), the number of vertices (order), and the address of a pointer to which to assign the array of sorted vertices. Within the algorithm, the vertices collection is a set implemented as an array of integers used as booleans to indicate whether a given vertex is in the set or not. The function returns a boolean value to indicate whether or not the graph is acyclic.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
} arc;

/* Find out if a vertex has no incoming arcs */
static unsigned int is_root(const arc *graph, const unsigned int *arcs, unsigned int size,
        unsigned int v)
{
    unsigned int a, root = 1;
    for (a = 0; a < size && root; a++) {
        root = !arcs[a] || graph[a].second != v;
    }
    return root;
}

/* Get the vertices with no incoming arcs */
static unsigned int get_roots(const arc *graph, const unsigned int *arcs, unsigned int size,
        unsigned int order, unsigned int *vertices)
{
    unsigned int v, vertices_size = 0;
    for (v = 0; v < order; v++) {
        if (is_root(graph, arcs, size, v)) {
            vertices[v] = 1;
            vertices_size++;
        }
    }
    return vertices_size;
}

unsigned int topological_sort(const arc *graph, unsigned int size, unsigned int order, 
        unsigned int **sorted)
{
    unsigned int *vertices = calloc(order, sizeof(unsigned int));
    unsigned int *arcs = malloc(size * sizeof(unsigned int));
    *sorted = malloc(order * sizeof(unsigned int));
    unsigned int v, a, vertices_size, sorted_size = 0,
            arcs_size = size;
    if (!(vertices && arcs && *sorted)) {
        free(vertices);
        free(arcs);
        free(*sorted);
        *sorted = NULL;
        return 0;
    }
    /* All arcs start off in the graph */
    for (a = 0; a < size; a++) {
        arcs[a] = 1;
    }
    /* Get the vertices with no incoming edges */
    vertices_size = get_roots(graph, arcs, size, order, vertices);
    /* Main loop */
    while (vertices_size > 0) {
        /* Get first vertex */
        for (v = 0; vertices[v] != 1; v++);
        /* Remove from vertex set */
        vertices[v] = 0;
        vertices_size--;
        /* Add it to the sorted array */
        (*sorted)[sorted_size++] = v;
        /* Remove all arcs connecting it to its neighbours */
        for (a = 0; a < size; a++) {
            if (arcs[a] && graph[a].first == v) {
                arcs[a] = 0;
                arcs_size--;
                /* Check if neighbour is now a root */
                if (is_root(graph, arcs, size, graph[a].second)) {
                    /* Add it to set of vertices */
                    vertices[graph[a].second] = 1;
                    vertices_size++;
                } 
            }
        }
    }
    free(vertices);
    free(arcs);
    return arcs_size == 0;
}

Here is an example program that topologically sorts the graph shown in the picture at the top:

#include <stdio.h>
#include <stdlib.h>

/* Connect two arcs */
void arc_connect(arc *arcs, unsigned int first, unsigned int second, 
        unsigned int *pos)
{
    arcs[*pos].first = first;
    arcs[*pos].second = second;
    (*pos)++;
}

int main(void)
{
    const unsigned int size = 8; /* Arcs */
    const unsigned int order = 8; /* Vertices */
    arc *arcs = malloc(size * sizeof(arc));
    unsigned int i = 0;
    unsigned int *sorted;
    unsigned int acyclic;

    arc_connect(arcs, 0, 2, &i);
    arc_connect(arcs, 1, 3, &i);
    arc_connect(arcs, 2, 3, &i);
    arc_connect(arcs, 2, 4, &i);
    arc_connect(arcs, 2, 5, &i);
    arc_connect(arcs, 3, 6, &i);
    arc_connect(arcs, 5, 7, &i);
    arc_connect(arcs, 5, 7, &i);

    acyclic = topological_sort(arcs, size, order, &sorted);
    printf("Graph is acyclic: %u\n", acyclic);
    for (i = 0; i < order; i++) {
        printf("%u ", sorted[i]);
    }
    putchar('\n');

    free(sorted);
    free(arcs);

    return 0;
}

The output:

Graph is acyclic: 1
0 1 2 3 4 5 6 7

0-1 Knapsack using dynamic programming in C

Recall that the 0-1 Knapsack problem is to fill a knapsack of given capacity with items of given weights and values in order to maximise the value of the knapsack’s contents. This can be solved in pseudo-polynomial time using dynamic programming.

This is another table-filling algorithm. The rows of the table represent items, and the columns are weights of the knapsack.

struct knapsack_entry {
    unsigned int value;
    unsigned int item;
    struct knapsack_entry *prev;
};
typedef struct knapsack_entry knapsack_entry;
 
unsigned int knapsack(const unsigned int *weights, const unsigned int *values, size_t n,
        unsigned int capacity, unsigned int **solution)
{
    unsigned int i, j;
    knapsack_entry **table;
    unsigned int result;
    knapsack_entry *head;

    /* Allocate the table */ 
    table = malloc((n + 1) * sizeof(knapsack_entry *));
    for (i = 0; i <= n; i++) {
        table[i] = malloc((capacity + 1) * sizeof(knapsack_entry));
    }

    /* Calculate the values and build chains */ 
    for (i = 0; i <= n; i++) {
        for (j = 0; j <= capacity; j++) {
            if (i == 0 || j == 0) {
                /* Initialising the first row or column */
                table[i][j].value = 0;
                table[i][j].item = 0;
                table[i][j].prev = NULL;
            }
            else if (weights[i - 1] <= j) {
                /* Can add item */
                if (values[i - 1] + table[i - 1][j - weights[i - 1]].value 
                        > table[i - 1][j].value) {
                    /* Add item */
                    table[i][j].value = values[i - 1] + table[i - 1][j - weights[i - 1]].value;
                    table[i][j].item = i - 1;
                    table[i][j].prev = &table[i - 1][j - weights[i - 1]];
                }
                else {
                    /* Don't add item */
                    table[i][j].value = table[i - 1][j].value;
                    table[i][j].item = table[i - 1][j].item;
                    table[i][j].prev = table[i - 1][j].prev;
                }
            }
            else {
                /* Don't add item */
                table[i][j].value = table[i - 1][j].value;
                table[i][j].item = table[i - 1][j].item;
                table[i][j].prev = table[i - 1][j].prev;
            }
        }
    }

    /* Read back the solution */
    *solution = calloc(n, sizeof(unsigned int));
    for (i = 0, head = &table[n][capacity];
            head->prev != NULL;
            head = head->prev, i++) {
        (*solution)[head->item] = 1;
    }
 
    result = table[n][capacity].value;
    for (i = 0; i <= n; i++) {
        free(table[i]);
    }
    free(table);
    return result;
}

Here is an example program:

int main(void)
{
    unsigned int weights[] = {2, 3, 4, 5, 9};
    unsigned int values[] = {3, 4, 5, 8, 10};
    const unsigned int capacity = 20;
    const size_t n = sizeof(weights) / sizeof(weights[0]);
    unsigned int *solution;
    unsigned int value = knapsack(weights, values, n, capacity, &solution);
    unsigned int i;
    printf("Value: %u\n", value);
    printf("Items in knapsack:\n");
    for (i = 0; i < n; i++) {
        if (solution[i]) {
            printf("Item %u with weight %u and value %u\n", i, weights[i], values[i]);
        }
    }
    free(solution);
    return 0;
}

The output:

Value: 26
Items in knapsack:
Item 0 with weight 2 and value 3
Item 2 with weight 4 and value 5
Item 3 with weight 5 and value 8
Item 4 with weight 9 and value 10

Greedy Vertex Cover in C

A graph showing a vertex cover

The Vertex Cover Problem is to find a subset of the vertices of a graph that contains an endpoint of every edge. For example, in the graph shown above, the subset (0, 1) highlighted in red is a vertex cover. The minimisation problem of finding the smallest vertex cover is NP-hard. However a good approximation can be achieved using a greedy algorithm. The algorithm is simply:

  • While there are uncovered edges:
    1. Find the first uncovered edge
    2. Put one of its endpoints in the covering set
    3. Mark all of the edges incident upon that endpoint as covered

Below is an implementation in C. The function vertex_cover() takes a graph in edge list format, its size (number of edges) and order (number of vertices), and the address of a pointer through which to return the covering set as an out parameter. The return value is the size of the covering set.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
} edge;

unsigned int vertex_cover(const edge *edges, unsigned int size, unsigned int order, 
        unsigned int **cover)
{
    unsigned int uncovered_size = size;
    unsigned int cover_size = 0;
    unsigned int *covered = calloc(size, sizeof(unsigned int));
    *cover = calloc(order, sizeof(unsigned int));
    if (covered == NULL || cover == NULL) {
        free(covered);
        free(*cover);
        *cover = NULL;
        return 0;
    }
    while (uncovered_size > 0) {
        unsigned int e, v;
        /* Find the first edge that hasn't been covered */
        for (e = 0; e < size && covered[e] == 1; e++);
        /* Add its first endpoint to the covering set */
        v = edges[e].first;
        (*cover)[v] = 1;
        cover_size++;
        /* Mark all uncovered edges containing its first endpoint as covered */
        for (e = 0; e < size; e++) {
            if (!covered[e] && (edges[e].first == v || edges[e].second == v)) {
                covered[e] = 1;
                uncovered_size--;
            }
        }
    }
    free(covered);
    return cover_size;
}

Here is an example program that finds a vertex cover on the graph shown above:

#include <stdio.h>
#include <stdlib.h>

/* Connect two edges */
void edge_connect(edge *edges, unsigned int first, unsigned int second, 
        unsigned int *pos)
{
    edges[*pos].first = first;
    edges[*pos].second = second;
    (*pos)++;
}

int main(void)
{
    const unsigned int size = 4; /* Edges */
    const unsigned int order = 5; /* Vertices */
    edge *edges = malloc(size * sizeof(edge));
    unsigned int i = 0;
    edge_connect(edges, 0, 1, &i);
    edge_connect(edges, 0, 2, &i);
    edge_connect(edges, 0, 3, &i);
    edge_connect(edges, 1, 4, &i);
    unsigned int *cover;
    unsigned int c = vertex_cover(edges, size, order, &cover);
    printf("Cover size is %u\n", c);
    printf("Vertices in cover:\n");
    for (i = 0; i < order; i++) {
        if (cover[i]) {
            printf("%u ", i);
        }
    }
    putchar('\n');
    free(edges);
    free(cover);
    return 0;
}

The output:

Cover size is 2
Vertices in cover:
0 1

0-1 Knapsack using backtracking in C

In the 0-1 Knapsack problem, we are given a set of items with individual weights and profits and a container of fixed capacity (the knapsack), and are required to compute a loading of the knapsack with items such that the total profit is maximised. We only have 1 of each item, so there is either 0 or 1 of each item in in the knapsack, hence the 0-1 in the name of the problem.

The 0-1 knapsack problem is NP-hard, but can be solved quite efficiently using backtracking. Below is a backtracking implementation in C. The function knapsack() takes arrays of weights, and profits, their size, the capacity, and the address of a pointer through which the solution array is returned. It returns the profit of the best knapsack. The profit density of each item (weight / profit) is first calculated, and the knapsack is filled in order of decreasing profit density. A bounding function is used to determine the upper bound on the profit achievable with the current knapsack so as to reduce the search space.

#include <stdlib.h>
#include <string.h>

/* A knapsack item */
typedef struct {
    unsigned int id;
    double weight;
    double profit;
    double profit_density;
} item;

/* Compare items by lesser profit density */
static int compare_items(const item *item1, const item *item2)
{
    if (item1->profit_density > item2->profit_density) {
        return -1;
    }
    if (item1->profit_density < item2->profit_density) {
        return 1;
    }
    return 0;
}

/* Bounding function */
static double profit_bound(const item *items, size_t n, double capacity,
        double current_weight, double current_profit,
        unsigned int level)
{
    double remaining_capacity = capacity - current_weight;
    double bound = current_profit;
    unsigned int lvl = level;
    /* Fill in order of decreasing profit density */
    while (lvl < n &&
        items[lvl].weight <= remaining_capacity) 
    {
        remaining_capacity -= items[lvl].weight;
        bound += items[lvl].profit;
        lvl++;
    }
    /* Pretend we can take a fraction of the next object */
    if (lvl < n) {
        bound += items[lvl].profit_density
            * remaining_capacity;
    }
    return bound;
}

static void knapsack_recursive(const item *items, size_t n, double capacity,
        unsigned int *current_knapsack, double *current_weight, double *current_profit,
        unsigned int *max_knapsack, double *max_profit, unsigned int level)
{   
    if (level == n) {
        /* Found a new max knapsack */
        *max_profit = *current_profit;
        memcpy(max_knapsack, current_knapsack, n * sizeof(unsigned int));
        return;
    }
    if (*current_weight + items[level].weight <= capacity)
    {   /* Try adding this item */
        *current_weight += items[level].weight;
        *current_profit += items[level].profit;
        current_knapsack[items[level].id] = 1;
        knapsack_recursive(items, n, capacity, current_knapsack, current_weight, 
                current_profit, max_knapsack, max_profit, level + 1);
        *current_weight -= items[level].weight;
        *current_profit -= items[level].profit;
        current_knapsack[items[level].id] = 0;
    }
    if (profit_bound(items, n, capacity, *current_weight, 
                *current_profit, level + 1) > *max_profit) {
        /* Still promising */
        knapsack_recursive(items, n, capacity, current_knapsack, current_weight, 
                current_profit, max_knapsack, max_profit, level + 1);
    }
}

double knapsack(const double *weights, const double *profits, 
        size_t n, double capacity, unsigned int **max_knapsack)
{
    double current_weight = 0.0;
    double current_profit = 0.0;
    double max_profit = 0.0;
    unsigned int i;
    item *items  = malloc(n * sizeof(item));
    unsigned int *current_knapsack = calloc(n, sizeof(unsigned int));
    *max_knapsack = malloc(n * sizeof(unsigned int));
    if (!(items && current_knapsack && *max_knapsack)) {
        free(items);
        free(current_knapsack);
        free(*max_knapsack);
        *max_knapsack = NULL;
        return 0;
    }
    /* Populate the array of items */
    for (i = 0; i < n; i++) {
        items[i].id = i;
        items[i].weight = weights[i];
        items[i].profit = profits[i];
        items[i].profit_density = profits[i] / weights[i];
    }
    /* Sort into decreasing density order */
    qsort(items, n, sizeof(item),
            (int (*)(const void *, const void *))compare_items);
    knapsack_recursive(items, n, capacity, current_knapsack, &current_weight,
            &current_profit, *max_knapsack, &max_profit, 0);
    free(items);
    free(current_knapsack);
    return max_profit;
}

Here is an example program:

#include <stdio.h>
#include <stdlib.h>

int main(void)
{
    double weights[] = {3, 5, 2, 1};
    double profits[] = {9, 10, 7, 4};
    const size_t n = sizeof(profits) / sizeof(profits[0]);
    const double capacity = 7;
    unsigned int *max_knapsack;
    double max_profit = knapsack(weights, profits, n, capacity, &max_knapsack);
    unsigned int i;
    printf("Profit: %.2f\n", max_profit);
    printf("Knapsack contains:\n");
    for (i = 0; i < n; i++) {
        if (max_knapsack[i] == 1) {
            printf("Item %u with weight %.2f and profit %.2f\n", i, weights[i], profits[i]); 
        }
    }
    free(max_knapsack);
    return 0;
}

The output:

Profit: 20.00
Knapsack contains:
Item 0 with weight 3.00 and profit 9.00
Item 2 with weight 2.00 and profit 7.00
Item 3 with weight 1.00 and profit 4.00