Tag Archives: Greedy Algorithms

Greedy partition in C

In a previous post, I showed a dynamic programming algorithm that gives an exact solution to the partition problem. It’s also possible to get an approximate solution using a greedy algorithm.

The algorithm is:

  1. Sort the elements of the set in decreasing order
  2. For each element in the set:
    1. If the sum of the first subset is greater, put the element in the second set
    2. Else, put it in the first set

You can see intuitively that this is going to make the sets as close as possible to having the same sum without using an exact algorithm. Below is an implementation in C. The function partition() takes an array of unsigned integers and its size, and returns an array of unsigned integers which are 0 if the corresponding element is in the first set of the partition, and 1 if it is in the second set. Note that the indices of this set correspond to the array of numbers after having been sorted.

#include <stdlib.h>

int compare_unsigned_ints_decreasing(const unsigned int *p1, const unsigned int *p2)
{
    if (*p1 > *p2) {
        return -1;
    }
    if (*p1 < *p2) {
        return 1;
    }
    return 0;
}

unsigned int *partition(unsigned int *numbers, size_t n)
{
    unsigned int i, sum0 = 0, sum1 = 0;
    unsigned int *solution = calloc(n, sizeof(unsigned int));
    if (solution == NULL) {
        return NULL;
    }
    qsort(numbers, n, sizeof(unsigned int),
            (int(*)(const void *, const void *))compare_unsigned_ints_decreasing);
    for (i = 0; i < n; i++) {
        if (sum1 < sum0) {
            solution[i] = 1;
            sum1 += numbers[i];
        }
        else {
            sum0 += numbers[i];
        }
    }
    return solution;
}

Here is an example program that partitions the set of numbers from 1 to 7:

#include <stdio.h>
#include <stdlib.h>

int main(void)
{
    unsigned int numbers[] = {1, 2, 3, 4, 5, 6, 7};
    const size_t len = sizeof(numbers) / sizeof(numbers[0]);
    unsigned int i;
    unsigned int *solution = partition(numbers, len);
    printf("First set:\n");
    for (i = 0; i < len; i++) {
        if (solution[i] == 0) {
            printf("%u\n", numbers[i]);
        }
    }
    printf("Second set:\n");
    for (i = 0; i < len; i++) {
        if (solution[i] == 1) {
            printf("%u\n", numbers[i]);
        }
    }
    putchar('\n');
    free(solution);
    return 0;
}

Here is the output. In this case, the partition divides the set perfectly into two sets of sum 14, but that won’t generally be the case for this algorithm as it is an approximate one.

First set:
7
4
3
Second set:
6
5
2
1

Huffman coding in C

Huffman coding is a compression method which generates variable-length codes for data – the more frequent the data item, the shorter the code generated. This allows more efficient compression than fixed-length codes. This is an implementation of the algorithm in C. The function huffman() takes arrays of letters and their frequencies, the length of the arrays, and a callback which is called for each code generated. The algorithm requires a priority queue, and I used a min-heap for the purpose.

#include <stdlib.h>

#include <minheap.h>

struct huffman_node {
    char data;
    unsigned int frequency;
    struct huffman_node *left;
    struct huffman_node *right;
};
typedef struct huffman_node huffman_node;

huffman_node *huffman_node_create(char data, unsigned int frequency)
{
    huffman_node *node = malloc(sizeof(huffman_node));
    if (node) {
        node->data = data;
        node->frequency = frequency;
        node->left = NULL;
        node->right = NULL;
    }
    return node;
}

void huffman_node_delete(huffman_node *node)
{
    if (node) {
        huffman_node_delete(node->left);
        huffman_node_delete(node->right);
        free(node);
    }
}

unsigned int max(unsigned int a, unsigned int b)
{
    return a > b ? a : b;
}

unsigned int huffman_node_height(const huffman_node *node)
{
    unsigned int height = 0;
    if (node->left || node->right) {
        height = max(node->left ? huffman_node_height(node->left) : 0,
                node->right ? huffman_node_height(node->right) : 0) + 1;
    }
    return height;
}

void huffman_node_print(const huffman_node *node, unsigned int indent)
{
    unsigned int i;
    for (i = 0; i < indent; i++) {
        printf("  ");
    }
    printf("%c %u\n", node->data, node->frequency);
    if (node->left != NULL) {
        huffman_node_print(node->left, indent + 1);
    }
    if (node->right != NULL) {
        huffman_node_print(node->right, indent + 1);
    }
}

typedef void huffmanfn(char, const unsigned int *, size_t);

void huffman_node_encodings(const huffman_node *node, unsigned int *arr, 
        unsigned int pos, huffmanfn fun)
{
    if (node->left) {
        arr[pos] = 0;
        huffman_node_encodings(node->left, arr, pos + 1, fun);
    }
    if (node->right) {
        arr[pos] = 1;
        huffman_node_encodings(node->right, arr, pos + 1, fun);
    }
    if (!(node->left || node->right)) {
        fun(node->data, arr, pos);
    }
}

void huffman(const char *letters, const int *frequencies, size_t size, huffmanfn fun)
{
    minheap *heap = minheap_create();
    unsigned int i;
    huffman_node *top;
    unsigned int *arr;
    /* Populate the heap */
    for (i = 0; i < size; i++) {
        minheap_add(heap, huffman_node_create(letters[i], frequencies[i]), frequencies[i]);
    }
    /* Build the tree */
    while (minheap_get_count(heap) != 1) {
        huffman_node *left = minheap_remove_min(heap);
        huffman_node *right = minheap_remove_min(heap);
        top = huffman_node_create('$', left->frequency + right->frequency);
        top->left = left;
        top->right = right;
        minheap_add(heap, top, top->frequency);
    }
    top = minheap_remove_min(heap);
    /* Generate the encodings */
    arr = malloc(huffman_node_height(top) * sizeof(unsigned int));
    huffman_node_encodings(top, arr, 0, fun);
    /* Clean up */
    huffman_node_delete(top);
    free(arr);
    minheap_delete(heap);
}

Example program:

void print(char letter, const unsigned int *arr, size_t len)
{
    unsigned int i;
    printf("%c: ", letter);
    for (i = 0; i < len; i++) {
        printf("%u", arr[i]);
    }
    putchar('\n');
}

int main(void)
{
    char letters[] = {'a', 'b', 'c', 'd', 'e', 'f'};
    int frequencies[] = {45, 13, 12, 16, 9, 5};
    const size_t size = sizeof(letters) / sizeof(letters[0]);
    huffman(letters, frequencies, size, print);
    return 0;
}

The output:

a: 0
c: 100
b: 101
f: 1100
e: 1101
d: 111

Greedy Vertex Cover in C

A graph showing a vertex cover

The Vertex Cover Problem is to find a subset of the vertices of a graph that contains an endpoint of every edge. For example, in the graph shown above, the subset (0, 1) highlighted in red is a vertex cover. The minimisation problem of finding the smallest vertex cover is NP-hard. However a good approximation can be achieved using a greedy algorithm. The algorithm is simply:

  • While there are uncovered edges:
    1. Find the first uncovered edge
    2. Put one of its endpoints in the covering set
    3. Mark all of the edges incident upon that endpoint as covered

Below is an implementation in C. The function vertex_cover() takes a graph in edge list format, its size (number of edges) and order (number of vertices), and the address of a pointer through which to return the covering set as an out parameter. The return value is the size of the covering set.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
} edge;

unsigned int vertex_cover(const edge *edges, unsigned int size, unsigned int order, 
        unsigned int **cover)
{
    unsigned int uncovered_size = size;
    unsigned int cover_size = 0;
    unsigned int *covered = calloc(size, sizeof(unsigned int));
    *cover = calloc(order, sizeof(unsigned int));
    if (covered == NULL || cover == NULL) {
        free(covered);
        free(*cover);
        *cover = NULL;
        return 0;
    }
    while (uncovered_size > 0) {
        unsigned int e, v;
        /* Find the first edge that hasn't been covered */
        for (e = 0; e < size && covered[e] == 1; e++);
        /* Add its first endpoint to the covering set */
        v = edges[e].first;
        (*cover)[v] = 1;
        cover_size++;
        /* Mark all uncovered edges containing its first endpoint as covered */
        for (e = 0; e < size; e++) {
            if (!covered[e] && (edges[e].first == v || edges[e].second == v)) {
                covered[e] = 1;
                uncovered_size--;
            }
        }
    }
    free(covered);
    return cover_size;
}

Here is an example program that finds a vertex cover on the graph shown above:

#include <stdio.h>
#include <stdlib.h>

/* Connect two edges */
void edge_connect(edge *edges, unsigned int first, unsigned int second, 
        unsigned int *pos)
{
    edges[*pos].first = first;
    edges[*pos].second = second;
    (*pos)++;
}

int main(void)
{
    const unsigned int size = 4; /* Edges */
    const unsigned int order = 5; /* Vertices */
    edge *edges = malloc(size * sizeof(edge));
    unsigned int i = 0;
    edge_connect(edges, 0, 1, &i);
    edge_connect(edges, 0, 2, &i);
    edge_connect(edges, 0, 3, &i);
    edge_connect(edges, 1, 4, &i);
    unsigned int *cover;
    unsigned int c = vertex_cover(edges, size, order, &cover);
    printf("Cover size is %u\n", c);
    printf("Vertices in cover:\n");
    for (i = 0; i < order; i++) {
        if (cover[i]) {
            printf("%u ", i);
        }
    }
    putchar('\n');
    free(edges);
    free(cover);
    return 0;
}

The output:

Cover size is 2
Vertices in cover:
0 1

Dijkstra’s Shortest Paths algorithm in C

A weighted graph

Dijksta’s algorithm finds the shortest path in a weighted graph from a single vertex to one or all other vertices. It is a greedy breadth-first search (BFS) algorithm which works as follows:

  1. Create a table of distances to all vertices
  2. Set the distance to 0 for the starting vertex, and infinity for the others
  3. Make a list of unvisited vertices, which starts off containing all vertices
  4. Set the current vertex to be the starting vertex
  5. While the unvisited list is not empty:
    1. For each neighbour of the current vertex:
      • If the distance to the current vertex + the distance to the neighbour < that the stored distance to the neighbour:
        • Update the stored distance to the neighbour to be the distance to the current vertex + the distance to the neighbour
    2. Remove the current vertex from the unvisited list
    3. Find the nearest unvisited vertex (i.e., the one with the lowest distance in the distances table)
    4. Make it the new current vertex

Below is an implemention in C. The function dijkstra(), takes an array of edges, the number of edges (size) and the number of vertices (order), and returns an array containing the distance to each vertex. For the unvisited list I used an array of integers which I set to 0 or 1 depending on whether the corresponding vertex was unvisited or visited respectively.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

unsigned int *dijkstra(const weighted_edge *edges, unsigned int size, unsigned int order,
        unsigned int vertex)
{
    unsigned int i;
    unsigned int *distances = malloc(order * sizeof(unsigned int));
    unsigned int *unvisited = malloc(order * sizeof(unsigned int));
    unsigned int unvisited_count = order;
    unsigned int current = vertex;
    if (distances == NULL || unvisited == NULL) {
        free(distances);
        free(unvisited);
        return NULL;
    }
    /* All distances start infinite and all vertices start unvisited */
    for (i = 0; i < order; i++) {
        distances[i] = UINT_MAX;
        unvisited[i] = 1;
    }
    /* Distance to starting vertex is 0 */
    distances[vertex] = 0;
    while (unvisited_count > 0) {
        /* Update the distances to all neighbours */
        unsigned int e, v;
        unsigned int min_distance;
        for (e = 0; e < size; e++) {
            if (edges[e].first == current || edges[e].second == current) {
                const unsigned int neighbour = edges[e].first == current ?
                    edges[e].second : edges[e].first;
                const unsigned int distance = distances[current] + edges[e].weight;
                if (distance < distances[neighbour]) {
                    distances[neighbour] = distance;
                }
            }
        }
        /* Finished with this vertex */
        unvisited[current] = 0;
        unvisited_count--;
        /* Find the nearest unvisited vertex */
        min_distance = 0;
        for (v = 0; v < order; v++) {
            if (unvisited[v] == 1 && (min_distance == 0 || distances[v] < min_distance)) {
                min_distance = distances[v];
                current = v;
            }
        }
    }
    free(unvisited);
    return distances;
}

Here is an example program that finds all of the distances from vertex 0 in the graph pictured above:

#include <stdio.h>
#include <stdlib.h>

int main(void)
{
    const unsigned int size = 9; /* Edges */
    const unsigned int order = 6; /* Vertices */
    weighted_edge *edges = malloc(size * sizeof(weighted_edge));
    unsigned int *distances;
    unsigned int i = 0;

    weighted_edge_connect(edges, 0, 1, 2, &i);
    weighted_edge_connect(edges, 0, 2, 4, &i);
    weighted_edge_connect(edges, 1, 2, 1, &i);
    weighted_edge_connect(edges, 1, 3, 4, &i);
    weighted_edge_connect(edges, 1, 4, 2, &i);
    weighted_edge_connect(edges, 2, 4, 3, &i);
    weighted_edge_connect(edges, 3, 4, 3, &i);
    weighted_edge_connect(edges, 3, 5, 2, &i);
    weighted_edge_connect(edges, 4, 5, 2, &i);

    distances = dijkstra(edges, size, order, 0);

    for (i = 0; i < order; i++) {
        printf("%u: %u\n", i, distances[i]);
    }

    free(distances);
    free(edges);

    return 0;
}

The output:

0: 0
1: 2
2: 3
3: 6
4: 4
5: 6

Borůvka’s Minimal Spanning Tree (MST) algorithm in C

Borůvka’s MST algorithm is quite similar to Kruskal’s algorithm. Like Kruskal, it begins by making a set of trees that start out as single vertices. It then repeatedly iterates over these trees and finds for each one the cheapest edge that has one endpoint in the tree and the other not. It then adds all of these edges to the MST, and merges the trees they join. Notice that the difference with Kruskal’s algorithm is that while Kruskal finds the cheapest edge with an endpoint in different trees at each iteration, Borůvka finds all of the cheapest edges that have endpoints in different trees. This has the effect of halving the number of trees at each iteration.

So in summary the algorithm is:

  1. Start with \(Order(G)\) trees, each one a single vertex
  2. Until there is only one tree:
    1. Find the cheapest edge for each tree that has one endpoint in the tree and one outside
    2. Add all of these edges to the MST
    3. Merge all of the affected trees

Below is an implementation in C. As with Kruskal, I used a vertices array to keep track of which tree each vertex is in. I also needed another array links to keep track of which cheapest edges had been found through the loop over the trees. I needed this because the cheapest outgoing edge for one tree is also the cheapest outgoing edge for another one, and I needed to prevent adding the same edge to the MST twice.

The code was a little bit longer than for Prim or Kruskal, so I broke it into three functions as I never write a function that doesn’t fit on the screen.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

/* Find the cheapest edge with one endpoint in tree and one not */
static int cheapest_edge_leaving_tree(const weighted_edge *edges, unsigned int size, 
        const unsigned int *vertices, unsigned int tree)
{
    unsigned int e;
    int cheapest = -1;
    for (e = 0; e < size && cheapest == -1; e++) {
        if ((vertices[edges[e].first] == tree
                    && vertices[edges[e].second] != tree)
                || (vertices[edges[e].first] != tree 
                    && vertices[edges[e].second] == tree))
        {
            cheapest = e;
        }
    }
    return cheapest;
}

/* Merge trees for all of the edges in mst from mst_prev to mst_edges */
static void merge_trees(const weighted_edge *mst, unsigned int mst_prev, unsigned int mst_edges,
       unsigned int *vertices, unsigned int order, unsigned int *trees)
{
    unsigned int e;
    for (e = mst_prev; e < mst_edges; e++) {
        unsigned int v;
        for (v = 0; v < order; v++) {
            if (vertices[v] == mst[e].second) {
                vertices[v] = mst[e].first;
            } 
        }
        (*trees)--;
    }
}

unsigned int boruvka(weighted_edge *edges, unsigned int size, unsigned int order,
        weighted_edge **mst)
{
    *mst = malloc((order - 1) * sizeof(weighted_edge));
    unsigned int *vertices = malloc(order * sizeof(unsigned int));
    unsigned int trees = order;
    unsigned int *links = malloc(size * sizeof(unsigned int));
    unsigned int i, cost = 0, mst_edges = 0;
    if (*mst == NULL || vertices == NULL || links == NULL) {
        free(*mst);
        free(vertices);
        free(links);
        return 0;
    }
    /* Each vertex starts off in its own tree */
    for (i = 0; i < order; i++) {
        vertices[i] = i;
    }
    /* Sort the edges by weight */
    qsort(edges, size, sizeof(weighted_edge), 
            (int(*)(const void *, const void *))weighted_edge_compare);
    /* Main loop */
    while (trees > 1) {
        unsigned int t, mst_prev = mst_edges;
        memset(links, 0, size * sizeof(unsigned int));
        for (t = 0; t < trees ; t++) {
            /* Get the cheapest edge leaving this tree */
            int cheapest = cheapest_edge_leaving_tree(edges, size, vertices, t);
            if (cheapest == -1) {
                /* Graph wasn't connected properly */
                free(*mst);
                *mst = NULL;
                free(vertices);
                free(links);
                return 0;
            }
            /* Add the edge if not there already */
            if (links[cheapest] != 1) {
                (*mst)[mst_edges++] = edges[cheapest];
                links[cheapest] = 1;
                /* Add the cost */
                cost += edges[cheapest].weight;
            }
        }
        /* Merge the trees they join */
        merge_trees(*mst, mst_prev, mst_edges, vertices, order, &trees);
    }
    free(vertices);
    free(links);
    return cost;
}

Here is an example program that finds the MST of the same graph I used for Prim and Kruskal:

#include <stdio.h>
#include <stdlib.h>

int main(void)
{
    weighted_edge *edges;
    const unsigned int order = 5;
    const unsigned int size = complete_weighted_graph(order, &edges);
    weighted_edge *mst;
    unsigned int cost = boruvka(edges, size, order, &mst);
    printf("Cost is %u\n", cost);
    print_edges(mst, order - 1);
    free(mst);
    free(edges);
    return 0;
}

The output:

Cost is 10
(0, 1, 1) (0, 2, 2) (0, 3, 3) (0, 4, 4)

Kruskal’s Minimum Spanning Tree (MST) Algorithm in C

Kruskal’s MST algorithm is a greedy algorithm like Prim’s algorithm but works quite differently. Recall that Prim’s algorithm builds up a single tree by greedily choosing the cheapest edge that has one endpoint inside it and one outside. Kruskal’s algorithm, on the other hand, builds up multiple trees, which start out as single vertices, and greedily chooses the cheapest edge that has endpoints in two different trees. Adding this edge causes the two trees to merge. After adding \(Order(G) – 1\) edges in this way, all of the trees have been merged into a single spanning tree which is the MST.

In summary:

  1. Start with \(Order(G)\) trees, each one a single vertex
  2. Do \(Order(G)\) – 1 times (or equivalently, until there is only one tree):
    1. Find the cheapest edge with endpoints in different trees
    2. Use it to merge those two trees into a single tree

Below is an implementation in C. I used an array of integers, vertices, to keep track of which tree the vertices are in. When trees are merged by an edge, this array is updated to put the vertices that are in the same tree as the edge’s second endpoint into the tree containing the edge’s first endpoint. This has the effect of merging the trees. When the algorithm finishes, this array contains the same tree number for every vertex, since there is now only one tree.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

unsigned int kruskal(weighted_edge *edges, unsigned int size, unsigned int order,
        weighted_edge **mst)
{
    *mst = malloc((order - 1) * sizeof(weighted_edge));
    unsigned int *vertices = malloc(order * sizeof(unsigned int));
    unsigned int i;
    unsigned int cost = 0;
    if (*mst == NULL || vertices == NULL) {
        free(*mst);
        free(vertices);
        return 0;
    }
    /* Each vertex starts off in its own tree */
    for (i = 0; i < order; i++) {
        vertices[i] = i;
    }
    /* Sort the edges by weight */
    qsort(edges, size, sizeof(weighted_edge), 
            (int(*)(const void *, const void *))weighted_edge_compare);
    /* Main loop */
    for (i = 0; i < order - 1; i++) {
        /* Find the cheapest edge with endpoints in different trees */
        unsigned int j;
        int cheapest = -1;
        for (j = 0; j < size && cheapest == -1; j++) {
            if (vertices[edges[j].first] != vertices[edges[j].second]) {
                cheapest = j;
            }
        }
        if (cheapest == -1) {
            /* Graph wasn't connected properly */
            free(*mst);
            *mst = NULL;
            free(vertices);
            return 0;
        }
        /* Add the edge */
        (*mst)[i] = edges[cheapest];
        /* Merge the trees it joins */
        for (j = 0; j < order; j++) {
            if (vertices[j] == edges[cheapest].second) {
                vertices[j] = edges[cheapest].first;
            }
        }
        /* Add the cost */
        cost += edges[cheapest].weight;
    }
    free(vertices);
    return cost;
}

Here is an example program that constructs a complete weighted graph and then finds the MST:

#include <stdio.h>
#include <stdlib.h>

/* Construct a complete weighted graph on v vertices */
unsigned int complete_weighted_graph(unsigned int v, weighted_edge **edges)
{
    const unsigned int n = triangular_number(v - 1);
    unsigned int i, j, k;
    *edges = malloc(n * sizeof(weighted_edge));
    if (edges != NULL) {
        for (i = 0, k = 0; i < v - 1; i++) {
            for (j = i + 1; j < v; j++) {
                (*edges)[k].first = i;
                (*edges)[k].second = j;
                (*edges)[k].weight = k + 1;
                k++;
            }
        }
    }
    return n;
}

void print_edges(const weighted_edge *edges, unsigned int n)
{
    unsigned int e;
    for (e = 0; e < n; e++) {
        printf("(%u, %u, %u) ", edges[e].first, edges[e].second, edges[e].weight);
    }
    putchar('\n');
}

int main(void)
{
    weighted_edge *edges;
    const unsigned int order = 5;
    const unsigned int size = complete_weighted_graph(order, &edges);
    weighted_edge *mst;
    unsigned int cost = kruskal(edges, size, order, &mst);
    printf("Cost is %u\n", cost);
    print_edges(mst, order - 1);
    free(mst);
    free(edges);
    return 0;
}

The output:

Cost is 10
(0, 1, 1) (0, 2, 2) (0, 3, 3) (0, 4, 4)

Prim’s Minimum Spanning Tree (MST) Algorithm in C

Weighted graph showing a Minimum Spanning Tree (MST)

A minimum spanning tree (MST) on a weighted graph is a spanning tree in which the sum of the weights of the edges is at a minimum. Finding MSTs is important in areas like the design of electrical networks.

Prim’s algorithm is a greedy algorithm for finding an MST on a weighted graph \(G\) . It goes as follows:

  1. Create an empty set \(T,\) for the tree
  2. Choose a starting vertex
  3. While \(T\) contains fewer than \(Order(G) – 1\) edges:
    1. Find the cheapest edge with one endpoint in \(T\) and one endpoint outside it
    2. Add it to \(T\)

Below is an implementation in C. By sorting the edges by weight first, I have made it more efficient to find the next cheapest edge than using an unordered array. It could be made more efficient still by the use of a data structure like a heap.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

/* Compare two weighted edges by weight */
int weighted_edge_compare(const weighted_edge *edge1, const weighted_edge *edge2)
{
    return edge1->weight - edge2->weight;
}

unsigned int prim(weighted_edge *edges, unsigned int size, unsigned int order,
        weighted_edge **mst)
{
    *mst = malloc((order - 1) * sizeof(weighted_edge));
    unsigned int *vertices = calloc(order, sizeof(unsigned int));
    unsigned int i;
    unsigned int cost = 0;
    if (*mst == NULL || vertices == NULL) {
        free(*mst);
        free(vertices);
        return 0;
    }
    /* Sort the edges by weight */
    qsort(edges, size, sizeof(weighted_edge), 
            (int(*)(const void *, const void *))weighted_edge_compare);
    /* Choose a starting vertex */
    vertices[0] = 1;
    /* Main loop */
    for (i = 0; i < order - 1; i++) {
        /* Find the cheapest edge with one endpoint in the tree and one not */
        unsigned int e;
        int cheapest = -1;
        for (e = 0; e < size && cheapest == -1; e++) {
            if ((vertices[edges[e].first] == 1 && vertices[edges[e].second] == 0)
                || (vertices[edges[e].first] == 0 && vertices[edges[e].second] == 1))
            {
                cheapest = e;
            }
        }
        if (cheapest == -1) {
            /* Graph wasn't connected properly */
            free(*mst);
            *mst = NULL;
            free(vertices);
            return 0;
        }
        /* Add the edge */
        (*mst)[i] = edges[cheapest];
        /* Add the vertices (one will already be there) */
        vertices[edges[cheapest].first] = 1;
        vertices[edges[cheapest].second] = 1;
        /* Add the cost */
        cost += edges[cheapest].weight;
    }
    free(vertices);
    return cost;
}

Here is an example program that finds an MST on the graph shown at the top:

#include <stdlib.h>
#include <stdio.h>

/* Calculate the nth triangular number T(n) */
unsigned int triangular_number(unsigned int n)
{
    return (n * (n + 1)) / 2;
}

/* Construct a complete weighted graph on v vertices */
unsigned int complete_weighted_graph(unsigned int v, weighted_edge **edges)
{
    const unsigned int n = triangular_number(v - 1);
    unsigned int i, j, k;
    *edges = malloc(n * sizeof(weighted_edge));
    if (edges != NULL) {
        for (i = 0, k = 0; i < v - 1; i++) {
            for (j = i + 1; j < v; j++) {
                (*edges)[k].first = i;
                (*edges)[k].second = j;
                (*edges)[k].weight = k + 1;
                k++;
            }
        }
    }
    return n;
}

void print_edges(const weighted_edge *edges, unsigned int n)
{
    unsigned int e;
    for (e = 0; e < n; e++) {
        printf("(%u, %u, %u) ", edges[e].first, edges[e].second, edges[e].weight);
    }
    putchar('\n');
}

int main(void)
{
    weighted_edge *edges;
    const unsigned int order = 5;
    const unsigned int size = complete_weighted_graph(order, &edges);
    weighted_edge *mst;
    unsigned int cost = prim(edges, size, order, &mst);
    printf("Cost is %u\n", cost);
    print_edges(mst, order - 1);
    free(mst);
    free(edges);
    return 0;
}

The output:

Cost is 10
(0, 1, 1) (0, 2, 2) (0, 3, 3) (0, 4, 4)

Repetitive Nearest Neighbour Algorithm for TSP in C

The Repetitive Nearest Neighbour Algorithm (RNNA) is a refinement of the Nearest Neighbour Algorithm. It uses the same greedy strategy of going to the nearest unvisited neighbour at each step, but instead of constructing just one tour, the RNNA constructs a tour starting from every vertex of the graph, and then selects the one with the lowest total length to return as the solution.

Here is the implementation in C:

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

/* Check if the tour already contains an edge */
static unsigned int tour_contains(const weighted_edge *tour, unsigned int t, 
        const weighted_edge *edge)
{
    unsigned int contains = 0;
    unsigned int i;
    for (i = 0; i < t && !contains; i++) {
        contains = tour[i].first == edge->first 
            && tour[i].second == edge->second;
    }
    return contains;
}

/* Find the edge to v's nearest neighbour not in the tour already */
static unsigned int nearest_neighbour_edge(const weighted_edge *edges, unsigned int size, 
        const weighted_edge *tour, unsigned int t, unsigned int v)
{
    unsigned int min_distance = 0;
    unsigned int nearest_neighbour;
    unsigned int i;
    for (i = 0; i < size; i++) {
        if ((edges[i].first == v || edges[i].second == v)
                && (min_distance == 0 || edges[i].weight < min_distance)
                && !tour_contains(tour, t, &edges[i]))
        {
            min_distance = edges[i].weight;
            nearest_neighbour = i;
        }
    }
    return nearest_neighbour;
}

weighted_edge *repetitive_nearest_neighbour_tsp(const weighted_edge *edges, unsigned int size,
        unsigned int order)
{
    unsigned int best_tour_distance = 0;
    weighted_edge *best_tour = NULL;
    unsigned int v;
    for (v = 0; v < order; v++) {
        unsigned int t;
        unsigned int distance = 0;
        weighted_edge *tour = malloc(order * sizeof(weighted_edge));
        if (tour == NULL) {
            return NULL;
        }
        for (t = 0; t < order; t++) {
            unsigned int e = nearest_neighbour_edge(edges, size, tour, t, v);
            tour[t] = edges[e];
            distance += edges[e].weight;
            v = edges[e].first == v ? edges[e].second : edges[e].first;
        }
        if (best_tour_distance == 0 || distance < best_tour_distance) {
            best_tour_distance = distance;
            free(best_tour);
            best_tour = tour;
        }
        else {
            free(tour);
        }
    }
    return best_tour;
}

Here is an example program in C using the same graph as I used for the Nearest Neighbour and Cheapest-Link algorithms:

#include <stdio.h>
#include <stdlib.h>

void weighted_edge_connect(weighted_edge *edges, unsigned int first, unsigned int second, 
        unsigned int weight, unsigned int *pos)
{
    edges[*pos].first = first;
    edges[*pos].second = second;
    edges[*pos].weight = weight;
    (*pos)++;
}

void print_edges(const weighted_edge *edges, unsigned int n)
{
    unsigned int e;
    for (e = 0; e < n; e++) {
        printf("(%u, %u, %u) ", edges[e].first, edges[e].second, edges[e].weight);
    }
    putchar('\n');
}

int main(void)
{
    unsigned int i = 0;
    const unsigned int size = 15; /* Edges */
    const unsigned int order = 6; /* Vertices */
    weighted_edge *edges = malloc(size * sizeof(weighted_edge));
    weighted_edge *tour;

    weighted_edge_connect(edges, 0, 1, 25, &i);
    weighted_edge_connect(edges, 0, 2, 19, &i);
    weighted_edge_connect(edges, 0, 3, 19, &i);
    weighted_edge_connect(edges, 0, 4, 16, &i);
    weighted_edge_connect(edges, 0, 5, 28, &i);
    weighted_edge_connect(edges, 1, 2, 24, &i);
    weighted_edge_connect(edges, 1, 3, 30, &i);
    weighted_edge_connect(edges, 1, 4, 27, &i);
    weighted_edge_connect(edges, 1, 5, 17, &i);
    weighted_edge_connect(edges, 2, 3, 18, &i);
    weighted_edge_connect(edges, 2, 4, 20, &i);
    weighted_edge_connect(edges, 2, 5, 23, &i);
    weighted_edge_connect(edges, 3, 4, 19, &i);
    weighted_edge_connect(edges, 3, 5, 32, &i);
    weighted_edge_connect(edges, 4, 5, 41, &i);

    tour = repetitive_nearest_neighbour_tsp(edges, size, order);
    print_edges(tour, order);
    
    free(tour);
    free(edges);
    return 0;
}

Here is the output:

(2, 3, 18) (0, 3, 19) (0, 4, 16) (3, 4, 19) (1, 3, 30) (1, 5, 17)

Nearest Neighbour Algorithm for TSP in C

The Nearest Neighbour Algorithm is the simplest greedy approximate algorithm for the TSP.

The steps are:

  1. Pick a starting vertex
  2. Go to its nearest neighbour in the graph
  3. Repeat, only going to unvisited vertices
  4. When all vertices have been visited, stop

Here it is in C:

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

/* Check if the tour already contains an edge */
static unsigned int tour_contains(const weighted_edge *tour, unsigned int t, 
        const weighted_edge *edge)
{
    unsigned int contains = 0;
    unsigned int i;
    for (i = 0; i < t && !contains; i++) {
        contains = tour[i].first == edge->first 
            && tour[i].second == edge->second;
    }
    return contains;
}

/* Find the edge to v's nearest neighbour not in the tour already */
static unsigned int nearest_neighbour_edge(const weighted_edge *edges, unsigned int size, 
        const weighted_edge *tour, unsigned int t, unsigned int v)
{
    unsigned int min_distance = 0;
    unsigned int nearest_neighbour;
    unsigned int i;
    for (i = 0; i < size; i++) {
        if ((edges[i].first == v || edges[i].second == v)
                && (min_distance == 0 || edges[i].weight < min_distance)
                && !tour_contains(tour, t, &edges[i]))
        {
            min_distance = edges[i].weight;
            nearest_neighbour = i;
        }
    }
    return nearest_neighbour;
}

weighted_edge *nearest_neighbour_tsp(const weighted_edge *edges, unsigned int size,
        unsigned int order)
{
    unsigned int t, v = 0;
    weighted_edge *tour = malloc(order * sizeof(weighted_edge));
    if (tour == NULL) {
        return NULL;
    }
    for (t = 0; t < order; t++) {
        unsigned int e = nearest_neighbour_edge(edges, size, tour, t, v);
        tour[t] = edges[e];
        v = edges[e].first == v ? edges[e].second : edges[e].first;
    }
    return tour;
}

Here is an example program that solves the TSP for the same graph as I used for the Cheapest-Link Algorithm:

#include <stdio.h>
#include <stdlib.h>

/* Connect two edges */
void weighted_edge_connect(weighted_edge *edges, unsigned int first, unsigned int second, 
        unsigned int weight, unsigned int *pos)
{
    edges[*pos].first = first;
    edges[*pos].second = second;
    edges[*pos].weight = weight;
    (*pos)++;
}

void print_edges(const weighted_edge *edges, unsigned int n)
{
    unsigned int e;
    for (e = 0; e < n; e++) {
        printf("(%u, %u, %u) ", edges[e].first, edges[e].second, edges[e].weight);
    }
    putchar('\n');
}

int main(void)
{
    unsigned int i = 0;
    const unsigned int size = 15; /* Edges */
    const unsigned int order = 6; /* Vertices */
    weighted_edge *edges = malloc(size * sizeof(weighted_edge));
    weighted_edge *tour;

    weighted_edge_connect(edges, 0, 1, 25, &i);
    weighted_edge_connect(edges, 0, 2, 19, &i);
    weighted_edge_connect(edges, 0, 3, 19, &i);
    weighted_edge_connect(edges, 0, 4, 16, &i);
    weighted_edge_connect(edges, 0, 5, 28, &i);
    weighted_edge_connect(edges, 1, 2, 24, &i);
    weighted_edge_connect(edges, 1, 3, 30, &i);
    weighted_edge_connect(edges, 1, 4, 27, &i);
    weighted_edge_connect(edges, 1, 5, 17, &i);
    weighted_edge_connect(edges, 2, 3, 18, &i);
    weighted_edge_connect(edges, 2, 4, 20, &i);
    weighted_edge_connect(edges, 2, 5, 23, &i);
    weighted_edge_connect(edges, 3, 4, 19, &i);
    weighted_edge_connect(edges, 3, 5, 32, &i);
    weighted_edge_connect(edges, 4, 5, 41, &i);

    tour = nearest_neighbour_tsp(edges, size, order);
    print_edges(tour, order);
    
    free(tour);
    free(edges);
    return 0;
}

The output:

(0, 4, 16) (3, 4, 19) (2, 3, 18) (0, 2, 19) (0, 3, 19) (1, 3, 30)

Cheapest-Link Algorithm for TSP in C

A weighted graph for the Traveling Salesman Problem

The Traveling Salesman Problem (TSP) is NP-Complete, but there are a few greedy approximate algorithms that are efficient. One of them is the Cheapest Link Algorithm, which I describe here.

The algorithm works by repeatedly choosing the cheapest link in the graph that:

  1. Doesn’t close the circuit
  2. Doesn’t create a vertex with three edges coming out of it

These cheapest links are added to the tour until it needs one more edge to complete it, at which point condition (1) is removed so the cheapest link that does not create a vertex with three edges will then be added and the tour is complete.

Below is the implementation in C. To prevent closing the circuit early I used the graph cycle detection algorithm I described in an earlier post. To make sure there are no vertices with three edges, I keep track of the degrees of the vertices as the tour is built, and edges that connect vertices with degree 2 are rejected.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

static int compare_weighted_edges(const weighted_edge *edge1, const weighted_edge *edge2)
{
    return edge1->weight - edge2->weight;
}

static unsigned int cyclic_recursive(const weighted_edge *edges, unsigned int n,
        unsigned int *visited, unsigned int order, unsigned int vertex, 
        unsigned int predecessor)
{
    unsigned int i;
    unsigned int cycle_found = 0;
    visited[vertex] = 1;
    for (i = 0; i < n && !cycle_found; i++) {
        if (edges[i].first == vertex || edges[i].second == vertex) {
            /* Adjacent */
            const unsigned int neighbour = edges[i].first == vertex ?
                    edges[i].second : edges[i].first;
            if (visited[neighbour] == 0) {
                /* Not yet visited */
                cycle_found = cyclic_recursive(edges, n, visited, order, neighbour, vertex);
            }
            else if (neighbour != predecessor) {
                /* Found a cycle */
                cycle_found = 1;
            }
        }
    }
    return cycle_found;
}
 
unsigned int cyclic(const weighted_edge *edges, unsigned int n, unsigned int order)
{
    unsigned int *visited = calloc(order, sizeof(unsigned int));
    unsigned int cycle_found;
    if (visited == NULL) {
        return 0;
    }
    cycle_found  = cyclic_recursive(edges, n, visited, order, 0, 0);
    free(visited);
    return cycle_found;
}

weighted_edge *cheapest_link_tsp(weighted_edge *edges, unsigned int size, unsigned int order)
{
    unsigned int t, e = 0;
    weighted_edge *tour = malloc(order * sizeof(weighted_edge));
    unsigned int *degrees = calloc(order, sizeof(unsigned int));
    if (tour == NULL || degrees == NULL) {
        free(tour);
        free(degrees);
        return NULL;
    }
    /* Sort the edges by weight */
    qsort(edges, size, sizeof(weighted_edge),
            (int(*)(const void *, const void *))compare_weighted_edges);
    /* Main algorithm */
    for (t = 0; t < order; t++) {
        unsigned int added = 0;
        while (!added && e < size) {
            if (degrees[edges[e].first] < 2 && degrees[edges[e].second] < 2) {
                tour[t] = edges[e];
                if (t == order - 1 /* It's the last edge */
                    || !cyclic(tour, t + 1, order)) /* It doesn't close the circuit */ 
                { 
                    added = 1;
                    degrees[edges[e].first]++;
                    degrees[edges[e].second]++;
                }
            }
            e++;
        }
        if (!added) {
            /* Edges were not correct */
            free(tour);
            free(degrees);
            return NULL;
        }
    }
    free(degrees);
    return tour;
}

Here is an example program that finds a solution for the graph shown at the top:

#include <stdio.h>
#include <stdlib.h>

/* Connect two edges */
void connect(weighted_edge *edges, unsigned int first, unsigned int second, 
        unsigned int weight, unsigned int *pos)
{
    edges[*pos].first = first;
    edges[*pos].second = second;
    edges[*pos].weight = weight;
    (*pos)++;
}

static void print_edges(const weighted_edge *edges, unsigned int n)
{
    unsigned int e;
    for (e = 0; e < n; e++) {
        printf("(%u, %u, %u) ", edges[e].first, edges[e].second, edges[e].weight);
    }
    putchar('\n');
}

int main(void)
{
    unsigned int i = 0;
    const unsigned int size = 15; /* Edges */
    const unsigned int order = 6; /* Vertices */
    weighted_edge *edges = malloc(size * sizeof(weighted_edge));
    weighted_edge *tour;

    connect(edges, 0, 1, 25, &i);
    connect(edges, 0, 2, 19, &i);
    connect(edges, 0, 3, 19, &i);
    connect(edges, 0, 4, 16, &i);
    connect(edges, 0, 5, 28, &i);
    connect(edges, 1, 2, 24, &i);
    connect(edges, 1, 3, 30, &i);
    connect(edges, 1, 4, 27, &i);
    connect(edges, 1, 5, 17, &i);
    connect(edges, 2, 3, 18, &i);
    connect(edges, 2, 4, 20, &i);
    connect(edges, 2, 5, 23, &i);
    connect(edges, 3, 4, 19, &i);
    connect(edges, 3, 5, 32, &i);
    connect(edges, 4, 5, 41, &i);

    tour = cheapest_link_tsp(edges, size, order);
    print_edges(tour, order);
    
    free(tour);
    free(edges);
    return 0;
}

Output:

(0, 4, 16) (1, 5, 17) (2, 3, 18) (0, 2, 19) (1, 4, 27) (3, 5, 32)