Maximum Clique using backtracking in C

A graph showing a maximum clique

A clique in a graph is a subgraph in which all vertices are connected to each other – a complete subgraph. The Maximum Clique Problem is to find the largest clique of a graph. For example, in the graph shown above the maximum clique size is 3, and the vertices (1, 2, 4) are a maximum clique, as are (0, 1, 4) and (0, 3, 4).

The Maximum Clique Problem is NP-complete, but can be solved efficiently using backtracking. The following is a backtracking implementation in C. The function maximum_clique() takes a graph in adjacency matrix form and its order (the number of vertices), and will return a maximum clique and its size.

#include <stdlib.h>
#include <string.h>

static void maximum_clique_recursive(const unsigned int **adjmat, unsigned int order, 
        unsigned int *current_clique, unsigned int *current_clique_size, unsigned int *max_clique,
        unsigned int *max_clique_size, unsigned int level)
{
    unsigned int i, connected;
    if (level == order) {
        /* Found a new max clique */
        memcpy(max_clique, current_clique, order * sizeof(unsigned int));
        *max_clique_size = *current_clique_size;
        return;
    }
    /* Find out if current vertex is connected to all vertices in current clique */
    connected = 1;
    for (i = 0; i < level && connected; i++) {
        if (current_clique[i] && !adjmat[level][i]) {
            connected = 0;
        }
    }

    if (connected) {
        /* Add this vertex to the clique */
        current_clique[level] = 1;
        (*current_clique_size)++;
        maximum_clique_recursive(adjmat, order, current_clique, current_clique_size, max_clique, 
                max_clique_size, level + 1);
        (*current_clique_size)--;
    }
    if (*current_clique_size + order - level > *max_clique_size) {
        /* Still promising */
        current_clique[level] = 0;
        maximum_clique_recursive(adjmat, order, current_clique, current_clique_size, max_clique, 
                max_clique_size, level + 1);
    }
}

unsigned int maximum_clique(const unsigned int **adjmat, unsigned int order, 
        unsigned int **max_clique)
{
    unsigned int max_clique_size = 0;
    unsigned int current_clique_size = 0;
    unsigned int *current_clique = malloc(order * sizeof(unsigned int));
    *max_clique = malloc(order * sizeof(unsigned int));
    if (current_clique == NULL || *max_clique == NULL) {
        free(current_clique);
        free(max_clique);
        return 0;
    }
    maximum_clique_recursive(adjmat, order, current_clique, &current_clique_size, *max_clique, 
            &max_clique_size, 0);
    free(current_clique);
    return max_clique_size;
}

Here is an example program that finds a maximum clique on the graph shown at the top:

#include <stdio.h>
#include <stdlib.h>

int main()
{
    const unsigned int order = 5; /* Vertices */
    unsigned int r1[] = {0, 1, 0, 1, 1};
    unsigned int r2[] = {1, 0, 1, 0, 1};
    unsigned int r3[] = {0, 1, 0, 0, 1};
    unsigned int r4[] = {1, 0, 0, 0, 1};
    unsigned int r5[] = {1, 1, 1, 1, 0};
    const unsigned int *adjmat[] = {r1, r2, r3, r4, r5};
    unsigned int *max_clique;
    unsigned int max_clique_size = maximum_clique(adjmat, order, &max_clique);
    unsigned int i;
    printf("Max clique size is %u\n", max_clique_size);
    for (i = 0; i < order; i++) {
        if (max_clique[i] == 1) {
            printf("%u ", i);
        }
    }
    putchar('\n');
    free(max_clique);
    return 0;
}

The output:

Max clique size is 3
1 2 4

Bellman-Ford algorithm in C

Weighted graph showing all distances from vertex 0

The Bellman-Ford algorithm finds the shortest path between a single vertex and all other vertices in a graph. This is the same problem that Dijkstra’s algorithm solves, but unlike Dijkstra, the Bellman-Ford algorithm can handle graphs with negative edge weights.

One consequence of negative weights is that a graph can contain a negative cycle, and if this is the case, the shortest path to all vertices reachable from the cycle is negative infinity, because one could traverse the cycle any number of times and keep reducing the path length indefinitely. The Bellman Ford algorithm detects this by performing an extra iteration over the graph, and if this would result in a distance decreasing, it knows that there is a negative cycle.

Below is an implementation in C. It takes a set of weighted arcs, the number of arcs (size), the number of vertices (order), and a starting vertex number. It returns an array of integers containing the distance to every vertex. If the graph contains a negative cycle, it returns NULL.

#include <stdlib.h>
#include <limits.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    int weight;
} weighted_arc;

int *bellman_ford(const weighted_arc *arcs, unsigned int size, unsigned int order, 
        unsigned int vertex)
{
    unsigned int i;
    int *distances = malloc(order * sizeof(unsigned int));
    unsigned int a, v, negative_cycle = 0;
    if (distances == NULL) {
        return NULL;
    }
    /* All distances start infinite */
    for (i = 0; i < order; i++) {
        distances[i] = INT_MAX;
    }
    /* Distance to starting vertex is 0 */
    distances[vertex] = 0;
    /* Relax edges repeatedly */
    for (v = 0; v < order; v++) {
        for (a = 0; a < size; a++) {
            const unsigned int first = arcs[a].first;
            const unsigned int second = arcs[a].second;
            const int weight = arcs[a].weight;
            if (distances[first] != INT_MAX
                && distances[first] + weight < distances[second])
            {
                distances[second] = distances[first] + weight;
            } 
        }
    }
    /* Check for negative weight cycle */
    for (a = 0; a < size && !negative_cycle; a++) {
        const unsigned int first = arcs[a].first;
        const unsigned int second = arcs[a].second;
        const int weight = arcs[a].weight;
        negative_cycle = distances[first] + weight < distances[second];
    }
    if (negative_cycle) {
        free(distances);
        distances = NULL;
    }

    return distances;
}

Here is an example program that computes the distances for the graph shown at the top:

#include <stdio.h>
#include <stdlib.h>

/* Connect two arcs */
void weighted_arc_connect(weighted_arc *arcs, unsigned int first, unsigned int second, 
        int weight, unsigned int *pos)
{
    arcs[*pos].first = first;
    arcs[*pos].second = second;
    arcs[*pos].weight = weight;
    (*pos)++;
}

int main(void)
{
    unsigned int size = 9; /* Arcs */
    unsigned int order = 6; /* Vertices */
    unsigned int i = 0;
    int *distances;

    weighted_arc *arcs = malloc(size * sizeof(weighted_arc));
    weighted_arc_connect(arcs, 0, 1, 5, &i);
    weighted_arc_connect(arcs, 0, 3, -2, &i);
    weighted_arc_connect(arcs, 1, 2, 1, &i);
    weighted_arc_connect(arcs, 2, 3, 2, &i);
    weighted_arc_connect(arcs, 2, 4, 7, &i);
    weighted_arc_connect(arcs, 2, 5, 3, &i);
    weighted_arc_connect(arcs, 3, 1, 2, &i);
    weighted_arc_connect(arcs, 4, 3, 3, &i);
    weighted_arc_connect(arcs, 4, 5, 10, &i);

    distances = bellman_ford(arcs, size, order, 0);
    if (distances == NULL) {
        printf("Graph contains a negative cycle or ran out of memory.\n");
        return 1;
    }

    for (i = 0; i < order; i++) {
        printf("%u: %d\n", i, distances[i]);
    }

    free(arcs);
    free(distances);
    return 0;
}

The output:

0: 0
1: 0
2: 1
3: -2
4: 8
5: 4

Related

Floyd-Warshall algorithm in C

A weighted directed graph

The Floyd-Warshall algorithm calculates the distances between all pairs of vertices in a weighted graph. It is a dynamic programming algorithm very similar to Gauss-Jordan elimination.

Below is an implementation in C. The function takes an array of directed arcs, the size of the graph (number of arcs), and its order (number of vertices). It returns a dynamically allocated 2-dimensional array that is the distance table for all pairs of vertices.

One thing to notice about the implementation is that when you use INT_MAX to represent infinity, you need to be very careful not to do arithmetic on it that will cause it to roll over and become a small number. This is the reason for the checks for INT_MAX in the main calculation.

#include <stdlib.h>
#include <limits.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    int weight;
} weighted_arc;

int **floyd_warshall(const weighted_arc *arcs, unsigned int size, unsigned int order)
{
    unsigned int i, j, k;
    int **distances = malloc(order * sizeof(int *));
    /* Initialise the distance table */
    for (i = 0; i < order; i++) {
        distances[i] = malloc(order * sizeof(int));
        for (j = 0; j < order; j++) {
            if (i == j) {
                distances[i][j] = 0;
            }
            else {
                distances[i][j] = INT_MAX;
            }
        }
    }
    /* Add the distance for each arc */
    for (i = 0; i < size; i++) {
        distances[arcs[i].first][arcs[i].second] = arcs[i].weight;
    }
    /* Calculate the rest of the distances */
    for (i = 0; i < order; i++) {
        for (j = 0; j < order; j++) {
            for (k = 0; k < order; k++) {
                const int djk = distances[j][k];
                const int dji = distances[j][i];
                const int dik = distances[i][k];
                if (dji != INT_MAX && dik != INT_MAX 
                        && djk > dji + dik)
                {
                    distances[j][k] = dji + dik;
                }
            }
        }
    }
    return distances;
}

Here is an example program that calculates the distance table for the graph shown at the top of the page:

#include <stdio.h>
#include <stdlib.h>

/* Connect two arcs */
void weighted_arc_connect(weighted_arc *arcs, unsigned int first, unsigned int second, 
        int weight, unsigned int *pos)
{
    arcs[*pos].first = first;
    arcs[*pos].second = second;
    arcs[*pos].weight = weight;
    (*pos)++;
}

/* Print a distance table */
void print_distances(int **distances, unsigned int order)
{
    unsigned int i, j;
    for (i = 0; i < order; i++) {
        for (j = 0; j < order; j++) {
            printf("%d ", distances[i][j]);
        }
        putchar('\n');
    }
}

int main(void)
{
    unsigned int size = 5; /* Arcs */
    unsigned int order = 4; /* Vertices */
    unsigned int i = 0;
    int **distances;
    weighted_arc *arcs = malloc(size * sizeof(weighted_arc));
    weighted_arc_connect(arcs, 0, 2, -2, &i);
    weighted_arc_connect(arcs, 1, 0, 4, &i);
    weighted_arc_connect(arcs, 1, 2, 3, &i);
    weighted_arc_connect(arcs, 2, 3, 2, &i);
    weighted_arc_connect(arcs, 3, 1, -1, &i);
    distances = floyd_warshall(arcs, size, order);
    print_distances(distances, order);
    free(arcs);
    for (i = 0; i < order; i++) {
        free(distances[i]);
    }
    free(distances);
    return 0;
}

The output:

0 -1 -2 0
4 0 2 4
5 1 0 2
3 -1 1 0

Related

Dijkstra’s Shortest Paths algorithm in C

A weighted graph

Dijksta’s algorithm finds the shortest path in a weighted graph from a single vertex to one or all other vertices. It is a greedy breadth-first search (BFS) algorithm which works as follows:

  1. Create a table of distances to all vertices
  2. Set the distance to 0 for the starting vertex, and infinity for the others
  3. Make a list of unvisited vertices, which starts off containing all vertices
  4. Set the current vertex to be the starting vertex
  5. While the unvisited list is not empty:
    1. For each neighbour of the current vertex:
      • If the distance to the current vertex + the distance to the neighbour < that the stored distance to the neighbour:
        • Update the stored distance to the neighbour to be the distance to the current vertex + the distance to the neighbour
    2. Remove the current vertex from the unvisited list
    3. Find the nearest unvisited vertex (i.e., the one with the lowest distance in the distances table)
    4. Make it the new current vertex

Below is an implemention in C. The function dijkstra(), takes an array of edges, the number of edges (size) and the number of vertices (order), and returns an array containing the distance to each vertex. For the unvisited list I used an array of integers which I set to 0 or 1 depending on whether the corresponding vertex was unvisited or visited respectively.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

unsigned int *dijkstra(const weighted_edge *edges, unsigned int size, unsigned int order,
        unsigned int vertex)
{
    unsigned int i;
    unsigned int *distances = malloc(order * sizeof(unsigned int));
    unsigned int *unvisited = malloc(order * sizeof(unsigned int));
    unsigned int unvisited_count = order;
    unsigned int current = vertex;
    if (distances == NULL || unvisited == NULL) {
        free(distances);
        free(unvisited);
        return NULL;
    }
    /* All distances start infinite and all vertices start unvisited */
    for (i = 0; i < order; i++) {
        distances[i] = UINT_MAX;
        unvisited[i] = 1;
    }
    /* Distance to starting vertex is 0 */
    distances[vertex] = 0;
    while (unvisited_count > 0) {
        /* Update the distances to all neighbours */
        unsigned int e, v;
        unsigned int min_distance;
        for (e = 0; e < size; e++) {
            if (edges[e].first == current || edges[e].second == current) {
                const unsigned int neighbour = edges[e].first == current ?
                    edges[e].second : edges[e].first;
                const unsigned int distance = distances[current] + edges[e].weight;
                if (distance < distances[neighbour]) {
                    distances[neighbour] = distance;
                }
            }
        }
        /* Finished with this vertex */
        unvisited[current] = 0;
        unvisited_count--;
        /* Find the nearest unvisited vertex */
        min_distance = 0;
        for (v = 0; v < order; v++) {
            if (unvisited[v] == 1 && (min_distance == 0 || distances[v] < min_distance)) {
                min_distance = distances[v];
                current = v;
            }
        }
    }
    free(unvisited);
    return distances;
}

Here is an example program that finds all of the distances from vertex 0 in the graph pictured above:

#include <stdio.h>
#include <stdlib.h>

int main(void)
{
    const unsigned int size = 9; /* Edges */
    const unsigned int order = 6; /* Vertices */
    weighted_edge *edges = malloc(size * sizeof(weighted_edge));
    unsigned int *distances;
    unsigned int i = 0;

    weighted_edge_connect(edges, 0, 1, 2, &i);
    weighted_edge_connect(edges, 0, 2, 4, &i);
    weighted_edge_connect(edges, 1, 2, 1, &i);
    weighted_edge_connect(edges, 1, 3, 4, &i);
    weighted_edge_connect(edges, 1, 4, 2, &i);
    weighted_edge_connect(edges, 2, 4, 3, &i);
    weighted_edge_connect(edges, 3, 4, 3, &i);
    weighted_edge_connect(edges, 3, 5, 2, &i);
    weighted_edge_connect(edges, 4, 5, 2, &i);

    distances = dijkstra(edges, size, order, 0);

    for (i = 0; i < order; i++) {
        printf("%u: %u\n", i, distances[i]);
    }

    free(distances);
    free(edges);

    return 0;
}

The output:

0: 0
1: 2
2: 3
3: 6
4: 4
5: 6

Related

Borůvka’s Minimal Spanning Tree (MST) algorithm in C

Borůvka’s MST algorithm is quite similar to Kruskal’s algorithm. Like Kruskal, it begins by making a set of trees that start out as single vertices. It then repeatedly iterates over these trees and finds for each one the cheapest edge that has one endpoint in the tree and the other not. It then adds all of these edges to the MST, and merges the trees they join. Notice that the difference with Kruskal’s algorithm is that while Kruskal finds the cheapest edge with an endpoint in different trees at each iteration, Borůvka finds all of the cheapest edges that have endpoints in different trees. This has the effect of halving the number of trees at each iteration.

So in summary the algorithm is:

  1. Start with \(Order(G)\) trees, each one a single vertex
  2. Until there is only one tree:
    1. Find the cheapest edge for each tree that has one endpoint in the tree and one outside
    2. Add all of these edges to the MST
    3. Merge all of the affected trees

Below is an implementation in C. As with Kruskal, I used a vertices array to keep track of which tree each vertex is in. I also needed another array links to keep track of which cheapest edges had been found through the loop over the trees. I needed this because the cheapest outgoing edge for one tree is also the cheapest outgoing edge for another one, and I needed to prevent adding the same edge to the MST twice.

The code was a little bit longer than for Prim or Kruskal, so I broke it into three functions as I never write a function that doesn’t fit on the screen.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

/* Find the cheapest edge with one endpoint in tree and one not */
static int cheapest_edge_leaving_tree(const weighted_edge *edges, unsigned int size, 
        const unsigned int *vertices, unsigned int tree)
{
    unsigned int e;
    int cheapest = -1;
    for (e = 0; e < size && cheapest == -1; e++) {
        if ((vertices[edges[e].first] == tree
                    && vertices[edges[e].second] != tree)
                || (vertices[edges[e].first] != tree 
                    && vertices[edges[e].second] == tree))
        {
            cheapest = e;
        }
    }
    return cheapest;
}

/* Merge trees for all of the edges in mst from mst_prev to mst_edges */
static void merge_trees(const weighted_edge *mst, unsigned int mst_prev, unsigned int mst_edges,
       unsigned int *vertices, unsigned int order, unsigned int *trees)
{
    unsigned int e;
    for (e = mst_prev; e < mst_edges; e++) {
        unsigned int v;
        for (v = 0; v < order; v++) {
            if (vertices[v] == mst[e].second) {
                vertices[v] = mst[e].first;
            } 
        }
        (*trees)--;
    }
}

unsigned int boruvka(weighted_edge *edges, unsigned int size, unsigned int order,
        weighted_edge **mst)
{
    *mst = malloc((order - 1) * sizeof(weighted_edge));
    unsigned int *vertices = malloc(order * sizeof(unsigned int));
    unsigned int trees = order;
    unsigned int *links = malloc(size * sizeof(unsigned int));
    unsigned int i, cost = 0, mst_edges = 0;
    if (*mst == NULL || vertices == NULL || links == NULL) {
        free(*mst);
        free(vertices);
        free(links);
        return 0;
    }
    /* Each vertex starts off in its own tree */
    for (i = 0; i < order; i++) {
        vertices[i] = i;
    }
    /* Sort the edges by weight */
    qsort(edges, size, sizeof(weighted_edge), 
            (int(*)(const void *, const void *))weighted_edge_compare);
    /* Main loop */
    while (trees > 1) {
        unsigned int t, mst_prev = mst_edges;
        memset(links, 0, size * sizeof(unsigned int));
        for (t = 0; t < trees ; t++) {
            /* Get the cheapest edge leaving this tree */
            int cheapest = cheapest_edge_leaving_tree(edges, size, vertices, t);
            if (cheapest == -1) {
                /* Graph wasn't connected properly */
                free(*mst);
                *mst = NULL;
                free(vertices);
                free(links);
                return 0;
            }
            /* Add the edge if not there already */
            if (links[cheapest] != 1) {
                (*mst)[mst_edges++] = edges[cheapest];
                links[cheapest] = 1;
                /* Add the cost */
                cost += edges[cheapest].weight;
            }
        }
        /* Merge the trees they join */
        merge_trees(*mst, mst_prev, mst_edges, vertices, order, &trees);
    }
    free(vertices);
    free(links);
    return cost;
}

Here is an example program that finds the MST of the same graph I used for Prim and Kruskal:

#include <stdio.h>
#include <stdlib.h>

int main(void)
{
    weighted_edge *edges;
    const unsigned int order = 5;
    const unsigned int size = complete_weighted_graph(order, &edges);
    weighted_edge *mst;
    unsigned int cost = boruvka(edges, size, order, &mst);
    printf("Cost is %u\n", cost);
    print_edges(mst, order - 1);
    free(mst);
    free(edges);
    return 0;
}

The output:

Cost is 10
(0, 1, 1) (0, 2, 2) (0, 3, 3) (0, 4, 4)

Kruskal’s Minimum Spanning Tree (MST) Algorithm in C

Kruskal’s MST algorithm is a greedy algorithm like Prim’s algorithm but works quite differently. Recall that Prim’s algorithm builds up a single tree by greedily choosing the cheapest edge that has one endpoint inside it and one outside. Kruskal’s algorithm, on the other hand, builds up multiple trees, which start out as single vertices, and greedily chooses the cheapest edge that has endpoints in two different trees. Adding this edge causes the two trees to merge. After adding \(Order(G) – 1\) edges in this way, all of the trees have been merged into a single spanning tree which is the MST.

In summary:

  1. Start with \(Order(G)\) trees, each one a single vertex
  2. Do \(Order(G)\) – 1 times (or equivalently, until there is only one tree):
    1. Find the cheapest edge with endpoints in different trees
    2. Use it to merge those two trees into a single tree

Below is an implementation in C. I used an array of integers, vertices, to keep track of which tree the vertices are in. When trees are merged by an edge, this array is updated to put the vertices that are in the same tree as the edge’s second endpoint into the tree containing the edge’s first endpoint. This has the effect of merging the trees. When the algorithm finishes, this array contains the same tree number for every vertex, since there is now only one tree.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

unsigned int kruskal(weighted_edge *edges, unsigned int size, unsigned int order,
        weighted_edge **mst)
{
    *mst = malloc((order - 1) * sizeof(weighted_edge));
    unsigned int *vertices = malloc(order * sizeof(unsigned int));
    unsigned int i;
    unsigned int cost = 0;
    if (*mst == NULL || vertices == NULL) {
        free(*mst);
        free(vertices);
        return 0;
    }
    /* Each vertex starts off in its own tree */
    for (i = 0; i < order; i++) {
        vertices[i] = i;
    }
    /* Sort the edges by weight */
    qsort(edges, size, sizeof(weighted_edge), 
            (int(*)(const void *, const void *))weighted_edge_compare);
    /* Main loop */
    for (i = 0; i < order - 1; i++) {
        /* Find the cheapest edge with endpoints in different trees */
        unsigned int j;
        int cheapest = -1;
        for (j = 0; j < size && cheapest == -1; j++) {
            if (vertices[edges[j].first] != vertices[edges[j].second]) {
                cheapest = j;
            }
        }
        if (cheapest == -1) {
            /* Graph wasn't connected properly */
            free(*mst);
            *mst = NULL;
            free(vertices);
            return 0;
        }
        /* Add the edge */
        (*mst)[i] = edges[cheapest];
        /* Merge the trees it joins */
        for (j = 0; j < order; j++) {
            if (vertices[j] == edges[cheapest].second) {
                vertices[j] = edges[cheapest].first;
            }
        }
        /* Add the cost */
        cost += edges[cheapest].weight;
    }
    free(vertices);
    return cost;
}

Here is an example program that constructs a complete weighted graph and then finds the MST:

#include <stdio.h>
#include <stdlib.h>

/* Construct a complete weighted graph on v vertices */
unsigned int complete_weighted_graph(unsigned int v, weighted_edge **edges)
{
    const unsigned int n = triangular_number(v - 1);
    unsigned int i, j, k;
    *edges = malloc(n * sizeof(weighted_edge));
    if (edges != NULL) {
        for (i = 0, k = 0; i < v - 1; i++) {
            for (j = i + 1; j < v; j++) {
                (*edges)[k].first = i;
                (*edges)[k].second = j;
                (*edges)[k].weight = k + 1;
                k++;
            }
        }
    }
    return n;
}

void print_edges(const weighted_edge *edges, unsigned int n)
{
    unsigned int e;
    for (e = 0; e < n; e++) {
        printf("(%u, %u, %u) ", edges[e].first, edges[e].second, edges[e].weight);
    }
    putchar('\n');
}

int main(void)
{
    weighted_edge *edges;
    const unsigned int order = 5;
    const unsigned int size = complete_weighted_graph(order, &edges);
    weighted_edge *mst;
    unsigned int cost = kruskal(edges, size, order, &mst);
    printf("Cost is %u\n", cost);
    print_edges(mst, order - 1);
    free(mst);
    free(edges);
    return 0;
}

The output:

Cost is 10
(0, 1, 1) (0, 2, 2) (0, 3, 3) (0, 4, 4)

Prim’s Minimum Spanning Tree (MST) Algorithm in C

Weighted graph showing a Minimum Spanning Tree (MST)

A minimum spanning tree (MST) on a weighted graph is a spanning tree in which the sum of the weights of the edges is at a minimum. Finding MSTs is important in areas like the design of electrical networks.

Prim’s algorithm is a greedy algorithm for finding an MST on a weighted graph \(G\) . It goes as follows:

  1. Create an empty set \(T,\) for the tree
  2. Choose a starting vertex
  3. While \(T\) contains fewer than \(Order(G) – 1\) edges:
    1. Find the cheapest edge with one endpoint in \(T\) and one endpoint outside it
    2. Add it to \(T\)

Below is an implementation in C. By sorting the edges by weight first, I have made it more efficient to find the next cheapest edge than using an unordered array. It could be made more efficient still by the use of a data structure like a heap.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

/* Compare two weighted edges by weight */
int weighted_edge_compare(const weighted_edge *edge1, const weighted_edge *edge2)
{
    return edge1->weight - edge2->weight;
}

unsigned int prim(weighted_edge *edges, unsigned int size, unsigned int order,
        weighted_edge **mst)
{
    *mst = malloc((order - 1) * sizeof(weighted_edge));
    unsigned int *vertices = calloc(order, sizeof(unsigned int));
    unsigned int i;
    unsigned int cost = 0;
    if (*mst == NULL || vertices == NULL) {
        free(*mst);
        free(vertices);
        return 0;
    }
    /* Sort the edges by weight */
    qsort(edges, size, sizeof(weighted_edge), 
            (int(*)(const void *, const void *))weighted_edge_compare);
    /* Choose a starting vertex */
    vertices[0] = 1;
    /* Main loop */
    for (i = 0; i < order - 1; i++) {
        /* Find the cheapest edge with one endpoint in the tree and one not */
        unsigned int e;
        int cheapest = -1;
        for (e = 0; e < size && cheapest == -1; e++) {
            if ((vertices[edges[e].first] == 1 && vertices[edges[e].second] == 0)
                || (vertices[edges[e].first] == 0 && vertices[edges[e].second] == 1))
            {
                cheapest = e;
            }
        }
        if (cheapest == -1) {
            /* Graph wasn't connected properly */
            free(*mst);
            *mst = NULL;
            free(vertices);
            return 0;
        }
        /* Add the edge */
        (*mst)[i] = edges[cheapest];
        /* Add the vertices (one will already be there) */
        vertices[edges[cheapest].first] = 1;
        vertices[edges[cheapest].second] = 1;
        /* Add the cost */
        cost += edges[cheapest].weight;
    }
    free(vertices);
    return cost;
}

Here is an example program that finds an MST on the graph shown at the top:

#include <stdlib.h>
#include <stdio.h>

/* Calculate the nth triangular number T(n) */
unsigned int triangular_number(unsigned int n)
{
    return (n * (n + 1)) / 2;
}

/* Construct a complete weighted graph on v vertices */
unsigned int complete_weighted_graph(unsigned int v, weighted_edge **edges)
{
    const unsigned int n = triangular_number(v - 1);
    unsigned int i, j, k;
    *edges = malloc(n * sizeof(weighted_edge));
    if (edges != NULL) {
        for (i = 0, k = 0; i < v - 1; i++) {
            for (j = i + 1; j < v; j++) {
                (*edges)[k].first = i;
                (*edges)[k].second = j;
                (*edges)[k].weight = k + 1;
                k++;
            }
        }
    }
    return n;
}

void print_edges(const weighted_edge *edges, unsigned int n)
{
    unsigned int e;
    for (e = 0; e < n; e++) {
        printf("(%u, %u, %u) ", edges[e].first, edges[e].second, edges[e].weight);
    }
    putchar('\n');
}

int main(void)
{
    weighted_edge *edges;
    const unsigned int order = 5;
    const unsigned int size = complete_weighted_graph(order, &edges);
    weighted_edge *mst;
    unsigned int cost = prim(edges, size, order, &mst);
    printf("Cost is %u\n", cost);
    print_edges(mst, order - 1);
    free(mst);
    free(edges);
    return 0;
}

The output:

Cost is 10
(0, 1, 1) (0, 2, 2) (0, 3, 3) (0, 4, 4)

Repetitive Nearest Neighbour Algorithm for TSP in C

The Repetitive Nearest Neighbour Algorithm (RNNA) is a refinement of the Nearest Neighbour Algorithm. It uses the same greedy strategy of going to the nearest unvisited neighbour at each step, but instead of constructing just one tour, the RNNA constructs a tour starting from every vertex of the graph, and then selects the one with the lowest total length to return as the solution.

Here is the implementation in C:

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

/* Check if the tour already contains an edge */
static unsigned int tour_contains(const weighted_edge *tour, unsigned int t, 
        const weighted_edge *edge)
{
    unsigned int contains = 0;
    unsigned int i;
    for (i = 0; i < t && !contains; i++) {
        contains = tour[i].first == edge->first 
            && tour[i].second == edge->second;
    }
    return contains;
}

/* Find the edge to v's nearest neighbour not in the tour already */
static unsigned int nearest_neighbour_edge(const weighted_edge *edges, unsigned int size, 
        const weighted_edge *tour, unsigned int t, unsigned int v)
{
    unsigned int min_distance = 0;
    unsigned int nearest_neighbour;
    unsigned int i;
    for (i = 0; i < size; i++) {
        if ((edges[i].first == v || edges[i].second == v)
                && (min_distance == 0 || edges[i].weight < min_distance)
                && !tour_contains(tour, t, &edges[i]))
        {
            min_distance = edges[i].weight;
            nearest_neighbour = i;
        }
    }
    return nearest_neighbour;
}

weighted_edge *repetitive_nearest_neighbour_tsp(const weighted_edge *edges, unsigned int size,
        unsigned int order)
{
    unsigned int best_tour_distance = 0;
    weighted_edge *best_tour = NULL;
    unsigned int v;
    for (v = 0; v < order; v++) {
        unsigned int t;
        unsigned int distance = 0;
        weighted_edge *tour = malloc(order * sizeof(weighted_edge));
        if (tour == NULL) {
            return NULL;
        }
        for (t = 0; t < order; t++) {
            unsigned int e = nearest_neighbour_edge(edges, size, tour, t, v);
            tour[t] = edges[e];
            distance += edges[e].weight;
            v = edges[e].first == v ? edges[e].second : edges[e].first;
        }
        if (best_tour_distance == 0 || distance < best_tour_distance) {
            best_tour_distance = distance;
            free(best_tour);
            best_tour = tour;
        }
        else {
            free(tour);
        }
    }
    return best_tour;
}

Here is an example program in C using the same graph as I used for the Nearest Neighbour and Cheapest-Link algorithms:

#include <stdio.h>
#include <stdlib.h>

void weighted_edge_connect(weighted_edge *edges, unsigned int first, unsigned int second, 
        unsigned int weight, unsigned int *pos)
{
    edges[*pos].first = first;
    edges[*pos].second = second;
    edges[*pos].weight = weight;
    (*pos)++;
}

void print_edges(const weighted_edge *edges, unsigned int n)
{
    unsigned int e;
    for (e = 0; e < n; e++) {
        printf("(%u, %u, %u) ", edges[e].first, edges[e].second, edges[e].weight);
    }
    putchar('\n');
}

int main(void)
{
    unsigned int i = 0;
    const unsigned int size = 15; /* Edges */
    const unsigned int order = 6; /* Vertices */
    weighted_edge *edges = malloc(size * sizeof(weighted_edge));
    weighted_edge *tour;

    weighted_edge_connect(edges, 0, 1, 25, &i);
    weighted_edge_connect(edges, 0, 2, 19, &i);
    weighted_edge_connect(edges, 0, 3, 19, &i);
    weighted_edge_connect(edges, 0, 4, 16, &i);
    weighted_edge_connect(edges, 0, 5, 28, &i);
    weighted_edge_connect(edges, 1, 2, 24, &i);
    weighted_edge_connect(edges, 1, 3, 30, &i);
    weighted_edge_connect(edges, 1, 4, 27, &i);
    weighted_edge_connect(edges, 1, 5, 17, &i);
    weighted_edge_connect(edges, 2, 3, 18, &i);
    weighted_edge_connect(edges, 2, 4, 20, &i);
    weighted_edge_connect(edges, 2, 5, 23, &i);
    weighted_edge_connect(edges, 3, 4, 19, &i);
    weighted_edge_connect(edges, 3, 5, 32, &i);
    weighted_edge_connect(edges, 4, 5, 41, &i);

    tour = repetitive_nearest_neighbour_tsp(edges, size, order);
    print_edges(tour, order);
    
    free(tour);
    free(edges);
    return 0;
}

Here is the output:

(2, 3, 18) (0, 3, 19) (0, 4, 16) (3, 4, 19) (1, 3, 30) (1, 5, 17)

Nearest Neighbour Algorithm for TSP in C

The Nearest Neighbour Algorithm is the simplest greedy approximate algorithm for the TSP.

The steps are:

  1. Pick a starting vertex
  2. Go to its nearest neighbour in the graph
  3. Repeat, only going to unvisited vertices
  4. When all vertices have been visited, stop

Here it is in C:

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

/* Check if the tour already contains an edge */
static unsigned int tour_contains(const weighted_edge *tour, unsigned int t, 
        const weighted_edge *edge)
{
    unsigned int contains = 0;
    unsigned int i;
    for (i = 0; i < t && !contains; i++) {
        contains = tour[i].first == edge->first 
            && tour[i].second == edge->second;
    }
    return contains;
}

/* Find the edge to v's nearest neighbour not in the tour already */
static unsigned int nearest_neighbour_edge(const weighted_edge *edges, unsigned int size, 
        const weighted_edge *tour, unsigned int t, unsigned int v)
{
    unsigned int min_distance = 0;
    unsigned int nearest_neighbour;
    unsigned int i;
    for (i = 0; i < size; i++) {
        if ((edges[i].first == v || edges[i].second == v)
                && (min_distance == 0 || edges[i].weight < min_distance)
                && !tour_contains(tour, t, &edges[i]))
        {
            min_distance = edges[i].weight;
            nearest_neighbour = i;
        }
    }
    return nearest_neighbour;
}

weighted_edge *nearest_neighbour_tsp(const weighted_edge *edges, unsigned int size,
        unsigned int order)
{
    unsigned int t, v = 0;
    weighted_edge *tour = malloc(order * sizeof(weighted_edge));
    if (tour == NULL) {
        return NULL;
    }
    for (t = 0; t < order; t++) {
        unsigned int e = nearest_neighbour_edge(edges, size, tour, t, v);
        tour[t] = edges[e];
        v = edges[e].first == v ? edges[e].second : edges[e].first;
    }
    return tour;
}

Here is an example program that solves the TSP for the same graph as I used for the Cheapest-Link Algorithm:

#include <stdio.h>
#include <stdlib.h>

/* Connect two edges */
void weighted_edge_connect(weighted_edge *edges, unsigned int first, unsigned int second, 
        unsigned int weight, unsigned int *pos)
{
    edges[*pos].first = first;
    edges[*pos].second = second;
    edges[*pos].weight = weight;
    (*pos)++;
}

void print_edges(const weighted_edge *edges, unsigned int n)
{
    unsigned int e;
    for (e = 0; e < n; e++) {
        printf("(%u, %u, %u) ", edges[e].first, edges[e].second, edges[e].weight);
    }
    putchar('\n');
}

int main(void)
{
    unsigned int i = 0;
    const unsigned int size = 15; /* Edges */
    const unsigned int order = 6; /* Vertices */
    weighted_edge *edges = malloc(size * sizeof(weighted_edge));
    weighted_edge *tour;

    weighted_edge_connect(edges, 0, 1, 25, &i);
    weighted_edge_connect(edges, 0, 2, 19, &i);
    weighted_edge_connect(edges, 0, 3, 19, &i);
    weighted_edge_connect(edges, 0, 4, 16, &i);
    weighted_edge_connect(edges, 0, 5, 28, &i);
    weighted_edge_connect(edges, 1, 2, 24, &i);
    weighted_edge_connect(edges, 1, 3, 30, &i);
    weighted_edge_connect(edges, 1, 4, 27, &i);
    weighted_edge_connect(edges, 1, 5, 17, &i);
    weighted_edge_connect(edges, 2, 3, 18, &i);
    weighted_edge_connect(edges, 2, 4, 20, &i);
    weighted_edge_connect(edges, 2, 5, 23, &i);
    weighted_edge_connect(edges, 3, 4, 19, &i);
    weighted_edge_connect(edges, 3, 5, 32, &i);
    weighted_edge_connect(edges, 4, 5, 41, &i);

    tour = nearest_neighbour_tsp(edges, size, order);
    print_edges(tour, order);
    
    free(tour);
    free(edges);
    return 0;
}

The output:

(0, 4, 16) (3, 4, 19) (2, 3, 18) (0, 2, 19) (0, 3, 19) (1, 3, 30)

Cheapest-Link Algorithm for TSP in C

A weighted graph for the Traveling Salesman Problem

The Traveling Salesman Problem (TSP) is NP-Complete, but there are a few greedy approximate algorithms that are efficient. One of them is the Cheapest Link Algorithm, which I describe here.

The algorithm works by repeatedly choosing the cheapest link in the graph that:

  1. Doesn’t close the circuit
  2. Doesn’t create a vertex with three edges coming out of it

These cheapest links are added to the tour until it needs one more edge to complete it, at which point condition (1) is removed so the cheapest link that does not create a vertex with three edges will then be added and the tour is complete.

Below is the implementation in C. To prevent closing the circuit early I used the graph cycle detection algorithm I described in an earlier post. To make sure there are no vertices with three edges, I keep track of the degrees of the vertices as the tour is built, and edges that connect vertices with degree 2 are rejected.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

static int compare_weighted_edges(const weighted_edge *edge1, const weighted_edge *edge2)
{
    return edge1->weight - edge2->weight;
}

static unsigned int cyclic_recursive(const weighted_edge *edges, unsigned int n,
        unsigned int *visited, unsigned int order, unsigned int vertex, 
        unsigned int predecessor)
{
    unsigned int i;
    unsigned int cycle_found = 0;
    visited[vertex] = 1;
    for (i = 0; i < n && !cycle_found; i++) {
        if (edges[i].first == vertex || edges[i].second == vertex) {
            /* Adjacent */
            const unsigned int neighbour = edges[i].first == vertex ?
                    edges[i].second : edges[i].first;
            if (visited[neighbour] == 0) {
                /* Not yet visited */
                cycle_found = cyclic_recursive(edges, n, visited, order, neighbour, vertex);
            }
            else if (neighbour != predecessor) {
                /* Found a cycle */
                cycle_found = 1;
            }
        }
    }
    return cycle_found;
}
 
unsigned int cyclic(const weighted_edge *edges, unsigned int n, unsigned int order)
{
    unsigned int *visited = calloc(order, sizeof(unsigned int));
    unsigned int cycle_found;
    if (visited == NULL) {
        return 0;
    }
    cycle_found  = cyclic_recursive(edges, n, visited, order, 0, 0);
    free(visited);
    return cycle_found;
}

weighted_edge *cheapest_link_tsp(weighted_edge *edges, unsigned int size, unsigned int order)
{
    unsigned int t, e = 0;
    weighted_edge *tour = malloc(order * sizeof(weighted_edge));
    unsigned int *degrees = calloc(order, sizeof(unsigned int));
    if (tour == NULL || degrees == NULL) {
        free(tour);
        free(degrees);
        return NULL;
    }
    /* Sort the edges by weight */
    qsort(edges, size, sizeof(weighted_edge),
            (int(*)(const void *, const void *))compare_weighted_edges);
    /* Main algorithm */
    for (t = 0; t < order; t++) {
        unsigned int added = 0;
        while (!added && e < size) {
            if (degrees[edges[e].first] < 2 && degrees[edges[e].second] < 2) {
                tour[t] = edges[e];
                if (t == order - 1 /* It's the last edge */
                    || !cyclic(tour, t + 1, order)) /* It doesn't close the circuit */ 
                { 
                    added = 1;
                    degrees[edges[e].first]++;
                    degrees[edges[e].second]++;
                }
            }
            e++;
        }
        if (!added) {
            /* Edges were not correct */
            free(tour);
            free(degrees);
            return NULL;
        }
    }
    free(degrees);
    return tour;
}

Here is an example program that finds a solution for the graph shown at the top:

#include <stdio.h>
#include <stdlib.h>

/* Connect two edges */
void connect(weighted_edge *edges, unsigned int first, unsigned int second, 
        unsigned int weight, unsigned int *pos)
{
    edges[*pos].first = first;
    edges[*pos].second = second;
    edges[*pos].weight = weight;
    (*pos)++;
}

static void print_edges(const weighted_edge *edges, unsigned int n)
{
    unsigned int e;
    for (e = 0; e < n; e++) {
        printf("(%u, %u, %u) ", edges[e].first, edges[e].second, edges[e].weight);
    }
    putchar('\n');
}

int main(void)
{
    unsigned int i = 0;
    const unsigned int size = 15; /* Edges */
    const unsigned int order = 6; /* Vertices */
    weighted_edge *edges = malloc(size * sizeof(weighted_edge));
    weighted_edge *tour;

    connect(edges, 0, 1, 25, &i);
    connect(edges, 0, 2, 19, &i);
    connect(edges, 0, 3, 19, &i);
    connect(edges, 0, 4, 16, &i);
    connect(edges, 0, 5, 28, &i);
    connect(edges, 1, 2, 24, &i);
    connect(edges, 1, 3, 30, &i);
    connect(edges, 1, 4, 27, &i);
    connect(edges, 1, 5, 17, &i);
    connect(edges, 2, 3, 18, &i);
    connect(edges, 2, 4, 20, &i);
    connect(edges, 2, 5, 23, &i);
    connect(edges, 3, 4, 19, &i);
    connect(edges, 3, 5, 32, &i);
    connect(edges, 4, 5, 41, &i);

    tour = cheapest_link_tsp(edges, size, order);
    print_edges(tour, order);
    
    free(tour);
    free(edges);
    return 0;
}

Output:

(0, 4, 16) (1, 5, 17) (2, 3, 18) (0, 2, 19) (1, 4, 27) (3, 5, 32)