Tag Archives: Minimum Spanning Tree

Borůvka’s Minimal Spanning Tree (MST) algorithm in C

Borůvka’s MST algorithm is quite similar to Kruskal’s algorithm. Like Kruskal, it begins by making a set of trees that start out as single vertices. It then repeatedly iterates over these trees and finds for each one the cheapest edge that has one endpoint in the tree and the other not. It then adds all of these edges to the MST, and merges the trees they join. Notice that the difference with Kruskal’s algorithm is that while Kruskal finds the cheapest edge with an endpoint in different trees at each iteration, Borůvka finds all of the cheapest edges that have endpoints in different trees. This has the effect of halving the number of trees at each iteration.

So in summary the algorithm is:

  1. Start with \(Order(G)\) trees, each one a single vertex
  2. Until there is only one tree:
    1. Find the cheapest edge for each tree that has one endpoint in the tree and one outside
    2. Add all of these edges to the MST
    3. Merge all of the affected trees

Below is an implementation in C. As with Kruskal, I used a vertices array to keep track of which tree each vertex is in. I also needed another array links to keep track of which cheapest edges had been found through the loop over the trees. I needed this because the cheapest outgoing edge for one tree is also the cheapest outgoing edge for another one, and I needed to prevent adding the same edge to the MST twice.

The code was a little bit longer than for Prim or Kruskal, so I broke it into three functions as I never write a function that doesn’t fit on the screen.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

/* Find the cheapest edge with one endpoint in tree and one not */
static int cheapest_edge_leaving_tree(const weighted_edge *edges, unsigned int size, 
        const unsigned int *vertices, unsigned int tree)
{
    unsigned int e;
    int cheapest = -1;
    for (e = 0; e < size && cheapest == -1; e++) {
        if ((vertices[edges[e].first] == tree
                    && vertices[edges[e].second] != tree)
                || (vertices[edges[e].first] != tree 
                    && vertices[edges[e].second] == tree))
        {
            cheapest = e;
        }
    }
    return cheapest;
}

/* Merge trees for all of the edges in mst from mst_prev to mst_edges */
static void merge_trees(const weighted_edge *mst, unsigned int mst_prev, unsigned int mst_edges,
       unsigned int *vertices, unsigned int order, unsigned int *trees)
{
    unsigned int e;
    for (e = mst_prev; e < mst_edges; e++) {
        unsigned int v;
        for (v = 0; v < order; v++) {
            if (vertices[v] == mst[e].second) {
                vertices[v] = mst[e].first;
            } 
        }
        (*trees)--;
    }
}

unsigned int boruvka(weighted_edge *edges, unsigned int size, unsigned int order,
        weighted_edge **mst)
{
    *mst = malloc((order - 1) * sizeof(weighted_edge));
    unsigned int *vertices = malloc(order * sizeof(unsigned int));
    unsigned int trees = order;
    unsigned int *links = malloc(size * sizeof(unsigned int));
    unsigned int i, cost = 0, mst_edges = 0;
    if (*mst == NULL || vertices == NULL || links == NULL) {
        free(*mst);
        free(vertices);
        free(links);
        return 0;
    }
    /* Each vertex starts off in its own tree */
    for (i = 0; i < order; i++) {
        vertices[i] = i;
    }
    /* Sort the edges by weight */
    qsort(edges, size, sizeof(weighted_edge), 
            (int(*)(const void *, const void *))weighted_edge_compare);
    /* Main loop */
    while (trees > 1) {
        unsigned int t, mst_prev = mst_edges;
        memset(links, 0, size * sizeof(unsigned int));
        for (t = 0; t < trees ; t++) {
            /* Get the cheapest edge leaving this tree */
            int cheapest = cheapest_edge_leaving_tree(edges, size, vertices, t);
            if (cheapest == -1) {
                /* Graph wasn't connected properly */
                free(*mst);
                *mst = NULL;
                free(vertices);
                free(links);
                return 0;
            }
            /* Add the edge if not there already */
            if (links[cheapest] != 1) {
                (*mst)[mst_edges++] = edges[cheapest];
                links[cheapest] = 1;
                /* Add the cost */
                cost += edges[cheapest].weight;
            }
        }
        /* Merge the trees they join */
        merge_trees(*mst, mst_prev, mst_edges, vertices, order, &trees);
    }
    free(vertices);
    free(links);
    return cost;
}

Here is an example program that finds the MST of the same graph I used for Prim and Kruskal:

#include <stdio.h>
#include <stdlib.h>

int main(void)
{
    weighted_edge *edges;
    const unsigned int order = 5;
    const unsigned int size = complete_weighted_graph(order, &edges);
    weighted_edge *mst;
    unsigned int cost = boruvka(edges, size, order, &mst);
    printf("Cost is %u\n", cost);
    print_edges(mst, order - 1);
    free(mst);
    free(edges);
    return 0;
}

The output:

Cost is 10
(0, 1, 1) (0, 2, 2) (0, 3, 3) (0, 4, 4)

Kruskal’s Minimum Spanning Tree (MST) Algorithm in C

Kruskal’s MST algorithm is a greedy algorithm like Prim’s algorithm but works quite differently. Recall that Prim’s algorithm builds up a single tree by greedily choosing the cheapest edge that has one endpoint inside it and one outside. Kruskal’s algorithm, on the other hand, builds up multiple trees, which start out as single vertices, and greedily chooses the cheapest edge that has endpoints in two different trees. Adding this edge causes the two trees to merge. After adding \(Order(G) – 1\) edges in this way, all of the trees have been merged into a single spanning tree which is the MST.

In summary:

  1. Start with \(Order(G)\) trees, each one a single vertex
  2. Do \(Order(G)\) – 1 times (or equivalently, until there is only one tree):
    1. Find the cheapest edge with endpoints in different trees
    2. Use it to merge those two trees into a single tree

Below is an implementation in C. I used an array of integers, vertices, to keep track of which tree the vertices are in. When trees are merged by an edge, this array is updated to put the vertices that are in the same tree as the edge’s second endpoint into the tree containing the edge’s first endpoint. This has the effect of merging the trees. When the algorithm finishes, this array contains the same tree number for every vertex, since there is now only one tree.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

unsigned int kruskal(weighted_edge *edges, unsigned int size, unsigned int order,
        weighted_edge **mst)
{
    *mst = malloc((order - 1) * sizeof(weighted_edge));
    unsigned int *vertices = malloc(order * sizeof(unsigned int));
    unsigned int i;
    unsigned int cost = 0;
    if (*mst == NULL || vertices == NULL) {
        free(*mst);
        free(vertices);
        return 0;
    }
    /* Each vertex starts off in its own tree */
    for (i = 0; i < order; i++) {
        vertices[i] = i;
    }
    /* Sort the edges by weight */
    qsort(edges, size, sizeof(weighted_edge), 
            (int(*)(const void *, const void *))weighted_edge_compare);
    /* Main loop */
    for (i = 0; i < order - 1; i++) {
        /* Find the cheapest edge with endpoints in different trees */
        unsigned int j;
        int cheapest = -1;
        for (j = 0; j < size && cheapest == -1; j++) {
            if (vertices[edges[j].first] != vertices[edges[j].second]) {
                cheapest = j;
            }
        }
        if (cheapest == -1) {
            /* Graph wasn't connected properly */
            free(*mst);
            *mst = NULL;
            free(vertices);
            return 0;
        }
        /* Add the edge */
        (*mst)[i] = edges[cheapest];
        /* Merge the trees it joins */
        for (j = 0; j < order; j++) {
            if (vertices[j] == edges[cheapest].second) {
                vertices[j] = edges[cheapest].first;
            }
        }
        /* Add the cost */
        cost += edges[cheapest].weight;
    }
    free(vertices);
    return cost;
}

Here is an example program that constructs a complete weighted graph and then finds the MST:

#include <stdio.h>
#include <stdlib.h>

/* Construct a complete weighted graph on v vertices */
unsigned int complete_weighted_graph(unsigned int v, weighted_edge **edges)
{
    const unsigned int n = triangular_number(v - 1);
    unsigned int i, j, k;
    *edges = malloc(n * sizeof(weighted_edge));
    if (edges != NULL) {
        for (i = 0, k = 0; i < v - 1; i++) {
            for (j = i + 1; j < v; j++) {
                (*edges)[k].first = i;
                (*edges)[k].second = j;
                (*edges)[k].weight = k + 1;
                k++;
            }
        }
    }
    return n;
}

void print_edges(const weighted_edge *edges, unsigned int n)
{
    unsigned int e;
    for (e = 0; e < n; e++) {
        printf("(%u, %u, %u) ", edges[e].first, edges[e].second, edges[e].weight);
    }
    putchar('\n');
}

int main(void)
{
    weighted_edge *edges;
    const unsigned int order = 5;
    const unsigned int size = complete_weighted_graph(order, &edges);
    weighted_edge *mst;
    unsigned int cost = kruskal(edges, size, order, &mst);
    printf("Cost is %u\n", cost);
    print_edges(mst, order - 1);
    free(mst);
    free(edges);
    return 0;
}

The output:

Cost is 10
(0, 1, 1) (0, 2, 2) (0, 3, 3) (0, 4, 4)

Prim’s Minimum Spanning Tree (MST) Algorithm in C

Weighted graph showing a Minimum Spanning Tree (MST)

A minimum spanning tree (MST) on a weighted graph is a spanning tree in which the sum of the weights of the edges is at a minimum. Finding MSTs is important in areas like the design of electrical networks.

Prim’s algorithm is a greedy algorithm for finding an MST on a weighted graph \(G\) . It goes as follows:

  1. Create an empty set \(T,\) for the tree
  2. Choose a starting vertex
  3. While \(T\) contains fewer than \(Order(G) – 1\) edges:
    1. Find the cheapest edge with one endpoint in \(T\) and one endpoint outside it
    2. Add it to \(T\)

Below is an implementation in C. By sorting the edges by weight first, I have made it more efficient to find the next cheapest edge than using an unordered array. It could be made more efficient still by the use of a data structure like a heap.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

/* Compare two weighted edges by weight */
int weighted_edge_compare(const weighted_edge *edge1, const weighted_edge *edge2)
{
    return edge1->weight - edge2->weight;
}

unsigned int prim(weighted_edge *edges, unsigned int size, unsigned int order,
        weighted_edge **mst)
{
    *mst = malloc((order - 1) * sizeof(weighted_edge));
    unsigned int *vertices = calloc(order, sizeof(unsigned int));
    unsigned int i;
    unsigned int cost = 0;
    if (*mst == NULL || vertices == NULL) {
        free(*mst);
        free(vertices);
        return 0;
    }
    /* Sort the edges by weight */
    qsort(edges, size, sizeof(weighted_edge), 
            (int(*)(const void *, const void *))weighted_edge_compare);
    /* Choose a starting vertex */
    vertices[0] = 1;
    /* Main loop */
    for (i = 0; i < order - 1; i++) {
        /* Find the cheapest edge with one endpoint in the tree and one not */
        unsigned int e;
        int cheapest = -1;
        for (e = 0; e < size && cheapest == -1; e++) {
            if ((vertices[edges[e].first] == 1 && vertices[edges[e].second] == 0)
                || (vertices[edges[e].first] == 0 && vertices[edges[e].second] == 1))
            {
                cheapest = e;
            }
        }
        if (cheapest == -1) {
            /* Graph wasn't connected properly */
            free(*mst);
            *mst = NULL;
            free(vertices);
            return 0;
        }
        /* Add the edge */
        (*mst)[i] = edges[cheapest];
        /* Add the vertices (one will already be there) */
        vertices[edges[cheapest].first] = 1;
        vertices[edges[cheapest].second] = 1;
        /* Add the cost */
        cost += edges[cheapest].weight;
    }
    free(vertices);
    return cost;
}

Here is an example program that finds an MST on the graph shown at the top:

#include <stdlib.h>
#include <stdio.h>

/* Calculate the nth triangular number T(n) */
unsigned int triangular_number(unsigned int n)
{
    return (n * (n + 1)) / 2;
}

/* Construct a complete weighted graph on v vertices */
unsigned int complete_weighted_graph(unsigned int v, weighted_edge **edges)
{
    const unsigned int n = triangular_number(v - 1);
    unsigned int i, j, k;
    *edges = malloc(n * sizeof(weighted_edge));
    if (edges != NULL) {
        for (i = 0, k = 0; i < v - 1; i++) {
            for (j = i + 1; j < v; j++) {
                (*edges)[k].first = i;
                (*edges)[k].second = j;
                (*edges)[k].weight = k + 1;
                k++;
            }
        }
    }
    return n;
}

void print_edges(const weighted_edge *edges, unsigned int n)
{
    unsigned int e;
    for (e = 0; e < n; e++) {
        printf("(%u, %u, %u) ", edges[e].first, edges[e].second, edges[e].weight);
    }
    putchar('\n');
}

int main(void)
{
    weighted_edge *edges;
    const unsigned int order = 5;
    const unsigned int size = complete_weighted_graph(order, &edges);
    weighted_edge *mst;
    unsigned int cost = prim(edges, size, order, &mst);
    printf("Cost is %u\n", cost);
    print_edges(mst, order - 1);
    free(mst);
    free(edges);
    return 0;
}

The output:

Cost is 10
(0, 1, 1) (0, 2, 2) (0, 3, 3) (0, 4, 4)