Borůvka’s Minimal Spanning Tree (MST) algorithm in C

Borůvka’s MST algorithm is quite similar to Kruskal’s algorithm. Like Kruskal, it begins by making a set of trees that start out as single vertices. It then repeatedly iterates over these trees and finds for each one the cheapest edge that has one endpoint in the tree and the other not. It then adds all of these edges to the MST, and merges the trees they join. Notice that the difference with Kruskal’s algorithm is that while Kruskal finds the cheapest edge with an endpoint in different trees at each iteration, Borůvka finds all of the cheapest edges that have endpoints in different trees. This has the effect of halving the number of trees at each iteration.

So in summary the algorithm is:

  1. Start with \(Order(G)\) trees, each one a single vertex
  2. Until there is only one tree:
    1. Find the cheapest edge for each tree that has one endpoint in the tree and one outside
    2. Add all of these edges to the MST
    3. Merge all of the affected trees

Below is an implementation in C. As with Kruskal, I used a vertices array to keep track of which tree each vertex is in. I also needed another array links to keep track of which cheapest edges had been found through the loop over the trees. I needed this because the cheapest outgoing edge for one tree is also the cheapest outgoing edge for another one, and I needed to prevent adding the same edge to the MST twice.

The code was a little bit longer than for Prim or Kruskal, so I broke it into three functions as I never write a function that doesn’t fit on the screen.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

/* Find the cheapest edge with one endpoint in tree and one not */
static int cheapest_edge_leaving_tree(const weighted_edge *edges, unsigned int size, 
        const unsigned int *vertices, unsigned int tree)
{
    unsigned int e;
    int cheapest = -1;
    for (e = 0; e < size && cheapest == -1; e++) {
        if ((vertices[edges[e].first] == tree
                    && vertices[edges[e].second] != tree)
                || (vertices[edges[e].first] != tree 
                    && vertices[edges[e].second] == tree))
        {
            cheapest = e;
        }
    }
    return cheapest;
}

/* Merge trees for all of the edges in mst from mst_prev to mst_edges */
static void merge_trees(const weighted_edge *mst, unsigned int mst_prev, unsigned int mst_edges,
       unsigned int *vertices, unsigned int order, unsigned int *trees)
{
    unsigned int e;
    for (e = mst_prev; e < mst_edges; e++) {
        unsigned int v;
        for (v = 0; v < order; v++) {
            if (vertices[v] == mst[e].second) {
                vertices[v] = mst[e].first;
            } 
        }
        (*trees)--;
    }
}

unsigned int boruvka(weighted_edge *edges, unsigned int size, unsigned int order,
        weighted_edge **mst)
{
    *mst = malloc((order - 1) * sizeof(weighted_edge));
    unsigned int *vertices = malloc(order * sizeof(unsigned int));
    unsigned int trees = order;
    unsigned int *links = malloc(size * sizeof(unsigned int));
    unsigned int i, cost = 0, mst_edges = 0;
    if (*mst == NULL || vertices == NULL || links == NULL) {
        free(*mst);
        free(vertices);
        free(links);
        return 0;
    }
    /* Each vertex starts off in its own tree */
    for (i = 0; i < order; i++) {
        vertices[i] = i;
    }
    /* Sort the edges by weight */
    qsort(edges, size, sizeof(weighted_edge), 
            (int(*)(const void *, const void *))weighted_edge_compare);
    /* Main loop */
    while (trees > 1) {
        unsigned int t, mst_prev = mst_edges;
        memset(links, 0, size * sizeof(unsigned int));
        for (t = 0; t < trees ; t++) {
            /* Get the cheapest edge leaving this tree */
            int cheapest = cheapest_edge_leaving_tree(edges, size, vertices, t);
            if (cheapest == -1) {
                /* Graph wasn't connected properly */
                free(*mst);
                *mst = NULL;
                free(vertices);
                free(links);
                return 0;
            }
            /* Add the edge if not there already */
            if (links[cheapest] != 1) {
                (*mst)[mst_edges++] = edges[cheapest];
                links[cheapest] = 1;
                /* Add the cost */
                cost += edges[cheapest].weight;
            }
        }
        /* Merge the trees they join */
        merge_trees(*mst, mst_prev, mst_edges, vertices, order, &trees);
    }
    free(vertices);
    free(links);
    return cost;
}

Here is an example program that finds the MST of the same graph I used for Prim and Kruskal:

#include <stdio.h>
#include <stdlib.h>

int main(void)
{
    weighted_edge *edges;
    const unsigned int order = 5;
    const unsigned int size = complete_weighted_graph(order, &edges);
    weighted_edge *mst;
    unsigned int cost = boruvka(edges, size, order, &mst);
    printf("Cost is %u\n", cost);
    print_edges(mst, order - 1);
    free(mst);
    free(edges);
    return 0;
}

The output:

Cost is 10
(0, 1, 1) (0, 2, 2) (0, 3, 3) (0, 4, 4)