Kruskal’s Minimum Spanning Tree (MST) Algorithm in C

Kruskal’s MST algorithm is a greedy algorithm like Prim’s algorithm but works quite differently. Recall that Prim’s algorithm builds up a single tree by greedily choosing the cheapest edge that has one endpoint inside it and one outside. Kruskal’s algorithm, on the other hand, builds up multiple trees, which start out as single vertices, and greedily chooses the cheapest edge that has endpoints in two different trees. Adding this edge causes the two trees to merge. After adding \(Order(G) – 1\) edges in this way, all of the trees have been merged into a single spanning tree which is the MST.

In summary:

  1. Start with \(Order(G)\) trees, each one a single vertex
  2. Do \(Order(G)\) – 1 times (or equivalently, until there is only one tree):
    1. Find the cheapest edge with endpoints in different trees
    2. Use it to merge those two trees into a single tree

Below is an implementation in C. I used an array of integers, vertices, to keep track of which tree the vertices are in. When trees are merged by an edge, this array is updated to put the vertices that are in the same tree as the edge’s second endpoint into the tree containing the edge’s first endpoint. This has the effect of merging the trees. When the algorithm finishes, this array contains the same tree number for every vertex, since there is now only one tree.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

unsigned int kruskal(weighted_edge *edges, unsigned int size, unsigned int order,
        weighted_edge **mst)
{
    *mst = malloc((order - 1) * sizeof(weighted_edge));
    unsigned int *vertices = malloc(order * sizeof(unsigned int));
    unsigned int i;
    unsigned int cost = 0;
    if (*mst == NULL || vertices == NULL) {
        free(*mst);
        free(vertices);
        return 0;
    }
    /* Each vertex starts off in its own tree */
    for (i = 0; i < order; i++) {
        vertices[i] = i;
    }
    /* Sort the edges by weight */
    qsort(edges, size, sizeof(weighted_edge), 
            (int(*)(const void *, const void *))weighted_edge_compare);
    /* Main loop */
    for (i = 0; i < order - 1; i++) {
        /* Find the cheapest edge with endpoints in different trees */
        unsigned int j;
        int cheapest = -1;
        for (j = 0; j < size && cheapest == -1; j++) {
            if (vertices[edges[j].first] != vertices[edges[j].second]) {
                cheapest = j;
            }
        }
        if (cheapest == -1) {
            /* Graph wasn't connected properly */
            free(*mst);
            *mst = NULL;
            free(vertices);
            return 0;
        }
        /* Add the edge */
        (*mst)[i] = edges[cheapest];
        /* Merge the trees it joins */
        for (j = 0; j < order; j++) {
            if (vertices[j] == edges[cheapest].second) {
                vertices[j] = edges[cheapest].first;
            }
        }
        /* Add the cost */
        cost += edges[cheapest].weight;
    }
    free(vertices);
    return cost;
}

Here is an example program that constructs a complete weighted graph and then finds the MST:

#include <stdio.h>
#include <stdlib.h>

/* Construct a complete weighted graph on v vertices */
unsigned int complete_weighted_graph(unsigned int v, weighted_edge **edges)
{
    const unsigned int n = triangular_number(v - 1);
    unsigned int i, j, k;
    *edges = malloc(n * sizeof(weighted_edge));
    if (edges != NULL) {
        for (i = 0, k = 0; i < v - 1; i++) {
            for (j = i + 1; j < v; j++) {
                (*edges)[k].first = i;
                (*edges)[k].second = j;
                (*edges)[k].weight = k + 1;
                k++;
            }
        }
    }
    return n;
}

void print_edges(const weighted_edge *edges, unsigned int n)
{
    unsigned int e;
    for (e = 0; e < n; e++) {
        printf("(%u, %u, %u) ", edges[e].first, edges[e].second, edges[e].weight);
    }
    putchar('\n');
}

int main(void)
{
    weighted_edge *edges;
    const unsigned int order = 5;
    const unsigned int size = complete_weighted_graph(order, &edges);
    weighted_edge *mst;
    unsigned int cost = kruskal(edges, size, order, &mst);
    printf("Cost is %u\n", cost);
    print_edges(mst, order - 1);
    free(mst);
    free(edges);
    return 0;
}

The output:

Cost is 10
(0, 1, 1) (0, 2, 2) (0, 3, 3) (0, 4, 4)