Tag Archives: DFS

Spanning forest of a graph in C

A graph showing a spanning forest

If a graph isn’t connected, there isn’t a spanning tree that covers all of the vertices. We can, however, construct a spanning forest, which is a set of spanning trees, one for each connected component in the graph. This is rather similar to finding connected components, except that in a spanning forest the components are represented by a set of edges, rather than a set of vertices. Any vertices in the graph that are entirely unconnected will not appear in the spanning forest.

To find the spanning forest, all we need to do is to use the depth-first search algorithm for finding a spanning tree repeatedly, starting at each unvisited vertex in turn. Once all vertices that appear in edges have been visited, the spanning forest is complete.

Below is an implementation in C. The function spanning_forest() takes a graph in edge list format, the number of edges (size), the number of vertices (order), and a callback function that is called with each spanning tree found. It reuses the spanning_tree_recursive() function from the spanning tree algorithm to find each spanning tree.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
} edge;

typedef void (*treefn)(const unsigned int *, size_t, const edge *, size_t);

void spanning_tree_recursive(const edge *edges, unsigned int size, 
        unsigned int order, unsigned int *visited, unsigned int *tree,
        unsigned int vertex, int edge, unsigned int *len)
{
    unsigned int e;
    visited[vertex] = 1;
    if (edge != -1) {
        tree[(*len)++] = edge;
    }
    for (e = 0; e < size; e++) {
        if (edges[e].first == vertex || edges[e].second == vertex) {
            unsigned int neighbour = edges[e].first == vertex ?
                edges[e].second : edges[e].first;
            if (!visited[neighbour]) {
                spanning_tree_recursive(edges, size, order, visited, tree, 
                        neighbour, e, len);
            }
        }
    }
}

void spanning_forest(const edge *edges, unsigned int size, unsigned int order,
        treefn fun)
{
    unsigned int *visited = calloc(order, sizeof(unsigned int));
    unsigned int *tree = malloc((order - 1) * sizeof(unsigned int));
    unsigned int len, v;
    if (visited == NULL || tree == NULL) {
        free(visited);
        free(tree);
        return;
    }
    for (v = 0; v < order; v++) {
        if (!visited[v]) {
            len = 0;
            spanning_tree_recursive(edges, size, order, visited, tree, v, -1, &len);
            if (len > 0) {
                fun(tree, len, edges, size);
            }
        }
    }
    free(visited);
    free(tree);
}

Here is an example program that finds the spanning forest of the graph shown at the top.

#include <stdio.h>
#include <stdlib.h>

/* Connect two edges */
void edge_connect(edge *edges, unsigned int first, unsigned int second, 
        unsigned int *pos)
{
    edges[*pos].first = first;
    edges[*pos].second = second;
    (*pos)++;
}

void print(const unsigned int *tree, size_t tree_size, const edge *edges, size_t size)
{
    unsigned int e;
    for (e = 0; e < tree_size; e++) {
        printf("(%u, %u) ", edges[tree[e]].first, edges[tree[e]].second);
    }
    putchar('\n');
}

int main(void)
{
    const unsigned int order = 9; /* Vertices */
    const unsigned int size = 8; /* Edges */
    edge *edges;
    
    edges = malloc(size * sizeof(edge));
    if (edges == NULL) {
        return 1;
    }
 
    /* Square */
    edges[0].first = 0;
    edges[0].second = 1;
    edges[1].first = 1;
    edges[1].second = 2;
    edges[2].first = 2;
    edges[2].second = 3;
    edges[3].first = 3;
    edges[3].second = 0;
 
    /* Triangle */
    edges[4].first = 4;
    edges[4].second = 5;
    edges[5].first = 5;
    edges[5].second = 6;
    edges[6].first = 6;
    edges[6].second = 4;
 
    /* Line */
    edges[7].first = 7;
    edges[7].second = 8;

    spanning_forest(edges, size, order, print);

    free(edges);
    return 0;
}

The output:

(0, 1) (1, 2) (2, 3)
(4, 5) (5, 6)
(7, 8)

Spanning tree of a graph in C

A complete graph on 5 vertices showing a spanning tree

In a previous post I showed an algorithm to find all spanning trees in a graph. A simpler problem is just to find a single spanning tree. This can be solved using a depth-first search. We simply need to record, for each vertex we visit, the edge by which we reached it.

Below is an implementation in C. The function spanning_tree() takes a graph in edge list format, the number of edges (size), the number of vertices (order), and the address of a pointer to which to assign the spanning tree. The spanning tree is in the form of an array of edge indices. The function returns the number of edges in this array, which will be order – 1 if the graph is connected. If the graph is not connected, the function will return a spanning tree of the component containing vertex 0, and the returned size will be correspondingly smaller.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
} edge;

void spanning_tree_recursive(const edge *edges, unsigned int size, 
        unsigned int order, unsigned int *visited, unsigned int *tree,
        unsigned int vertex, int edge, unsigned int *len)
{
    unsigned int e;
    visited[vertex] = 1;
    if (edge != -1) {
        tree[(*len)++] = edge;
    }
    for (e = 0; e < size; e++) {
        if (edges[e].first == vertex || edges[e].second == vertex) {
            unsigned int neighbour = edges[e].first == vertex ?
                edges[e].second : edges[e].first;
            if (!visited[neighbour]) {
                spanning_tree_recursive(edges, size, order, visited, tree, 
                        neighbour, e, len);
            }
        }
    }
}

unsigned int spanning_tree(const edge *edges, unsigned int size, unsigned int order,
        unsigned int **tree)
{
    unsigned int *visited = calloc(order, sizeof(unsigned int));
    *tree = malloc((order - 1) * sizeof(unsigned int));
    unsigned int len = 0;
    if (visited == NULL || *tree == NULL) {
        free(visited);
        free(*tree);
        *tree = NULL;
        return 0;
    }
    spanning_tree_recursive(edges, size, order, visited, *tree, 0, -1, &len);
    free(visited);
    return len;
}

Here is an example program that finds a spanning tree of the complete graph on 5 vertices:

/* Calculate the nth triangular number T(n) */
unsigned int triangular_number(unsigned int n)
{
    return (n * (n + 1)) / 2;
}

/* Construct a complete graph on v vertices */
unsigned int complete_graph(unsigned int v, edge **edges)
{
    unsigned int n = triangular_number(v - 1);
    unsigned int i, j, k;
    *edges = malloc(n * sizeof(edge));
    if (edges != NULL) {
        for (i = 0, k = 0; i < v - 1; i++) {
            for (j = i + 1; j < v; j++) {
                (*edges)[k].first = i;
                (*edges)[k].second = j;
                k++;
            }
        }
    }
    else {
        n = 0;
    }
    return n;
}

int main(void)
{
    edge *edges;
    const unsigned int order = 5; /* Vertices */
    const unsigned int size = complete_graph(5, &edges); /* Edges */
    unsigned int *tree;
    unsigned int tree_size = spanning_tree(edges, size, order, &tree);
    unsigned int e;
    for (e = 0; e < tree_size; e++) {
        printf("(%u, %u) ", edges[tree[e]].first, edges[tree[e]].second);
    }
    putchar('\n');
    free(edges);
    free(tree);
    return 0;
}

The output:

(0, 1) (1, 2) (2, 3) (3, 4)

Graph cycle detection in C

A cycle in a graph is simply a path whereby one can get from a vertex back to itself. For example, in the graph below there is a cycle (0, 1, 2, 3, 0).
Graph containing a cycle
A graph containing at least one cycle is called a cyclic graph, and a graph without cycles is called an acyclic graph.

Detecting whether a graph is cyclic or acyclic can be easily performed using a Depth First Search (DFS). We simply start at an arbitrary vertex, visit each of its neighbours, then each of the neighbour’s neighbours, and so on. If at any point we find a neighbour that we have visited already, and we haven’t just come from there, then we have detected a cycle.

Here is an implementation in C. Notice that, because it is a DFS, it is very similar to the connected components algorithm I described earlier, which also does a DFS.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
} edge;

static unsigned int cyclic_recursive(const edge *edges, unsigned int n, unsigned int *visited,
        unsigned int order, unsigned int vertex, unsigned int predecessor)
{
    unsigned int i;
    unsigned int cycle_found = 0;
    visited[vertex] = 1;
    for (i = 0; i < n && !cycle_found; i++) {
        if (edges[i].first == vertex || edges[i].second == vertex) {
            /* Adjacent */
            const unsigned int neighbour = edges[i].first == vertex ?
                    edges[i].second : edges[i].first;
            if (visited[neighbour] == 0) {
                /* Not yet visited */
                cycle_found = cyclic_recursive(edges, n, visited, order, neighbour, vertex);
            }
            else if (neighbour != predecessor) {
                /* Found a cycle */
                cycle_found = 1;
            }
        }
    }
    return cycle_found;
}

unsigned int cyclic(const edge *edges, unsigned int n, unsigned int order)
{
    unsigned int *visited = calloc(order, sizeof(unsigned int));
    unsigned int cycle_found;
    if (visited == NULL) {
        return 0;
    }
    cycle_found  = cyclic_recursive(edges, n, visited, order, 0, 0);
    free(visited);
    return cycle_found;
}

An example program to find out if the graph shown at the top is cyclic or acyclic:

#include <stdio.h>

int main(void)
{
    const unsigned int order = 6; /* Vertices */
    const unsigned int n = 6; /* Edges */
    edge *edges;
    unsigned int c;
   
    edges = malloc(n * sizeof(edge));
    if (edges == NULL) {
        return 1;
    }

    edges[0].first = 0;
    edges[0].second = 1;
    edges[1].first = 1;
    edges[1].second = 2;
    edges[2].first = 2;
    edges[2].second = 3;
    edges[3].first = 3;
    edges[3].second = 0;
    edges[4].first = 3;
    edges[4].second = 4;
    edges[5].first = 3;
    edges[5].second = 5;

    c = cyclic(edges, n, order);
    printf("Graph is %s.\n", c ? "cyclic" : "acyclic");
    free(edges);

    return 0;
}

The output:

Graph is cyclic.

Connected components of a graph in C

A connected component of a graph is a maximal subgraph in which the vertices are all connected, and there are no connections between the subgraph and the rest of the graph. A connected graph has only one connected component, which is the graph itself, while unconnected graphs have more than one component. For example, the graph shown below has three components, (0, 1, 2, 3), (4, 5, 6), and (7, 8).
Graph with three connected components
The connected components of a graph can be found using a depth-first search (DFS). We start at an arbitrary vertex, and visit every vertex adjacent to it recursively, adding them to the first component. Once this search has finished, we have visited all of the vertices in the first connected component, so we choose another unvisited vertex (if any) and perform the same search starting from it, adding the vertices we find to the second component. This process continues until all vertices have been visited, at which point we know the number of connected components in the graph, and which vertices they contain.

This is an implementation of the connected components algorithm in C. An array is used to store the number of the connected component for each vertex, starting with component 0. The array elements are initialised to -1 so the array is also used to determine which vertices have not yet been visited, as their component number will still be -1.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
} edge;

void connected_components_recursive(const edge *edges, unsigned int n, 
        int *components, unsigned int order, unsigned int vertex,
        unsigned int component)
{
    unsigned int i;
    /* Put this vertex in the current component */
    components[vertex] = component; 
    for (i = 0; i < n; i++) {
        if (edges[i].first == vertex || edges[i].second == vertex) {
            /* Adjacent */
            const unsigned int neighbour = edges[i].first == vertex ?
                    edges[i].second : edges[i].first;
            if (components[neighbour] == -1) {
                /* Not yet visited */
                connected_components_recursive(edges, n, components, order, neighbour, component);
            }
        }
    }
}

unsigned int connected_components(const edge *edges, unsigned int n, unsigned int order, 
        int **components)
{
    unsigned int i;
    unsigned int component = 0;
    *components = malloc(order * sizeof(int));
    if (components == NULL) {
        return 0;
    }
    for (i = 0; i < order; i++) {
        (*components)[i] = -1;
    }
    
    for (i = 0; i < order; i++) {
        if ((*components)[i] == -1) {
            connected_components_recursive(edges, n, *components, order, i, component);
            component++;
        }
    }
    return component;
}

Here is an example program that constructs the graph shown above and then finds its connected components:

#include <stdio.h>
#include <stdlib.h>

static void print_components(int *components, unsigned int order)
{
    unsigned int i;
    for (i = 0; i < order; i++) {
        printf("Vertex %u is in component %d\n", i, components[i]);
    }
}

int main(void)
{
    const unsigned int order = 9; /* Vertices */
    const unsigned int n = 8; /* Edges */
    edge *edges;
    int *components;
    unsigned int c;
   
    edges = malloc(n * sizeof(edge));
    if (edges == NULL) {
        return 1;
    }

    /* Square */
    edges[0].first = 0;
    edges[0].second = 1;
    edges[1].first = 1;
    edges[1].second = 2;
    edges[2].first = 2;
    edges[2].second = 3;
    edges[3].first = 3;
    edges[3].second = 0;

    /* Triangle */
    edges[4].first = 4;
    edges[4].second = 5;
    edges[5].first = 5;
    edges[5].second = 6;
    edges[6].first = 6;
    edges[6].second = 4;

    /* Line */
    edges[7].first = 7;
    edges[7].second = 8;

    c = connected_components(edges, n, order, &components);
    if (components == NULL) {
        free(edges);
        return 1;
    }
    printf("There are %u components:\n", c);
    print_components(components, order);
    free(edges);
    free(components);

    return 0;
}

The output:

There are 3 components:
Vertex 0 is in component 0
Vertex 1 is in component 0
Vertex 2 is in component 0
Vertex 3 is in component 0
Vertex 4 is in component 1
Vertex 5 is in component 1
Vertex 6 is in component 1
Vertex 7 is in component 2
Vertex 8 is in component 2