Tag Archives: Graph Theory

Spanning forest of a graph in C

A graph showing a spanning forest

If a graph isn’t connected, there isn’t a spanning tree that covers all of the vertices. We can, however, construct a spanning forest, which is a set of spanning trees, one for each connected component in the graph. This is rather similar to finding connected components, except that in a spanning forest the components are represented by a set of edges, rather than a set of vertices. Any vertices in the graph that are entirely unconnected will not appear in the spanning forest.

To find the spanning forest, all we need to do is to use the depth-first search algorithm for finding a spanning tree repeatedly, starting at each unvisited vertex in turn. Once all vertices that appear in edges have been visited, the spanning forest is complete.

Below is an implementation in C. The function spanning_forest() takes a graph in edge list format, the number of edges (size), the number of vertices (order), and a callback function that is called with each spanning tree found. It reuses the spanning_tree_recursive() function from the spanning tree algorithm to find each spanning tree.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
} edge;

typedef void (*treefn)(const unsigned int *, size_t, const edge *, size_t);

void spanning_tree_recursive(const edge *edges, unsigned int size, 
        unsigned int order, unsigned int *visited, unsigned int *tree,
        unsigned int vertex, int edge, unsigned int *len)
{
    unsigned int e;
    visited[vertex] = 1;
    if (edge != -1) {
        tree[(*len)++] = edge;
    }
    for (e = 0; e < size; e++) {
        if (edges[e].first == vertex || edges[e].second == vertex) {
            unsigned int neighbour = edges[e].first == vertex ?
                edges[e].second : edges[e].first;
            if (!visited[neighbour]) {
                spanning_tree_recursive(edges, size, order, visited, tree, 
                        neighbour, e, len);
            }
        }
    }
}

void spanning_forest(const edge *edges, unsigned int size, unsigned int order,
        treefn fun)
{
    unsigned int *visited = calloc(order, sizeof(unsigned int));
    unsigned int *tree = malloc((order - 1) * sizeof(unsigned int));
    unsigned int len, v;
    if (visited == NULL || tree == NULL) {
        free(visited);
        free(tree);
        return;
    }
    for (v = 0; v < order; v++) {
        if (!visited[v]) {
            len = 0;
            spanning_tree_recursive(edges, size, order, visited, tree, v, -1, &len);
            if (len > 0) {
                fun(tree, len, edges, size);
            }
        }
    }
    free(visited);
    free(tree);
}

Here is an example program that finds the spanning forest of the graph shown at the top.

#include <stdio.h>
#include <stdlib.h>

/* Connect two edges */
void edge_connect(edge *edges, unsigned int first, unsigned int second, 
        unsigned int *pos)
{
    edges[*pos].first = first;
    edges[*pos].second = second;
    (*pos)++;
}

void print(const unsigned int *tree, size_t tree_size, const edge *edges, size_t size)
{
    unsigned int e;
    for (e = 0; e < tree_size; e++) {
        printf("(%u, %u) ", edges[tree[e]].first, edges[tree[e]].second);
    }
    putchar('\n');
}

int main(void)
{
    const unsigned int order = 9; /* Vertices */
    const unsigned int size = 8; /* Edges */
    edge *edges;
    
    edges = malloc(size * sizeof(edge));
    if (edges == NULL) {
        return 1;
    }
 
    /* Square */
    edges[0].first = 0;
    edges[0].second = 1;
    edges[1].first = 1;
    edges[1].second = 2;
    edges[2].first = 2;
    edges[2].second = 3;
    edges[3].first = 3;
    edges[3].second = 0;
 
    /* Triangle */
    edges[4].first = 4;
    edges[4].second = 5;
    edges[5].first = 5;
    edges[5].second = 6;
    edges[6].first = 6;
    edges[6].second = 4;
 
    /* Line */
    edges[7].first = 7;
    edges[7].second = 8;

    spanning_forest(edges, size, order, print);

    free(edges);
    return 0;
}

The output:

(0, 1) (1, 2) (2, 3)
(4, 5) (5, 6)
(7, 8)

Spanning tree of a graph in C

A complete graph on 5 vertices showing a spanning tree

In a previous post I showed an algorithm to find all spanning trees in a graph. A simpler problem is just to find a single spanning tree. This can be solved using a depth-first search. We simply need to record, for each vertex we visit, the edge by which we reached it.

Below is an implementation in C. The function spanning_tree() takes a graph in edge list format, the number of edges (size), the number of vertices (order), and the address of a pointer to which to assign the spanning tree. The spanning tree is in the form of an array of edge indices. The function returns the number of edges in this array, which will be order – 1 if the graph is connected. If the graph is not connected, the function will return a spanning tree of the component containing vertex 0, and the returned size will be correspondingly smaller.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
} edge;

void spanning_tree_recursive(const edge *edges, unsigned int size, 
        unsigned int order, unsigned int *visited, unsigned int *tree,
        unsigned int vertex, int edge, unsigned int *len)
{
    unsigned int e;
    visited[vertex] = 1;
    if (edge != -1) {
        tree[(*len)++] = edge;
    }
    for (e = 0; e < size; e++) {
        if (edges[e].first == vertex || edges[e].second == vertex) {
            unsigned int neighbour = edges[e].first == vertex ?
                edges[e].second : edges[e].first;
            if (!visited[neighbour]) {
                spanning_tree_recursive(edges, size, order, visited, tree, 
                        neighbour, e, len);
            }
        }
    }
}

unsigned int spanning_tree(const edge *edges, unsigned int size, unsigned int order,
        unsigned int **tree)
{
    unsigned int *visited = calloc(order, sizeof(unsigned int));
    *tree = malloc((order - 1) * sizeof(unsigned int));
    unsigned int len = 0;
    if (visited == NULL || *tree == NULL) {
        free(visited);
        free(*tree);
        *tree = NULL;
        return 0;
    }
    spanning_tree_recursive(edges, size, order, visited, *tree, 0, -1, &len);
    free(visited);
    return len;
}

Here is an example program that finds a spanning tree of the complete graph on 5 vertices:

/* Calculate the nth triangular number T(n) */
unsigned int triangular_number(unsigned int n)
{
    return (n * (n + 1)) / 2;
}

/* Construct a complete graph on v vertices */
unsigned int complete_graph(unsigned int v, edge **edges)
{
    unsigned int n = triangular_number(v - 1);
    unsigned int i, j, k;
    *edges = malloc(n * sizeof(edge));
    if (edges != NULL) {
        for (i = 0, k = 0; i < v - 1; i++) {
            for (j = i + 1; j < v; j++) {
                (*edges)[k].first = i;
                (*edges)[k].second = j;
                k++;
            }
        }
    }
    else {
        n = 0;
    }
    return n;
}

int main(void)
{
    edge *edges;
    const unsigned int order = 5; /* Vertices */
    const unsigned int size = complete_graph(5, &edges); /* Edges */
    unsigned int *tree;
    unsigned int tree_size = spanning_tree(edges, size, order, &tree);
    unsigned int e;
    for (e = 0; e < tree_size; e++) {
        printf("(%u, %u) ", edges[tree[e]].first, edges[tree[e]].second);
    }
    putchar('\n');
    free(edges);
    free(tree);
    return 0;
}

The output:

(0, 1) (1, 2) (2, 3) (3, 4)

Prüfer codes and labeled trees in C

A tree and its Prüfer Code

Arthur Cayley first proved that there are \(n^{n-2}\) labeled trees on \(n\) vertices, and in another proof of the same theorem, Heinz Prüfer showed a bijection between trees on \(n\) vertices and \((n-2)\)-tuples of integers from 1 to \(n – 2\), called Prüfer codes. The proof provides an algorithm that allows us to construct an arbitrary labeled tree from its code.

The algorithm proceeds as follows:

  1. Initialise an array of node degrees for the nodes, which are numbered from 1 to \(n + 2\)
  2. For each occurrence of a node number in the code, increment its degree
  3. Add 1 to all degrees
  4. For each occurrence of a node number in the code:
    1. Find the lowest-numbered node with degree 1
    2. Create an edge between that node and the node in the code
    3. Decrement the degrees of both nodes by 1
  5. There are now 2 nodes left with degree 1, so join them with an edge to complete the tree

Below is an implementation of the algorithm in C. The function prufer_tree() takes a code in the form of an array of unsigned integers and its length, and returns the tree as an array of edges.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
} edge;

edge *prufer_tree(const unsigned int *code, size_t len)
{
    unsigned int i, j, e = 0;
    const unsigned int n = len + 2; /* Nodes */
    edge *tree = malloc((n - 1) * sizeof(edge));
    unsigned int *degrees = calloc(n + 1, sizeof(unsigned int)); /* 1-based */
    if (tree == NULL || degrees == NULL) {
        free(tree);
        free(degrees);
        return NULL;
    }
    /* Set the degrees from the occurrences in the code */
    for (i = 0; i < len; i++) {
        degrees[ code[i]]++;
    }
    /* Add 1 to them all */
    for (i = 1; i <= n; i++) {
        degrees[i]++;
    }
    /* Add edges to nodes in the code */
    for (i = 0; i < len; i++) {
        /* Find the lowest-numbered node with degree 1 */
        for (j = 1; degrees[j] != 1; j++);
        /* Add the edge */
        tree[e].first = j;
        tree[e].second = code[i];
        degrees[j]--;
        degrees[ code[i]]--;
        e++;
    }
    /* Find the last 2 degree-1 nodes */
    for (i = 1; degrees[i] != 1; i++);
    for (j = i + 1; degrees[j] != 1; j++);
    /* Add the last edge */
    tree[e].first = i;
    tree[e].second = j;
    free(degrees);

    return tree;
}

Here is an example program that constructs the tree in the image at the top:

#include <stdio.h>
#include <stdlib.h>

int main(void)
{
    unsigned int code[] = {1, 1, 1, 1, 6, 5};
    const size_t len = sizeof(code) / sizeof(unsigned int);
    const unsigned int n = len + 1; /* Edges */
    edge *tree = prufer_tree(code, len);
    unsigned int e;
    for (e = 0; e < n; e++) {
        printf("(%u, %u)\n", tree[e].first, tree[e].second);
    }
    free(tree);
    return 0;
}

The output:

(2, 1)
(3, 1)
(4, 1)
(7, 1)
(1, 6)
(6, 5)
(5, 8)

Breadth-First Search of a graph in C

A graph

A breadth-first search (BFS) of a graph visits every neighbour of the current vertex before visiting the neighbours’ neighbours. This means that a breadth-first search extends radially from the starting vertex.

The algorithm is as follows:

  1. Create an empty set for the visited vertices
  2. Create a queue
  3. Put the starting vertex in the visited set
  4. Put the starting vertex in the queue
  5. While the queue is not empty:
    1. Remove a vertex from the queue
    2. Perform any visit actions on it
    3. For each of the vertex’s neighbours:
      • If the neighbour is not in the visited set:
        1. Put the neighbour in the visited set
        2. Add the neighbour to the queue

Here is an implementation in C using a cirque for the queue. The function breadth_first_search() takes a graph in edge list form, the number of edges (size), the number of vertices (order), and a callback function that is called for each vertex visited.

#include <stdlib.h>

#include <cirque.h>

typedef struct {
    unsigned int first;
    unsigned int second;
} edge;

typedef void (*searchfn)(unsigned int);

void breadth_first_search(const edge *edges, unsigned int size, unsigned int order, searchfn fun)
{
    unsigned int start = 0;
    unsigned int *visited = calloc(order, sizeof(unsigned int));
    cirque *queue = cirque_create();
    if (!(visited && queue)) {
        free(visited);
        cirque_delete(queue);
        return;
    }
    visited[start] = 1;
    cirque_insert(queue, &start);
    while (cirque_get_count(queue)) {
        unsigned int e;
        unsigned int *current = cirque_remove(queue);
        fun(*current);
        for (e = 0; e < size; e++) {
            if (edges[e].first == *current || edges[e].second == *current) {
                const unsigned int *neighbour = edges[e].first == *current ?
                    &edges[e].second : &edges[e].first;
                if (!visited[*neighbour]) {
                    visited[*neighbour] = 1;
                    cirque_insert(queue, (void*)neighbour);
                }
            }
        }
    }
    free(visited);
    cirque_delete(queue);
}

Here is an example program that constructs the graph shown at the top and then performs a BFS on it, printing the numbers of the vertices as they are visited.

#include <stdio.h>
#include <stdlib.h>



/* Connect two edges */
void edge_connect(edge *edges, unsigned int first, unsigned int second, 
        unsigned int *pos)
{
    edges[*pos].first = first;
    edges[*pos].second = second;
    (*pos)++;
}

void print(unsigned int vertex)
{
    printf("Visiting vertex %u\n", vertex);
}

int main(void)
{
    const unsigned int size = 12; /* Edges */
    const unsigned int order = 13; /* Vertices */
    edge *edges = malloc(size * sizeof(edge));
    unsigned int i = 0;
    edge_connect(edges, 0, 1, &i);
    edge_connect(edges, 0, 2, &i);
    edge_connect(edges, 0, 3, &i);
    edge_connect(edges, 0, 4, &i);
    edge_connect(edges, 1, 5, &i);
    edge_connect(edges, 1, 6, &i);
    edge_connect(edges, 2, 7, &i);
    edge_connect(edges, 2, 8, &i);
    edge_connect(edges, 3, 9, &i);
    edge_connect(edges, 3, 10, &i);
    edge_connect(edges, 4, 11, &i);
    edge_connect(edges, 4, 12, &i);

    breadth_first_search(edges, size, order, print);

    free(edges);
    return 0;
}

The output:

Visiting vertex 0
Visiting vertex 1
Visiting vertex 2
Visiting vertex 3
Visiting vertex 4
Visiting vertex 5
Visiting vertex 6
Visiting vertex 7
Visiting vertex 8
Visiting vertex 9
Visiting vertex 10
Visiting vertex 11
Visiting vertex 12

Realising a graph degree sequence in C

An example graph

The degree sequence of a graph is the sequence of the degrees of each if its vertices. It is usually written in non-increasing order. For example, the degree sequence of the graph shown above is <4, 2, 2, 2, 2, 1, 1, 1, 1>. It’s easy to find the degree sequence of a graph – count the number of times each vertex occurs as an endpoint in the edge list, and then sort the results. The following C function will return the degree sequence of graph in edge list form:

typedef struct {
    unsigned int first;
    unsigned int second;
} edge;

int compare_unsigned_ints_decreasing(const unsigned int *p1, const unsigned int *p2)
{
    if (*p1 > *p2) {
        return -1;
    }
    if (*p1 < *p2) {
        return 1;
    }
    return 0;
}

unsigned int *degree_sequence(const edge *edges, unsigned int size, unsigned int order)
{
    unsigned int *sequence = calloc(order, sizeof(unsigned int));
    if (sequence != NULL) {
        unsigned int e;
        for (e = 0; e < size; e++) {
            sequence[edges[e].first]++;
            sequence[edges[e].second]++;
        }
    }
    qsort(sequence, order, sizeof(unsigned int),
            (int(*)(const void *, const void *))compare_unsigned_ints_decreasing);
    return sequence;
}

The sum of the numbers in a graph’s degree sequence will divide by 2 because every edge has 2 endpoints, but other than that it isn’t easy to tell if a given sequence of integers can be the degree sequence of a graph. A sequence that can be the degree sequence of a graph is called a graphic sequence. One way to find out if a sequence is graphic is to simulate constructing a graph out of it, treating the vertex degrees as a resource to use up by adding edges. This process is called realising the sequence. There is an algorithm by Havel and Hakimi to do this, which goes as follows:

  1. While there are non-0 numbers in the degree sequence:
    1. Connect the first vertex to as many of the following vertices as its degree, decrementing their degrees accordingly
    2. Make the first vertex’s degree 0 – it is now finished with
    3. Sort the vertices in descending order
  2. If at any point you run out of vertices because you need to decrement a 0-degree vertex or there are not enough vertices in the sequence, then the sequence is not graphical

There are many possible graphs for any given graphical degree sequence, so given the degree sequence of a graph the algorithm will not in general reconstruct the same graph. For example, given the degree sequence of the graph at the top, the algorithm will produce this graph:

Another graph with the same degree sequence

Below is an implementation of the Havel-Hakimi algorithm in C. It takes a degree sequence and it length, a callback function to call when each edge would be added, and a void pointer for user data to be passed to the callback. It doesn’t actually construct a graph data structure but instead leaves it to the client to realise edge-addition events in any way it sees fit.

#include <stdlib.h>

typedef struct {
    unsigned int id;
    unsigned int degree;
} vertex;

int compare_vertices_decreasing(const vertex *v1, const vertex *v2)
{
     return compare_unsigned_ints_decreasing(&v1->degree,
             &v2->degree);
}

unsigned int sum(const unsigned int *numbers, size_t len)
{
    unsigned int i, total = 0;
    for (i = 0; i < len; i++) {
        total += numbers[i];
    }
    return total;
}

typedef void(*realisefn)(unsigned int, unsigned int, void *);

unsigned int realise_sequence(const unsigned int *degrees, size_t len, realisefn fun, void *data)
{
    unsigned int i;
    unsigned int failed = 0;
    unsigned int unused_vertices = len;
    vertex *vertices;
    /* Check for even sum of degrees */
    if (sum(degrees, len) % 2 != 0) {
        return 0;
    }
    /* Initialise vertices */
    vertices = malloc(len * sizeof(vertex));
    if (vertices == NULL) {
        return 0;
    }
    for (i = 0; i < len; i++) {
        vertices[i].id = i;
        vertices[i].degree = degrees[i];
    }
    /* Main loop */
    while (unused_vertices > 0 && !failed) {
        /* Sort vertices in decreasing order of degree */
        qsort(vertices, len, sizeof(vertex), 
                (int(*)(const void *, const void *))compare_vertices_decreasing);
        /* Connect the first vertex to as many others as its degree */
        for (i = 1; i <= vertices[0].degree && !failed; i++) {
            if (i < len && vertices[i].degree > 0) {
                vertices[i].degree -= 1;
                /* Call callback */
                fun(vertices[0].id, vertices[i].id, data);
                if (vertices[i].degree == 0) {
                    unused_vertices--;
                }
            }
            else {
                /* Ran out of vertices */            
                failed = 1;
            }
        }
        /* Finished with first vertex */
        vertices[0].degree = 0;
        unused_vertices--;
    }
    free(vertices);
    return !failed;
}

Below is an example program. It constructs the graph shown at the top, gets its degree sequence, and then passes this to the algorithm, along with a callback and data that will construct the realisation of the degree sequence. The degree sequence of the new graph is then retrieved and is shown to be the same as that of the original graph.

#include <stdio.h>
#include <stdlib.h>

/* Connect two edges */
void edge_connect(edge *edges, unsigned int first, unsigned int second, 
        unsigned int *pos)
{
    edges[*pos].first = first;
    edges[*pos].second = second;
    (*pos)++;
}


/* Structure to hold edge list and keep track of current edge */
typedef struct {
    edge *edges;
    unsigned int i;
} edge_list;

/* Callback function to connect two edges */
void connect(unsigned int v1, unsigned int v2, edge_list *graph)
{
    edge_connect(graph->edges, v1, v2, &graph->i);
}

int main(void)
{
    const unsigned int size = 8; /* Edges */
    const unsigned int order = 9; /* Vertices */
    edge *edges = malloc(size * sizeof(edge));
    unsigned int *sequence;
    unsigned int i = 0;
    edge_list graph;

    /* Construct the graph */
    edge_connect(edges, 0, 1, &i);
    edge_connect(edges, 0, 2, &i);
    edge_connect(edges, 0, 3, &i);
    edge_connect(edges, 0, 4, &i);
    edge_connect(edges, 1, 5, &i);
    edge_connect(edges, 2, 6, &i);
    edge_connect(edges, 3, 7, &i);
    edge_connect(edges, 4, 8, &i);

    /* Get the degree sequence */
    sequence = degree_sequence(edges, size, order);
    for (i = 0; i < order; i++) {
        printf("%u ", sequence[i]);
    }
    putchar('\n');
    
    /* Realise the sequence */
    graph.edges = edges;
    graph.i = 0;
    realise_sequence(sequence, order, (realisefn)connect, &graph);

    /* Get the degree sequence again */
    free(sequence);
    sequence = degree_sequence(edges, size, order);
    for (i = 0; i < order; i++) {
        printf("%u ", sequence[i]);
    }
    putchar('\n');

    free(sequence);
    free(edges);
    return 0;

Output:

4 2 2 2 2 1 1 1 1
4 2 2 2 2 1 1 1 1

Topological sort in C

A digraph

A topological sort of a directed graph is an ordering of the vertices such that the starting vertex of all arcs occurs before its ending vertex. Only graphs without cycles can be topologically sorted, and attempting to topologically sort a digraph is one way of finding out if it is a directed acyclic graph (DAG).

There is a simple linear time algorithm for topologically sorting a graph:

  1. Make a collection of all of the vertices that have no incoming arcs (in-degree 0)
  2. Make a collection of all of the arcs
  3. Make an empty collection for the sorted sequence
  4. While the vertices collection is not empty:
    1. Remove the first vertex from the vertices collection
    2. Add it to the sorted sequence
    3. Remove all arcs from it to its neighbours
    4. If any of those neighbours now have no incoming arcs, add them to the vertices collection
  5. If the arcs collection is now empty, the graph is acyclic, and the sorted sequence contains a topological sort

Note that it doesn’t matter what kind of collection the vertices collection is – it can be a set, queue, or stack – the constraint of only adding vertices that have no incoming arcs ensures that the ordering produced will be valid. This means that a graph will in general have several valid topological sorts.

Below is an implementation of the algorithm in C. The input is a graph in arc list form, the number of arcs (size), the number of vertices (order), and the address of a pointer to which to assign the array of sorted vertices. Within the algorithm, the vertices collection is a set implemented as an array of integers used as booleans to indicate whether a given vertex is in the set or not. The function returns a boolean value to indicate whether or not the graph is acyclic.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
} arc;

/* Find out if a vertex has no incoming arcs */
static unsigned int is_root(const arc *graph, const unsigned int *arcs, unsigned int size,
        unsigned int v)
{
    unsigned int a, root = 1;
    for (a = 0; a < size && root; a++) {
        root = !arcs[a] || graph[a].second != v;
    }
    return root;
}

/* Get the vertices with no incoming arcs */
static unsigned int get_roots(const arc *graph, const unsigned int *arcs, unsigned int size,
        unsigned int order, unsigned int *vertices)
{
    unsigned int v, vertices_size = 0;
    for (v = 0; v < order; v++) {
        if (is_root(graph, arcs, size, v)) {
            vertices[v] = 1;
            vertices_size++;
        }
    }
    return vertices_size;
}

unsigned int topological_sort(const arc *graph, unsigned int size, unsigned int order, 
        unsigned int **sorted)
{
    unsigned int *vertices = calloc(order, sizeof(unsigned int));
    unsigned int *arcs = malloc(size * sizeof(unsigned int));
    *sorted = malloc(order * sizeof(unsigned int));
    unsigned int v, a, vertices_size, sorted_size = 0,
            arcs_size = size;
    if (!(vertices && arcs && *sorted)) {
        free(vertices);
        free(arcs);
        free(*sorted);
        *sorted = NULL;
        return 0;
    }
    /* All arcs start off in the graph */
    for (a = 0; a < size; a++) {
        arcs[a] = 1;
    }
    /* Get the vertices with no incoming edges */
    vertices_size = get_roots(graph, arcs, size, order, vertices);
    /* Main loop */
    while (vertices_size > 0) {
        /* Get first vertex */
        for (v = 0; vertices[v] != 1; v++);
        /* Remove from vertex set */
        vertices[v] = 0;
        vertices_size--;
        /* Add it to the sorted array */
        (*sorted)[sorted_size++] = v;
        /* Remove all arcs connecting it to its neighbours */
        for (a = 0; a < size; a++) {
            if (arcs[a] && graph[a].first == v) {
                arcs[a] = 0;
                arcs_size--;
                /* Check if neighbour is now a root */
                if (is_root(graph, arcs, size, graph[a].second)) {
                    /* Add it to set of vertices */
                    vertices[graph[a].second] = 1;
                    vertices_size++;
                } 
            }
        }
    }
    free(vertices);
    free(arcs);
    return arcs_size == 0;
}

Here is an example program that topologically sorts the graph shown in the picture at the top:

#include <stdio.h>
#include <stdlib.h>

/* Connect two arcs */
void arc_connect(arc *arcs, unsigned int first, unsigned int second, 
        unsigned int *pos)
{
    arcs[*pos].first = first;
    arcs[*pos].second = second;
    (*pos)++;
}

int main(void)
{
    const unsigned int size = 8; /* Arcs */
    const unsigned int order = 8; /* Vertices */
    arc *arcs = malloc(size * sizeof(arc));
    unsigned int i = 0;
    unsigned int *sorted;
    unsigned int acyclic;

    arc_connect(arcs, 0, 2, &i);
    arc_connect(arcs, 1, 3, &i);
    arc_connect(arcs, 2, 3, &i);
    arc_connect(arcs, 2, 4, &i);
    arc_connect(arcs, 2, 5, &i);
    arc_connect(arcs, 3, 6, &i);
    arc_connect(arcs, 5, 7, &i);
    arc_connect(arcs, 5, 7, &i);

    acyclic = topological_sort(arcs, size, order, &sorted);
    printf("Graph is acyclic: %u\n", acyclic);
    for (i = 0; i < order; i++) {
        printf("%u ", sorted[i]);
    }
    putchar('\n');

    free(sorted);
    free(arcs);

    return 0;
}

The output:

Graph is acyclic: 1
0 1 2 3 4 5 6 7

Greedy Vertex Cover in C

A graph showing a vertex cover

The Vertex Cover Problem is to find a subset of the vertices of a graph that contains an endpoint of every edge. For example, in the graph shown above, the subset (0, 1) highlighted in red is a vertex cover. The minimisation problem of finding the smallest vertex cover is NP-hard. However a good approximation can be achieved using a greedy algorithm. The algorithm is simply:

  • While there are uncovered edges:
    1. Find the first uncovered edge
    2. Put one of its endpoints in the covering set
    3. Mark all of the edges incident upon that endpoint as covered

Below is an implementation in C. The function vertex_cover() takes a graph in edge list format, its size (number of edges) and order (number of vertices), and the address of a pointer through which to return the covering set as an out parameter. The return value is the size of the covering set.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
} edge;

unsigned int vertex_cover(const edge *edges, unsigned int size, unsigned int order, 
        unsigned int **cover)
{
    unsigned int uncovered_size = size;
    unsigned int cover_size = 0;
    unsigned int *covered = calloc(size, sizeof(unsigned int));
    *cover = calloc(order, sizeof(unsigned int));
    if (covered == NULL || cover == NULL) {
        free(covered);
        free(*cover);
        *cover = NULL;
        return 0;
    }
    while (uncovered_size > 0) {
        unsigned int e, v;
        /* Find the first edge that hasn't been covered */
        for (e = 0; e < size && covered[e] == 1; e++);
        /* Add its first endpoint to the covering set */
        v = edges[e].first;
        (*cover)[v] = 1;
        cover_size++;
        /* Mark all uncovered edges containing its first endpoint as covered */
        for (e = 0; e < size; e++) {
            if (!covered[e] && (edges[e].first == v || edges[e].second == v)) {
                covered[e] = 1;
                uncovered_size--;
            }
        }
    }
    free(covered);
    return cover_size;
}

Here is an example program that finds a vertex cover on the graph shown above:

#include <stdio.h>
#include <stdlib.h>

/* Connect two edges */
void edge_connect(edge *edges, unsigned int first, unsigned int second, 
        unsigned int *pos)
{
    edges[*pos].first = first;
    edges[*pos].second = second;
    (*pos)++;
}

int main(void)
{
    const unsigned int size = 4; /* Edges */
    const unsigned int order = 5; /* Vertices */
    edge *edges = malloc(size * sizeof(edge));
    unsigned int i = 0;
    edge_connect(edges, 0, 1, &i);
    edge_connect(edges, 0, 2, &i);
    edge_connect(edges, 0, 3, &i);
    edge_connect(edges, 1, 4, &i);
    unsigned int *cover;
    unsigned int c = vertex_cover(edges, size, order, &cover);
    printf("Cover size is %u\n", c);
    printf("Vertices in cover:\n");
    for (i = 0; i < order; i++) {
        if (cover[i]) {
            printf("%u ", i);
        }
    }
    putchar('\n');
    free(edges);
    free(cover);
    return 0;
}

The output:

Cover size is 2
Vertices in cover:
0 1

Complement of a graph in C

A graph and its complement

Complement of a graph

The complement of a graph is another graph in which all vertices that are adjacent in the original graph are not adjacent, and all vertices that are not adjacent in the original graph are adjacent. The picture above shows a graph and its complement. Notice that vertex 4, which is adjacent to all of the other vertices in the original graph is adjacent to none in the complement graph, while vertices 0 and 1, which have 3 out of a possible 4 neighbours in the original graph have 4 – 3 = 1 neighbours in the complement graph.

Computing the complement of a graph is easy – just change every 0 in the adjacency matrix to a 1 and every 1 to a 0. Below is how it looks in C. I am assuming the graph is a simple graph and so does not have any self-loops (vertices that are adjacent to themselves), so I am ignoring the central diagonal.

void complement(unsigned int *const *adjmat, unsigned int order)
{
    unsigned int i, j;
    for (i = 0; i < order;i++) {
        for (j = 0; j < order; j++) {
            if (i != j) {
                adjmat[i][j] = !adjmat[i][j];
            }
        }
    }
}

Cliques and independent sets

Recall that a clique in a graph is a subset of the vertices in which all of the vertices are adjacent to each other, while an independent set is a subset of the vertices in which none of the vertices are adjacent to each other. This means that cliques in a graph become independent sets in its complement, and vice-versa. For example, in the picture above, the clique (1, 2, 4) shown in red becomes the independent set (1, 2, 4) in the complement graph.

In a previous post I presented a backtracking algorithm for solving the Maximum Clique problem – finding the largest clique in a graph. The dual of this problem is the Maximum Independent Set problem – finding the largest independent set in a graph. Because of the duality of cliques and independent sets through taking the graph’s complement, this means that having a Max Clique function gives us a Max Independent Set function for free – we just need to take the graph’s complement and then solve the Max Clique Problem, as below:

unsigned int maximum_independent_set(unsigned int *const *adjmat, unsigned int order,
        unsigned int **max_set)
{
    unsigned int max_clique_size;
    complement(adjmat, order);
    max_clique_size = maximum_clique((const unsigned int **)adjmat, order, max_set);
    complement(adjmat, order);
    return max_clique_size;
}

Here is an example program that finds a maximum independent set in the second graph shown above:

#include <stdio.h>
#include <stdlib.h>

int main(void)
{
    const unsigned int order = 5; /* Vertices */
    unsigned int r1[] = {0, 0, 1, 0, 0};
    unsigned int r2[] = {0, 0, 0, 1, 0};
    unsigned int r3[] = {1, 0, 0, 1, 0};
    unsigned int r4[] = {0, 1, 1, 0, 0};
    unsigned int r5[] = {0, 0, 0, 0, 0};
    unsigned int *const adjmat[] = {r1, r2, r3, r4, r5};
    unsigned int *max_set;
    unsigned int max_size, i;
    max_size = maximum_independent_set(adjmat, order, &max_set);
    printf("Max independent set size is %u\n", max_size);
    for (i = 0; i < order; i++) {
        if (max_set[i] == 1) {
            printf("%u ", i);
        }
    }
    putchar('\n');
    free(max_set);
    return 0;
}

The output:

Max independent set size is 3
1 2 4

Traveling Salesman Problem using backtracking in C

I have previously shown the Cheapest-Link, Nearest-Neigbour, and Repetitive-Nearest Neighbour algorithms for the Traveling Salesman Problem. These are all greedy algorithms that give an approximate result. The Traveling Salesman Problem is NP-complete, so an exact algorithm will have exponential running time unless \(P=NP\). However, we can reduce the search space for the problem by using backtracking.

This is an implementation of TSP using backtracking in C. It searches the permutation space of vertices, fixing the start of each tour at vertex 0. This means that the last edge is always the one that connects the second-last edge to vertex 0, so it is not necessary to find this edge by permutation. The function traveling_salesman() takes a graph in the form of a matrix of distances (adjmat), the number of vertices (order), and the address of a pointer to an array of unsigned integers used as an output parameter (best_tour). It returns the cost of the best tour, and assigns an array containing the vertices of the tour in order to *best_tour.

#include <stdlib.h>
#include <string.h>

static void swap(unsigned int *a, unsigned int *b)
{
    unsigned int temp = *a;
    *a = *b;
    *b = temp;
}

static void traveling_salesman_recursive(const unsigned int **adjmat, unsigned int order, 
        unsigned int *best_tour, unsigned int *best_tour_cost, unsigned int *partial_tour,
        unsigned int *partial_tour_cost, unsigned int level)
{
    if (level == order - 1) {
        /* Add last two edges to complete the tour */
        unsigned int tour_cost = *partial_tour_cost 
                + adjmat[partial_tour[order - 2]][partial_tour[order - 1]]
                + adjmat[partial_tour[order - 1]][0];
        if (*best_tour_cost == 0 || tour_cost < *best_tour_cost) {
            /* Best tour so far */
            memcpy(best_tour, partial_tour, order * sizeof(unsigned int));
            *best_tour_cost = tour_cost;
        }
    }
    else {
        unsigned int i;
        for (i = level; i < order; i++) {
            if (*best_tour_cost == 0
                || *partial_tour_cost + adjmat[partial_tour[level - 1]][partial_tour[i]]
                    < *best_tour_cost)
            {  
                /* Next permutation */
                swap(&partial_tour[level], &partial_tour[i]);
                unsigned int cost = adjmat[partial_tour[level - 1]][partial_tour[level]];
                *partial_tour_cost += cost;
                traveling_salesman_recursive(adjmat, order, best_tour, best_tour_cost,
                        partial_tour, partial_tour_cost, level + 1);
                *partial_tour_cost -= cost;
                swap(&partial_tour[level], &partial_tour[i]);
            }
        }
    }
}

unsigned int traveling_salesman(const unsigned int **adjmat, unsigned int order, 
        unsigned int **best_tour)
{
    unsigned int i;
    unsigned int best_tour_cost = 0;
    unsigned int partial_tour_cost = 0;
    unsigned int *partial_tour = malloc(order * sizeof(unsigned int));
    *best_tour = malloc(order * sizeof(unsigned int));
    if (partial_tour == NULL || *best_tour == NULL) {
        free(partial_tour);
        free(*best_tour);
        *best_tour = NULL;
        return 0;
    }        
    for (i = 0; i < order; i++) {
        partial_tour[i] = i;
    }
    traveling_salesman_recursive(adjmat, order, *best_tour, &best_tour_cost, partial_tour, 
            &partial_tour_cost, 1);
    free(partial_tour);
    return best_tour_cost;
}

Here is an example program:

#include <stdlib.h>
#include <stdio.h>

int main(void)
{
    unsigned int r1[] = {0, 5, 7, 9, 10};
    unsigned int r2[] = {4, 0, 11, 3, 7};
    unsigned int r3[] = {3, 1, 0, 4, 5};
    unsigned int r4[] = {6, 5, 7, 0, 11} ;
    unsigned int r5[] = {13, 2, 8, 3, 0} ;
    const size_t order = 5; /* Vertices */
    const unsigned int *adjmat[] = { r1, r2, r3, r4, r5 };
    unsigned int *best_tour;
    unsigned int cost = traveling_salesman(adjmat, order, &best_tour);
    unsigned int i;
    printf("Best tour cost: %u\n", cost);
    printf("Vertices:\n");
    for (i = 0; i < order; i++) {
        printf("%u ", best_tour[i]);
    }
    putchar('\n');
    printf("Edge weights:\n");
    for (i = 0; i < order - 1; i++) {
        printf("%u ", adjmat[best_tour[i]][best_tour[i + 1]]);
    }
    printf("%u\n", adjmat[best_tour[order - 1]][0]);
    free(best_tour);
    return 0;
}

The output:

Best tour cost: 23
Vertices:
0 2 4 1 3
Edge weights:
7 5 2 3 6

Maximum Clique using backtracking in C

A graph showing a maximum clique

A clique in a graph is a subgraph in which all vertices are connected to each other – a complete subgraph. The Maximum Clique Problem is to find the largest clique of a graph. For example, in the graph shown above the maximum clique size is 3, and the vertices (1, 2, 4) are a maximum clique, as are (0, 1, 4) and (0, 3, 4).

The Maximum Clique Problem is NP-complete, but can be solved efficiently using backtracking. The following is a backtracking implementation in C. The function maximum_clique() takes a graph in adjacency matrix form and its order (the number of vertices), and will return a maximum clique and its size.

#include <stdlib.h>
#include <string.h>

static void maximum_clique_recursive(const unsigned int **adjmat, unsigned int order, 
        unsigned int *current_clique, unsigned int *current_clique_size, unsigned int *max_clique,
        unsigned int *max_clique_size, unsigned int level)
{
    unsigned int i, connected;
    if (level == order) {
        /* Found a new max clique */
        memcpy(max_clique, current_clique, order * sizeof(unsigned int));
        *max_clique_size = *current_clique_size;
        return;
    }
    /* Find out if current vertex is connected to all vertices in current clique */
    connected = 1;
    for (i = 0; i < level && connected; i++) {
        if (current_clique[i] && !adjmat[level][i]) {
            connected = 0;
        }
    }

    if (connected) {
        /* Add this vertex to the clique */
        current_clique[level] = 1;
        (*current_clique_size)++;
        maximum_clique_recursive(adjmat, order, current_clique, current_clique_size, max_clique, 
                max_clique_size, level + 1);
        (*current_clique_size)--;
    }
    if (*current_clique_size + order - level > *max_clique_size) {
        /* Still promising */
        current_clique[level] = 0;
        maximum_clique_recursive(adjmat, order, current_clique, current_clique_size, max_clique, 
                max_clique_size, level + 1);
    }
}

unsigned int maximum_clique(const unsigned int **adjmat, unsigned int order, 
        unsigned int **max_clique)
{
    unsigned int max_clique_size = 0;
    unsigned int current_clique_size = 0;
    unsigned int *current_clique = malloc(order * sizeof(unsigned int));
    *max_clique = malloc(order * sizeof(unsigned int));
    if (current_clique == NULL || *max_clique == NULL) {
        free(current_clique);
        free(max_clique);
        return 0;
    }
    maximum_clique_recursive(adjmat, order, current_clique, &current_clique_size, *max_clique, 
            &max_clique_size, 0);
    free(current_clique);
    return max_clique_size;
}

Here is an example program that finds a maximum clique on the graph shown at the top:

#include <stdio.h>
#include <stdlib.h>

int main()
{
    const unsigned int order = 5; /* Vertices */
    unsigned int r1[] = {0, 1, 0, 1, 1};
    unsigned int r2[] = {1, 0, 1, 0, 1};
    unsigned int r3[] = {0, 1, 0, 0, 1};
    unsigned int r4[] = {1, 0, 0, 0, 1};
    unsigned int r5[] = {1, 1, 1, 1, 0};
    const unsigned int *adjmat[] = {r1, r2, r3, r4, r5};
    unsigned int *max_clique;
    unsigned int max_clique_size = maximum_clique(adjmat, order, &max_clique);
    unsigned int i;
    printf("Max clique size is %u\n", max_clique_size);
    for (i = 0; i < order; i++) {
        if (max_clique[i] == 1) {
            printf("%u ", i);
        }
    }
    putchar('\n');
    free(max_clique);
    return 0;
}

The output:

Max clique size is 3
1 2 4