Tag Archives: C

Greedy Vertex Cover in C

A graph showing a vertex cover

The Vertex Cover Problem is to find a subset of the vertices of a graph that contains an endpoint of every edge. For example, in the graph shown above, the subset (0, 1) highlighted in red is a vertex cover. The minimisation problem of finding the smallest vertex cover is NP-hard. However a good approximation can be achieved using a greedy algorithm. The algorithm is simply:

  • While there are uncovered edges:
    1. Find the first uncovered edge
    2. Put one of its endpoints in the covering set
    3. Mark all of the edges incident upon that endpoint as covered

Below is an implementation in C. The function vertex_cover() takes a graph in edge list format, its size (number of edges) and order (number of vertices), and the address of a pointer through which to return the covering set as an out parameter. The return value is the size of the covering set.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
} edge;

unsigned int vertex_cover(const edge *edges, unsigned int size, unsigned int order, 
        unsigned int **cover)
{
    unsigned int uncovered_size = size;
    unsigned int cover_size = 0;
    unsigned int *covered = calloc(size, sizeof(unsigned int));
    *cover = calloc(order, sizeof(unsigned int));
    if (covered == NULL || cover == NULL) {
        free(covered);
        free(*cover);
        *cover = NULL;
        return 0;
    }
    while (uncovered_size > 0) {
        unsigned int e, v;
        /* Find the first edge that hasn't been covered */
        for (e = 0; e < size && covered[e] == 1; e++);
        /* Add its first endpoint to the covering set */
        v = edges[e].first;
        (*cover)[v] = 1;
        cover_size++;
        /* Mark all uncovered edges containing its first endpoint as covered */
        for (e = 0; e < size; e++) {
            if (!covered[e] && (edges[e].first == v || edges[e].second == v)) {
                covered[e] = 1;
                uncovered_size--;
            }
        }
    }
    free(covered);
    return cover_size;
}

Here is an example program that finds a vertex cover on the graph shown above:

#include <stdio.h>
#include <stdlib.h>

/* Connect two edges */
void edge_connect(edge *edges, unsigned int first, unsigned int second, 
        unsigned int *pos)
{
    edges[*pos].first = first;
    edges[*pos].second = second;
    (*pos)++;
}

int main(void)
{
    const unsigned int size = 4; /* Edges */
    const unsigned int order = 5; /* Vertices */
    edge *edges = malloc(size * sizeof(edge));
    unsigned int i = 0;
    edge_connect(edges, 0, 1, &i);
    edge_connect(edges, 0, 2, &i);
    edge_connect(edges, 0, 3, &i);
    edge_connect(edges, 1, 4, &i);
    unsigned int *cover;
    unsigned int c = vertex_cover(edges, size, order, &cover);
    printf("Cover size is %u\n", c);
    printf("Vertices in cover:\n");
    for (i = 0; i < order; i++) {
        if (cover[i]) {
            printf("%u ", i);
        }
    }
    putchar('\n');
    free(edges);
    free(cover);
    return 0;
}

The output:

Cover size is 2
Vertices in cover:
0 1

0-1 Knapsack using backtracking in C

In the 0-1 Knapsack problem, we are given a set of items with individual weights and profits and a container of fixed capacity (the knapsack), and are required to compute a loading of the knapsack with items such that the total profit is maximised. We only have 1 of each item, so there is either 0 or 1 of each item in in the knapsack, hence the 0-1 in the name of the problem.

The 0-1 knapsack problem is NP-hard, but can be solved quite efficiently using backtracking. Below is a backtracking implementation in C. The function knapsack() takes arrays of weights, and profits, their size, the capacity, and the address of a pointer through which the solution array is returned. It returns the profit of the best knapsack. The profit density of each item (weight / profit) is first calculated, and the knapsack is filled in order of decreasing profit density. A bounding function is used to determine the upper bound on the profit achievable with the current knapsack so as to reduce the search space.

#include <stdlib.h>
#include <string.h>

/* A knapsack item */
typedef struct {
    unsigned int id;
    double weight;
    double profit;
    double profit_density;
} item;

/* Compare items by lesser profit density */
static int compare_items(const item *item1, const item *item2)
{
    if (item1->profit_density > item2->profit_density) {
        return -1;
    }
    if (item1->profit_density < item2->profit_density) {
        return 1;
    }
    return 0;
}

/* Bounding function */
static double profit_bound(const item *items, size_t n, double capacity,
        double current_weight, double current_profit,
        unsigned int level)
{
    double remaining_capacity = capacity - current_weight;
    double bound = current_profit;
    unsigned int lvl = level;
    /* Fill in order of decreasing profit density */
    while (lvl < n &&
        items[lvl].weight <= remaining_capacity) 
    {
        remaining_capacity -= items[lvl].weight;
        bound += items[lvl].profit;
        lvl++;
    }
    /* Pretend we can take a fraction of the next object */
    if (lvl < n) {
        bound += items[lvl].profit_density
            * remaining_capacity;
    }
    return bound;
}

static void knapsack_recursive(const item *items, size_t n, double capacity,
        unsigned int *current_knapsack, double *current_weight, double *current_profit,
        unsigned int *max_knapsack, double *max_profit, unsigned int level)
{   
    if (level == n) {
        /* Found a new max knapsack */
        *max_profit = *current_profit;
        memcpy(max_knapsack, current_knapsack, n * sizeof(unsigned int));
        return;
    }
    if (*current_weight + items[level].weight <= capacity)
    {   /* Try adding this item */
        *current_weight += items[level].weight;
        *current_profit += items[level].profit;
        current_knapsack[items[level].id] = 1;
        knapsack_recursive(items, n, capacity, current_knapsack, current_weight, 
                current_profit, max_knapsack, max_profit, level + 1);
        *current_weight -= items[level].weight;
        *current_profit -= items[level].profit;
        current_knapsack[items[level].id] = 0;
    }
    if (profit_bound(items, n, capacity, *current_weight, 
                *current_profit, level + 1) > *max_profit) {
        /* Still promising */
        knapsack_recursive(items, n, capacity, current_knapsack, current_weight, 
                current_profit, max_knapsack, max_profit, level + 1);
    }
}

double knapsack(const double *weights, const double *profits, 
        size_t n, double capacity, unsigned int **max_knapsack)
{
    double current_weight = 0.0;
    double current_profit = 0.0;
    double max_profit = 0.0;
    unsigned int i;
    item *items  = malloc(n * sizeof(item));
    unsigned int *current_knapsack = calloc(n, sizeof(unsigned int));
    *max_knapsack = malloc(n * sizeof(unsigned int));
    if (!(items && current_knapsack && *max_knapsack)) {
        free(items);
        free(current_knapsack);
        free(*max_knapsack);
        *max_knapsack = NULL;
        return 0;
    }
    /* Populate the array of items */
    for (i = 0; i < n; i++) {
        items[i].id = i;
        items[i].weight = weights[i];
        items[i].profit = profits[i];
        items[i].profit_density = profits[i] / weights[i];
    }
    /* Sort into decreasing density order */
    qsort(items, n, sizeof(item),
            (int (*)(const void *, const void *))compare_items);
    knapsack_recursive(items, n, capacity, current_knapsack, &current_weight,
            &current_profit, *max_knapsack, &max_profit, 0);
    free(items);
    free(current_knapsack);
    return max_profit;
}

Here is an example program:

#include <stdio.h>
#include <stdlib.h>

int main(void)
{
    double weights[] = {3, 5, 2, 1};
    double profits[] = {9, 10, 7, 4};
    const size_t n = sizeof(profits) / sizeof(profits[0]);
    const double capacity = 7;
    unsigned int *max_knapsack;
    double max_profit = knapsack(weights, profits, n, capacity, &max_knapsack);
    unsigned int i;
    printf("Profit: %.2f\n", max_profit);
    printf("Knapsack contains:\n");
    for (i = 0; i < n; i++) {
        if (max_knapsack[i] == 1) {
            printf("Item %u with weight %.2f and profit %.2f\n", i, weights[i], profits[i]); 
        }
    }
    free(max_knapsack);
    return 0;
}

The output:

Profit: 20.00
Knapsack contains:
Item 0 with weight 3.00 and profit 9.00
Item 2 with weight 2.00 and profit 7.00
Item 3 with weight 1.00 and profit 4.00

Complement of a graph in C

A graph and its complement

Complement of a graph

The complement of a graph is another graph in which all vertices that are adjacent in the original graph are not adjacent, and all vertices that are not adjacent in the original graph are adjacent. The picture above shows a graph and its complement. Notice that vertex 4, which is adjacent to all of the other vertices in the original graph is adjacent to none in the complement graph, while vertices 0 and 1, which have 3 out of a possible 4 neighbours in the original graph have 4 – 3 = 1 neighbours in the complement graph.

Computing the complement of a graph is easy – just change every 0 in the adjacency matrix to a 1 and every 1 to a 0. Below is how it looks in C. I am assuming the graph is a simple graph and so does not have any self-loops (vertices that are adjacent to themselves), so I am ignoring the central diagonal.

void complement(unsigned int *const *adjmat, unsigned int order)
{
    unsigned int i, j;
    for (i = 0; i < order;i++) {
        for (j = 0; j < order; j++) {
            if (i != j) {
                adjmat[i][j] = !adjmat[i][j];
            }
        }
    }
}

Cliques and independent sets

Recall that a clique in a graph is a subset of the vertices in which all of the vertices are adjacent to each other, while an independent set is a subset of the vertices in which none of the vertices are adjacent to each other. This means that cliques in a graph become independent sets in its complement, and vice-versa. For example, in the picture above, the clique (1, 2, 4) shown in red becomes the independent set (1, 2, 4) in the complement graph.

In a previous post I presented a backtracking algorithm for solving the Maximum Clique problem – finding the largest clique in a graph. The dual of this problem is the Maximum Independent Set problem – finding the largest independent set in a graph. Because of the duality of cliques and independent sets through taking the graph’s complement, this means that having a Max Clique function gives us a Max Independent Set function for free – we just need to take the graph’s complement and then solve the Max Clique Problem, as below:

unsigned int maximum_independent_set(unsigned int *const *adjmat, unsigned int order,
        unsigned int **max_set)
{
    unsigned int max_clique_size;
    complement(adjmat, order);
    max_clique_size = maximum_clique((const unsigned int **)adjmat, order, max_set);
    complement(adjmat, order);
    return max_clique_size;
}

Here is an example program that finds a maximum independent set in the second graph shown above:

#include <stdio.h>
#include <stdlib.h>

int main(void)
{
    const unsigned int order = 5; /* Vertices */
    unsigned int r1[] = {0, 0, 1, 0, 0};
    unsigned int r2[] = {0, 0, 0, 1, 0};
    unsigned int r3[] = {1, 0, 0, 1, 0};
    unsigned int r4[] = {0, 1, 1, 0, 0};
    unsigned int r5[] = {0, 0, 0, 0, 0};
    unsigned int *const adjmat[] = {r1, r2, r3, r4, r5};
    unsigned int *max_set;
    unsigned int max_size, i;
    max_size = maximum_independent_set(adjmat, order, &max_set);
    printf("Max independent set size is %u\n", max_size);
    for (i = 0; i < order; i++) {
        if (max_set[i] == 1) {
            printf("%u ", i);
        }
    }
    putchar('\n');
    free(max_set);
    return 0;
}

The output:

Max independent set size is 3
1 2 4

Value-Independent Knapsack Problem using backtracking in C

In another post I described a backtracking algorithm for the Subset-Sum Problem, in which the aim is to find a subset of a set of weights that sums to a given weight. A related problem, which is sometimes called Subset-Sum but is also called Value-Independent Knapsack, is to find a subset of the set of weights that is the maximum weight less than or equal to the given weight, rather than the exact weight.

This problem can be thought of as a 0-1 knapsack problem in which the weights are equal to the values for all items. Like 0-1 knapsack, the problem is NP-hard, but a backtracking algorithm can produce an exact solution quite efficiently. This is a backtracking algorithm for Value Independent Knapsack in C. The function subset_sum() takes an array of weights and its size, a capacity, and an out parameter for the solution array, and returns the weight of the best set found.

static void subset_sum_recursive(const unsigned int *weights, size_t n, unsigned int capacity, 
        unsigned int *current_set, unsigned int *current_weight, unsigned int *max_set, 
        unsigned int *max_weight, unsigned int *remaining_weight, unsigned int level)
{
    if (level == n) {
        /* Found a new best set */
        *max_weight = *current_weight;
        memcpy(max_set, current_set, n * sizeof(unsigned int));
        return;
    }
    *remaining_weight -= weights[level];
    if (*current_weight + weights[level] <= capacity) {
        /* Try solutions that include this item */
        *current_weight += weights[level];
        current_set[level] = 1;
        subset_sum_recursive(weights, n, capacity, current_set, current_weight, 
                max_set, max_weight, remaining_weight, level + 1);
        *current_weight -= weights[level];
        current_set[level] = 0;
    }
    if (*current_weight + *remaining_weight > *max_weight) { 
        /* Still promising */
        subset_sum_recursive(weights, n, capacity, current_set, current_weight, 
                max_set, max_weight, remaining_weight, level + 1);
    }
    *remaining_weight += weights[level];
}

static unsigned int sum(const unsigned int *numbers, size_t len)
{
    unsigned int total = 0;
    unsigned int i;
    for (i = 0; i < len; i++) {
        total += numbers[i];
    }
    return total;
}

static unsigned int subset_sum(const unsigned int *weights, size_t n, unsigned int capacity,
        unsigned int **max_set)
{
    unsigned int current_weight = 0;
    unsigned int max_weight = 0;
    unsigned int remaining_weight = sum(weights, n);
    unsigned int *current_set = calloc(n, sizeof(unsigned int));
    *max_set = malloc(n * sizeof(unsigned int));
    if (current_set == NULL || *max_set == NULL) {
        free(current_set);
        free(*max_set);
        return 0;
    }
    subset_sum_recursive(weights, n, capacity, current_set, &current_weight, *max_set,
           &max_weight, &remaining_weight, 0);
    free(current_set);
    return max_weight;
}

Here is an example program. It prints the maximum weight found and the elements of the set:

int main(void)
{
    unsigned int weights[] = {8, 6, 2, 3};
    const size_t n = sizeof(weights) / sizeof(weights[0]);
    const unsigned int capacity = 12;
    unsigned int *set;
    unsigned int weight = subset_sum(weights, n, capacity, &set);
    unsigned int i;
    printf("%u\n", weight);
    for (i = 0; i < n; i++) {
        if (set[i]) {
            printf("%u ", weights[i]);
        }
    }
    putchar('\n');
    free(set);
    return 0;
}

The output:

11
8 3

Traveling Salesman Problem using backtracking in C

I have previously shown the Cheapest-Link, Nearest-Neigbour, and Repetitive-Nearest Neighbour algorithms for the Traveling Salesman Problem. These are all greedy algorithms that give an approximate result. The Traveling Salesman Problem is NP-complete, so an exact algorithm will have exponential running time unless \(P=NP\). However, we can reduce the search space for the problem by using backtracking.

This is an implementation of TSP using backtracking in C. It searches the permutation space of vertices, fixing the start of each tour at vertex 0. This means that the last edge is always the one that connects the second-last edge to vertex 0, so it is not necessary to find this edge by permutation. The function traveling_salesman() takes a graph in the form of a matrix of distances (adjmat), the number of vertices (order), and the address of a pointer to an array of unsigned integers used as an output parameter (best_tour). It returns the cost of the best tour, and assigns an array containing the vertices of the tour in order to *best_tour.

#include <stdlib.h>
#include <string.h>

static void swap(unsigned int *a, unsigned int *b)
{
    unsigned int temp = *a;
    *a = *b;
    *b = temp;
}

static void traveling_salesman_recursive(const unsigned int **adjmat, unsigned int order, 
        unsigned int *best_tour, unsigned int *best_tour_cost, unsigned int *partial_tour,
        unsigned int *partial_tour_cost, unsigned int level)
{
    if (level == order - 1) {
        /* Add last two edges to complete the tour */
        unsigned int tour_cost = *partial_tour_cost 
                + adjmat[partial_tour[order - 2]][partial_tour[order - 1]]
                + adjmat[partial_tour[order - 1]][0];
        if (*best_tour_cost == 0 || tour_cost < *best_tour_cost) {
            /* Best tour so far */
            memcpy(best_tour, partial_tour, order * sizeof(unsigned int));
            *best_tour_cost = tour_cost;
        }
    }
    else {
        unsigned int i;
        for (i = level; i < order; i++) {
            if (*best_tour_cost == 0
                || *partial_tour_cost + adjmat[partial_tour[level - 1]][partial_tour[i]]
                    < *best_tour_cost)
            {  
                /* Next permutation */
                swap(&partial_tour[level], &partial_tour[i]);
                unsigned int cost = adjmat[partial_tour[level - 1]][partial_tour[level]];
                *partial_tour_cost += cost;
                traveling_salesman_recursive(adjmat, order, best_tour, best_tour_cost,
                        partial_tour, partial_tour_cost, level + 1);
                *partial_tour_cost -= cost;
                swap(&partial_tour[level], &partial_tour[i]);
            }
        }
    }
}

unsigned int traveling_salesman(const unsigned int **adjmat, unsigned int order, 
        unsigned int **best_tour)
{
    unsigned int i;
    unsigned int best_tour_cost = 0;
    unsigned int partial_tour_cost = 0;
    unsigned int *partial_tour = malloc(order * sizeof(unsigned int));
    *best_tour = malloc(order * sizeof(unsigned int));
    if (partial_tour == NULL || *best_tour == NULL) {
        free(partial_tour);
        free(*best_tour);
        *best_tour = NULL;
        return 0;
    }        
    for (i = 0; i < order; i++) {
        partial_tour[i] = i;
    }
    traveling_salesman_recursive(adjmat, order, *best_tour, &best_tour_cost, partial_tour, 
            &partial_tour_cost, 1);
    free(partial_tour);
    return best_tour_cost;
}

Here is an example program:

#include <stdlib.h>
#include <stdio.h>

int main(void)
{
    unsigned int r1[] = {0, 5, 7, 9, 10};
    unsigned int r2[] = {4, 0, 11, 3, 7};
    unsigned int r3[] = {3, 1, 0, 4, 5};
    unsigned int r4[] = {6, 5, 7, 0, 11} ;
    unsigned int r5[] = {13, 2, 8, 3, 0} ;
    const size_t order = 5; /* Vertices */
    const unsigned int *adjmat[] = { r1, r2, r3, r4, r5 };
    unsigned int *best_tour;
    unsigned int cost = traveling_salesman(adjmat, order, &best_tour);
    unsigned int i;
    printf("Best tour cost: %u\n", cost);
    printf("Vertices:\n");
    for (i = 0; i < order; i++) {
        printf("%u ", best_tour[i]);
    }
    putchar('\n');
    printf("Edge weights:\n");
    for (i = 0; i < order - 1; i++) {
        printf("%u ", adjmat[best_tour[i]][best_tour[i + 1]]);
    }
    printf("%u\n", adjmat[best_tour[order - 1]][0]);
    free(best_tour);
    return 0;
}

The output:

Best tour cost: 23
Vertices:
0 2 4 1 3
Edge weights:
7 5 2 3 6

Maximum Clique using backtracking in C

A graph showing a maximum clique

A clique in a graph is a subgraph in which all vertices are connected to each other – a complete subgraph. The Maximum Clique Problem is to find the largest clique of a graph. For example, in the graph shown above the maximum clique size is 3, and the vertices (1, 2, 4) are a maximum clique, as are (0, 1, 4) and (0, 3, 4).

The Maximum Clique Problem is NP-complete, but can be solved efficiently using backtracking. The following is a backtracking implementation in C. The function maximum_clique() takes a graph in adjacency matrix form and its order (the number of vertices), and will return a maximum clique and its size.

#include <stdlib.h>
#include <string.h>

static void maximum_clique_recursive(const unsigned int **adjmat, unsigned int order, 
        unsigned int *current_clique, unsigned int *current_clique_size, unsigned int *max_clique,
        unsigned int *max_clique_size, unsigned int level)
{
    unsigned int i, connected;
    if (level == order) {
        /* Found a new max clique */
        memcpy(max_clique, current_clique, order * sizeof(unsigned int));
        *max_clique_size = *current_clique_size;
        return;
    }
    /* Find out if current vertex is connected to all vertices in current clique */
    connected = 1;
    for (i = 0; i < level && connected; i++) {
        if (current_clique[i] && !adjmat[level][i]) {
            connected = 0;
        }
    }

    if (connected) {
        /* Add this vertex to the clique */
        current_clique[level] = 1;
        (*current_clique_size)++;
        maximum_clique_recursive(adjmat, order, current_clique, current_clique_size, max_clique, 
                max_clique_size, level + 1);
        (*current_clique_size)--;
    }
    if (*current_clique_size + order - level > *max_clique_size) {
        /* Still promising */
        current_clique[level] = 0;
        maximum_clique_recursive(adjmat, order, current_clique, current_clique_size, max_clique, 
                max_clique_size, level + 1);
    }
}

unsigned int maximum_clique(const unsigned int **adjmat, unsigned int order, 
        unsigned int **max_clique)
{
    unsigned int max_clique_size = 0;
    unsigned int current_clique_size = 0;
    unsigned int *current_clique = malloc(order * sizeof(unsigned int));
    *max_clique = malloc(order * sizeof(unsigned int));
    if (current_clique == NULL || *max_clique == NULL) {
        free(current_clique);
        free(max_clique);
        return 0;
    }
    maximum_clique_recursive(adjmat, order, current_clique, &current_clique_size, *max_clique, 
            &max_clique_size, 0);
    free(current_clique);
    return max_clique_size;
}

Here is an example program that finds a maximum clique on the graph shown at the top:

#include <stdio.h>
#include <stdlib.h>

int main()
{
    const unsigned int order = 5; /* Vertices */
    unsigned int r1[] = {0, 1, 0, 1, 1};
    unsigned int r2[] = {1, 0, 1, 0, 1};
    unsigned int r3[] = {0, 1, 0, 0, 1};
    unsigned int r4[] = {1, 0, 0, 0, 1};
    unsigned int r5[] = {1, 1, 1, 1, 0};
    const unsigned int *adjmat[] = {r1, r2, r3, r4, r5};
    unsigned int *max_clique;
    unsigned int max_clique_size = maximum_clique(adjmat, order, &max_clique);
    unsigned int i;
    printf("Max clique size is %u\n", max_clique_size);
    for (i = 0; i < order; i++) {
        if (max_clique[i] == 1) {
            printf("%u ", i);
        }
    }
    putchar('\n');
    free(max_clique);
    return 0;
}

The output:

Max clique size is 3
1 2 4

Bellman-Ford algorithm in C

Weighted graph showing all distances from vertex 0

The Bellman-Ford algorithm finds the shortest path between a single vertex and all other vertices in a graph. This is the same problem that Dijkstra’s algorithm solves, but unlike Dijkstra, the Bellman-Ford algorithm can handle graphs with negative edge weights.

One consequence of negative weights is that a graph can contain a negative cycle, and if this is the case, the shortest path to all vertices reachable from the cycle is negative infinity, because one could traverse the cycle any number of times and keep reducing the path length indefinitely. The Bellman Ford algorithm detects this by performing an extra iteration over the graph, and if this would result in a distance decreasing, it knows that there is a negative cycle.

Below is an implementation in C. It takes a set of weighted arcs, the number of arcs (size), the number of vertices (order), and a starting vertex number. It returns an array of integers containing the distance to every vertex. If the graph contains a negative cycle, it returns NULL.

#include <stdlib.h>
#include <limits.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    int weight;
} weighted_arc;

int *bellman_ford(const weighted_arc *arcs, unsigned int size, unsigned int order, 
        unsigned int vertex)
{
    unsigned int i;
    int *distances = malloc(order * sizeof(unsigned int));
    unsigned int a, v, negative_cycle = 0;
    if (distances == NULL) {
        return NULL;
    }
    /* All distances start infinite */
    for (i = 0; i < order; i++) {
        distances[i] = INT_MAX;
    }
    /* Distance to starting vertex is 0 */
    distances[vertex] = 0;
    /* Relax edges repeatedly */
    for (v = 0; v < order; v++) {
        for (a = 0; a < size; a++) {
            const unsigned int first = arcs[a].first;
            const unsigned int second = arcs[a].second;
            const int weight = arcs[a].weight;
            if (distances[first] != INT_MAX
                && distances[first] + weight < distances[second])
            {
                distances[second] = distances[first] + weight;
            } 
        }
    }
    /* Check for negative weight cycle */
    for (a = 0; a < size && !negative_cycle; a++) {
        const unsigned int first = arcs[a].first;
        const unsigned int second = arcs[a].second;
        const int weight = arcs[a].weight;
        negative_cycle = distances[first] + weight < distances[second];
    }
    if (negative_cycle) {
        free(distances);
        distances = NULL;
    }

    return distances;
}

Here is an example program that computes the distances for the graph shown at the top:

#include <stdio.h>
#include <stdlib.h>

/* Connect two arcs */
void weighted_arc_connect(weighted_arc *arcs, unsigned int first, unsigned int second, 
        int weight, unsigned int *pos)
{
    arcs[*pos].first = first;
    arcs[*pos].second = second;
    arcs[*pos].weight = weight;
    (*pos)++;
}

int main(void)
{
    unsigned int size = 9; /* Arcs */
    unsigned int order = 6; /* Vertices */
    unsigned int i = 0;
    int *distances;

    weighted_arc *arcs = malloc(size * sizeof(weighted_arc));
    weighted_arc_connect(arcs, 0, 1, 5, &i);
    weighted_arc_connect(arcs, 0, 3, -2, &i);
    weighted_arc_connect(arcs, 1, 2, 1, &i);
    weighted_arc_connect(arcs, 2, 3, 2, &i);
    weighted_arc_connect(arcs, 2, 4, 7, &i);
    weighted_arc_connect(arcs, 2, 5, 3, &i);
    weighted_arc_connect(arcs, 3, 1, 2, &i);
    weighted_arc_connect(arcs, 4, 3, 3, &i);
    weighted_arc_connect(arcs, 4, 5, 10, &i);

    distances = bellman_ford(arcs, size, order, 0);
    if (distances == NULL) {
        printf("Graph contains a negative cycle or ran out of memory.\n");
        return 1;
    }

    for (i = 0; i < order; i++) {
        printf("%u: %d\n", i, distances[i]);
    }

    free(arcs);
    free(distances);
    return 0;
}

The output:

0: 0
1: 0
2: 1
3: -2
4: 8
5: 4

Related

Floyd-Warshall algorithm in C

A weighted directed graph

The Floyd-Warshall algorithm calculates the distances between all pairs of vertices in a weighted graph. It is a dynamic programming algorithm very similar to Gauss-Jordan elimination.

Below is an implementation in C. The function takes an array of directed arcs, the size of the graph (number of arcs), and its order (number of vertices). It returns a dynamically allocated 2-dimensional array that is the distance table for all pairs of vertices.

One thing to notice about the implementation is that when you use INT_MAX to represent infinity, you need to be very careful not to do arithmetic on it that will cause it to roll over and become a small number. This is the reason for the checks for INT_MAX in the main calculation.

#include <stdlib.h>
#include <limits.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    int weight;
} weighted_arc;

int **floyd_warshall(const weighted_arc *arcs, unsigned int size, unsigned int order)
{
    unsigned int i, j, k;
    int **distances = malloc(order * sizeof(int *));
    /* Initialise the distance table */
    for (i = 0; i < order; i++) {
        distances[i] = malloc(order * sizeof(int));
        for (j = 0; j < order; j++) {
            if (i == j) {
                distances[i][j] = 0;
            }
            else {
                distances[i][j] = INT_MAX;
            }
        }
    }
    /* Add the distance for each arc */
    for (i = 0; i < size; i++) {
        distances[arcs[i].first][arcs[i].second] = arcs[i].weight;
    }
    /* Calculate the rest of the distances */
    for (i = 0; i < order; i++) {
        for (j = 0; j < order; j++) {
            for (k = 0; k < order; k++) {
                const int djk = distances[j][k];
                const int dji = distances[j][i];
                const int dik = distances[i][k];
                if (dji != INT_MAX && dik != INT_MAX 
                        && djk > dji + dik)
                {
                    distances[j][k] = dji + dik;
                }
            }
        }
    }
    return distances;
}

Here is an example program that calculates the distance table for the graph shown at the top of the page:

#include <stdio.h>
#include <stdlib.h>

/* Connect two arcs */
void weighted_arc_connect(weighted_arc *arcs, unsigned int first, unsigned int second, 
        int weight, unsigned int *pos)
{
    arcs[*pos].first = first;
    arcs[*pos].second = second;
    arcs[*pos].weight = weight;
    (*pos)++;
}

/* Print a distance table */
void print_distances(int **distances, unsigned int order)
{
    unsigned int i, j;
    for (i = 0; i < order; i++) {
        for (j = 0; j < order; j++) {
            printf("%d ", distances[i][j]);
        }
        putchar('\n');
    }
}

int main(void)
{
    unsigned int size = 5; /* Arcs */
    unsigned int order = 4; /* Vertices */
    unsigned int i = 0;
    int **distances;
    weighted_arc *arcs = malloc(size * sizeof(weighted_arc));
    weighted_arc_connect(arcs, 0, 2, -2, &i);
    weighted_arc_connect(arcs, 1, 0, 4, &i);
    weighted_arc_connect(arcs, 1, 2, 3, &i);
    weighted_arc_connect(arcs, 2, 3, 2, &i);
    weighted_arc_connect(arcs, 3, 1, -1, &i);
    distances = floyd_warshall(arcs, size, order);
    print_distances(distances, order);
    free(arcs);
    for (i = 0; i < order; i++) {
        free(distances[i]);
    }
    free(distances);
    return 0;
}

The output:

0 -1 -2 0
4 0 2 4
5 1 0 2
3 -1 1 0

Related

Dijkstra’s Shortest Paths algorithm in C

A weighted graph

Dijksta’s algorithm finds the shortest path in a weighted graph from a single vertex to one or all other vertices. It is a greedy breadth-first search (BFS) algorithm which works as follows:

  1. Create a table of distances to all vertices
  2. Set the distance to 0 for the starting vertex, and infinity for the others
  3. Make a list of unvisited vertices, which starts off containing all vertices
  4. Set the current vertex to be the starting vertex
  5. While the unvisited list is not empty:
    1. For each neighbour of the current vertex:
      • If the distance to the current vertex + the distance to the neighbour < that the stored distance to the neighbour:
        • Update the stored distance to the neighbour to be the distance to the current vertex + the distance to the neighbour
    2. Remove the current vertex from the unvisited list
    3. Find the nearest unvisited vertex (i.e., the one with the lowest distance in the distances table)
    4. Make it the new current vertex

Below is an implemention in C. The function dijkstra(), takes an array of edges, the number of edges (size) and the number of vertices (order), and returns an array containing the distance to each vertex. For the unvisited list I used an array of integers which I set to 0 or 1 depending on whether the corresponding vertex was unvisited or visited respectively.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

unsigned int *dijkstra(const weighted_edge *edges, unsigned int size, unsigned int order,
        unsigned int vertex)
{
    unsigned int i;
    unsigned int *distances = malloc(order * sizeof(unsigned int));
    unsigned int *unvisited = malloc(order * sizeof(unsigned int));
    unsigned int unvisited_count = order;
    unsigned int current = vertex;
    if (distances == NULL || unvisited == NULL) {
        free(distances);
        free(unvisited);
        return NULL;
    }
    /* All distances start infinite and all vertices start unvisited */
    for (i = 0; i < order; i++) {
        distances[i] = UINT_MAX;
        unvisited[i] = 1;
    }
    /* Distance to starting vertex is 0 */
    distances[vertex] = 0;
    while (unvisited_count > 0) {
        /* Update the distances to all neighbours */
        unsigned int e, v;
        unsigned int min_distance;
        for (e = 0; e < size; e++) {
            if (edges[e].first == current || edges[e].second == current) {
                const unsigned int neighbour = edges[e].first == current ?
                    edges[e].second : edges[e].first;
                const unsigned int distance = distances[current] + edges[e].weight;
                if (distance < distances[neighbour]) {
                    distances[neighbour] = distance;
                }
            }
        }
        /* Finished with this vertex */
        unvisited[current] = 0;
        unvisited_count--;
        /* Find the nearest unvisited vertex */
        min_distance = 0;
        for (v = 0; v < order; v++) {
            if (unvisited[v] == 1 && (min_distance == 0 || distances[v] < min_distance)) {
                min_distance = distances[v];
                current = v;
            }
        }
    }
    free(unvisited);
    return distances;
}

Here is an example program that finds all of the distances from vertex 0 in the graph pictured above:

#include <stdio.h>
#include <stdlib.h>

int main(void)
{
    const unsigned int size = 9; /* Edges */
    const unsigned int order = 6; /* Vertices */
    weighted_edge *edges = malloc(size * sizeof(weighted_edge));
    unsigned int *distances;
    unsigned int i = 0;

    weighted_edge_connect(edges, 0, 1, 2, &i);
    weighted_edge_connect(edges, 0, 2, 4, &i);
    weighted_edge_connect(edges, 1, 2, 1, &i);
    weighted_edge_connect(edges, 1, 3, 4, &i);
    weighted_edge_connect(edges, 1, 4, 2, &i);
    weighted_edge_connect(edges, 2, 4, 3, &i);
    weighted_edge_connect(edges, 3, 4, 3, &i);
    weighted_edge_connect(edges, 3, 5, 2, &i);
    weighted_edge_connect(edges, 4, 5, 2, &i);

    distances = dijkstra(edges, size, order, 0);

    for (i = 0; i < order; i++) {
        printf("%u: %u\n", i, distances[i]);
    }

    free(distances);
    free(edges);

    return 0;
}

The output:

0: 0
1: 2
2: 3
3: 6
4: 4
5: 6

Related

Borůvka’s Minimal Spanning Tree (MST) algorithm in C

Borůvka’s MST algorithm is quite similar to Kruskal’s algorithm. Like Kruskal, it begins by making a set of trees that start out as single vertices. It then repeatedly iterates over these trees and finds for each one the cheapest edge that has one endpoint in the tree and the other not. It then adds all of these edges to the MST, and merges the trees they join. Notice that the difference with Kruskal’s algorithm is that while Kruskal finds the cheapest edge with an endpoint in different trees at each iteration, Borůvka finds all of the cheapest edges that have endpoints in different trees. This has the effect of halving the number of trees at each iteration.

So in summary the algorithm is:

  1. Start with \(Order(G)\) trees, each one a single vertex
  2. Until there is only one tree:
    1. Find the cheapest edge for each tree that has one endpoint in the tree and one outside
    2. Add all of these edges to the MST
    3. Merge all of the affected trees

Below is an implementation in C. As with Kruskal, I used a vertices array to keep track of which tree each vertex is in. I also needed another array links to keep track of which cheapest edges had been found through the loop over the trees. I needed this because the cheapest outgoing edge for one tree is also the cheapest outgoing edge for another one, and I needed to prevent adding the same edge to the MST twice.

The code was a little bit longer than for Prim or Kruskal, so I broke it into three functions as I never write a function that doesn’t fit on the screen.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

/* Find the cheapest edge with one endpoint in tree and one not */
static int cheapest_edge_leaving_tree(const weighted_edge *edges, unsigned int size, 
        const unsigned int *vertices, unsigned int tree)
{
    unsigned int e;
    int cheapest = -1;
    for (e = 0; e < size && cheapest == -1; e++) {
        if ((vertices[edges[e].first] == tree
                    && vertices[edges[e].second] != tree)
                || (vertices[edges[e].first] != tree 
                    && vertices[edges[e].second] == tree))
        {
            cheapest = e;
        }
    }
    return cheapest;
}

/* Merge trees for all of the edges in mst from mst_prev to mst_edges */
static void merge_trees(const weighted_edge *mst, unsigned int mst_prev, unsigned int mst_edges,
       unsigned int *vertices, unsigned int order, unsigned int *trees)
{
    unsigned int e;
    for (e = mst_prev; e < mst_edges; e++) {
        unsigned int v;
        for (v = 0; v < order; v++) {
            if (vertices[v] == mst[e].second) {
                vertices[v] = mst[e].first;
            } 
        }
        (*trees)--;
    }
}

unsigned int boruvka(weighted_edge *edges, unsigned int size, unsigned int order,
        weighted_edge **mst)
{
    *mst = malloc((order - 1) * sizeof(weighted_edge));
    unsigned int *vertices = malloc(order * sizeof(unsigned int));
    unsigned int trees = order;
    unsigned int *links = malloc(size * sizeof(unsigned int));
    unsigned int i, cost = 0, mst_edges = 0;
    if (*mst == NULL || vertices == NULL || links == NULL) {
        free(*mst);
        free(vertices);
        free(links);
        return 0;
    }
    /* Each vertex starts off in its own tree */
    for (i = 0; i < order; i++) {
        vertices[i] = i;
    }
    /* Sort the edges by weight */
    qsort(edges, size, sizeof(weighted_edge), 
            (int(*)(const void *, const void *))weighted_edge_compare);
    /* Main loop */
    while (trees > 1) {
        unsigned int t, mst_prev = mst_edges;
        memset(links, 0, size * sizeof(unsigned int));
        for (t = 0; t < trees ; t++) {
            /* Get the cheapest edge leaving this tree */
            int cheapest = cheapest_edge_leaving_tree(edges, size, vertices, t);
            if (cheapest == -1) {
                /* Graph wasn't connected properly */
                free(*mst);
                *mst = NULL;
                free(vertices);
                free(links);
                return 0;
            }
            /* Add the edge if not there already */
            if (links[cheapest] != 1) {
                (*mst)[mst_edges++] = edges[cheapest];
                links[cheapest] = 1;
                /* Add the cost */
                cost += edges[cheapest].weight;
            }
        }
        /* Merge the trees they join */
        merge_trees(*mst, mst_prev, mst_edges, vertices, order, &trees);
    }
    free(vertices);
    free(links);
    return cost;
}

Here is an example program that finds the MST of the same graph I used for Prim and Kruskal:

#include <stdio.h>
#include <stdlib.h>

int main(void)
{
    weighted_edge *edges;
    const unsigned int order = 5;
    const unsigned int size = complete_weighted_graph(order, &edges);
    weighted_edge *mst;
    unsigned int cost = boruvka(edges, size, order, &mst);
    printf("Cost is %u\n", cost);
    print_edges(mst, order - 1);
    free(mst);
    free(edges);
    return 0;
}

The output:

Cost is 10
(0, 1, 1) (0, 2, 2) (0, 3, 3) (0, 4, 4)