Tag Archives: NP-Complete

Nearest Neighbour Algorithm for TSP in C

The Nearest Neighbour Algorithm is the simplest greedy approximate algorithm for the TSP.

The steps are:

  1. Pick a starting vertex
  2. Go to its nearest neighbour in the graph
  3. Repeat, only going to unvisited vertices
  4. When all vertices have been visited, stop

Here it is in C:

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

/* Check if the tour already contains an edge */
static unsigned int tour_contains(const weighted_edge *tour, unsigned int t, 
        const weighted_edge *edge)
{
    unsigned int contains = 0;
    unsigned int i;
    for (i = 0; i < t && !contains; i++) {
        contains = tour[i].first == edge->first 
            && tour[i].second == edge->second;
    }
    return contains;
}

/* Find the edge to v's nearest neighbour not in the tour already */
static unsigned int nearest_neighbour_edge(const weighted_edge *edges, unsigned int size, 
        const weighted_edge *tour, unsigned int t, unsigned int v)
{
    unsigned int min_distance = 0;
    unsigned int nearest_neighbour;
    unsigned int i;
    for (i = 0; i < size; i++) {
        if ((edges[i].first == v || edges[i].second == v)
                && (min_distance == 0 || edges[i].weight < min_distance)
                && !tour_contains(tour, t, &edges[i]))
        {
            min_distance = edges[i].weight;
            nearest_neighbour = i;
        }
    }
    return nearest_neighbour;
}

weighted_edge *nearest_neighbour_tsp(const weighted_edge *edges, unsigned int size,
        unsigned int order)
{
    unsigned int t, v = 0;
    weighted_edge *tour = malloc(order * sizeof(weighted_edge));
    if (tour == NULL) {
        return NULL;
    }
    for (t = 0; t < order; t++) {
        unsigned int e = nearest_neighbour_edge(edges, size, tour, t, v);
        tour[t] = edges[e];
        v = edges[e].first == v ? edges[e].second : edges[e].first;
    }
    return tour;
}

Here is an example program that solves the TSP for the same graph as I used for the Cheapest-Link Algorithm:

#include <stdio.h>
#include <stdlib.h>

/* Connect two edges */
void weighted_edge_connect(weighted_edge *edges, unsigned int first, unsigned int second, 
        unsigned int weight, unsigned int *pos)
{
    edges[*pos].first = first;
    edges[*pos].second = second;
    edges[*pos].weight = weight;
    (*pos)++;
}

void print_edges(const weighted_edge *edges, unsigned int n)
{
    unsigned int e;
    for (e = 0; e < n; e++) {
        printf("(%u, %u, %u) ", edges[e].first, edges[e].second, edges[e].weight);
    }
    putchar('\n');
}

int main(void)
{
    unsigned int i = 0;
    const unsigned int size = 15; /* Edges */
    const unsigned int order = 6; /* Vertices */
    weighted_edge *edges = malloc(size * sizeof(weighted_edge));
    weighted_edge *tour;

    weighted_edge_connect(edges, 0, 1, 25, &i);
    weighted_edge_connect(edges, 0, 2, 19, &i);
    weighted_edge_connect(edges, 0, 3, 19, &i);
    weighted_edge_connect(edges, 0, 4, 16, &i);
    weighted_edge_connect(edges, 0, 5, 28, &i);
    weighted_edge_connect(edges, 1, 2, 24, &i);
    weighted_edge_connect(edges, 1, 3, 30, &i);
    weighted_edge_connect(edges, 1, 4, 27, &i);
    weighted_edge_connect(edges, 1, 5, 17, &i);
    weighted_edge_connect(edges, 2, 3, 18, &i);
    weighted_edge_connect(edges, 2, 4, 20, &i);
    weighted_edge_connect(edges, 2, 5, 23, &i);
    weighted_edge_connect(edges, 3, 4, 19, &i);
    weighted_edge_connect(edges, 3, 5, 32, &i);
    weighted_edge_connect(edges, 4, 5, 41, &i);

    tour = nearest_neighbour_tsp(edges, size, order);
    print_edges(tour, order);
    
    free(tour);
    free(edges);
    return 0;
}

The output:

(0, 4, 16) (3, 4, 19) (2, 3, 18) (0, 2, 19) (0, 3, 19) (1, 3, 30)

Cheapest-Link Algorithm for TSP in C

A weighted graph for the Traveling Salesman Problem

The Traveling Salesman Problem (TSP) is NP-Complete, but there are a few greedy approximate algorithms that are efficient. One of them is the Cheapest Link Algorithm, which I describe here.

The algorithm works by repeatedly choosing the cheapest link in the graph that:

  1. Doesn’t close the circuit
  2. Doesn’t create a vertex with three edges coming out of it

These cheapest links are added to the tour until it needs one more edge to complete it, at which point condition (1) is removed so the cheapest link that does not create a vertex with three edges will then be added and the tour is complete.

Below is the implementation in C. To prevent closing the circuit early I used the graph cycle detection algorithm I described in an earlier post. To make sure there are no vertices with three edges, I keep track of the degrees of the vertices as the tour is built, and edges that connect vertices with degree 2 are rejected.

#include <stdlib.h>

typedef struct {
    unsigned int first;
    unsigned int second;
    unsigned int weight;
} weighted_edge;

static int compare_weighted_edges(const weighted_edge *edge1, const weighted_edge *edge2)
{
    return edge1->weight - edge2->weight;
}

static unsigned int cyclic_recursive(const weighted_edge *edges, unsigned int n,
        unsigned int *visited, unsigned int order, unsigned int vertex, 
        unsigned int predecessor)
{
    unsigned int i;
    unsigned int cycle_found = 0;
    visited[vertex] = 1;
    for (i = 0; i < n && !cycle_found; i++) {
        if (edges[i].first == vertex || edges[i].second == vertex) {
            /* Adjacent */
            const unsigned int neighbour = edges[i].first == vertex ?
                    edges[i].second : edges[i].first;
            if (visited[neighbour] == 0) {
                /* Not yet visited */
                cycle_found = cyclic_recursive(edges, n, visited, order, neighbour, vertex);
            }
            else if (neighbour != predecessor) {
                /* Found a cycle */
                cycle_found = 1;
            }
        }
    }
    return cycle_found;
}
 
unsigned int cyclic(const weighted_edge *edges, unsigned int n, unsigned int order)
{
    unsigned int *visited = calloc(order, sizeof(unsigned int));
    unsigned int cycle_found;
    if (visited == NULL) {
        return 0;
    }
    cycle_found  = cyclic_recursive(edges, n, visited, order, 0, 0);
    free(visited);
    return cycle_found;
}

weighted_edge *cheapest_link_tsp(weighted_edge *edges, unsigned int size, unsigned int order)
{
    unsigned int t, e = 0;
    weighted_edge *tour = malloc(order * sizeof(weighted_edge));
    unsigned int *degrees = calloc(order, sizeof(unsigned int));
    if (tour == NULL || degrees == NULL) {
        free(tour);
        free(degrees);
        return NULL;
    }
    /* Sort the edges by weight */
    qsort(edges, size, sizeof(weighted_edge),
            (int(*)(const void *, const void *))compare_weighted_edges);
    /* Main algorithm */
    for (t = 0; t < order; t++) {
        unsigned int added = 0;
        while (!added && e < size) {
            if (degrees[edges[e].first] < 2 && degrees[edges[e].second] < 2) {
                tour[t] = edges[e];
                if (t == order - 1 /* It's the last edge */
                    || !cyclic(tour, t + 1, order)) /* It doesn't close the circuit */ 
                { 
                    added = 1;
                    degrees[edges[e].first]++;
                    degrees[edges[e].second]++;
                }
            }
            e++;
        }
        if (!added) {
            /* Edges were not correct */
            free(tour);
            free(degrees);
            return NULL;
        }
    }
    free(degrees);
    return tour;
}

Here is an example program that finds a solution for the graph shown at the top:

#include <stdio.h>
#include <stdlib.h>

/* Connect two edges */
void connect(weighted_edge *edges, unsigned int first, unsigned int second, 
        unsigned int weight, unsigned int *pos)
{
    edges[*pos].first = first;
    edges[*pos].second = second;
    edges[*pos].weight = weight;
    (*pos)++;
}

static void print_edges(const weighted_edge *edges, unsigned int n)
{
    unsigned int e;
    for (e = 0; e < n; e++) {
        printf("(%u, %u, %u) ", edges[e].first, edges[e].second, edges[e].weight);
    }
    putchar('\n');
}

int main(void)
{
    unsigned int i = 0;
    const unsigned int size = 15; /* Edges */
    const unsigned int order = 6; /* Vertices */
    weighted_edge *edges = malloc(size * sizeof(weighted_edge));
    weighted_edge *tour;

    connect(edges, 0, 1, 25, &i);
    connect(edges, 0, 2, 19, &i);
    connect(edges, 0, 3, 19, &i);
    connect(edges, 0, 4, 16, &i);
    connect(edges, 0, 5, 28, &i);
    connect(edges, 1, 2, 24, &i);
    connect(edges, 1, 3, 30, &i);
    connect(edges, 1, 4, 27, &i);
    connect(edges, 1, 5, 17, &i);
    connect(edges, 2, 3, 18, &i);
    connect(edges, 2, 4, 20, &i);
    connect(edges, 2, 5, 23, &i);
    connect(edges, 3, 4, 19, &i);
    connect(edges, 3, 5, 32, &i);
    connect(edges, 4, 5, 41, &i);

    tour = cheapest_link_tsp(edges, size, order);
    print_edges(tour, order);
    
    free(tour);
    free(edges);
    return 0;
}

Output:

(0, 4, 16) (1, 5, 17) (2, 3, 18) (0, 2, 19) (1, 4, 27) (3, 5, 32)

Greedy Maximum Independent Set in C

An independent set of a graph is some subset of the vertices where no vertex in the subset is connected to another vertex in the subset. For example, in the graph shown below, the set of vertices (0, 2, 4) is an independent set, as is (1, 3, 5).
A wheel graph on 7 vertices showing an independent set
The Maximimum Independent Set (MIS) problem is to find an independent set with the greatest cardinality in a graph. The problem is NP-complete, but a greedy algorithm gives a good approximation.

The algorithm is:

  1. 1. Start with the set of vertices of the graph, \(V\) and an empty set for the MIS, \(S\)
  2. While \(V \ne \emptyset\):
    1. Find a vertex of minimum degree \(v \in V\)
    2. Add it to \(S\)
    3. Remove it and its neighbours from \(V\)

The following is an implementation in C;

#include <stdlib.h>

/* Graph edge */
typedef struct {
    unsigned int first;
    unsigned int second;
} edge;

/* State a vertex can be in */
typedef enum {
    IN_GRAPH = 0,
    IN_MIS = 1,
    DISCARDED = 2
} vertex_state;

/* Vertex info */
typedef struct {
    vertex_state state;
    unsigned int degree;
} vertex_info;

/* Create and initialise the data structure for vertices */
static vertex_info *create_vertices(const edge *edges, unsigned int size, 
        unsigned int order)
{
    unsigned int i;
    vertex_info *vertices = malloc(order * sizeof(vertex_info));
    if (vertices == NULL) {
        return 0;
    }
    for (i = 0; i < order; i++) {
        vertices[i].state = IN_GRAPH;
        vertices[i].degree = 0;
    }
    for (i = 0; i < size; i++) {
        vertices[edges[i].first].degree++;
        vertices[edges[i].second].degree++;
    }
    return vertices;
}

/* Find a vertex with the lowest degree of any in the graph */
static int min_degree_vertex(const vertex_info *vertices, unsigned int order)
{
    unsigned int min_degree = 0;
    int min_vertex = -1;
    unsigned int i;
    for (i = 0; i < order; i++) {
        if (vertices[i].state == IN_GRAPH 
                && (min_degree == 0 
                    || vertices[i].degree < min_degree))
        {
            min_degree = vertices[i].degree;
            min_vertex = i;
        }
    }
    return min_vertex;
}

/* Move a vertex from the graph, adjusting the degrees of its neighbours */
static void move_vertex(vertex_info *vertices, unsigned int order, const edge *edges,
        unsigned int size, unsigned int v, vertex_state state)
{
    unsigned int e;
    /* Remove vertex */
    vertices[v].state = state;
    /* Reduce the degrees of its neighbours */
    for (e = 0; e < size; e++) {
        if (edges[e].first == v
            && vertices[edges[e].second].state == IN_GRAPH) 
        {
                vertices[edges[e].second].degree--;
        }
        else if (edges[e].second == v
            && vertices[edges[e].first].state == IN_GRAPH) 
        {
            vertices[edges[e].first].degree--;
        }
    }
}

/* Create the MIS array from the vertices structure */
static unsigned int *create_mis(const vertex_info *vertices, unsigned int order,
        unsigned int m)
{
    unsigned int *mis = malloc(m * sizeof(unsigned int));
    if (mis) {
        unsigned int v, m = 0;
        for (v = 0; v < order; v++) {
            if (vertices[v].state == IN_MIS) {
                mis[m++] = v;
            }
        }
    }
    return mis;
}

unsigned int maximum_independent_set(const edge *edges, unsigned int size,
        unsigned int order, unsigned int **mis)
{
    unsigned int m = 0;
    vertex_info *vertices;
    unsigned int finished = 0;
    unsigned int i;

    /* Create vertices structure */
    vertices = create_vertices(edges, size, order);

    /* Main algorithm */
    while (!finished) {
        /* Find a vertex of minimum degree */
        int v = min_degree_vertex(vertices, order);
        if (v != -1) {
            /* Put it in the independent set */
            move_vertex(vertices, order, edges, size, v, IN_MIS);
            m++;
            /* Remove its neighbours from the graph */
            for (i = 0; i < size; i++) {
                if (edges[i].first == v
                        && vertices[edges[i].second].state == IN_GRAPH)
                {
                    move_vertex(vertices, order, edges, size, 
                            edges[i].second, DISCARDED);
                }
                else if (edges[i].second == v
                        && vertices[edges[i].first].state == IN_GRAPH)
                {
                    move_vertex(vertices, order, edges, size,
                            edges[i].first, DISCARDED);
                }
            }
        }
        else {
            finished = 1;
        } 
    }

    /* Create and populate MIS array */
    *mis = create_mis(vertices, order, m);
    if (*mis == NULL) {
        m = 0;
    }

    free(vertices);

    return m;
}

An example program to find an MIS in the graph shown above:

#include <stdio.h>
#include <stdlib.h>

/* Create a wheel graph of order n */
unsigned int wheel_graph(unsigned int n, edge **edges)
{
    const unsigned int size = 2 * n - 2;
    *edges = malloc(size * sizeof(edge));
    if (*edges != NULL) {
        /* Create the rim */
        unsigned int i;
        for (i = 0; i < n - 2; i++) {
            (*edges)[i].first = i;
            (*edges)[i].second = i + 1;
        }
        (*edges)[i].first = n - 2;
        (*edges)[i].second = 0;
        /* Create the spokes */
        for (i = 0; i < n - 1; i++) {
            (*edges)[n + i - 1].first = i;
            (*edges)[n + i - 1].second = n - 1;
        }
    }
    return size;
}

static void print_mis(const unsigned int *mis, size_t m)
{
    unsigned int i;
    for (i = 0; i < m; i++) {
        printf("%u ", mis[i]);
    }
    putchar('\n');
}

int main(void)
{
    unsigned int order = 7; /* Vertices */
    unsigned int *mis;
    edge *edges;
    unsigned int size = wheel_graph(order, &edges);
    unsigned m = maximum_independent_set(edges, size, order, &mis);
    print_mis(mis, m);
    free(mis);
    free(edges);
    return 0;
}

The output:

0 2 4

Partial Latin square completion using backtracking in C

The partial Latin square completion problem is to take a partially-filled Latin square and to determine if it can be completed successfully. For example, below is a 1-based order 5 partial Latin square:

\(
\left( \begin{array}{ccc}
\cdot & 2 & \cdot & \cdot & \cdot \\
\cdot & \cdot & \cdot & \cdot & 3 \\
\cdot & \cdot & 5 & \cdot & \cdot \\
4 & \cdot & \cdot & \cdot & \cdot \\
\cdot & \cdot & \cdot & 2 & \cdot \end{array} \right)
\)

 

The problem is to find out if it can be completed, which in this case it can, to this square:

\(
\left( \begin{array}{ccc}
1 & 2 & 3 & 4 & 5 \\
2 & 1 & 4 & 5 & 3 \\
3 & 4 & 5 & 1 & 2 \\
4 & 5 & 2 & 3 & 1 \\
5 & 3 & 1 & 2 & 4 \end{array} \right)
\)

 

The partial Latin square completion problem is NP-complete, but can be solved using backtracking.

Here is an implementation in C. It takes a partial Latin square in the form of a 1-dimensional array, with 0s for the unfilled cells, and a callback function which is called with the solution, if found.

#include <stdlib.h>

static void get_row_column(unsigned int cell, unsigned int *row, unsigned int *column, size_t size)
{
    *row = cell / size;
    *column =  cell % size;
}

static unsigned int get_value(const unsigned int *square, size_t size, unsigned int row,
        unsigned int column)
{
    return square[row * size + column];
}

static unsigned int unique_in_column(const unsigned int *square, size_t size, unsigned int cell)
{
    unsigned int row;
    unsigned int column;
    get_row_column(cell, &row, &column, size);
    unsigned int r;
    unsigned int unique = 1;
    for (r = 0; r < size && unique; r++) {
        unique = r == row || get_value(square, size, r, column) != square[cell];
    } 
    return unique;
}

static unsigned int unique_in_row(const unsigned int *square, size_t size, unsigned int cell)
{
    unsigned int row;
    unsigned int column;
    get_row_column(cell, &row, &column, size);
    unsigned int c;
    unsigned int unique = 1;
    for (c = 0; c < size && unique; c++) {
        unique = c == column || get_value(square, size, row, c) != square[cell];
    } 
    return unique;
}

static unsigned int promising(const unsigned int *square, size_t size, unsigned int cell)
{
    return square[cell] == 0
        || (unique_in_row(square, size, cell)
                && unique_in_column(square, size, cell));
}

static unsigned int get_next_cell(const unsigned int *square, size_t size, unsigned int *cell)
{
    unsigned int found = 0;
    unsigned int i;
    for (i = *cell; i < size * size && !found; i++) {
        if (square[i] == 0) {
            found = 1;
            *cell = i;
        }
    }
    return found;
}

static void latin_square_completion_recursive(unsigned int *square, size_t size, unsigned int cell,
        unsigned int *succeeded, latin_square_completionfn fun)
{
    if (!get_next_cell(square, size, &cell)) {
        *succeeded = 1;
        fun(square, size);
    }
    else {
        unsigned int i;
        for (i = 1; i <= size && !*succeeded; i++) {
            unsigned int temp = square[cell];
            square[cell] = i;
            if (promising(square, size, cell)) {
                latin_square_completion_recursive(square, size, cell + 1, succeeded, fun);
            }
            square[cell] = temp;
        }
    }
}

void latin_square_completion(unsigned int *square, size_t size,
        latin_square_completionfn fun)
{
    unsigned int cell = 0;
    unsigned int succeeded = 0;
    latin_square_completion_recursive(square, size, cell, &succeeded, fun);
}

Here is a program to complete the partial Latin square shown at the beginning:

#include <stdio.h>

static void print(const unsigned int *square, size_t size)
{
    unsigned int i;
    for (i = 0; i < size * size; i++) {
        printf("%d ", square[i]);
        if (i % size == size - 1) {
            printf("\n");
        }
    }
    printf("\n");
}

int main(void)
{
    unsigned int square[] = {0, 2, 0, 0, 0,
                             0, 0, 0, 0, 3,
                             0, 0, 5, 0, 0,
                             4, 0, 0, 0, 0,
                             0, 0, 0, 2, 0};
    latin_square_completion(square, 5, print);
    return 0;
}

The output:

1 2 3 4 5
2 1 4 5 3
3 4 5 1 2
4 5 2 3 1
5 3 1 2 4

Hamiltonian circuits using backtracking in C

A Hamiltonian circuit of a graph is a tour that visits every vertex once, and ends at its starting vertex. Finding out if a graph has a Hamiltonian circuit is an NP-complete problem.

This is a backtracking algorithm to find all of the Hamiltonian circuits in a graph. The input is an adjacency matrix, and it calls a user-specified callback with an array containing the order of vertices for each Hamiltonian circuit it finds.

The first vertex for all circuits is fixed at 0, and the last vertex is limited to being less than the second vertex. These are normalisation conditions that prevent duplicates that are cyclic permutations or reversals respectively.

#include <stdlib.h>

static unsigned int circuit_contains(const unsigned int *circuit, unsigned int c, unsigned int v)
{
    unsigned int i;
    unsigned int contains = 0;
    for (i = 0; i < c && !contains; i++) {
        contains = circuit[i] == v;
    }
    return contains;
}

typedef void (*circuitfn)(const unsigned int *, size_t);

static void hamiltonian_circuits_recursive(unsigned int **adjmat, size_t n, unsigned int *circuit,
       unsigned int c, circuitfn fun)
{
    if (c == n) {
        /* Found a circuit */
        fun(circuit, n);
    }
    else {
        unsigned int v;
        for (v = 1; v < n; v++) {
            if (!circuit_contains(circuit, c, v) /* Vertex is not in the circuit already */
                && adjmat[circuit[ c - 1]][v] == 1 /* Vertex is adjacent to the previous vertex */
                && (c < n - 1 || (adjmat[0][v] == 1 /* Last vertex is adjacent to the first */
                    && v < circuit[1]))) /* Last vertex is less than the second */
            {
                circuit[ c] = v;
                hamiltonian_circuits_recursive(adjmat, n, circuit, c + 1, fun);
            }
        }
    }
} 

void hamiltonian_circuits(unsigned int **adjmat, size_t n, circuitfn fun)
{
    unsigned int *circuit;
    circuit = malloc(n * sizeof(unsigned int));
    if (circuit == NULL) {
        return;
    }
    circuit[0] = 0;
    hamiltonian_circuits_recursive(adjmat, n, circuit, 1, fun);
    free(circuit);
}

This is an example program that finds all Hamiltonian circuits on the complete graph of order 5. With the constraints on cyclic permutations and direction mentioned above, there are \((n – 1)!/2\) Hamiltonian circuits on a complete graph of order \(n\).

#include <stdio.h>
#include <stdlib.h>

static void print_circuit(const unsigned int *circuit, size_t len)
{
    unsigned int v;
    for (v = 0; v < len; v++) {
        printf("%d ", circuit[v]);
    }
    putchar('\n');
}

int main(void)
{
    unsigned int **adjmat;
    const size_t n = 5;
    unsigned int i, j;

    /* Create a complete graph on 5 vertices */
    adjmat = malloc(n * sizeof(unsigned int *));
    for (i = 0; i < n; i++) {
        adjmat[i] = malloc(n * sizeof(unsigned int));
        for (j = 0; j < n; j++) {
            adjmat[i][j] = 1;
        }
    }

    hamiltonian_circuits(adjmat, n, print_circuit);
    for (i = 0; i < n; i++) {
        free(adjmat[i]);
    }
    free(adjmat);

    return 0;
}

Here is the output. Notice there are \((5 – 1)!/2 = 12\) circuits:

0 2 3 4 1
0 2 4 3 1
0 3 1 4 2
0 3 2 4 1
0 3 4 1 2
0 3 4 2 1
0 4 1 2 3
0 4 1 3 2
0 4 2 1 3
0 4 2 3 1
0 4 3 1 2
0 4 3 2 1

Greedy Set Cover in Python

In the Set Cover problem, we are given a universal set \(U\) and a family of subsets \(S_1, \ldots, S_k \subseteq U\). A set cover is a collection of subsets \(C\) from \(S_1, \ldots, S_k\) whose union is the universal set \(U\). We would like to minimise \(\left\vert{C}\right\vert\).

The problem of finding the optimum \(C\) is NP-Complete, but a greedy algorithm can give an \(O(log_e n)\) approximation to optimal solution.

The greedy algorithm selects the set \(S_i\) containing the largest number of uncovered points at each step, until all of the points have been covered.

Below is an implementation in Python:

def set_cover(universe, subsets):
    """Find a family of subsets that covers the universal set"""
    elements = set(e for s in subsets for e in s)
    # Check the subsets cover the universe
    if elements != universe:
        return None
    covered = set()
    cover = []
    # Greedily add the subsets with the most uncovered points
    while covered != elements:
        subset = max(subsets, key=lambda s: len(s - covered))
        cover.append(subset)
        covered |= subset

    return cover

def main():
    universe = set(range(1, 11))
    subsets = [set([1, 2, 3, 8, 9, 10]),
        set([1, 2, 3, 4, 5]),
        set([4, 5, 7]),
        set([5, 6, 7]),
        set([6, 7, 8, 9, 10])]
    cover = set_cover(universe, subsets)
    print(cover)

if __name__ == '__main__':
    main()

Output:

[set([1, 2, 3, 8, 9, 10]), set([4, 5, 7]), set([5, 6, 7])]

Unbounded Knapsack using dynamic programming in C

The unbounded knapsack problem is to try to fill a knapsack of a given capacity with items of given weight and value in such a way as to maximise the value of the knapsack. There is no limit on the number of instances of each item, hence the “unbounded” label. The decision problem of whether the knapsack can be filled to greater than or equal to a value is NP-complete. The maximisation problem can be solved in pseudo-polynomial time using dynamic programming.

The algorithm works by filling an array of 0, …, the capacity of the knapsack from the bottom up with the value most valuable knapsack for each capacity. In order to work out at each capacity what the most profitable knapsack is, it considers each item in turn and finds out what the value of the knapsack would be if this was the last item added. This value is the value of the earlier knapsack that is lighter by the weight of the current item, plus the value of the current item. Since the knapsack values are calculated from lower to higher capacities, the value of this earlier, lighter knapsack can always be found without needing to recompute it. Once each of the items has been considered as the most recent addition, the one that gives rise to the most valuable knapsack is added, and this value is recorded in the current cell of the array. The algorithm then proceeds to the next cell of the array. When the algorithm reaches the top of the array, and fills that cell in, it has found the most valuable knapsack for the specified capacity.

There is an implicit link between each knapsack in the array and the previous lighter knapsack from which it was derived by adding an item. These links form multiple chains through the array. The chain that begins at the top cell is the one that traces the path of additions that have led to the most profitable knapsack. In order to realise this link, the array cells contain pointers to the earlier knapsack from which they were constructed, and these pointers are set as the algorithm progresses. Once the array is full, the values of the items can be retrieved by following this linked list of pointers.

struct knapsack {
    unsigned int profit;
    struct knapsack *prev;
};
typedef struct knapsack knapsack;

/* Find the minimum weight item with this profit */
int min_weight_item(unsigned int profit, const unsigned int *weights,
        const unsigned int *profits, size_t len)
{
    int item = -1;
    unsigned int i;
    for (i = 0; i < len; i++) {
        if (profits[i] == profit) {
            if (item == -1 || weights[i] < weights[item]) {
                item = i;
            }
        }
    }
    return item;
}

unsigned int unbounded_knapsack(unsigned int capacity, unsigned int *weights,
        unsigned int *profits, unsigned int *counts, size_t len)
{
    knapsack *z = malloc((capacity + 1) * sizeof(knapsack));
    unsigned int c, i;
    unsigned int solution, profit;
    z[0].profit = 0;
    z[0].prev = NULL;
    knapsack *current;
    /* Fill in the array */
    for (c = 1; c <= capacity; c++) {
        z.profit = z.profit;
        z.prev = &(z);
        for (i = 0; i < len; i++) {
            if (weights[i] <= c) {
                /* prev is the best knapsack without adding this item */
                knapsack *prev = z + (c - weights[i]);
                if (prev->profit + profits[i] > z.profit) {
                    z.profit = prev->profit + profits[i];
                    z.prev = prev;
                }
            }
        }
    }
    /* Read back the best solution */
    for (profit = z[capacity].profit, current = z[capacity].prev;
            current != NULL;
            profit = current->profit, current = current->prev) {
        counts[min_weight_item(profit - current->profit, weights, profits, len)]++;
        
    }
    solution = z[capacity].profit;
    free(z);
    return solution;
}

An example program:

static void print_knapsack(const unsigned int *counts, const unsigned int *profits, size_t len)
{
	unsigned int i;
    for (i = 0; i < len; i++) {
        if (counts[i] > 0) {
            printf("%d x %d\n", counts[i], profits[i]);
        }
    }
}

int main(void)
{
    unsigned int weights[] = {4, 3, 5, 7, 11};
    unsigned int profits[] = {5, 3, 6, 2, 7};
    unsigned int counts[5] = {0};
    const size_t len = sizeof(weights) / sizeof(unsigned int);
    const unsigned int capacity = 17; 
    printf("The maximum profit is %u\n", 
            unbounded_knapsack(capacity, weights, profits, counts, len));
    print_knapsack(counts, profits, len);
    return 0;
}

Output:

The maximum profit is 21
3 x 5
1 x 6

Subset-Sum with backtracking in C

The subset-sum problem is to find a subset of a set of integers that sums to a given value. The decision problem of finding out if such a subset exists is NP-complete. One way of solving the problem is to use backtracking. This is a backtracking solution in C that finds all of the subsets that sum to the target value.

typedef void(*subset_sumfn)(const unsigned int *, size_t);

static unsigned int promising(int i, size_t len, unsigned int weight, unsigned int total, 
        unsigned int target, const unsigned int *weights)
{
    return (weight + total >= target) && (weight == target || weight + weights[i + 1] <= target);
}

static unsigned int sum(const unsigned int *weights, size_t len)
{
    unsigned int total = 0;
    unsigned int i;
    for (i = 0; i < len; i++) {
        total += weights[i];
    }
    return total;
}

static void subset_sum_recursive(const unsigned int *weights, size_t len, unsigned int target, 
        int i, unsigned int weight, unsigned int total, unsigned int *include, subset_sumfn fun)
{
    if (promising(i, len, weight, total, target, weights)) {
        if (weight == target) {
            fun(include, i + 1);
        }
        else if (i < (int)len - 1){
            include[i + 1] = 1;
            subset_sum_recursive(weights, len, target, i + 1, weight + weights[i + 1], 
                   total - weights[i + 1], include, fun);
            include[i + 1] = 0;
            subset_sum_recursive(weights, len, target, i + 1, weight,
                    total - weights[i + 1], include, fun);
        }
    }
}

void subset_sum(const unsigned int *weights, size_t len, unsigned int target, subset_sumfn fun)
{
    const unsigned int total = sum(weights, len);
    unsigned int *include = calloc(len, sizeof(unsigned int));
    if (include == NULL) {
        return;
    }
    subset_sum_recursive(weights, len, target, -1, 0, total, include, fun);
    free(include);
}

int main(void)
{
    unsigned int weights[] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
    const unsigned int len = sizeof(weights) / sizeof(unsigned int);
    const unsigned int target = 7;
    subset_sum(weights, len, target, print_vector);
    return 0;
}

The output is in the form of binary strings showing which elements of the original set are in the subset, so for example the first binary string corresponds to 1 + 2 + 4 = 7.

1 1 0 1
1 0 0 0 0 1
0 1 0 0 1
0 0 1 1
0 0 0 0 0 0 1

Vertex colouring with backtracking in C

The vertex colouring problem is to colour the vertices of a graph in such a way that no two adjacent vertices are the same colour. The decision problem of whether a graph can be coloured in m or fewer colours is NP-complete.

Although the problem is intractable, a bactracking algorithm can be used to reduce the number of colourings that need to be tried. The algorithm works by colouring the vertices one at a time, and backtracking if at any point it becomes impossible to colour the next vertex differently from all of its neighbours.

typedef void(*vertex_colouringfn)(const unsigned int *, size_t);

static unsigned int promising(int i, const unsigned int **adjmat, const unsigned int *colours)
{
    int j;
    unsigned int ok = 1;

    for (j = 0; j < i && ok; j++) {
        if (adjmat[i][j] && colours[i] == colours[j]) {
            /* Adjacent vertex is the same colour */
            ok = 0;
        }
    }
    return ok;
}

static void vertex_colouring_recursive(int i, const unsigned int **adjmat, size_t len,
        unsigned int *colours, unsigned int m, vertex_colouringfn fun)
{
    unsigned int colour;

    if (promising(i, adjmat, colours)) {
        if (i == len - 1) {
            /* Coloured every vertex successfully */
            fun(colours, len);
        }
        else if (i < (int)len - 1) {
            /* Try every colour for the next vertex */
            for (colour = 0; colour < m; colour++) {
                colours[i + 1] = colour;
                vertex_colouring_recursive(i + 1, adjmat, len, colours, m, fun);
            }
        }
    }
}

void vertex_colouring(const unsigned int **adjmat, size_t len, unsigned int m, 
        vertex_colouringfn fun)
{
    unsigned int *colours = calloc(len, sizeof(unsigned int));
    if (colours == NULL) {
        return;
    }
    vertex_colouring_recursive(-1, adjmat, len, colours, m, fun);
    free(colours);
}

This is an example program with a small graph and m = 3:

static void print_colouring(const unsigned int *colours, size_t len)
{
    unsigned int i;
    for (i = 0; i < len; i++) {
        printf("%d ", colours[i]);
    }
    printf("\n");
}

int main(void)
{
    unsigned int v0[] = {0, 1, 1, 1, 0};
    unsigned int v1[] = {1, 0, 1, 0, 1};
    unsigned int v2[] = {1, 1, 0, 1, 1};
    unsigned int v3[] = {1, 0, 1, 0, 1};
    unsigned int v4[] = {0, 1, 1, 1, 0};
    const unsigned int *adjmat[] = {v0, v1, v2, v3, v4};
    const size_t len = sizeof(adjmat) / sizeof(unsigned int*);
    const unsigned int m = 3; /* Number of colours */
    vertex_colouring(adjmat, len, m, print_colouring);
    return 0;
}

xkcd 287

General solutions get you a 50% tip

I’m not sure what this problem is called – I’m going to call it "multicombination sum" – but I don’t doubt that it is NP-complete, as it’s a variety of knapsack problem in which the values of the items are the same as their weight.

Below are three methods of solving it: a brute force method, using backtracking, and using dynamic programming.

Brute force method

The brute force method is just to construct all of the possible orders that might total $15.05.
The combinatorial algorithm we want is combinations with duplicates, or multicombinations.
Since 3 of the most expensive appetizer – SAMPLER PLATE – exceeds the target, and 7 of the cheapest appetizer – MIXED FRUIT – equals the target (so that’s one of the solutions), we want to iterate over all multicombinations with k ranging from 3 to 7.

unsigned int multiset_sum(const unsigned int *multiset, const unsigned int *values, unsigned int k)
{
    unsigned int i;
    unsigned int sum = 0;
    for (i = 0; i < k; i++) {
        sum += values[multiset[i]];
    }
    return sum;
}

typedef void (*multiset1fn)(const unsigned int *, unsigned int);

void multicombination_sum(const unsigned int *items, size_t len, unsigned int target,
        multiset1fn fun)
{
    unsigned int first = target / array_max(items, len);
    unsigned int last = target / array_min(items, len);
    unsigned int *multiset = calloc(last + 1, sizeof(unsigned int));
    unsigned int k;
    if (!multiset) {
        return;
    }
    for (k = first; k <= last; k++) {
        do {
            if (multiset_sum(multiset, items, k) == target) {
                fun(multiset, k);
            }
        } while (next_multicombination(multiset, len, k));
    }
    free(multiset);
}

void order_print(const unsigned int *numbers, unsigned int k)
{
	const char *appetizers[] = {"MIXED FRUIT", "FRENCH FRIES", "SIDE SALAD", 
        "HOT WINGS", "MOZARELLA STICKS", "SAMPLER PLATE"};
    unsigned int i, item, count;
    for (i = 0; i < k; i++) {
        if (i == 0 || numbers[i] != item) {
            if (i > 0) {
                printf("%s (x%d) ", appetizers[item], count);
            }
            count = 1;
            item = numbers[i];
        }
        else {
            count++;
        }
    }
    printf("%s (x%d)\n", appetizers[item], count);
}

int main(void)
{
    unsigned int prices[] = {215, 275, 335, 355, 420, 580};
	const size_t n = sizeof(prices) / sizeof(prices[0]);
    const unsigned int target = 1505;
    multicombination_sum(prices, n, target, (multiset1fn)order_print);

	return 0;
}

Output:

MIXED FRUIT (x1) HOT WINGS (x2) SAMPLER PLATE (x1)
MIXED FRUIT (x7)

This took 1709 iterations to come up with the answer.

Backtracking

We can drastically reduce the search space by building the orders up one item at a time, and backtracking if the target is exceeded.


typedef void(*multiset2fn)(unsigned int *, const unsigned int *, size_t); 

static void multicombination_sum_recursive(int i, unsigned int *combination, 
       const unsigned int *items, size_t len, unsigned int target, multiset2fn fun,
       unsigned int sum)
{
    if (i == (int)len - 1) {
        if (sum == target) {
            fun(combination, items, i);
        }
    }
    else  {
        unsigned int quantity;
        unsigned int max_quantity = (target - sum) / items[i + 1];
        for (quantity = 0; quantity  <= max_quantity; quantity++) {
            combination[i + 1] = quantity;
            multicombination_sum_recursive(i + 1, combination, items, len, target, fun, 
                    sum + quantity * items[i + 1]);
        }
    }
}

void multicombination_sum(const unsigned int *items, size_t len, unsigned int target,
        multiset2fn fun)
{
    unsigned int *combination = malloc(len * sizeof(unsigned int));
    multicombination_sum_recursive(-1, combination, items, len, target, fun, 0);
    free(combination);
}

void order_print(const unsigned int *combination, const unsigned int *items, size_t len)
{
    const char *appetizers[] = {"MIXED FRUIT", "FRENCH FRIES", "SIDE SALAD", 
        "HOT WINGS", "MOZARELLA STICKS", "SAMPLER PLATE"};
    unsigned int i;
    for (i = 0; i <= len; i++) {
        if (combination[i] > 0) {
            printf("%s (x%d) ", appetizers[i], combination[i]);
        }
    }
    putchar('\n');
}

int main(void)
{
    unsigned int prices[] = {215, 275, 335, 355, 420, 580};
    const unsigned int len = sizeof(prices) / sizeof(unsigned int);
    const unsigned int target = 1505;
    multicombination_sum(prices, len, target, (multiset2fn)order_print);
    return 0;
}

Output:

MIXED FRUIT (x1) HOT WINGS (x2) SAMPLER PLATE (x1)
MIXED FRUIT (x7)

This took just 211 iterations.

Dynamic Programming

As I said at the beginning, this problem is a special case of the knapsack problem in which the values of the items are the same as their weight. This means that we can use the classic dynamic programming algorithm for knapsack to solve it. The algorithm works by calculating the most profitable knapsack for each capacity up to the target capacity.

Here is an implementation of the algorithm:

struct knapsack {
    unsigned int profit;
    struct knapsack *prev;
};
typedef struct knapsack knapsack;

/* Find the minimum weight item with this profit */
int min_weight_item(unsigned int profit, const unsigned int *weights, const unsigned int *profits,
        size_t len)
{
    int item = -1;
    unsigned int i;
    for (i = 0; i < len; i++) {
        if (profits[i] == profit) {
            if (item == -1 || weights[i] < weights[item]) {
                item = i;
            }
        }
    }
    return item;
}

unsigned int unbounded_knapsack(unsigned int capacity, unsigned int *weights, 
       unsigned int *profits, unsigned int *counts, size_t len)
{
    knapsack *z = malloc((capacity + 1) * sizeof(knapsack));
    unsigned int c, i;
    unsigned int solution, profit;
    z[0].profit = 0;
    z[0].prev = NULL;
    knapsack *current;
    /* Fill in the array */
    for (c = 1; c <= capacity; c++) {
        z.profit = z.profit;
        z.prev = &(z);
        for (i = 0; i < len; i++) {
            if (weights[i] <= c) {
                knapsack *prev = z + (c - weights[i]);
                if (prev->profit + profits[i] > z.profit) {
                    z.profit = prev->profit + profits[i];
                    z.prev = prev;
                }
            }
        }
    }
    /* Read back the best solution */
    for (profit = z[capacity].profit, current = z[capacity].prev;
            current != NULL;
            profit = current->profit, current = current->prev) {
        counts[min_weight_item(profit - current->profit, weights, profits, len)]++;
        
    }
    solution = z[capacity].profit;
    free(z);
    return solution;
}

We just need to use this algorithm and pass the prices of the menu items for both weights and profits:

int main(void)
{
    unsigned int values[] = {215, 275, 335, 355, 420, 580};
    const size_t len = sizeof(values) / sizeof(values[0]);
    unsigned int counts[6] = {0};
    const unsigned int target = 1505;
    unbounded_knapsack(target, values, values, counts, len);
    order_print(counts, len);
    return 0;
}

This produces one of the solutions:

MIXED FRUIT (x7)

We could modify the algorithm to produce all of them.