Tag Archives: Combinatorial Algorithms

Combinations of a multiset

These are the combinations of k elements chosen from a set that can contain duplicates (a multiset).

For example, given the multiset {0, 1, 1, 2, 2, 2, 3}, the 4-combinations are:

[0, 1, 1, 2]
[0, 1, 1, 3]
[0, 1, 2, 2]
[0, 1, 2, 3]
[0, 2, 2, 2]
[0, 2, 2, 3]
[1, 1, 2, 2]
[1, 1, 2, 3]
[1, 2, 2, 2]
[1, 2, 2, 3]
[2, 2, 2, 3]

This algorithm produces the combinations in lexicographic order, with the elements in each combination in increasing order.

Begin with the first combination, which is the first k elements of the multiset ([0, 1, 1, 2] in the example above), and then at each step:

  1. Find the rightmost element that is less than the maximum value it can have (which is the element in the multiset that is the same distance from the right).
  2. Replace it with the first multiset element greater than it.
  3. Replace the remainder of the combination with the elements that follow the replacement in the multiset.
unsigned int next_multiset_combination(const unsigned int *multiset, unsigned int *ar, size_t n, unsigned int k)
{
	unsigned int finished = 0;
	unsigned int changed = 0;
	unsigned int i;

	for (i = k - 1; !finished && !changed; i--) {
		if (ar[i] < multiset[i + (n - k)]) {
			/* Find the successor */
			unsigned int j;
			for (j = 0; multiset[j] <= ar[i]; j++);
			/* Replace this element with it */
			ar[i] = multiset[j];
			if (i < k - 1) {
				/* Make the elements after it the same as this part of the multiset */
				unsigned int l;
				for (l = i + 1, j = j + 1; l < k; l++, j++) {
					ar[l] = multiset[j];
				}
			}
			changed = 1;
		}
		finished = i == 0;
	}
	if (!changed) {
		/* Reset to first combination */
		for (i = 0; i < k; i++) {
			ar[i] = multiset[i];
		}
	}
	return changed;
}

Here is an example program:

#include <stdio.h>

#include <multiset-combination.h>

static void print_array(const unsigned int *ar, size_t len, FILE *fptr)
{
	unsigned int i;
	fputc('[', fptr);
	for (i = 0; i < len; i++) {
		fprintf(fptr, "%d", ar[i]);
		if (i < len - 1) {
			fputs(", ", fptr);
		}
	}
	fputc(']', fptr);
}

int main(void)
{
	unsigned int multiset[] = {0, 1, 1, 2, 2, 2, 3};
	unsigned int n = sizeof(multiset) / sizeof(unsigned int);
	unsigned int numbers[] = {0, 1, 1, 2};
	const unsigned int k = sizeof(numbers) / sizeof(unsigned int);

	do {
		print_array(numbers, k, stdout);
		putchar('\n');
	} while (next_multiset_combination(multiset, numbers, n, k));

	return 0;
}

Here is a second example that prints the elements of the multisets:

#include <stdio.h>

#include <multiset-combination.h>

static void print_set(const unsigned int *ar, size_t len, const void **elements, 
		const char *brackets, printfn print, FILE *fptr)
{
	unsigned int i;
	fputc(brackets[0], fptr);
	for (i = 0; i < len; i++) {
		print(elements[ar[i]], fptr);
		if (i < len - 1) {
			fputs(", ", fptr);
		}
	}
	fputc(brackets[1], fptr); 
}

int main(void)
{
	unsigned int multiset[] = {0, 1, 1, 2, 2, 2, 3};
	char *elements[] = {"a", "b", "c", "d"};
	const size_t n = sizeof(multiset) / sizeof(unsigned int);
	unsigned int numbers[] = {0, 1, 1, 2};
	const size_t k = sizeof(numbers) / sizeof(unsigned int);

	do {
		print_set(numbers, k, (void*)elements, "[]", (printfn)fputs, stdout);
		putchar('\n');
	} while (next_multiset_combination(multiset, numbers, n, k));

	return 0;
}

Multicombinations

A multicombination is a combination that can contain duplicates (i.e., the combination is a multiset).

Note that the source set does not contain duplicates. For combinations of a set that contains duplicates, see combinations of a multiset.

These are the 3-multicombinations of the 4-set {0, 1, 2, 3}:

[0, 0, 0]
[0, 0, 1]
[0, 0, 2]
[0, 0, 3]
[0, 1, 1]
[0, 1, 2]
[0, 1, 3]
[0, 2, 2]
[0, 2, 3]
[0, 3, 3]
[1, 1, 1]
[1, 1, 2]
[1, 1, 3]
[1, 2, 2]
[1, 2, 3]
[1, 3, 3]
[2, 2, 2]
[2, 2, 3]
[2, 3, 3]
[3, 3, 3]

This algorithm produces the combinations in lexicographic order, with the elements in each combination in increasing order.

To find the next multicombination containing k elements from a set containing n elements, begin with the multicombination containing k zeroes, then at each step:

  1. Find the rightmost element that is less than n – 1
  2. Increment it.
  3. Make the elements after it the same.
unsigned int next_multicombination(unsigned int *ar, size_t n, unsigned int k)
{
	unsigned int changed = 0;
	int i;

	for (i = k - 1; i >= 0 && !changed; i--) {
		if (ar[i] < n - 1) {
			/* Increment this element */
			ar[i]++;
			if (i < k - 1) {
				/* Make the elements after it the same */
				unsigned int j;
				for (j = i + 1; j < k; j++) {
					ar[j] = ar[j - 1];
				}
			}
			changed = 1;
		}
	}
	if (!changed) {
		/* Reset to first combination */
		for (i = 0; i < k; i++) {
			ar[i] = 0;
		}
	}
	return changed;
}

Example program to produce the output above:

void print_array(const unsigned int *ar, size_t len, FILE *fptr)
{
	unsigned int i;
	fputc('[', fptr);
	for (i = 0; i < len; i++) {
		fprintf(fptr, "%d", ar[i]);
		if (i < len - 1) {
			fputs(", ", fptr);
		}
	}
	fputc(']', fptr);
}

int main(void)
{
	unsigned int numbers[3] = {0};
	const size_t k = sizeof(numbers) / sizeof(numbers[0]);
	unsigned int n = 4;

	do {
		print_array(numbers, k, stdout);
		putchar('\n');
	} while (next_multicombination(numbers, n, k));

	return 0;
}

Combinations

Combinations, or k-combinations, are the unordered sets of k elements chosen from a set of size n.

For example, there are 10 3-combinations of the 5-set {0, 1, 2, 3, 4}:

{0, 1, 2}
{0, 1, 3}
{0, 1, 4}
{0, 2, 3}
{0, 2, 4}
{0, 3, 4}
{1, 2, 3}
{1, 2, 4}
{1, 3, 4}
{2, 3, 4}

The set of all combinations 0 ≤ k ≤ n is the power set.

Combinations that can contain duplicates are called multicombinations.

Combinations of a set containing duplicates are combinations of a multiset.

Combinations where order is significant are called k-permutations.

This algorithm produces the combinations in lexicographic order, with the elements in each combination strictly increasing in magnitude.

Begin with the combination containing the the numbers from 0 to k – 1, and at each step:

  1. Find the rightmost element ar[i] that is less than the maximum value it can have (which is (n – 1) – (k – 1) – i)
  2. Increment it
  3. Turn the elements after it into a linear sequence continuing from ar[i]
unsigned int next_combination(unsigned int *ar, size_t n, unsigned int k)
{
	unsigned int finished = 0;
	unsigned int changed = 0;
	unsigned int i;

	if (k > 0) {
		for (i = k - 1; !finished && !changed; i--) {
			if (ar[i] < (n - 1) - (k - 1) + i) {
				/* Increment this element */
				ar[i]++;
				if (i < k - 1) {
					/* Turn the elements after it into a linear sequence */
					unsigned int j;
					for (j = i + 1; j < k; j++) {
						ar[j] = ar[j - 1] + 1;
					}
				}
				changed = 1;
			}
			finished = i == 0;
		}
		if (!changed) {
			/* Reset to first combination */
			for (i = 0; i < k; i++) {
				ar[i] = i;
			}
		}
	}
	return changed;
}

Permutations

The permutations of a set are the ways of arranging its elements.

For example, there are 24 permutations of a set of 4 elements:

[0, 1, 2, 3]
[0, 1, 3, 2]
[0, 2, 1, 3]
[0, 2, 3, 1]
[0, 3, 1, 2]
[0, 3, 2, 1]
[1, 0, 2, 3]
[1, 0, 3, 2]
[1, 2, 0, 3]
[1, 2, 3, 0]
[1, 3, 0, 2]
[1, 3, 2, 0]
[2, 0, 1, 3]
[2, 0, 3, 1]
[2, 1, 0, 3]
[2, 1, 3, 0]
[2, 3, 0, 1]
[2, 3, 1, 0]
[3, 0, 1, 2]
[3, 0, 2, 1]
[3, 1, 0, 2]
[3, 1, 2, 0]
[3, 2, 0, 1]
[3, 2, 1, 0]

To find the next permutation of an array ar:

  1. Find the highest index, i1 such that ar[i1] is the first of a pair of elements in ascending order.
    If there isn’t one, the sequence is the highest permutation, so reverse the whole thing to begin again.

  2. Find the highest index i2, such that i2 > i1 and ar[i2] > ar[i1].
  3. Swap ar[i1] and ar[i2].
  4. The elements from ar[i1 + 1] to the end are now in descending order (a later permutation), so reverse them.
static void swap(unsigned int *ar, unsigned int first, unsigned int second)
{
	unsigned int temp = ar[first];
	ar[first] = ar[second];
	ar[second] = temp;
}

static void reverse(unsigned int *ar, size_t len)
{
	unsigned int i, j;

	for (i = 0, j = len - 1; i < j; i++, j--) {
		swap(ar, i, j);
	}
}

unsigned int next_permutation(unsigned int *ar, size_t len)
{
	unsigned int i1, i2;
	unsigned int result = 0;
	
	/* Find the rightmost element that is the first in a pair in ascending order */
	for (i1 = len - 2, i2 = len - 1; ar[i2] <= ar[i1] && i1 != 0; i1--, i2--);
	if (ar[i2] <= ar[i1]) {
		/* If not found, array is highest permutation */
		reverse(ar, len);
	}
	else {
		/* Find the rightmost element to the right of i1 that is greater than ar[i1] */
		for (i2 = len - 1; i2 > i1 && ar[i2] <= ar[i1]; i2--);
		/* Swap it with the first one */
		swap(ar, i1, i2);
		/* Reverse the remainder */
		reverse(ar + i1 + 1, len - i1 - 1);
		result = 1;
	}
	return result;
}

Cartesian Product

The cartesian product of n sets is the set of ordered n-tuples containing one element from each set.

For example, the cartesian product of the sets {a, b}, {p, q, r} and {w, x, y, z} is:

[a, p, w]
[a, p, x]
[a, p, y]
[a, p, z]
[a, q, w]
[a, q, x]
[a, q, y]
[a, q, z]
[a, r, w]
[a, r, x]
[a, r, y]
[a, r, z]
[b, p, w]
[b, p, x]
[b, p, y]
[b, p, z]
[b, q, w]
[b, q, x]
[b, q, y]
[b, q, z]
[b, r, w]
[b, r, x]
[b, r, y]
[b, r, z]

The algorithm uses an array of n integers. Each number in the array represents the index number within the corresponding set of that element.

The algorithm then simply needs to count upwards from n zeroes, with the cardinality of each set determining when a digit rolls over.

For example, these are the correspondences between the array and the n-tuple for the sets in the example:

[0, 0, 0] = [a, p, w]
[0, 0, 1] = [a, p, x]
[0, 0, 2] = [a, p, y]
[0, 0, 3] = [a, p, z]
[0, 1, 0] = [a, q, w]
[0, 1, 1] = [a, q, x]
[0, 1, 2] = [a, q, y]
[0, 1, 3] = [a, q, z]
[0, 2, 0] = [a, r, w]
[0, 2, 1] = [a, r, x]
[0, 2, 2] = [a, r, y]
[0, 2, 3] = [a, r, z]
[1, 0, 0] = [b, p, w]
[1, 0, 1] = [b, p, x]
[1, 0, 2] = [b, p, y]
[1, 0, 3] = [b, p, z]
[1, 1, 0] = [b, q, w]
[1, 1, 1] = [b, q, x]
[1, 1, 2] = [b, q, y]
[1, 1, 3] = [b, q, z]
[1, 2, 0] = [b, r, w]
[1, 2, 1] = [b, r, x]
[1, 2, 2] = [b, r, y]
[1, 2, 3] = [b, r, z]
unsigned int next_n_tuple(unsigned int *ar, size_t len, const size_t *sizes)
{
	unsigned int changed = 0;
	unsigned int finished = 0;
	unsigned int i;

	for (i = len - 1; !changed && !finished; i--) {
		if (ar[i] < sizes[i] - 1) {
			/* Increment */
			ar[i]++;
			changed = 1;
		}
		else {
			/* Roll over */
			ar[i] = 0;
		}
		finished = i == 0;
	}

	return changed;
}	

Partitions

A partition of a set is a division of it into disjoint subsets whose union is the set itself.

For example, the partitions of the set {a, b, c, d} are:

{{a, b, c, d}}
{{a, b, c}, {d}}
{{a, b, d}, {c}}
{{a, b}, {c, d}}
{{a, b}, {c}, {d}}
{{a, c, d}, {b}}
{{a, c}, {b, d}}
{{a, c}, {b}, {d}}
{{a, d}, {b, c}}
{{a}, {b, c, d}}
{{a}, {b, c}, {d}}
{{a, d}, {b}, {c}}
{{a}, {b, d}, {c}}
{{a}, {b}, {c, d}}
{{a}, {b}, {c}, {d}}

The scheme I am using is as follows: a partition is represented by an array of the same size as the set, and each element in the array is an integer specifying the subset in the partition in which the corresponding element occurs. For example, the array [0, 0, 0, 0] indicates that every element occurs in set 0, the first set, i.e., the partition {{a, b, c, d}}, while the array [0, 1, 2, 3] puts every element in its own set, i.e., the partition {{a}, {b}, {c}, {d}}.

The correspondences for the partitions of a 4-set are as follows:

[0, 0, 0, 0] = {{a, b, c, d}}
[0, 0, 0, 1] = {{a, b, c}, {d}}
[0, 0, 1, 0] = {{a, b, d}, {c}}
[0, 0, 1, 1] = {{a, b}, {c, d}}
[0, 0, 1, 2] = {{a, b}, {c}, {d}}
[0, 1, 0, 0] = {{a, c, d}, {b}}
[0, 1, 0, 1] = {{a, c}, {b, d}}
[0, 1, 0, 2] = {{a, c}, {b}, {d}}
[0, 1, 1, 0] = {{a, d}, {b, c}}
[0, 1, 1, 1] = {{a}, {b, c, d}}
[0, 1, 1, 2] = {{a}, {b, c}, {d}}
[0, 1, 2, 0] = {{a, d}, {b}, {c}}
[0, 1, 2, 1] = {{a}, {b, d}, {c}}
[0, 1, 2, 2] = {{a}, {b}, {c, d}}
[0, 1, 2, 3] = {{a}, {b}, {c}, {d}}

Having established how to represent partitions, the algorithm simply needs to produce the arrays shown above. At each step:

  1. Find the rightmost element that is no more than any element to its left
  2. Increment it
  3. Set all of the elements after it to 0
unsigned int next_partition(unsigned int *ar, size_t len)
{	
	unsigned int result = 0;
	if (ar[len - 1] < len - 1) {
		unsigned int i;
		unsigned int finished = 0;
		unsigned int changed = 0;
		/* Find the rightmost element no more than the other elements */
		for (i = len - 1; !finished && !changed; i--) {
			unsigned int j, max = 0;
			/* Find the highest element to the left of this one */
			for (j = 0; j < i; j++) {
				if (ar[j] > max) {
					max = ar[j];
				}
			}
			if (ar[i] <= max) {
				/* Increment */
				ar[i]++;
				changed = 1;
				/* Set the following elements to 0 */
				for (j = i + 1; j < len; j++) {
					ar[j] = 0;
				}
			}
			finished = i == 1;
		}
		result = 1;
	}

	return result;
}

Subsets using Gray codes

When calculations are being performed on each element of the power set, it can be advantageous if each set differs from its predecessor by as few elements as possible. One can produce the subsets in order such that each subset differs from its predecessor by only one element. The characteristic vectors of sets in this order are called Gray codes.

These are the Gray codes in order for a set of 3 elements:

[0, 0, 0]
[1, 0, 0]
[1, 1, 0]
[0, 1, 0]
[0, 1, 1]
[1, 1, 1]
[1, 0, 1]
[0, 0, 1]

They correspond to the following subsets. Notice how each subset differs from its predecessor by only 1 element:

{}
{a}
{a, b}
{b}
{b, c}
{a, b, c}
{a, c}
{c}

In order to go from one Gray code to its successor, it is necessary to find which single value in the array to change from 1 to 0, or vice-versa.

The rules for finding the index of this element j are based on the number of 1s in the current code, k, as follows:

  • If k is even, then j = 0
  • If k is odd, then j is the index of the first cell that follows the first 1
unsigned int next_subset_gray(unsigned int *ar, size_t n)
{
    /* Find the number of 1s, and the first 1 */
	unsigned int i;
    unsigned int k = 0;
    int first_1 = -1;
    unsigned int finished = 0;
    for (i = 0; i < n; i++) {
        if (ar[i] == 1) {
            k++;
            if (first_1 == -1) {
                first_1 = i;
            }
        }
    }
    /* Flip the relevant digit */
    if (k % 2 == 0) {
        /* First digit */
        ar[0] ^= 1;
    }
    else if (first_1 < n - 1) {
        /* Digit after the first 1 */
        ar[first_1 + 1] ^= 1;
    }
    else {
        /* Last digit */
        ar[n - 1] = 0;
        finished = 1;
    }
    return finished == 0;
}

Subsets

The set of all subsets of a set is called the power set.

For example, the power set of the set {a, b, c} consists of the sets:

{}
{a}
{b}
{c}
{a, b}
{a, c}
{b, c}
{a, b, c}

Note that:

  • The empty set {} is in the power set
  • The set itself is in the power set

The set of all subsets of a particular size, or k-subsets, are combinations.

A subset can be represented as an array of boolean values of the same size as the set, called a characteristic vector. Each boolean value indicates whether the corresponding element in the set is present or absent in the subset.

This gives following correspondences for the set {a, b, c}:

[0, 0, 0] = {}
[1, 0, 0] = {a}
[0, 1, 0] = {b}
[0, 0, 1] = {c}
[1, 1, 0] = {a, b}
[1, 0, 1] = {a, c}
[0, 1, 1] = {b, c}
[1, 1, 1] = {a, b, c}

The algorithm then simply needs to produce the arrays shown above. Observing how they change, one can derive the following algorithm:

  • Find the rightmost 1 with a 0 on its right. If it exists:
    1. Move it one cell to the right
    2. Move all of the 1s on its right, which are packed on the right hand side, to the cells immediately behind it
  • Otherwise, If all of the 1s are on the right hand side and ar[0] is 0, add another 1 and move the other 1s immediately behind it
  • Otherwise, all subsets have been produced, so return the array to its starting arrangement of all 0s
unsigned int next_subset(unsigned int *ar, size_t n)
{
	int i, ones = 0;
	unsigned int found = 0;
	unsigned int result = 1;

	/* Find the rightmost 1 with a 0 on its right, or the number of 1s */
	for (i = n - 2; !found && i >= 0; i--) {
		found = ar[i] == 1 && ar[i + 1] == 0;
		ones += ar[i + 1] == 1;
	}
	if (found) {
		/* Move the 1 right */
		ar[++i] = 0;
		ar[++i] = 1;
        /* Pack the 1s to its right immediately behind it */
        i++;
        for (; i < n; i++, ones--) {
            if (ones > 0) {
                ar[i] = 1;
            }
            else {
                ar[i] = 0;
            }
        }
	}
	else  if (ar[0] == 0) {
		/* Add a new 1 and place them all at the beginning */
		for (i = 0; i < n; i++) {
			if (i < ones + 1) {
				ar[i] = 1;
			}
			else {
				ar[i] = 0;
			}
		}
	}
	else {
		/* Finished, return to the starting arrangement */
		for (i = 0; i < n; i++) {
			ar[i] = 0;
		}
		result = 0;
	}
	return result;
}