Tag Archives: Subsets

Subsets of a multiset

This is the set of all subsets of a multiset, which are themselves multisets, i.e., the power set of a multiset.

For example, the power set of the multiset [a, b, b, c] consists of the sets:

()
(c)
(b)
(b, c)
(b, b)
(b, b, c)
(a)
(a, c)
(a, b)
(a, b, c)
(a, b, b)
(a, b, b, c)

The set of all subsets of a particular size are combinations of a multiset.

The multiset and its subsets are represented as a vector containing, for each element, the count of its occurrences in the multiset. For example, the multiset [a, b, b, c] is represented as [1, 2, 1]. This is similar to the characteristic vector used for power set, but with counts rather than boolean values.

The correspondences for the subsets are then as follows:

[0, 0, 0] = ()
[0, 0, 1] = (c)
[0, 1, 0] = (b)
[0, 1, 1] = (b, c)
[0, 2, 0] = (b, b)
[0, 2, 1] = (b, b, c)
[1, 0, 0] = (a)
[1, 0, 1] = (a, c)
[1, 1, 0] = (a, b)
[1, 1, 1] = (a, b, c)
[1, 2, 0] = (a, b, b)
[1, 2, 1] = (a, b, b, c)

The algorithm can then simply count from [0, 0, 0] to [1, 2, 1], using the values in the source multiset as the upper limit for the value of an element.

Note that this algorithm does not produce the subsets in lexicographic order.

unsigned int next_multiset_subset(const unsigned int *multiset, unsigned int *ar, size_t n)
{
	unsigned int changed = 0;
	int i;

	for (i = n - 1; i >= 0 && !changed; i--) {
		if (ar[i] < multiset[i]) {
			/* Increment */
			ar[i]++;
			changed = 1;
		}
		else {
			/* Roll over */
			ar[i] = 0;
		}
	}
	if (!changed) {
		/* Reset to first combination */
		for (i = 0; i < n; i++) {
			ar[i] = 0;
		}
	}
	return changed;
}

Here is an example program:

#include <stdio.h>

#include <multiset-subset.h>

static void print_list(const unsigned int *ar, size_t len, FILE *fptr)
{
	unsigned int i;
	fputc('(', fptr);
	for (i = 0; i < len; i++) {
		fprintf(fptr, "%d", ar[i]);
		if (i < len - 1) {
			fputs(", ", fptr);
		}
	}
	fputc(')', fptr);
}

int main(void)
{
	unsigned int multiset[] = {1, 2, 1};
	const size_t n = sizeof(multiset) / sizeof(unsigned int);
	unsigned int numbers[] = {0, 0, 0};

	do {
		print_list(numbers, n, stdout);
		putchar('\n');
	} while (next_multiset_subset(multiset, numbers, n));

	return 0;
}

Here is a second example that prints the elements of the multisets:

#include <stdio.h>

#include <multiset-subset.h>

static void print_array(const unsigned int *ar, size_t len, FILE *fptr)
{
	unsigned int i;
	fputc('[', fptr);
	for (i = 0; i < len; i++) {
		fprintf(fptr, "%d", ar[i]);
		if (i < len - 1) {
			fputs(", ", fptr);
		}
	}
	fputc(']', fptr);
}


static void print_multiset_subset(const unsigned int *ar, size_t len, 
		const void **elements, printfn print, FILE *fptr)
{
	unsigned int i, started = 0;
	fputc('(', fptr);
	for (i = 0; i < len; i++) {
		unsigned int j;
		for (j = 0; j < ar[i]; j++) {
			if (started) {
				fputs(", ", fptr);
			}
			print(elements[i], fptr);
			started = 1;
		}
	}
	fputc(')', fptr); 
}


int main(void)
{
	unsigned int multiset[] = {1, 2, 1};
	const size_t n = sizeof(multiset) / sizeof(unsigned int);
	char *elements[] = {"a", "b", "c"};
	unsigned int numbers[] = {0, 0, 0};

	do {
        print_array(numbers, n, stdout);
        printf(" = "); 
		print_multiset_subset(numbers, n, (void*)elements, (printfn)fputs, stdout);
		putchar('\n');
	} while (next_multiset_subset(multiset, numbers, n));

	return 0;
}

Subsets using Gray codes

When calculations are being performed on each element of the power set, it can be advantageous if each set differs from its predecessor by as few elements as possible. One can produce the subsets in order such that each subset differs from its predecessor by only one element. The characteristic vectors of sets in this order are called Gray codes.

These are the Gray codes in order for a set of 3 elements:

[0, 0, 0]
[1, 0, 0]
[1, 1, 0]
[0, 1, 0]
[0, 1, 1]
[1, 1, 1]
[1, 0, 1]
[0, 0, 1]

They correspond to the following subsets. Notice how each subset differs from its predecessor by only 1 element:

{}
{a}
{a, b}
{b}
{b, c}
{a, b, c}
{a, c}
{c}

In order to go from one Gray code to its successor, it is necessary to find which single value in the array to change from 1 to 0, or vice-versa.

The rules for finding the index of this element j are based on the number of 1s in the current code, k, as follows:

  • If k is even, then j = 0
  • If k is odd, then j is the index of the first cell that follows the first 1
unsigned int next_subset_gray(unsigned int *ar, size_t n)
{
    /* Find the number of 1s, and the first 1 */
	unsigned int i;
    unsigned int k = 0;
    int first_1 = -1;
    unsigned int finished = 0;
    for (i = 0; i < n; i++) {
        if (ar[i] == 1) {
            k++;
            if (first_1 == -1) {
                first_1 = i;
            }
        }
    }
    /* Flip the relevant digit */
    if (k % 2 == 0) {
        /* First digit */
        ar[0] ^= 1;
    }
    else if (first_1 < n - 1) {
        /* Digit after the first 1 */
        ar[first_1 + 1] ^= 1;
    }
    else {
        /* Last digit */
        ar[n - 1] = 0;
        finished = 1;
    }
    return finished == 0;
}

Subsets

The set of all subsets of a set is called the power set.

For example, the power set of the set {a, b, c} consists of the sets:

{}
{a}
{b}
{c}
{a, b}
{a, c}
{b, c}
{a, b, c}

Note that:

  • The empty set {} is in the power set
  • The set itself is in the power set

The set of all subsets of a particular size, or k-subsets, are combinations.

A subset can be represented as an array of boolean values of the same size as the set, called a characteristic vector. Each boolean value indicates whether the corresponding element in the set is present or absent in the subset.

This gives following correspondences for the set {a, b, c}:

[0, 0, 0] = {}
[1, 0, 0] = {a}
[0, 1, 0] = {b}
[0, 0, 1] = {c}
[1, 1, 0] = {a, b}
[1, 0, 1] = {a, c}
[0, 1, 1] = {b, c}
[1, 1, 1] = {a, b, c}

The algorithm then simply needs to produce the arrays shown above. Observing how they change, one can derive the following algorithm:

  • Find the rightmost 1 with a 0 on its right. If it exists:
    1. Move it one cell to the right
    2. Move all of the 1s on its right, which are packed on the right hand side, to the cells immediately behind it
  • Otherwise, If all of the 1s are on the right hand side and ar[0] is 0, add another 1 and move the other 1s immediately behind it
  • Otherwise, all subsets have been produced, so return the array to its starting arrangement of all 0s
unsigned int next_subset(unsigned int *ar, size_t n)
{
	int i, ones = 0;
	unsigned int found = 0;
	unsigned int result = 1;

	/* Find the rightmost 1 with a 0 on its right, or the number of 1s */
	for (i = n - 2; !found && i >= 0; i--) {
		found = ar[i] == 1 && ar[i + 1] == 0;
		ones += ar[i + 1] == 1;
	}
	if (found) {
		/* Move the 1 right */
		ar[++i] = 0;
		ar[++i] = 1;
        /* Pack the 1s to its right immediately behind it */
        i++;
        for (; i < n; i++, ones--) {
            if (ones > 0) {
                ar[i] = 1;
            }
            else {
                ar[i] = 0;
            }
        }
	}
	else  if (ar[0] == 0) {
		/* Add a new 1 and place them all at the beginning */
		for (i = 0; i < n; i++) {
			if (i < ones + 1) {
				ar[i] = 1;
			}
			else {
				ar[i] = 0;
			}
		}
	}
	else {
		/* Finished, return to the starting arrangement */
		for (i = 0; i < n; i++) {
			ar[i] = 0;
		}
		result = 0;
	}
	return result;
}