Tag Archives: Multiset

Subsets of a multiset

This is the set of all subsets of a multiset, which are themselves multisets, i.e., the power set of a multiset.

For example, the power set of the multiset [a, b, b, c] consists of the sets:

()
(c)
(b)
(b, c)
(b, b)
(b, b, c)
(a)
(a, c)
(a, b)
(a, b, c)
(a, b, b)
(a, b, b, c)

The set of all subsets of a particular size are combinations of a multiset.

The multiset and its subsets are represented as a vector containing, for each element, the count of its occurrences in the multiset. For example, the multiset [a, b, b, c] is represented as [1, 2, 1]. This is similar to the characteristic vector used for power set, but with counts rather than boolean values.

The correspondences for the subsets are then as follows:

[0, 0, 0] = ()
[0, 0, 1] = (c)
[0, 1, 0] = (b)
[0, 1, 1] = (b, c)
[0, 2, 0] = (b, b)
[0, 2, 1] = (b, b, c)
[1, 0, 0] = (a)
[1, 0, 1] = (a, c)
[1, 1, 0] = (a, b)
[1, 1, 1] = (a, b, c)
[1, 2, 0] = (a, b, b)
[1, 2, 1] = (a, b, b, c)

The algorithm can then simply count from [0, 0, 0] to [1, 2, 1], using the values in the source multiset as the upper limit for the value of an element.

Note that this algorithm does not produce the subsets in lexicographic order.

unsigned int next_multiset_subset(const unsigned int *multiset, unsigned int *ar, size_t n)
{
	unsigned int changed = 0;
	int i;

	for (i = n - 1; i >= 0 && !changed; i--) {
		if (ar[i] < multiset[i]) {
			/* Increment */
			ar[i]++;
			changed = 1;
		}
		else {
			/* Roll over */
			ar[i] = 0;
		}
	}
	if (!changed) {
		/* Reset to first combination */
		for (i = 0; i < n; i++) {
			ar[i] = 0;
		}
	}
	return changed;
}

Here is an example program:

#include <stdio.h>

#include <multiset-subset.h>

static void print_list(const unsigned int *ar, size_t len, FILE *fptr)
{
	unsigned int i;
	fputc('(', fptr);
	for (i = 0; i < len; i++) {
		fprintf(fptr, "%d", ar[i]);
		if (i < len - 1) {
			fputs(", ", fptr);
		}
	}
	fputc(')', fptr);
}

int main(void)
{
	unsigned int multiset[] = {1, 2, 1};
	const size_t n = sizeof(multiset) / sizeof(unsigned int);
	unsigned int numbers[] = {0, 0, 0};

	do {
		print_list(numbers, n, stdout);
		putchar('\n');
	} while (next_multiset_subset(multiset, numbers, n));

	return 0;
}

Here is a second example that prints the elements of the multisets:

#include <stdio.h>

#include <multiset-subset.h>

static void print_array(const unsigned int *ar, size_t len, FILE *fptr)
{
	unsigned int i;
	fputc('[', fptr);
	for (i = 0; i < len; i++) {
		fprintf(fptr, "%d", ar[i]);
		if (i < len - 1) {
			fputs(", ", fptr);
		}
	}
	fputc(']', fptr);
}


static void print_multiset_subset(const unsigned int *ar, size_t len, 
		const void **elements, printfn print, FILE *fptr)
{
	unsigned int i, started = 0;
	fputc('(', fptr);
	for (i = 0; i < len; i++) {
		unsigned int j;
		for (j = 0; j < ar[i]; j++) {
			if (started) {
				fputs(", ", fptr);
			}
			print(elements[i], fptr);
			started = 1;
		}
	}
	fputc(')', fptr); 
}


int main(void)
{
	unsigned int multiset[] = {1, 2, 1};
	const size_t n = sizeof(multiset) / sizeof(unsigned int);
	char *elements[] = {"a", "b", "c"};
	unsigned int numbers[] = {0, 0, 0};

	do {
        print_array(numbers, n, stdout);
        printf(" = "); 
		print_multiset_subset(numbers, n, (void*)elements, (printfn)fputs, stdout);
		putchar('\n');
	} while (next_multiset_subset(multiset, numbers, n));

	return 0;
}

Combinations of a multiset

These are the combinations of k elements chosen from a set that can contain duplicates (a multiset).

For example, given the multiset {0, 1, 1, 2, 2, 2, 3}, the 4-combinations are:

[0, 1, 1, 2]
[0, 1, 1, 3]
[0, 1, 2, 2]
[0, 1, 2, 3]
[0, 2, 2, 2]
[0, 2, 2, 3]
[1, 1, 2, 2]
[1, 1, 2, 3]
[1, 2, 2, 2]
[1, 2, 2, 3]
[2, 2, 2, 3]

This algorithm produces the combinations in lexicographic order, with the elements in each combination in increasing order.

Begin with the first combination, which is the first k elements of the multiset ([0, 1, 1, 2] in the example above), and then at each step:

  1. Find the rightmost element that is less than the maximum value it can have (which is the element in the multiset that is the same distance from the right).
  2. Replace it with the first multiset element greater than it.
  3. Replace the remainder of the combination with the elements that follow the replacement in the multiset.
unsigned int next_multiset_combination(const unsigned int *multiset, unsigned int *ar, size_t n, unsigned int k)
{
	unsigned int finished = 0;
	unsigned int changed = 0;
	unsigned int i;

	for (i = k - 1; !finished && !changed; i--) {
		if (ar[i] < multiset[i + (n - k)]) {
			/* Find the successor */
			unsigned int j;
			for (j = 0; multiset[j] <= ar[i]; j++);
			/* Replace this element with it */
			ar[i] = multiset[j];
			if (i < k - 1) {
				/* Make the elements after it the same as this part of the multiset */
				unsigned int l;
				for (l = i + 1, j = j + 1; l < k; l++, j++) {
					ar[l] = multiset[j];
				}
			}
			changed = 1;
		}
		finished = i == 0;
	}
	if (!changed) {
		/* Reset to first combination */
		for (i = 0; i < k; i++) {
			ar[i] = multiset[i];
		}
	}
	return changed;
}

Here is an example program:

#include <stdio.h>

#include <multiset-combination.h>

static void print_array(const unsigned int *ar, size_t len, FILE *fptr)
{
	unsigned int i;
	fputc('[', fptr);
	for (i = 0; i < len; i++) {
		fprintf(fptr, "%d", ar[i]);
		if (i < len - 1) {
			fputs(", ", fptr);
		}
	}
	fputc(']', fptr);
}

int main(void)
{
	unsigned int multiset[] = {0, 1, 1, 2, 2, 2, 3};
	unsigned int n = sizeof(multiset) / sizeof(unsigned int);
	unsigned int numbers[] = {0, 1, 1, 2};
	const unsigned int k = sizeof(numbers) / sizeof(unsigned int);

	do {
		print_array(numbers, k, stdout);
		putchar('\n');
	} while (next_multiset_combination(multiset, numbers, n, k));

	return 0;
}

Here is a second example that prints the elements of the multisets:

#include <stdio.h>

#include <multiset-combination.h>

static void print_set(const unsigned int *ar, size_t len, const void **elements, 
		const char *brackets, printfn print, FILE *fptr)
{
	unsigned int i;
	fputc(brackets[0], fptr);
	for (i = 0; i < len; i++) {
		print(elements[ar[i]], fptr);
		if (i < len - 1) {
			fputs(", ", fptr);
		}
	}
	fputc(brackets[1], fptr); 
}

int main(void)
{
	unsigned int multiset[] = {0, 1, 1, 2, 2, 2, 3};
	char *elements[] = {"a", "b", "c", "d"};
	const size_t n = sizeof(multiset) / sizeof(unsigned int);
	unsigned int numbers[] = {0, 1, 1, 2};
	const size_t k = sizeof(numbers) / sizeof(unsigned int);

	do {
		print_set(numbers, k, (void*)elements, "[]", (printfn)fputs, stdout);
		putchar('\n');
	} while (next_multiset_combination(multiset, numbers, n, k));

	return 0;
}