# Shunting Yard Algorithm in Python

The Shunting Yard Algorithm is a classic algorithm for parsing mathematical expressions invented by Edsger Dijkstra. Its name comes from the use of a stack to rearrange the operators and operands into the correct order for evaluation, which is rather reminiscent of a railway siding.

Here is a very simple implementation in Python:

def is_number(str):
try:
int(str)
return True
except ValueError:
return False

def is_name(str):
return re.match("\w+", str)

def peek(stack):
return stack[-1] if stack else None

def apply_operator(operators, values):
operator = operators.pop()
right = values.pop()
left = values.pop()
values.append(eval("{0}{1}{2}".format(left, operator, right)))

def greater_precedence(op1, op2):
precedences = {'+' : 0, '-' : 0, '*' : 1, '/' : 1}
return precedences[op1] > precedences[op2]

def evaluate(expression):
tokens = re.findall("[+/*()-]|\d+", expression)
values = []
operators = []
for token in tokens:
if is_number(token):
values.append(int(token))
elif token == '(':
operators.append(token)
elif token == ')':
top = peek(operators)
while top is not None and top != '(':
apply_operator(operators, values)
top = peek(operators)
else:
# Operator
top = peek(operators)
while top is not None and top not in "()" and greater_precedence(top, token):
apply_operator(operators, values)
top = peek(operators)
operators.append(token)
while peek(operators) is not None:
apply_operator(operators, values)

return values


Example program:

def main():
expression = '((20 - 10 ) * (30 - 20) / 10 + 10 ) * 2'
print("Shunting Yard Algorithm: {0}".format(evaluate(expression)))
print("Python: {0}".format(eval(expression)))

if __name__ == '__main__':
main()


Output:

Shunting Yard Algorithm: 40
Python: 40


# Integer partitions in Python

These functions produce integer partitions in reverse lexicographic and lexicographic order respectively.

def revlex_partitions(n):
"""
Generate all integer partitions of n
in reverse lexicographic order
"""
if n == 0:
yield []
return
for p in revlex_partitions(n - 1):
if len(p) == 1 or (len(p) > 1 and p[-1] < p[-2]):
p[-1] += 1
yield p
p[-1] -= 1
p.append(1)
yield p
p.pop()

def lex_partitions(n):
"""
Generate all integer partitions of n
in lexicographic order
"""
if n == 0:
yield []
return
for p in lex_partitions(n - 1):
p.append(1)
yield p
p.pop()
if len(p) == 1 or (len(p) > 1 and p[-1] < p[-2]):
p[-1] += 1
yield p
p[-1] -= 1


Example program:

def main():
for p in revlex_partitions(5):
print(p)
print()
for p in lex_partitions(5):
print(p)

if __name__ == '__main__':
main()


Output:


[4, 1]
[3, 2]
[3, 1, 1]
[2, 2, 1]
[2, 1, 1, 1]
[1, 1, 1, 1, 1]

[1, 1, 1, 1, 1]
[2, 1, 1, 1]
[2, 2, 1]
[3, 1, 1]
[3, 2]
[4, 1]



Reference: David Eppstein, IntegerPartitions.py

# Partition function using dynamic programming in C

A partition of an integer is a way of writing it as a sum of positive integers. For example, partitions of 5 are:

5
4 + 1
3 + 2
3 + 1 + 1
2 + 2 + 1
2 + 1 + 1 + 1
1 + 1 + 1 + 1 + 1

The order of numbers in a partition doesn’t matter. The number of partitions of a number $$n$$ is given by the partition function $$p(n)$$.

The first few values of $$p(n)$$ are 1, 1, 2, 3, 5, 7, 11, 15, 22, 30, 42, 56, 77, 101, 135, 176, 231, 297, 385, 490, 627, 792, 1002, 1255, 1575, 1958, 2436, 3010, 3718, 4565, 5604, … (A000041).

The partition function can be computed using a dynamic programming table-filling algorithm. Here it is in C:

#include <stdlib.h>

unsigned int partition(unsigned int n)
{
unsigned int i, j;
unsigned int result;
unsigned int **table;

if (n == 0) {
return 1;
}

table = malloc((n + 1) * sizeof(unsigned int *));
for (i = 0; i <= n; i++) {
table[i] = malloc((n + 1) * sizeof(unsigned int));
}

for (i = 0;i <= n; i++) {
table[i]=0;
}
for (i = 1; i <= n; i++) {
table[i] = 1;
}

for (i = 1; i <= n; i++) {
for (j = 1; j <= n; j++) {
if (j > i) {
table[i][j] = table[i][j - 1];
}
else {
table[i][j] = table[i][j - 1] + table[i - j][j];
}
}
}

result = table[n][n];
for (i = 0; i <= n; i++) {
free(table[i]);
}
free(table);

return result;
}


Example program:

#include <stdio.h>

int main(void)
{
unsigned int i;
for (i = 0; i < 31; i++) {
printf("%d\n",partition(i));
}

return 0;
}


Output:

1
1
2
3
5
7
11
15
22
30
42
56
77
101
135
176
231
297
385
490
627
792
1002
1255
1575
1958
2436
3010
3718
4565
5604


If you add sorted data to a binary tree using the conventional binary tree insertion algorithm, the tree will end up as a single rightward-leaning vine, and will perform no better than a sorted linked list.

In order to load sorted data and keep the tree perfectly balanced, it is necessary to add the data in the order in which a binary search would visit it. In other words, the middle element first, then the middle of the left sub-array, the middle of the right sub-array and so on.

Here is an implementation of that algorithm in C:

static void binarytree_load_recursive(btnode ** node, void ** list, size_t size)
{
if (size > 0) {
unsigned int left, right, middle = size / 2;
*node = btnode_create(list[middle]);
left = middle;
right = size - middle - 1;
}
}

void binarytree_load(binarytree * tree, void ** list, size_t size)
{
tree->count = size;
}


As an example, I load a binary tree from a sorted array and then check its height with the algorithm I described in Finding the Height of a Binary Tree Recursively. The height is 2, which is the optimum height for a tree with 7 nodes.

int main(void)
{
char *elements[] = {"A", "B", "C", "D","E","F", "G"};
const unsigned int n = sizeof(elements) / sizeof(const char*);
binarytree *tree;

tree = binarytree_create((cmpfn)strcmp);
printf("Height is %u\n", binarytree_height(tree));
binarytree_for_each(tree, (forfn)puts);
binarytree_delete(tree);

return 0;
}


Output:

Height is 2
A
B
C
D
E
F
G


Here is the rest of the binary tree code:

struct btnode {
struct btnode * left;
struct btnode * right;
void * data;
};
typedef struct btnode btnode;

typedef int(*cmpfn)(const void *, const void *);

struct binarytree {
btnode * root;
cmpfn compare;
unsigned int count;
};
typedef struct binarytree binarytree;

static btnode * btnode_create(void * data)
{
btnode * node = malloc(sizeof(btnode));
if (node) {
node->left = NULL;
node->right = NULL;
node->data = data;
}
return node;
}

binarytree * binarytree_create(cmpfn compare)
{
binarytree * tree = malloc(sizeof(binarytree));
if (tree != NULL) {
tree->root = NULL;
tree->compare = compare;
tree->count = 0;
}
return tree;
}

static void binarytree_clear_recursive(btnode * root)
{
if (root != NULL) {
btnode * left = root->left;
btnode * right = root->right;
free(root);
binarytree_clear_recursive(left);
binarytree_clear_recursive(right);
}
}

void binarytree_clear(binarytree * tree)
{
binarytree_clear_recursive(tree->root);
tree->root = NULL;
tree->count = 0;
}

void binarytree_delete(binarytree * tree)
{
if (tree) {
binarytree_clear(tree);
free(tree);
}
}

typedef void(*forfn)(const void *);

static void binarytree_for_each_recursive(const btnode * root, forfn fun)
{
if (root) {
binarytree_for_each_recursive(root->left, fun);
fun(root->data);
binarytree_for_each_recursive(root->right, fun);
}
}

void binarytree_for_each(const binarytree * tree, forfn fun)
{
binarytree_for_each_recursive(tree->root, fun);
}

static void binarytree_load_recursive(btnode ** node, void ** list, size_t size)
{
if (size > 0) {
unsigned int left, right, middle = size / 2;
*node = btnode_create(list[middle]);
left = middle;
right = size - middle - 1;
}
}

void binarytree_load(binarytree * tree, void ** list, size_t size)
{
tree->count = size;
}

static int max(int a, int b)
{
if (a >= b) {
return a;
}
return b;
}

unsigned int binarytree_height_recursive(const btnode *node)
{
unsigned int height = 0;
if (node->left || node->right) {
height = max(node->left ? binarytree_height_recursive(node->left) : 0,
node->right ? binarytree_height_recursive(node->right) : 0) + 1;
}
return height;
}

unsigned int binarytree_height(const binarytree *tree)
{
unsigned int height = 0;
if (tree->root) {
height = binarytree_height_recursive(tree->root);
}
return height;
}


# Shuffle an array in C

This is an implementation in C of the Fisher-Yates algorithm to randomly shuffle an array.

#include <stdlib.h>
#include <string.h>
#include <time.h>

/* Get a random number >= first and <= second */
unsigned int rand_between(unsigned int first, unsigned int second)
{
unsigned int range = second - first;
return rand() % (range + 1) + first;
}

void shuffle(void* array, size_t num, size_t size)
{
unsigned int i;
void *temp = malloc(size);
if (!temp) {
return;
}
for (i = 0; i < num - 1; i++) {
unsigned int j = rand_between(i, num - 1);
if (i != j) {
memcpy(temp, array + i * size, size);
memcpy(array + i * size, array + j * size, size);
memcpy(array + j * size, temp, size);
}
}
free(temp);
}


Example program:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

int main(void)
{
char *elements[] = {"a", "b", "c", "d", "e"};
const size_t size = sizeof(elements);
const size_t n = sizeof(elements) / size;
char **shuffled;
unsigned int i;

shuffled = malloc(size * n);
memcpy(shuffled, elements, size * n);
srand(time(NULL));
shuffle(shuffled, n, size);
for (i = 0; i < n; i++) {
printf("%s\n", shuffled[i]);
}
free(shuffled);
return 0;
}


# Greedy Set Cover in Python

In the Set Cover problem, we are given a universal set $$U$$ and a family of subsets $$S_1, \ldots, S_k \subseteq U$$. A set cover is a collection of subsets $$C$$ from $$S_1, \ldots, S_k$$ whose union is the universal set $$U$$. We would like to minimise $$\left\vert{C}\right\vert$$.

The problem of finding the optimum $$C$$ is NP-Complete, but a greedy algorithm can give an $$O(log_e n)$$ approximation to optimal solution.

The greedy algorithm selects the set $$S_i$$ containing the largest number of uncovered points at each step, until all of the points have been covered.

Below is an implementation in Python:

def set_cover(universe, subsets):
"""Find a family of subsets that covers the universal set"""
elements = set(e for s in subsets for e in s)
# Check the subsets cover the universe
if elements != universe:
return None
covered = set()
cover = []
# Greedily add the subsets with the most uncovered points
while covered != elements:
subset = max(subsets, key=lambda s: len(s - covered))
cover.append(subset)
covered |= subset

return cover

def main():
universe = set(range(1, 11))
subsets = [set([1, 2, 3, 8, 9, 10]),
set([1, 2, 3, 4, 5]),
set([4, 5, 7]),
set([5, 6, 7]),
set([6, 7, 8, 9, 10])]
cover = set_cover(universe, subsets)
print(cover)

if __name__ == '__main__':
main()


Output:

[set([1, 2, 3, 8, 9, 10]), set([4, 5, 7]), set([5, 6, 7])]


# Greedy HornSAT in Python

Consider this logic puzzle:

If Mr. Smith has a dog, then Mrs. Brown has a cat.
If Mr. Jones has a dog, then he has a cat too.
If Mr. Smith has a dog and Mr. Jones has a cat, then Mrs. Peacock has a dog.
If Mrs. Brown and Mr. Jones share a pet of the same species, then Mr. Smith has a cat
All the men have dogs.

What can we say about who has what kind of pet?

We can encode the problem as Boolean variables. We write s, j, b, p for Boolean variables that are true if Smith, Jones, Brown, or Peacock respectively have a dog, and S, J, B, P for Boolean variables that are true if they have a cat. The puzzle can then be exressed as the following logical formula:

$$\begin{matrix} (s & \Rightarrow & B) & \land \\ (j & \Rightarrow & J) & \land \\ ((s \land J) & \Rightarrow & p) & \land \\ ((b \land j) & \Rightarrow & S) & \land \\ ((B \land J) & \Rightarrow & S) & \land \\ & & (s) & \land \\ & & (j). & \end{matrix}$$

A formula that is a conjunction of implications in which no variable is complemented is called a Horn formula. To solve the puzzle, we need to find a satisfying truth assignment for the variables in the formula. While the problem of Boolean Satisfiability (SAT) is in general intractable, Boolean satisfiability for Horn clauses – HornSAT – can be solved in linear time using a greedy algorithm.

The algorithm works as follows:

1. Make a set of truth assignments for the variables, starting with them all set to False
2. Make a set W of all of the variables that appear on the right hand side of an empty implication
3. While W is not empty:
1. Remove an arbitrary element v from W
2. Set v to True
3. For each clause c where v appears on the left hand side:
1. Delete v from the left hand side of c
2. If this makes c and empty implication, add the variable on the right hand side of c to W, unless it is already there
4. Return the set of truth assignments

Here is the implementation in Python:

def greedy_hornsat(variables, clauses):
"""Take a list of variables and horn clauses and return a truth assignment
to the variables that satisfies all of them"""
assignment = dict((v, False) for v in variables)
working = set(t for t in clauses if not len(t))
while working:
v = working.pop()
assignment[v] = True
vclauses = [ c for c in clauses if v in c]
for vclause in vclauses:
vclause.remove(v)
if not vclause:

return assignment


An example program using the puzzle above:

def main():
variables = ['s', 'j', 'b', 'p', 'S', 'J', 'B', 'P']
clauses = [(['s'], 'B'),
(['j'], 'J'),
(['s', 'J'], 'p'),
(['b', 'j'], 'S'),
(['B', 'J'], 'S'),
([], 's'),
([], 'j')]
assignment = greedy_hornsat(variables, clauses)
print(assignment)

if __name__ == '__main__':
main()


The output:

{'b': False, 'P': False, 'J': True, 'j': True, 'S': True, 'p': True, 's': True, 'B': True}


So we know that Mr. Jones and Mr. Smith both have a dog and a cat, Mrs. Brown has a cat, and Mrs. Peacock has a dog. Mrs. Brown may also have a dog, and Mrs. Peacock may also have a cat, but we don’t know those.

Reference: Notes for CS 170

# Stable Marriages in C

The stable marriages problem is to take a set of men and an equally sized set of women, and their marriage preferences, and to find the set of marriages that ensures that no two individuals not married to each other both prefer the other to their current match. The problem can be solved by a greedy algorithm as follows:

1. Take the first unengaged man
2. He proposes to his most-preferred woman to whom he has not proposed before, regardless of whether she is already engaged
3. If she is unengaged or prefers him to her current fiance, they become engaged, and the current fiance, if any is dropped
4. Continue until there are no unengaged men (and consequently no unengaged women)

Here it is in C:

/* Check if woman prefers m to p */
unsigned int woman_prefers(unsigned int w, unsigned int m, unsigned int p,
int **women_preferences, size_t n)
{
int *preferences = women_preferences[w];
unsigned int i;
/* Return on the first one found with p winning a tie */
for (i = 0; i < n; i++) {
if (preferences[i] == p) {
return 1;
}
if (preferences[i] == m) {
return 0;
}
}
return 1;
}

void stable_marriages(int **men_preferences, int **women_preferences, int *fiances, size_t n)
{
unsigned int *next_proposals; /* Index in preference list for each man */
unsigned int *free_men;
unsigned int fm = n; /* Number of free men */
unsigned int i;

next_proposals = calloc(n, sizeof(unsigned int));
free_men = malloc(n * sizeof(unsigned int));
if (next_proposals == NULL || free_men == NULL) {
free(next_proposals);
free(free_men);
return;
}
memset(fiances, -1, n * sizeof(int));
for (i = 0; i < n; i++) {
free_men[i] = i;
}

while (fm > 0) {
unsigned int m = free_men;
unsigned int w = next_proposals[m];
/* Find out if w has a partner */
unsigned int p = fiances[w];
if (p == -1) {
/* m and w get engaged */
fiances[w] = m;
free_men = free_men[fm - 1];
fm--;
}
else {
/* Check if w prefers m to p */
if (woman_prefers(w, m, p, women_preferences, n)) {
/* Break off engagement with p */
free_men = p;
/* m and w get engaged */
fiances[w] = m;
}
}
/* Don't propose to w again */
next_proposals[m]++;
}
free(next_proposals);
free(free_men);
}


Example program:

#include <stdio.h>

#include <stable_marriage.h>

int main(void)
{
/* Preferences are ids in preference order */
int m0[] = {0, 1, 2};
int m1[] = {2, 1, 0};
int m2[] = {1, 0, 2};
int w0[] = {0, 2, 1};
int w1[] = {2, 0, 1};
int w2[] = {1, 2, 0};
int *men_preferences[] = {m0, m1, m2};
int *women_preferences[] = {w0, w1, w2};
int marriages;
const size_t n = 3;
unsigned int m;

stable_marriages(men_preferences, women_preferences, marriages, n);
for (m = 0; m < n; m++) {
printf("m%d -- w%d\n", m, marriages[m]);
}
return 0;
}


Output:

m0 -- w1
m1 -- w0
m2 -- w2


# Scheduling to minimise lateness in C

In this problem, we have a single resource that processes jobs one at a time. Each job has a time requirement to complete, and a due time. The goal is to schedule the jobs so as to minimise the maximum lateness, i.e. to minimise the lateness of the latest job. This can be solved using a simple greedy algorithm:

1. Sort the jobs by due time
2. Schedule them in that order

Here is the algorithm implemented in C. It returns the lateness of the latest job, and populates an array with the order of the jobs.

#include <stdlib.h>
#include <string.h>

typedef struct  {
unsigned int time;
unsigned int due;
unsigned int index;
} job;

/* Compare jobs by due time */
static int compare_jobs(const job *job1, const job *job2)
{
if (job1->due < job2->due) {
return -1;
}
if (job1->due == job2->due) {
return 0;
}
return 1;
}

unsigned int minimise_lateness(const unsigned int *times, unsigned int *dues,
unsigned int *schedule, size_t len)
{
unsigned int i;
job *jobs;
unsigned int max_lateness = 0;
unsigned int t = 0;

jobs = malloc(len * sizeof(job));
if (jobs == NULL) {
return 0;
}
for (i = 0; i < len; i++) {
jobs[i].time = times[i];
jobs[i].due = dues[i];
jobs[i].index = i;
}
/* Sort the jobs by due time */
qsort(jobs, len, sizeof(job), (int(*)(const void *, const void *))compare_jobs);
/* Schedule them in that order and find max lateness */
for (i = 0; i < len; i++) {
schedule[i] = jobs[i].index;
t += jobs[i].time;
if (t > jobs[i].due && t - jobs[i].due > max_lateness) {
max_lateness = t - jobs[i].due;
}
}
free(jobs);
return max_lateness;
}


Example program:

#include <stdio.h>
#include <stdlib.h>

static void print_schedule(const unsigned int *schedule, const unsigned int *times,
const unsigned int *dues, size_t len)
{
unsigned int i;
unsigned int t = 0;
for (i = 0; i < len; i++) {
unsigned int finish = t + times[schedule[i]];
printf("Job %u starting at %u and finishing at %u with lateness %u\n", i,
t, finish, finish > dues[schedule[i]] ?
finish - dues[schedule[i]] : 0);
t = finish;
}
}

int main(void)
{
unsigned int times[] = {3, 2, 1, 4, 3, 2};
unsigned int dues[] = {6, 8, 9, 9, 14, 15};
const size_t len = sizeof(times) / sizeof(times);
unsigned int *schedule = malloc(len * sizeof(unsigned int));
unsigned int max_lateness = minimise_lateness(times, dues, schedule, len);
printf("Max lateness is %u\n", max_lateness);
print_schedule(schedule, times, dues, len);
free(schedule);
return 0;
}


Output:

Max lateness is 1
Job 0 starting at 0 and finishing at 3 with lateness 0
Job 1 starting at 3 and finishing at 5 with lateness 0
Job 2 starting at 5 and finishing at 6 with lateness 0
Job 3 starting at 6 and finishing at 10 with lateness 1
Job 4 starting at 10 and finishing at 13 with lateness 0
Job 5 starting at 13 and finishing at 15 with lateness 0


# Interval Scheduling in C

In the interval scheduling problem, we have a set of jobs J, each of which has a start time an end time. The goal is to find the largest subset of jobs that can be scheduled such that they don’t overlap. This can be solved using a greedy algorithm:

1. Sort the set of jobs J by finishing time
2. Begin with an empty set S of scheduled jobs
3. For each job in J, add it to S if it is compatible with all of the other jobs in S

A job is compatible with another if they don’t overlap in time. Since we have already sorted by finish time, a job will be compatible with all of the jobs in S as long as its start time is greater than the finish time of the last job added to S.

typedef struct {
unsigned int start;
unsigned int finish;
unsigned int index;
} job;

/* Compare jobs by finishing time */
int compare_jobs(const job *job1, const job *job2)
{
if (job1->finish < job2->finish) {
return -1;
}
if (job1->finish == job2->finish) {
return 0;
}
return 1;
}

unsigned int interval_scheduling(const unsigned int *starts, const unsigned int *finishes,
unsigned int *schedule, size_t len)
{
unsigned int i;
job *jobs;
job *last;
unsigned int count = 0;

jobs = malloc(len * sizeof(job));
if (jobs == NULL) {
return 0;
}
for (i = 0; i < len; i++) {
jobs[i].start = starts[i];
jobs[i].finish = finishes[i];
jobs[i].index = i;
}
/* Sort the jobs by finishing time */
qsort(jobs, len, sizeof(job), (int(*)(const void *, const void *))compare_jobs);
memset(schedule, 0, len * sizeof(unsigned int));
for (i = 0; i < len; i++) {
/* A job is compatible if its start time >= the last added job's finish time */
if (i == 0 || jobs[i].start >= last->finish) {
schedule[jobs[i].index] = 1;
last = &(jobs[i]);
count++;
}
}
free(jobs);
return count;
}



An example program:

void print_schedule(const unsigned int *schedule, const unsigned int *starts,
const unsigned int *finishes, size_t len)
{
unsigned int i;
for (i = 0; i < len; i++) {
if (schedule[i]) {
printf("Job %u starting at %u and finishing at %u\n", i,
starts[i], finishes[i]);
}
}
}

int main(void)
{
unsigned int starts[] = {0, 1, 3, 4, 5, 6, 8};
unsigned int finishes[] = {6, 4, 5, 8, 7, 9, 10, 11};
const size_t len = sizeof(starts) / sizeof(starts);
unsigned int *schedule = malloc(len * sizeof(unsigned int));
unsigned int jobs = interval_scheduling(starts, finishes, schedule, len);
printf("There are %d jobs:\n", jobs);
print_schedule(schedule, starts, finishes, len);
free(schedule);
return 0;
}


Output:

There are 3 jobs:
Job 1 starting at 1 and finishing at 4
Job 4 starting at 5 and finishing at 7
Job 6 starting at 8 and finishing at 10