Tag Archives: Data Structures

Hash table in C

This is a hash table in C using singly-linked lists for overflow chains.

Here is an example program:

#include <stdio.h>
#include <string.h>

#include <hashtable.h>

int main(void)
{
    hashtable * table;
    const char * result;
    unsigned int e;
    char * elements[] = {"A", "B", "C", "D", "E", "F"};
    const unsigned int n = sizeof(elements) / sizeof(const char*);

    table = hashtable_create(7, (hashtable_cmpfn)strcmp);
    for (e = 0; e < n; e++) {
        hashtable_add(table, elements[e]);
    }
    hashtable_for_each(table, (hashtable_forfn)puts);
    for (e = 0; e < n; e++) {
        result = hashtable_find(table, elements[e]);
        if (result) {
            printf("Found: %s\n", result);
        }
        else {
            printf("Couldn't find %s\n", elements[e]);
        }
    }
    printf("The load factor is %.2f\n", hashtable_get_load_factor(table));
    for (e = 0; e < n; e++) {
        result = hashtable_remove(table, elements[e]);
        if (result) {
            printf("Removed: %s\n", result);
        }
        else {
            printf("Couldn't remove %s\n", elements[e]);
        }
    }
    hashtable_delete(table);

    return 0;
}

The header file:

#ifndef HASHTABLE_H
#define HASHTABLE_H

struct hashnode {
	void * data;
	struct hashnode * next;
};
typedef struct hashnode hashnode;

typedef int (*hashtable_cmpfn)(const void*, const void*);
typedef unsigned long (*hashtable_hashfn)(const void*);
typedef void (*hashtable_forfn)(void *);
typedef void (*hashtable_forfn2)(void *, void *);

struct hashtable {
	unsigned int size;
	hashtable_hashfn hash;
	hashtable_cmpfn compare;
	hashnode **buckets;
	size_t count;
};
typedef struct hashtable hashtable;

hashtable *hashtable_create(size_t size, hashtable_cmpfn compare);
void hashtable_empty(hashtable * table);
void hashtable_delete(hashtable * table);
void *hashtable_add(hashtable * table, void * data);
void *hashtable_find(const hashtable * table, const void * data);
void *hashtable_remove(hashtable * table, const void * data);
float hashtable_get_load_factor(const hashtable * table);
unsigned int hashtable_get_count(const hashtable * table);
unsigned int hashtable_find_count(const hashtable *table);
void hashtable_for_each(const hashtable * table, hashtable_forfn fun);
void hashtable_for_each2(const hashtable * table, hashtable_forfn2 fun, void *data);
void hashtable_set_hashfn(hashtable * table, hashtable_hashfn hash);

#endif /* HASHTABLE_H */

The implementation:

#include <stdlib.h>

#include <hashtable.h>

hashnode * hashnode_create(void * data)
{
	hashnode * node = malloc(sizeof(hashnode));
	if (node) {
		node->data = data;
		node->next = NULL;
	}
	return node;
}

void hashnode_delete(hashnode * node)
{
	free(node);
}

static unsigned long sdbm(const char *str)
{
    unsigned long hash = 0;
    int c;

    while ((c = *str++))
        hash = c + (hash << 6) + (hash << 16) - hash;

    return hash;
}

hashtable * hashtable_create(size_t size, hashtable_cmpfn compare)
{
	hashtable * table = malloc(sizeof (hashtable));
	if (table) {
		table->size = size;
		table->hash = (hashtable_hashfn)sdbm;
		table->compare = compare;
		table->count = 0;
		table->buckets = malloc(size * sizeof(hashnode *));
		if (table->buckets) {
			unsigned int b;
			for (b = 0; b < size; b++) {
				table->buckets[b] = NULL;
			}
		}
		else {
			free(table);
			table = NULL;
		}
	}
	return table;
}

void hashtable_empty(hashtable * table)
{
	unsigned int i;
	hashnode * temp;
	for (i = 0; i < table->size; i++) {
		hashnode * current = table->buckets[i];
		while (current != NULL) {
			temp = current->next;
			hashnode_delete(current);
			current = temp;
		}
        table->buckets[i] = NULL;
	}
	table->count = 0;
}

void hashtable_delete(hashtable * table)
{
	if (table) {
		hashtable_empty(table);
		free(table->buckets);
		free(table);
	}
}

void * hashtable_add(hashtable * table, void * data)
{
	const unsigned int bucket = table->hash(data) % table->size;
	void * found = NULL;

	if (table->buckets[bucket] == NULL) {
		/* An empty bucket */
		table->buckets[bucket] = hashnode_create(data);
	}
	else {
        unsigned int added = 0;
		hashnode * current, * previous = NULL;
		for (current = table->buckets[bucket]; current != NULL && !found && !added; current = current->next) {
			const int result = table->compare(current->data, data);
			if (result == 0) {
				/* Changing an existing entry */
				found = current->data;
				current->data = data;
			}
			else if (result > 0) {
				/* Add before current */
				hashnode * node = hashnode_create(data);
				node->next = current;
				if (previous == NULL) {
					/* Adding at the front */
					table->buckets[bucket] = node;
				}
				else {
					previous->next = node;
				}
                added = 1;
			}
            previous = current;
		}
		if (!found && !added && current == NULL) {
			/* Adding at the end */
			previous->next = hashnode_create(data);
		}
	}
	if (found == NULL) {
		table->count++;
	}

	return found;
}

void * hashtable_find(const hashtable * table, const void * data)
{
	hashnode * current;
	const unsigned int bucket = table->hash(data) % table->size;
	void * found = NULL;
	unsigned int passed = 0;
	for (current = table->buckets[bucket]; current != NULL && !found && !passed; current = current->next) {
		const int result = table->compare(current->data, data);
		if (result == 0) {
			found = current->data;
		}
		else if (result > 0) {
			passed = 1;
		}
	}
	return found;
}

void * hashtable_remove(hashtable * table, const void * data)
{
	hashnode * current, * previous = NULL;
	const unsigned int bucket = table->hash(data) % table->size;
	void * found = NULL;
	unsigned int passed = 0;

	current = table->buckets[bucket];
	while (current != NULL && !found && !passed) {
		const int result = table->compare(current->data, data);
		if (result == 0) {
			found = current->data;
			if (previous == NULL) {
				/* Removing the first entry */
				table->buckets[bucket] = current->next;
			}
			else {
				previous->next = current->next;
			}
			hashnode_delete(current);
			table->count--;
		}
		else if (result > 0) {
			passed = 1;
		}
		else {
			previous = current;
	 		current = current->next;
		}
	}
	return found;
}


float hashtable_get_load_factor(const hashtable * table)
{
	unsigned int touched = 0;
	float loadfactor;
	unsigned int b;
	for (b = 0; b < table->size; b++) {
		if (table->buckets[b] != NULL) {
			touched++;
		}
	}
	loadfactor = (float)touched / (float)table->size;
	return loadfactor;
}

unsigned int hashtable_get_count(const hashtable * table)
{
	return table->count;
}

unsigned int hashtable_find_count(const hashtable *table)
{
	unsigned int b;
	const hashnode *node;
	unsigned int count = 0;
	for (b = 0; b < table->size; b++) {
		for (node = table->buckets[b]; node != NULL; node = node->next) {
			count++;
		}
	}
	return count;
}

void hashtable_for_each(const hashtable * table, hashtable_forfn fun)
{
	unsigned int b;

	for (b = 0; b < table->size; b++) {
		const hashnode *node;
		for (node = table->buckets[b]; node != NULL; node = node->next) {
			fun(node->data);
		}
	}
}


void hashtable_for_each2(const hashtable * table, hashtable_forfn2 fun, void *data)
{
	unsigned int b;

	for (b = 0; b < table->size; b++) {
		const hashnode *node;
		for (node = table->buckets[b]; node != NULL; node = node->next) {
			fun(node->data, data);
		}
	}
}

void hashtable_set_hashfn(hashtable * table, hashtable_hashfn hash)
{
    table->hash = hash;
}

Graph data structures

Contents

Introduction

In this post, I introduce the concept of a graph and describe some ways of representing graphs in C.

Definitions

Graphs, vertices and edges

A graph is a collection of nodes called vertices, and the connections between them, called edges.

Undirected and directed graphs

When the edges in a graph have a direction, the graph is called a directed graph or digraph, and the edges are called directed edges or arcs.
Here, I shall be exclusively concerned with directed graphs, and so when I refer to an edge, I mean a directed edge.
This is not a limitation, since an undirected graph can easily be implemented as a directed graph by adding edges between connected vertices in both directions.

A representation can often be simplified if it is only being used for undirected graphs, and I’ll mention in passing how this can be achieved.

Neighbours and adjacency

A vertex that is the end-point of an edge is called a neighbour of the vertex that is its starting-point.
The first vertex is said to be adjacent to the second.

An example

The following diagram shows a graph with 5 vertices and 7 edges.
The edges between A and D and B and C are pairs that make a bidirectional connection, represented here by a double-headed arrow.

Graph showing 5 vertices labelled A to E

Mathematical definition

More formally, a graph is an ordered pair, G = <V, A>, where V is the set of vertices, and A, the set of arcs, is itself a set of ordered pairs of vertices.

For example, the following expressions describe the graph shown above in set-theoretic language:

V = {A, B, C, D, E}
A = {<A, B>, <A, D>, <B, C>, <C, B>, <D, A>, <D, C>, <D, E>}

Functions

A graph implementation needs a basic set of functions to assemble and modify graphs, and to enumerate vertices, edges and neighbours.

The following functions are provided by each representation.
These are the declarations for the intuitive representation, graph1:

graph1 *graph1_create(void);
Create an empty graph
void graph1_delete(graph1 *graph);
Delete a graph
vertex *graph1_add(graph1 *graph, const char *name, void *data);
Add a vertex to the graph with a name, and optionally some data
vertex *graph1_get_vertex(const graph1 *graph, const char *name);
Retrieve a vertex by name
void *graph1_remove(graph1 *graph, vertex *vertex);
Remove a vertex
void graph1_add_edge(graph1 *graph, vertex *vertex1, vertex *vertex2);
Create a directed edge between vertex1 and vertex2
void graph1_remove_edge(graph1 *graph, vertex *vertex1, vertex *vertex2);
Remove the directed edge from vertex1 to vertex2
unsigned int graph1_get_adjacent(const graph1 *graph, const vertex *vertex1, const vertex *vertex2);
Determine if there is an edge from vertex1 to vertex2
iterator *graph1_get_neighbours(const graph1 *graph, const vertex *vertex);
Get the neighbours of a vertex
iterator *graph1_get_edges(const graph1 *graph);
Get all of the edges in the graph
iterator *graph1_get_vertices(const graph1 *graph);
Get all of the vertices in the graph
unsigned int graph1_get_neighbour_count(const graph1 *graph, const vertex *vertex);
Get the count of neighbours of a vertex
unsigned int graph1_get_edge_count(const graph1 *graph);
Get the count of edges in the graph
unsigned int graph1_get_vertex_count(const graph1 *graph);
Get the count of vertices in the graph

Representation of vertices and edges

Vertices

All of the graph representations use the following definition of a vertex:

typedef struct {
    char *name;
    void *data;
    void *body;
    deletefn del;
} vertex;

Note the body field, which is not of interest to clients, but is used by some representations (Adjacency List and Incidence List) to add per-vertex strucure.

The following functions are provided for working with vertices:

const char *vertex_get_name(const vertex *v);
Get the vertex’s name
void *vertex_get_data(const vertex *v);
Get the data associated with a vertex

Edges

How edges are implemented internally varies with the representation.
In fact, in three representations, Adjacency List, Adjacency Matrix and Incidence Matrix, edges do not exist internally as objects at all.
From the viewpoint of clients however, edges, as enumerated by the iterator returned by the function to retrieve edges, are this structure:

typedef struct {
    vertex *from;
    vertex *to;
} edge;

The following functions are provided for working with edges:

const vertex *edge_get_from(const edge *e);
Get the vertex that is the starting-point of an edge
const vertex * edge_get_to(const edge *e);
Get the vertex that is the end-point of an edge

Example program

The following program constructs the graph shown in the introduction using the intuitive representation, graph1, and then enumerates the vertices, neighbours and edges:

#include <stdio.h>

#include <graph1.h>

int main(void)
{
    graph1 *graph;
    vertex *v;
    vertex *A, *B, *C, *D, *E;
    iterator *vertices, *edges;
    edge *e;

    /* Create a graph */
    graph = graph1_create();

    /* Add vertices */
    A = graph1_add(graph, "A", NULL);
    B = graph1_add(graph, "B", NULL);
    C = graph1_add(graph, "C", NULL);
    D = graph1_add(graph, "D", NULL);
    E = graph1_add(graph, "E", NULL);

    /* Add edges */
    graph1_add_edge(graph, A, B);
    graph1_add_edge(graph, A, D);
    graph1_add_edge(graph, B, C);
    graph1_add_edge(graph, C, B);
    graph1_add_edge(graph, D, A);
    graph1_add_edge(graph, D, C);
    graph1_add_edge(graph, D, E);

    /* Display */
    printf("Vertices (%d) and their neighbours:\n\n", graph1_get_vertex_count(graph));
    vertices = graph1_get_vertices(graph);
    while ((v = iterator_get(vertices))) {
        iterator *neighbours;
        vertex *neighbour;
        unsigned int n = 0;
        printf("%s (%d): ", vertex_get_name(v), graph1_get_neighbour_count(graph, v));
        neighbours = graph1_get_neighbours(graph, v);
        while ((neighbour = iterator_get(neighbours))) {
            printf("%s", vertex_get_name(neighbour));
            if (n < graph1_get_neighbour_count(graph, vertex) - 1) {
                fputs(", ", stdout);
            }
            n++;
        }
        putchar('\n');
        iterator_delete(neighbours);
    }
    putchar('\n');
    iterator_delete(vertices);
    printf("Edges (%d):\n\n", graph1_get_edge_count(graph));
    edges = graph1_get_edges(graph);
    while ((e = iterator_get(edges))) {
        printf("<%s, %s>\n", vertex_get_name(edge_get_from(e)), vertex_get_name(edge_get_to(e)));
    }
    putchar('\n');
    iterator_delete(edges);

    /* Delete */
    graph1_delete(graph);

    return 0;
}

Graph representations

There are essentially 5 ways of representing a graph:

The intuitive representation: graph1

What I call the "intuitive" and can also called the "object-oriented" representation is a direct translation of the mathematical definition of a graph into a data type:

typedef struct {
    set *vertices;
    set *edges;
} graph1;
  • Adding a vertex simply requires adding it to the vertex set.
  • Adding an edge simply requires adding it to the edge set.
  • Removing vertices and edges simply means removing them from the respective sets.
  • To find a vertex’s neighbours, search the edge set for edges having the vertex as the from field.
  • To determine if two vertices are adjacent, search the edge set for an edge having the first vertex as its from field, and the second vertex as its to field.
  • Getting all of the edges is easy; just return an iterator over the edge set.
  • For undirected graphs, each edge would be stored only once, and getting neighbours and adjacency testing would look at both vertices.

    The edge object would not be from and to but simply first and second, i.e., an unordered pair.

  • This is one of the representations where edges exist internally as objects (Incidence List is the other).
  • This is most like a sparse Adjacency Matrix, with the edge set holding those pairs that are adjacent, and non-adjacent pairs being absent.

    Adjacency List: graph2

    The graph is made up of a set of vertices.
    Each vertex contains a set of vertices for its neighbours.

    typedef struct {
        set *vertices;
    } graph2;
    
    
    typedef struct {
        set *neighbours;
    } vertex_body;
    

    For the graph shown in the introduction, the sets of neighbours would look like this:

    A: {B, D}
    B: {C}
    C: {B}
    D: {A, C, E}
    E: {}
    
    • Adding a vertex just means adding it to the vertex set.
    • Adding an edge means adding the end-point of it to the starting vertex’s neighbour set.
    • It is easy to go from a vertex to its neighbours, because the vertex stores them all.

      Just return an iterator over them.

      This makes the graph argument in the function to retrieve neighbours unnecessary in this implementation.

    • Testing for adjacency is easy; just search the first vertex’s neighbours for the second vertex.
    • Getting all edges is more difficult to implement in this representation, because edges don’t exist as objects.

      You need to iterate over the neighbours of each vertex in turn, and construct the edge from the vertex and the neighbour.

    Adjacency Matrix: graph3

    The graph is made up of a set of vertices and a matrix, whose rows and columns are indexed by vertices, and which contains a 1 entry if the vertices are connected.

    typedef struct {
        set    *vertices;
        matrix *edges;
    } graph3;
    

    The adjacency matrix for the graph shown in the introduction would look like this:

    \(
    \begin{array}{c c} &
    \begin{array}{c c c} A & B & C & D & E \\
    \end{array}
    \\
    \begin{array}{c c c}
    A\\
    B\\
    C\\
    D\\
    E\\
    \end{array}
    &
    \left[
    \begin{array}{c c c}
    0 & 1 & 0 & 1 & 0 \\
    0 & 0 & 1 & 0 & 0 \\
    0 & 1 & 0 & 0 & 0 \\
    1 & 0 & 1 & 0 & 1 \\
    0 & 0 & 0 & 0 & 0
    \end{array}
    \right]
    \end{array}
    \)

    • When adding a vertex, add a row and column to the matrix.
    • When removing a vertex, remove its row and column.
      As adding and removing rows and columns is expensive, these make the adjacency matrix unsuitable for graphs in which vertices are frequently added and removed.

    • Adding and removing edges is easy however, and requires no allocation or deallocation of memory, just setting a matrix element.
    • To get neighbours, look along the vertex’s row for 1s.
    • To determine adjacency, look for a 1 at the intersection of the first vertex’s row and the second vertex’s column.
    • To get the edge set, find all of the 1s in the matrix and construct the edges from the corresponding vertices.
    • If the graph is undirected, the matrix will be symmetrical about the main diagonal.
      This means that you can drop half of it, making a triangular matrix.

    • The vertex set needs to be ordered so that the index number of vertices can be looked up, or the matrix needs to be a 2-d map keyed by the vertices themselves.
    • Memory used for edges is a constant |V|2.
      The best use of this is a graph that is nearly complete, i.e., has a lot of edges.

    • The matrix can be sparse; this relates the memory usage more closely to the number of edges.
      It also makes addition and removal of columns easier (no block shifts), but requires renumbering afterwards.

    • You can use booleans or bits in the matrix to save memory.

    Incidence Matrix: graph4

    The graph is made up of a set of vertices and a matrix, as in Adjacency Matrix, but the matrix is vertices × edges, with each column containing two non-zero entries, one for the starting-point vertex and one for the end-point.

    typedef struct {
        set    *vertices;
        matrix *edges;
    } graph4;
    

    The incidence matrix for the graph shown in the introduction looks like this (1 means "from" and 2 means "to"):

    \(
    \begin{array}{c c} &
    \\
    \begin{array}{c c c}
    A\\
    B\\
    C\\
    D\\
    E\\
    \end{array}
    &
    \left[
    \begin{array}{c c c}
    1 & 1 & 0 & 0 & 2 & 0 & 0 \\
    2 & 0 & 1 & 2 & 0 & 0 & 0 \\
    0 & 0 & 2 & 1 & 0 & 2 & 0 \\
    0 & 2 & 0 & 0 & 1 & 1 & 1 \\
    0 & 0 & 0 & 0 & 0 & 0 & 2 \\
    \end{array}
    \right]
    \end{array}
    \)

    • When you add a vertex, you add a row to the matrix.
    • When you add an edge, you add a column to the matrix.
    • When you remove a vertex, you need to remove all of the columns containing the vertex from the matrix.
    • Getting the edges means iterating over the columns and constructing the edges from the two values.
    • To find neighbours, look for 1s in the vertex’s row, and in each such column look for the 2 value, which is the neighbour.
    • To determine adjacency, find a column containing a 1 in the starting-point vertex’s row, and a 2 in the end-point’s row.
    • For an undirected graph, you have one column per edge, and just the value 1 for "connected", so each column contains two 1s.

    Incidence List: graph5

    There is a set of vertices as in Adjacency List, but each vertex stores a list of the edges that it is the starting-point of, rather than neighbours.

    typedef struct {
        set * vertices;
    } graph5;
    
    typedef struct {
        set *edges;
    } vertex_body;
    

    For the graph shown in the introduction, the sets of edges would look like this:

    A: {<A, B>, <A, D>}
    B: {<B, C>}
    C: {<C, B>}
    D: {<D, A>, <D, C>, <D, E>}
    E: {}
    
    • Adding a vertex just means adding it to the vertex set.
    • Adding an edge means adding it to its starting vertex’s edge set.
    • Finding if two vertices are adjacent requires searching the first vertex’s edge set for an edge containing the second vertex as its to field.
    • Getting the neighbours requires retrieving them from the pairs in the set of edges for the vertex.
    • Getting the edge set requires enumerating each of the vertices’ edge sets in turn.
    • You can store the edges in the graph object as well as in each vertex.
  • Boggle in Python

    This is a solver for the game “Boggle”. The aim of the game is to find as many words as possible in a 4-by-4 grid randomly filled with letters. You are allowed to go up, down, left, right, or diagonally, but not use the same letter more than once.

    First, here’s a prefix tree, which is the ideal structure for looking up words one letter at a time:

    class PrefixTree(object):
        def __init__(self, letter=None):
            self.letter = letter
            self.children = {}
            self.stop = False
    
        def add(self, word):
            if len(word):
                letter = word[0]
                word = word[1:]
                if letter not in self.children:
                    self.children[letter] = PrefixTree(letter);
                return self.children[letter].add(word)
            else:
                self.stop = True
                return self
    
        def find_letter(self, letter):
            if letter not in self.children:
                return None
            return self.children[letter]
    
        def __repr__(self):
            return "PrefixTree(letter={0}, stop={1})".format(self.letter, self.stop)
    

    Here’s the code for the game solver itself. It simply searches recursively starting at each cell in the game, and looks up the sequences of letters it builds up in the prefix tree:

    from random import choice
    from prefixtree import PrefixTree
    
    class Boggle(object):
        def __init__(self, board=None):
            self.size = 4
            if board is None:
                self.board = []
                for i in range(0, self.size):
                    self.board.append([])
                    for j in range(0, self.size):
                        self.board[i].append(Boggle.random_letter())
            else:
                self.board = board
    
        @staticmethod
        def random_letter():
            return chr(choice(range(65,91)))
    
        def play(self, tree, found):
            for r in range(0, self.size):
                for c in range(0, self.size):
                    self.search_r(tree, found, r, c)
    
        def search_r(self, tree, found, row, col, path=None, node=None, word=None):
            letter = self.board[row][col]
            if node is None or path is None or word is None:
                node = tree.find_letter(letter)
                path = [(row, col)]
                word = letter
            else:
                node = node.find_letter(letter)
                path.append((row, col))
                word = word + letter
            if node is None:
                return
            elif node.stop:
                found.add(word)
            for r in range(row - 1, row + 2):
                for c in range(col - 1, col + 2):
                    if (r >= 0 and r < self.size
                            and c >= 0 and c < self.size 
                            and not (r == row and c == col) 
                            and (r, c) not in path):
                        self.search_r(tree, found, r, c, path[:], node, word[:])
    
        def __repr__(self):
            return "Boggle(size={0}, board={1})".format(self.size, self.board)
    

    I used this dictionary as a source of words. You may want to use a slightly smaller one.
    Here is the main program:

    def load_tree(tree):
        with open('dictionary.txt') as f:
            for line in f:
                word = line.rstrip().upper()
                tree.add(word)
    
    def main():
        boggle = Boggle()
        tree = PrefixTree()
        load_tree(tree)
        found = set()
        boggle.play(tree, found)
        for word in sorted(found):
            print word
        print boggle
    
    if __name__ == '__main__':
        main()
    

    Finding the minimum depth of a binary tree recursively

    This is the counterpart of finding the height of a binary tree recursively. We start with a depth of 0, and only if a node has 2 children, we add the minimum of their depths.

    static int min(int a, int b)
    {
        if (a <= b) {
            return a;
        }
        return b;
    }
    
    unsigned int binarytree_minimum_depth_recursive(const btnode *node)
    {
        unsigned int depth = 0;
        if (node->left && node->right) {
            depth = min(binarytree_minimum_depth_recursive(node->left), 
                    binarytree_minimum_depth_recursive(node->right)) + 1;
        }
        return depth;
    }
    
    unsigned int binarytree_minimum_depth(const binarytree *tree)
    {
        unsigned int depth = 0;
        if (tree->root) {
            depth = binarytree_minimum_depth_recursive(tree->root);
        }
        return depth;
    }
    

    Related

    Deque in C

    A deque or double-ended queue is a data structure that allows efficient addition and removal at either end. It may also support accessing elements by index, as this implementation does.

    This deque is implemented as a dynamic array of fixed-size arrays. The first array is filled from right to left, and the last one from left to right, as shown in the following diagram:

    Diagram showing the deque implementation

    When an array at either end becomes full, a new one is added, and when one is emptied, it is removed.

    Here is the header file:

    #ifndef DEQUE_H
    #define DEQUE_H
    
    #include <dynarray.h>
    
    typedef struct {
    	dynarray *arrays;
    	unsigned int arraysize;
    	unsigned int front;
    	unsigned int back;
    	unsigned int firstempty;
    	unsigned int lastempty;
    	unsigned int count;
    } deque;
    
    typedef void (*deque_forfn)(void*);
    
    deque * deque_create(void);
    void deque_delete(deque * queue);
    void deque_push_front(deque * queue, void * data);
    void deque_push_back(deque * queue, void * data);
    void * deque_pop_front(deque * queue);
    void * deque_pop_back(deque * queue);
    void * deque_get_at(const deque *queue, unsigned int index);
    void * deque_set_at(deque *queue, unsigned int index, void * data);
    void * deque_peek_front(const deque * queue);
    void * deque_peek_back(const deque * queue);
    void deque_for_each(const deque * queue, deque_forfn fun);
    
    #endif /* DEQUE_H */
    

    The implementation:

    #include <stdlib.h>
    
    #include <deque.h>
    
    deque * deque_create(void)
    {
    	deque *queue = malloc(sizeof(deque));
    	if (queue) {
    		queue->arrays = dynarray_create(0);
    		queue->arraysize = 4;
    		queue->firstempty = 1;
    		queue->lastempty = 1;
    		queue->count = 0;
    		dynarray_add_head(queue->arrays, malloc(queue->arraysize * sizeof(void*)));
    	}
    	return queue;
    }
    
    void deque_delete(deque * queue)
    {
    	if (queue) {
    		dynarray_for_each(queue->arrays, free);
    		dynarray_delete(queue->arrays);
    		free(queue);
    	}
    }
    
    void deque_push_front(deque * queue, void * data)
    {
    	unsigned int index;
    	if (queue->count == 0) {
    		/* Adding the first element */
    		index = queue->arraysize / 2;
    	}
    	else if (queue->firstempty) {
    		/* There is an empty array at the beginning */
    		index = queue->arraysize - 1;
    	}
    	else if (queue->front == 0) {
    		/* Beginning array is full - add another */
    		dynarray_add_head(queue->arrays, malloc(queue->arraysize * sizeof(void*)));
    		index = queue->arraysize - 1;
    	}
    	else {
    		index = queue->front - 1;
    	}
    	((void**)dynarray_get(queue->arrays, 0))[index] = data;
    	queue->front = index;
    	queue->firstempty = 0;
    	if (queue->arrays->count == 1) {
    		queue->lastempty = 0;
    	}
    	queue->count++;
    	if (queue->count == 1) {
    		queue->back = queue->front;
    	}
    }
    
    void deque_push_back(deque * queue, void * data)
    {
    	unsigned int index;
    	if (queue->count == 0) {
    		/* Adding the first element */
    		index = queue->arraysize / 2;
    	}
    	else if (queue->lastempty) {
    		/* There is an empty array at the end */
    		index = 0;
    	}
    	else if (queue->back == queue->arraysize - 1) {
    		/* End array is full - add another */
    		dynarray_add_tail(queue->arrays, malloc(queue->arraysize * sizeof(void*)));
    		index = 0;
    	}
    	else {
    		index = queue->back + 1;
    	}
    	((void**)dynarray_get(queue->arrays, queue->arrays->count - 1))[index] = data;
    	queue->back = index;
    	queue->lastempty = 0;
    	if (queue->arrays->count == 1) {
    		queue->firstempty = 0;
    	}
    	queue->count++;
    	if (queue->count == 1) {
    		queue->front = queue->back;
    	}
    }
    
    void * deque_pop_front(deque * queue)
    {
    	void *data = NULL;
    	if (queue->count) {
    		if (queue->firstempty) {
    			/* There is an empty array at the beginning */
    			free(dynarray_remove_head(queue->arrays));
    			queue->firstempty = 0;
    		}
    		data = ((void**)dynarray_get(queue->arrays, 0))[queue->front];
    		queue->front++;
    		if (queue->front == queue->arraysize) {
    			/* First array is now empty */
    			queue->firstempty = 1;
    			queue->front = 0;
    		}
    		queue->count--;
    		if (queue->count == 1) {
    			queue->back = queue->front;
    		}
    		else if (queue->count == 0 && queue->arrays->count == 2) {
    			/* Both arrays are empty - remove either one */
    			free(dynarray_remove_head(queue->arrays));
    		}
    	}
    	return data;
    }
    
    void * deque_pop_back(deque * queue)
    {
    	void *data = NULL;
    	if (queue->count) {
    		if (queue->lastempty) {
    			/* There is an empty array at the end */
    			free(dynarray_remove_tail(queue->arrays));
    			queue->lastempty = 0;
    		}
    		data = ((void**)dynarray_get(queue->arrays, queue->arrays->count - 1))[queue->back];
    		if (queue->back == 0) {
    			/* Last array is now empty */
    			queue->lastempty = 1;
    			queue->back = queue->arraysize - 1;
    		}
    		else {
    			queue->back--;
    		}
    		queue->count--;
    		if (queue->count == 1) {
    			queue->front = queue->back;
    		}
    		else if (queue->count == 0 && queue->arrays->count == 2) {
    			/* Both arrays are empty - remove either one */
    			free(dynarray_remove_tail(queue->arrays));
    		}
    	}
    	return data;
    }
    
    void * deque_get_at(const deque *queue, unsigned int index)
    {
    	void * data = NULL;
    	if (index < queue->count) {
    		const unsigned int pos = index + queue->front;
    		data = ((void**)dynarray_get(queue->arrays, 
    			pos / queue->arraysize + queue->firstempty))[pos % queue->arraysize];
    	}
    	return data;
    }
    
    void * deque_set_at(deque *queue, unsigned int index, void * data)
    {
    	void * result = NULL;
    	if (index == queue->count) {
    		deque_push_back(queue, data);
    	}
    	else if (index < queue->count) {
    		const unsigned int pos = index + queue->front;
    		result = deque_get_at(queue, index);
    		((void**)dynarray_get(queue->arrays, 
    			pos / queue->arraysize + queue->firstempty))[pos % queue->arraysize] = data;
    	}
    	return result;
    }
    
    void * deque_peek_front(const deque * queue)
    {
    	void *data = NULL;
    	if (queue->count > 0) {
    		data = ((void**)dynarray_get(queue->arrays, queue->firstempty))[queue->front];
    	}
    	return data;
    }
    
    void * deque_peek_back(const deque * queue)
    {
    	void *data = NULL;
    	if (queue->count > 0) {
    		data = ((void**)dynarray_get(queue->arrays, 
    					dynarray_get_count(queue->arrays) - 1 - queue->lastempty))[queue->back];
    	}
    	return data;
    }
    
    void deque_for_each(const deque * queue, deque_forfn fun)
    {
    	unsigned int i;
    	for (i = 0; i < queue->count; i++) {
    		fun(deque_get_at(queue, i));
    	}
    }
    

    Sorting a linked list by turning it into a binary search tree

    Introduction

    A doubly-linked list node has two pointers, previous and next, and a binary tree node also has two pointers, left and right

    This means that one way of sorting a linked list is to turn it into a binary tree and then back into a list again.

    Turning the list into a binary tree

    Turning the list into a tree is just a question of iterating over the list and performing the normal binary tree insertion algorithm. We need to remember to save the current node’s next pointer before we insert it, as it will not be available afterwards.

    typedef int(*cmpfn)(const void*, const void*);
    
    listnode * linkedlist_to_tree(linkedlist * list, cmpfn compare)
    {
        listnode * root = list->head;
        if (root) {
            listnode * node = root->next;
            root->previous = NULL;
            root->next = NULL;
            while (node != NULL) {
                listnode * next = node->next;
                listnode * current = root, * previous = NULL;
                int result;
                while (current != NULL) {
                    previous = current;
                    result = compare(current->data, node->data);
                    if (result > 0) {
                        current = current->previous;
                    }
                    else {
                        current = current->next;
                    }
                }
                if (result > 0) {
                    previous->previous = node;
                }
                else {
                    previous->next = node;
                }
                node->previous = NULL;
                node->next = NULL;
                node = next;
            }
        }
        return root;
    }
    

    Turning the binary tree back into a list

    This involves an inorder traversal of the tree, which is most easily implemented using recursion. We need to look out for the first and last elements; they need to be treated specially as they are the head and tail of the list.

    The first element in a binary tree is the one that we encounter the first time we can go left no further. We find the last element by keeping track of the number of elements encountered so far, and comparing it to the number in the list.

    void linkedlist_from_tree(linkedlist * list, listnode * root,
            listnode ** previous, unsigned int * count)
    {
        if (root) {
            listnode * left = root->previous;
            listnode * right = root->next;
            linkedlist_from_tree(list, left, previous, count);
            if (root->previous == NULL && list->head == NULL) {
                /* We're at the first element */
                list->head = root;
                list->head->previous = NULL;
            }
            else {
                (*previous)->next = root;
                root->previous = *previous;
            }
            if (*count == linkedlist_get_count(list) - 1) {
                /* We're at the last element */
                list->tail = root;
                list->tail->next = NULL;
            }
            *previous = root;
            (*count)++;
            linkedlist_from_tree(list, right, previous, count);
        }
    }
    

    The sort function

    Our linked list sort function now just needs to combine the two operations:

    void linkedlist_sort(linkedlist * list, cmpfn compare)
    {
        unsigned int count = 0;
        listnode * previous;
        listnode * tree = linkedlist_to_tree(list, compare);
        list->head = NULL;
        list->tail = NULL;
        linkedlist_from_tree(list, tree, &previous, &count);
    }
    

    Example program

    int main(void)
    {
        char * elements[] = {"D", "B", "D", "F", "A", "C", "E"};
        const unsigned int n = sizeof(elements) / sizeof(const char *);
        linkedlist * list;
        unsigned int i;
    
        list = linkedlist_create();
        for (i = 0; i < n; i++) {
            linkedlist_add_tail(list, elements[i]);
        }
        linkedlist_sort(list, (cmpfn)strcmp);
        linkedlist_for_each(list, (forfn)puts);
        linkedlist_delete(list);
    
        return 0;
    }
    

    Circular linked list in C

    A circular linked list is a linked list in which the head node’s previous pointer points to the tail node, and the tail node’s next pointer points to the head node. This means that the list only needs to keep track of one pointer for both head and tail.

    A circular linked list containing elements labelled A to F

    Looping over a circular linked list is slightly different from looping over an ordinary linked list because there is no NULL node to detect, and instead you are trying to get back to where you started. There are 2 methods of doing this.

    Method 1: Use a do loop

    void iterate1(const circularlist *list)
    {
        cnode *node = list->head;
        if (node != NULL) {
            do {
                printf("%s\n", (const char*)node->data);
                node = node->next;
            } while (node != list->head);
        }
    }
    
    

    Method 2: Use a counter

    void iterate2(const circularlist *list)
    {
        cnode *node;
        unsigned int i;
        for (i = 0, node = list->head; i < list->count; i++, node = node->next) {
            printf("%s\n", (const char*)node->data);
        }
    }
    

    Here is the header file for the circular linked list

    #ifndef CIRCULARLIST_H
    #define CIRCULARLIST_H
    
    struct cnode {
    	struct cnode * next;
    	struct cnode * previous;
    	void * data;
    };
    typedef struct cnode cnode;
    
    struct circularlist {
    	cnode * head;
    	unsigned int count;
    };
    typedef struct circularlist circularlist;
    
    typedef void (*circularlist_forfn)(void *);
    
    circularlist * circularlist_create(void);
    void circularlist_delete(circularlist * list);
    
    void circularlist_add_head(circularlist * list, void * data);
    void circularlist_add_tail(circularlist * list, void * data);
    void * circularlist_remove_head(circularlist * list);
    void * circularlist_remove_tail(circularlist * list);
    void * circularlist_remove_node(circularlist * list, cnode * node);
    void circularlist_empty(circularlist * list);
    
    void circularlist_insert_before(circularlist * list, cnode * node, void * data);
    void circularlist_insert_after(circularlist * list, cnode * node, void * data);
    
    void circularlist_for_each(const circularlist * list, circularlist_forfn fun);
    unsigned int circularlist_get_count(const circularlist * list);
    
    #endif /* CIRCULARLIST_H */
    

    Here is the implementation

    #include <stdlib.h>
    
    #include <circularlist.h>
    
    static cnode * cnode_create(void * data)
    {
    	cnode * node = malloc(sizeof(cnode));
    	if(node) {
    		node->next = node;
    		node->previous = node;
    		node->data = data;
    	}
    
    	return node;
    }
    
    static void cnode_delete(cnode * node)
    {
    	free(node);
    }
    
    circularlist * circularlist_create(void)
    {
    	circularlist * list = malloc(sizeof(circularlist));
    	if (list) {
    		list->head = NULL;
    		list->count = 0;
    	}
    
    	return list;
    }
    
    
    void circularlist_delete(circularlist * list)
    {
    	if (list) {
    		circularlist_empty(list);
    		free(list);
    	}
    }
    
    void circularlist_add_head(circularlist * list, void * data)
    {
    	circularlist_insert_before(list, list->head, data);
    	list->head = list->head->previous;
    }
    
    void circularlist_add_tail(circularlist * list, void * data)
    {
    	circularlist_insert_before(list, list->head, data);
    }
    
    void * circularlist_remove_head(circularlist * list)
    {
    	return circularlist_remove_node(list, list->head);
    }
    
    void * circularlist_remove_tail(circularlist * list)
    {
    	void * data = NULL;
    	if (list->head) {
    		data = circularlist_remove_node(list, list->head->previous);
    	}
    	return data;
    }
    
    void * circularlist_remove_node(circularlist * list, cnode * node)
    {
    	void * data = NULL;
    	if (list->count > 0) {
    		if (node != node->next) { /* Or, list->count > 1 */
    			node->next->previous = node->previous;
    			node->previous->next = node->next;
    			if (node == list->head)
    				list->head = list->head->next;
    		}
    		else {
    			/* Removing the last element */
    			list->head = NULL;
    		}
    		data = node->data;
    		cnode_delete(node);
    		list->count--;
    	}
    	return data;
    }
    
    void circularlist_empty(circularlist * list)
    {
    	while (circularlist_remove_tail(list) != NULL);
    }
    
    void circularlist_insert_before(circularlist * list, cnode * node, void * data)
    {
    	cnode * newnode = cnode_create(data);
    
    	if (newnode) {
    		if (list->count > 0) {
    			newnode->next = node;
    			newnode->previous = node->previous;
    			newnode->previous->next = newnode;
    			node->previous = newnode;
    		}
    		else {
    			/* Adding the first element */
    			list->head = newnode;
    		}
    		list->count++;
    	}
    }
    
    void circularlist_insert_after(circularlist * list, cnode * node, void * data)
    {
    	circularlist_insert_before(list, node->next, data);
    }
    
    void circularlist_for_each(const circularlist * list, circularlist_forfn fun)
    {
    	cnode * node = list->head;
    
    	if (node != NULL) {
    		do {
    			fun(node->data);
    			node = node->next;
    		} while (node != list->head);
    	}
    }
    
    unsigned int circularlist_get_count(const circularlist *list)
    {
    	return list->count;
    }
    

    Here is a test program

    #include <stdio.h>
    #include <string.h>
    
    #include <circularlist.h>
    
    int main(void)
    {
        char * elements[] = {"A", "B", "C", "D", "E", "F"};
        const unsigned int n = sizeof(elements) / sizeof(const char*);
        circularlist * list;
        unsigned int i;
        cnode * node;
        unsigned int removed = 0, added = 0;
    
        list = circularlist_create();
        if (list) {
            /* Populate the list */
            for (i = 0; i < n; i++) {
                circularlist_add_tail(list, elements[i]);
            }
    
            /* Remove "C" */
            node = list->head;
            do {
                if (strcmp((const char*)node->data, "C") == 0) {
                    circularlist_remove_node(list, node);
                    removed = 1;
                }
                else {
                    node = node->next;
                }
            } while (node != list->head && !removed);
    
            /* Add "X" and "Y" either side of "D" */
            node = list->head;
            do {
                if (strcmp((const char*)node->data, "D") == 0) {
                    circularlist_insert_before(list, node, "X");
                    circularlist_insert_after(list, node, "Y");
                    added = 1;
                }
                else {
                    node = node->next;
                }
            } while (node != list->head && !added);
    
            /* Print */
            printf("Count is %d\n", circularlist_get_count(list));
            circularlist_for_each(list, (circularlist_forfn)puts);
    
            circularlist_delete(list);
        }
        return 0;
    }
    

    Finding the height of a binary tree recursively

    The height of a node in a binary tree is simply the maximum of the height of its left and right subtrees, plus one.

    This lends itself to a simple recursive algorithm for finding the height of a binary tree.

    static int max(int a, int b)
    {
        if (a >= b) {
            return a;
        }
        return b;
    }
    
    unsigned int binarytree_height_recursive(const btnode *node)
    {
        unsigned int height = 0;
        if (node->left || node->right) {
            height = max(node->left ? binarytree_height_recursive(node->left) : 0,
                    node->right ? binarytree_height_recursive(node->right) : 0) + 1;
        }
        return height;
    }
    
    unsigned int binarytree_height(const binarytree *tree)
    {
    	unsigned int height = 0;
    	if (tree->root) {
    		height = binarytree_height_recursive(tree->root);
    	}
    	return height;
    }
    

    Related

    Counting nodes in a binary tree recursively

    Introduction

    The recursive structure of a binary tree makes it easy to count nodes recursively. There are 3 things we can count:

    • The total number of nodes
    • The number of leaf nodes
    • The number of internal nodes

    Counting all nodes

    The number of nodes in a binary tree is the number of nodes in the root’s left subtree, plus the number of nodes in its right subtree, plus one (for the root itself).

    This lends itself to a simple recursive algorithm for counting the nodes in a binary tree.

    unsigned int binarytree_count_recursive(const btnode *root)
    {
        unsigned int count = 1;
        if (root->left != NULL) {
           count += binarytree_count_recursive(root->left);
        }
        if (root->right != NULL) {
            count += binarytree_count_recursive(root->right);
        }
        return count;
    }
    
    unsigned int binarytree_count(const binarytree *tree)
    {
        unsigned int count = 0;
        if (tree->root != NULL) {
            count = binarytree_count_recursive(tree->root);
        }
        return count;
    }
    

    Counting leaf nodes

    This is similar, except that we only return 1 if we are a leaf node. Otherwise, we recurse into the left and right subtrees and return the sum of the leaves in them.

    unsigned int binarytree_count_leaves_recursive(const btnode *root)
    {
        unsigned int count = 0;
        if (root->left == NULL && root->right == NULL) {
            count = 1;
        }
        else {
            if (root->left != NULL) {
    		    count += binarytree_count_leaves_recursive(root->left);
    	    }
            if (root->right != NULL) {
                count += binarytree_count_leaves_recursive(root->right);
            }
        }
    	return count;
    }
    
    unsigned int binarytree_count_leaves(const binarytree *tree)
    {
        unsigned int count = 0;
        if (tree->root != NULL) {
    	    count = binarytree_count_leaves_recursive(tree->root);
        }
        return count;
    }
    

    Counting internal nodes

    This is the counterpart of counting leaves. If we are an internal node, we count 1 for ourselves, then recurse into the left and right subtrees and sum the count of internal nodes in them.

    unsigned int binarytree_count_internal_nodes_recursive(const btnode *root)
    {
        unsigned int count = 0;
        if (root->left != NULL || root->right != NULL) {
            count = 1;
            if (root->left != NULL) {
    		    count += binarytree_count_internal_nodes_recursive(root->left);
    	    }
            if (root->right != NULL) {
                count += binarytree_count_internal_nodes_recursive(root->right);
            }
        }
    	return count;
    }
    
    unsigned int binarytree_count_internal_nodes(const binarytree *tree)
    {
        unsigned int count = 0;
        if (tree->root != NULL) {
    	    count = binarytree_count_internal_nodes_recursive(tree->root);
        }
        return count;
    }
    

    Related