Tag Archives: Dynamic Array

Split a string in C

There are two problems with the standard C strtok function, which is used to split strings:

  • It stores the string being split between calls. This means that while a string is in the process of being split, another string cannot be split in the current thread, or any other thread – i.e., strtok is non-reentrant
  • strtokoverwrites the separators in the string being split with nul characters, so the original string is unusable afterwards. i.e., strtok is destructive

Here’s a function to split a string that has neither of the problems of strok. It takes a pointer to a function from the caller – a callback, which is then passed the start address and length of each token found as the string is split.

typedef void(*split_fn)(const char *, size_t, void *);

void split(const char *str, char sep, split_fn fun, void *data)
{
    unsigned int start = 0, stop;
    for (stop = 0; str[stop]; stop++) {
        if (str[stop] == sep) {
            fun(str + start, stop - start, data);
            start = stop + 1;
        }
    }
    fun(str + start, stop - start, data);
}

Here’s how to use it to print the tokens:

#include <stdio.h>

#include <split.h>

void print(const char *str, size_t len, void *data)
{
    printf("%.*s\n", (int)len, str);
}

int main(void)
{
    char str[] = "first,second,third,fourth";
    split(str, ',', print, NULL);
    return 0;
}
first
second
third
fourth

The third argument to split is a void* pointer, which the caller can use to pass in anything it wants to use within its callback during the splitting process, such as a collection to which to add the tokens.

For example, here’s how to add the tokens to a dynamic array:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <split.h>
#include <dynarray.h>

void add_to_dynarray(const char *str, size_t len, void *data)
{
    dynarray *array = data;
    char *token = calloc(len + 1, 1);
    memcpy(token, str, len);
    dynarray_add_tail(array, token);
}

int main(void)
{
    char str[] = "first,second,third,fourth";
    dynarray *array = dynarray_create(0);
    split(str, ',', add_to_dynarray, array);
    dynarray_for_each(array, (dynarray_forfn)puts);
    dynarray_for_each(array, free);
    dynarray_delete(array);
    return 0;
}

Related

Prefixes, suffixes, and substrings in C

Prefixes

To find all prefixes of a string, just find all substrings starting at the beginning of the string of length from 1 to the length of the string. A good way of returning these to the client is to allow the client to pass in a function pointer, which is called for each prefix found:

typedef void(*substring_fn)(const char *, size_t, void *);

void prefixes(const char *str, size_t len, substring_fn fun, void *data)
{
    unsigned int length;
    for (length = 1; length <= len; length++) {
        fun(str, length, data);
    }
}

The client function is given the beginning of the string and a length, rather than a nul-terminated string. This means that the original string does not need to be modified or copied, which is much more efficient. The client can use the length to do something with the string, like printing it using printf with the * format specifier to specify the length:

#include <stdio.h>
#include <string.h>

#include <substring.h>

void print(const char *str, size_t len, void *data)
{
    printf("%.*s\n", (int)len, str);
}

int main(void)
{
    char str[] = "abracadabra";
    prefixes(str, strlen(str), print, NULL);
    return 0;
}

This produces the following output:

a
ab
abr
abra
abrac
abraca
abracad
abracada
abracadab
abracadabr
abracadabra

The third argument to the client function is a void * pointer that the client can use to pass anything it needs into the function. For example, it might pass in a dynamic array and add the prefixes to that:

#include <stdio.h>
#include <string.h>

#include <substring.h>
#include <dynarray.h>

void add_to_dynarray(const char *str, size_t len, void *data)
{
    dynarray *array = data;
    char *token = calloc(len + 1, 1);
    memcpy(token, str, len);
    dynarray_add_tail(array, token);
}

int main(void)
{
    char str[] = "abracadabra";
    dynarray *array = dynarray_create(0);
    prefixes(str, strlen(str), add_to_dynarray, array);
    dynarray_for_each(array, (dynarray_forfn)puts);
    dynarray_for_each(array, free);
    dynarray_delete(array);
    return 0;
}

Suffixes

To find all suffixes of a string, walk a pointer from the beginning of the string to 1 before the end, passing the client the pointer and the length remaining to the end of the string.

void suffixes(const char *str, size_t len, substring_fn fun, void *data)
{
    unsigned int start;
    for (start = 0; start < len; start++) {
        fun(str + start, len - start, data);
    }
}
abracadabra
bracadabra
racadabra
acadabra
cadabra
adabra
dabra
abra
bra
ra
a

Substrings

A substring is simply a prefix of a suffix, or a suffix of a prefix, so you can find all substrings by finding all suffixes and then finding all of their prefixes, or vice-versa. We can use the function pointer interface above for this by wrapping the client function pointer and data in our own structure and passing that from prefixes to suffixes as the third argument as follows:

typedef struct {
    substring_fn fun;
    void *data;
} substrings_data;

void call_suffixes(const char *str, size_t len, void *data)
{
    substrings_data *sd = data;
    suffixes(str, len, sd->fun, sd->data);
}

void substrings(const char *str, size_t len, substring_fn fun, void *data)
{
    substrings_data sd = {fun, data};
    prefixes(str, len, call_suffixes, &sd);
}

This produces the following output:

a
ab
b
abr
br
r
abra
bra
ra
a
abrac
brac
rac
ac
c
abraca
braca
raca
aca
ca
a
abracad
bracad
racad
acad
cad
ad
d
abracada
bracada
racada
acada
cada
ada
da
a
abracadab
bracadab
racadab
acadab
cadab
adab
dab
ab
b
abracadabr
bracadabr
racadabr
acadabr
cadabr
adabr
dabr
abr
br
r
abracadabra
bracadabra
racadabra
acadabra
cadabra
adabra
dabra
abra
bra
ra
a

Unique substrings

The substrings function will produce all substrings, which can include duplicates, as you can see in the output above. You can use a set data structure like a hash table to find just the unique substrings. Again, we can make good use of the function pointer interface by wrapping the client function and the hashtable in a structure, and then passing that to hashtable‘s own iteration function hashtable_for_each2.

void add_to_hashtable(const char *str, size_t len, void *data)
{
    hashtable *table = data;
    char *buf = malloc(len + 1);
    strncpy(buf, str, len);
    buf[len] = '\0';
    buf = hashtable_add(table, buf);
    if (buf) {
        free(buf);
    }
}

void call_substring_fun(const char *substring, void *data)
{
    substrings_data *sd = data;
    sd->fun(substring, strlen(substring), data);
}

void unique_substrings(const char *str, size_t len, substring_fn fun, void *data)
{
    hashtable *table = hashtable_create(7, (hashtable_cmpfn)strcmp);
    substrings(str, len, add_to_hashtable, table);
    substrings_data sd = {fun, data};
    hashtable_for_each2(table, (hashtable_forfn2)call_substring_fun, &sd);
    hashtable_for_each(table, free);
    hashtable_delete(table);
}

The substrings produced are now unique:

abrac
abraca
abracad
acada
ad
adabr
b
brac
bracada
bracadab
cada
dabr
rac
acadabra
c
ca
abra
abracadabra
br
bracadabra
d
dabra
r
racada
racadabra
acad
adabra
bra
cadabra
da
ra
racad
racadabr
aca
acadabr
bracad
cad
ab
abr
abracada
abracadabr
adab
bracadabr
cadab
racadab
a
abracadab
ac
acadab
ada
braca
cadabr
dab
raca

Related

Dynamic array in C

A dynamic array is one that grows to accommodate new items, overcoming the limitations of fixed-size arrays. New items can be added at the head or tail, or inserted in the middle. Existing elements are moved to accommodate new ones as necessary.

Here is an example program:

#include <stdio.h>

#include <dynarray.h>

int main(void)
{
    dynarray *array = dynarray_create(0); /* No preferred size */
    unsigned int i;
    void * data;
    char *elements[] = {"A", "B", "C", "D", "E", "F"};
    const unsigned int n = sizeof(elements) / sizeof(const char*);

    for (i = 0; i < n; i++) {
        if (i % 2 == 0) {
            dynarray_add_head(array, elements[i]);
        }
        else {
            dynarray_add_tail(array, elements[i]);
        }
    }
        
    dynarray_insert(array, 0, "X");                                /* Same as dynarray_addhead */
    dynarray_insert(array, dynarray_get_count(array) / 2, "Y");    /* Insert in the middle */
    dynarray_insert(array, dynarray_get_count(array), "Z");        /* Same as dynarray_add_tail */

    dynarray_set(array, dynarray_get_count(array) / 2, "P");
    dynarray_set(array, dynarray_get_count(array), "Q");           /* Same as dynarray_add_tail */

    for (i = 0; i < dynarray_get_count(array); i++) {
        printf("%d: %s\n", i, (const char*)dynarray_get(array, i));
    }
    putchar('\n');
    
    for (i = 0; dynarray_get_count(array); i++) {
        const unsigned int action = i % 3;
        if (action == 0) {
            data = dynarray_remove_head(array);
        }
        else if (action == 1) {
            data = dynarray_remove_tail(array);
        }
        else {
            data = dynarray_remove(array, dynarray_get_count(array) / 2);
        }
        printf("Removed: %s\n", (const char*)data);
    }

    dynarray_delete(array);

    return 0;
}

The header file:

#ifndef DYNARRAY_H
#define DYNARRAY_H

struct dynarray {
	void ** buffer;
	unsigned int size;
	unsigned int count;
};

typedef struct dynarray dynarray;

typedef void (*dynarray_forfn)(void *);

dynarray * dynarray_create(unsigned int startsize);
void dynarray_empty(dynarray * array);
void dynarray_delete(dynarray * array);
void dynarray_add_tail(dynarray * array, void * data);
void dynarray_add_head(dynarray * array, void * data);
void * dynarray_remove_tail(dynarray * array);
void * dynarray_remove_head(dynarray * array);
void dynarray_insert(dynarray *array, unsigned int pos, void *data);
void * dynarray_remove(dynarray *array, unsigned int pos);
void * dynarray_get(const dynarray * array, unsigned int pos);
void * dynarray_set(dynarray * array, unsigned int pos, void * data);
void dynarray_for_each(const dynarray * array, dynarray_forfn fun);
unsigned int dynarray_get_count(const dynarray * array);
void dynarray_set_size(dynarray * array, unsigned int size);

#endif /* DYNARRAY_H */

The implementation:

#include <stdlib.h>
#include <string.h> /* For memcpy and memmove */

#include <dynarray.h>

#define START_SIZE 4 /* Initial buffer size if not specified */

dynarray * dynarray_create(unsigned int size)
{
	dynarray * array = malloc(sizeof(dynarray));
	if (array != NULL) {
		if (size) {
			array->buffer = malloc(size * sizeof(void*));
			if (array->buffer) {
				array->size = size;
			}
			else {
				free(array);
				array = NULL;
			}
		}
		else {
			array->buffer = NULL;
			array->size = 0;
		}
		array->count = 0;
	}
	return array;
}

void dynarray_empty(dynarray * array)
{
	array->count = 0;
}

void dynarray_delete(dynarray * array)
{
	if (array) {
		free(array->buffer);
		free(array);
	}
}

void dynarray_add_tail(dynarray * array, void * data)
{
	if (array->count == array->size) {
		/* No more space */
		if (array->buffer != NULL) {
			void **buffer = realloc(array->buffer, array->size * 2 * sizeof(void*));
			array->buffer = buffer;
			array->size *= 2;
		}
		else {
			array->buffer = malloc(START_SIZE * sizeof(void*));
			array->size = START_SIZE;
		}
	}
	if (array->buffer != NULL) {
		array->buffer[array->count] = data;
		array->count++;
	}
}

void dynarray_add_head(dynarray * array, void * data)
{
	if (array->count == array->size) {
		/* No more space */
		if (array->buffer != NULL) {
			void **temp = malloc(array->size * 2 * sizeof(void*));
			if (temp) {
				/* Copy the elements one space to the right */
				memcpy(temp + 1, array->buffer, array->count * sizeof(void*));
				free(array->buffer);
				array->buffer = temp;
				array->size *= 2;
			}
		}
		else {
			array->buffer = malloc(START_SIZE * sizeof(void*));
			if (array->buffer) {
				array->size = START_SIZE;
			}
		}
	}
	else {
        /* Move the elements one space to the right */
		memmove(array->buffer + 1, array->buffer, array->count * sizeof(void*));
	}
	if (array->buffer != NULL) {
		array->buffer[0] = data;
		array->count++;
	}
}

void * dynarray_remove_tail(dynarray * array)
{
	void * data = NULL;
	if (array->count > 0) {
		data = array->buffer[array->count - 1];
		array->count--;
	}
	return data;
}

void * dynarray_remove_head(dynarray * array)
{
	void * data = NULL;
	if (array->count > 0) {
		data = array->buffer[0];
        /* Move the elements one space to the left */
        memmove(array->buffer, array->buffer + 1, (array->count - 1) * sizeof(void*));
		array->count--;
	}
	return data;
}

void dynarray_insert(dynarray *array, unsigned int pos, void *data)
{
	if (pos == 0) {
		dynarray_add_head(array, data);
	}
	else if (pos == array->count) {
		dynarray_add_tail(array, data);
	}
	else if (pos < array->count) {
		unsigned int i;
		if (array->count == array->size) {
			/* Reallocate the buffer and copy, leaving a space */
			void **temp = malloc(array->size * 2 * sizeof(void*));
			if (temp) {
				memcpy(temp, array->buffer, pos * sizeof(void*));
				memcpy(temp + pos + 1, array->buffer + pos, (array->count - pos) * sizeof(void*));
				free(array->buffer);
				array->buffer = temp;
				array->size *= 2;
			}
		}
		else {
			/* Move the elements after to the right */
            memmove(array->buffer + pos + 1, array->buffer + pos,
                    (array->count - pos) * sizeof(void*));
		}
		array->buffer[pos] = data;
		array->count++;
	}
}

void * dynarray_remove(dynarray *array, unsigned int pos)
{
	void *data;
	if (array->count < pos + 1) {
		data = NULL;
	}
	else if (pos == 0) {
		data = dynarray_remove_head(array);
	}
	else if (pos == array->count - 1) {
		data = dynarray_remove_tail(array);
	}
	else {
		unsigned int i;
		data = array->buffer[pos];
		/* Move the following elements left */
        memmove(array->buffer + pos, array->buffer + pos + 1,
                (array->count - pos - 1) * sizeof(void*)); 
		array->count--;
	}
	return data;
}

void * dynarray_get(const dynarray * array, unsigned int pos)
{
	void * data = NULL;
	if (pos < array->count) {
		data = array->buffer[pos];
	}
	return data;
}

void * dynarray_set(dynarray * array, unsigned int pos, void * data)
{
	void * temp = NULL;
	if (pos == array->count) {
		dynarray_add_tail(array, data);
	}
	else if (pos < array->count) {
		temp = array->buffer[pos];
		array->buffer[pos] = data;
	}
	return temp;
}

void dynarray_set_size(dynarray * array, unsigned int size)
{
	
	array->buffer = realloc(array->buffer, size);
	if (array->buffer) {
		array->size = size;
		if (array->size < array->count) {
			array->count = array->size;
		}
	}
	else {
		array->size = 0;
		array->count = 0;
	}
}

void dynarray_for_each(const dynarray * array, dynarray_forfn fun)
{
	unsigned int i;

	for (i = 0; i < array->count; i++) {
		fun(array->buffer[i]);
	}
}

unsigned int dynarray_get_count(const dynarray * array)
{
	return array->count;
}