Tag Archives: Longest Common Substring

Longest Common Substring using dynamic programming in C

The Longest Common Substring problem is similar to Longest Common Subsequence, except that the subsequence must be made of adjacent characters – a substring. The dynamic programming algorithm is also very similar. For this one I have used chains of pointers in the table to retrieve the solution, as I did with Longest Increasing Subsequence,

#include <string.h>
#include <stdlib.h>

struct lcs_entry {
    unsigned int score;
    char letter;
    struct lcs_entry* prev;
};
typedef struct lcs_entry lcs_entry;
 
unsigned int longest_common_substring(const char *str1, const char *str2,
        char **result)
{
    unsigned int i, j;
    unsigned int max = 0;
    lcs_entry *head;
    const size_t m = strlen(str1);
    const size_t n = strlen(str2);
    lcs_entry **lcstab;
   
    /* Allocate the table */
    lcstab = malloc((m + 1) * sizeof(lcs_entry*));
    for (i = 0; i < m + 1; i++) {
        lcstab[i] = malloc((n + 1) * sizeof(lcs_entry));
    }

    /* Calculate scores for each substring and build chains */ 
    for (i = 0; i <= m; i++) {
        for (j = 0; j <= n; j++) {
            if (i == 0 || j == 0) {
                lcstab[i][j].score = 0;
                lcstab[i][j].letter = 0;
                lcstab[i][j].prev = NULL;
            }
            else if (str1[i - 1] == str2[j - 1]) {
                lcstab[i][j].score = lcstab[i - 1][j - 1].score + 1;
                lcstab[i][j].letter = str1[i - 1];
                lcstab[i][j].prev = &lcstab[i - 1][j - 1];
                if (lcstab[i][j].score > max) {
                    max = lcstab[i][j].score;
                    head = &lcstab[i][j];
                }
            }
            else {
                lcstab[i][j].score = 0;
                lcstab[i][j].letter = 0;
                lcstab[i][j].prev = NULL;
            }
        }
    }

    /* Allocate the output array and copy the LC substring */
    *result = malloc(max + 1);
    (*result)[max] = '\0';
    for (i = max - 1; head->prev != NULL; i--) {
        (*result)[i] = head->letter;
        head = head->prev;
    } 
    for (i = 0; i < m + 1; i++) {
        free(lcstab[i]);
    }
    free(lcstab);
    return max;
}

An example program:

int main(void)
{
    char str1[] = "magnetohydrodynamics";
    char str2[] = "hydrogen";
    char *result;
 
    printf("Length of Longest Common Substring is %u\n", 
            longest_common_substring(str1, str2, &result));
    printf("%s\n", result);
    free(result);

    return 0;
}

The output:

Length of Longest Common Substring is 5
hydro