The Longest Common Substring problem is similar to Longest Common Subsequence, except that the subsequence must be made of adjacent characters – a substring. The dynamic programming algorithm is also very similar. For this one I have used chains of pointers in the table to retrieve the solution, as I did with Longest Increasing Subsequence,

#include <string.h> #include <stdlib.h> struct lcs_entry { unsigned int score; char letter; struct lcs_entry* prev; }; typedef struct lcs_entry lcs_entry; unsigned int longest_common_substring(const char *str1, const char *str2, char **result) { unsigned int i, j; unsigned int max = 0; lcs_entry *head; const size_t m = strlen(str1); const size_t n = strlen(str2); lcs_entry **lcstab; /* Allocate the table */ lcstab = malloc((m + 1) * sizeof(lcs_entry*)); for (i = 0; i < m + 1; i++) { lcstab[i] = malloc((n + 1) * sizeof(lcs_entry)); } /* Calculate scores for each substring and build chains */ for (i = 0; i <= m; i++) { for (j = 0; j <= n; j++) { if (i == 0 || j == 0) { lcstab[i][j].score = 0; lcstab[i][j].letter = 0; lcstab[i][j].prev = NULL; } else if (str1[i - 1] == str2[j - 1]) { lcstab[i][j].score = lcstab[i - 1][j - 1].score + 1; lcstab[i][j].letter = str1[i - 1]; lcstab[i][j].prev = &lcstab[i - 1][j - 1]; if (lcstab[i][j].score > max) { max = lcstab[i][j].score; head = &lcstab[i][j]; } } else { lcstab[i][j].score = 0; lcstab[i][j].letter = 0; lcstab[i][j].prev = NULL; } } } /* Allocate the output array and copy the LC substring */ *result = malloc(max + 1); (*result)[max] = '\0'; for (i = max - 1; head->prev != NULL; i--) { (*result)[i] = head->letter; head = head->prev; } for (i = 0; i < m + 1; i++) { free(lcstab[i]); } free(lcstab); return max; }

An example program:

int main(void) { char str1[] = "magnetohydrodynamics"; char str2[] = "hydrogen"; char *result; printf("Length of Longest Common Substring is %u\n", longest_common_substring(str1, str2, &result)); printf("%s\n", result); free(result); return 0; }

The output:

Length of Longest Common Substring is 5 hydro