This problem is very like Longest Common Subsequence, except that we are looking for the longest subsequence occurring twice in the same string, rather than once in different strings. Once again, this is a dynamic programming solution using chains of pointers in the table to keep track of the subsequences under consideration.

struct lrs_entry { unsigned int score; char letter; struct lrs_entry *prev; }; typedef struct lrs_entry lrs_entry; unsigned int longest_repeated_subsequence(const char *str, char **result) { const size_t len = strlen(str); lrs_entry **lrstab; unsigned int i, j; unsigned int max; lrs_entry *head; /* Allocate the table */ lrstab = malloc((len + 1) * sizeof(lrs_entry *)); for (i = 0; i <= len; i++) { lrstab[i] = malloc((len + 1) * sizeof(lrs_entry)); } /* Calculate the scores and build the chains */ for (i = 0; i <= len; i++) { for (j = 0; j <= len; j++) { if (i == 0 || j == 0) { /* Initialising the first row or column */ lrstab[i][j].score = 0; lrstab[i][j].letter = 0; lrstab[i][j].prev = NULL; } else if (str[i - 1] == str[j - 1] && i != j) { /* Match */ lrstab[i][j].score = lrstab[i - 1][j - 1].score + 1; lrstab[i][j].letter = str[i - 1]; lrstab[i][j].prev = &lrstab[i - 1][j - 1]; } else { /* Mismatch */ if (lrstab[i][j-1].score > lrstab[i-1][j].score) { lrstab[i][j].score = lrstab[i][j-1].score; lrstab[i][j].letter = lrstab[i][j-1].letter; lrstab[i][j].prev = lrstab[i][j-1].prev; } else { lrstab[i][j].score = lrstab[i - 1][j].score; lrstab[i][j].letter = lrstab[i - 1][j].letter; lrstab[i][j].prev = lrstab[i - 1][j].prev; } } } } max = lrstab[len][len].score; /* Allocate the output array and copy the LRS into it */ *result = malloc(max + 1); for (i = max - 1, head = &lrstab[len][len]; head->prev != NULL; head = head->prev, i--) { (*result)[i] = head->letter; } (*result)[max] = '\0'; for (i = 0; i <= len; i++) { free(lrstab[i]); } free(lrstab); return max; }

Example program:

int main() { char str[] = "abcXdYeXZfgYhZijk"; char *result; printf("Longest Repeated Subsequence: %u\n", longest_repeated_subsequence(str, &result)); printf("%s\n", result); free(result); return 0; }

Output:

Longest Repeated Subsequence: 3 XYZ