Пример #1
0
def remove_repeated_long_strings(l, minlen=1000):
    """ Remove duplicated long strings efficiently using the Ukkonen algorithm.
        The function recursively removes repeated strings as long as they are
        longer than `minlen`.

        Note: If the longest string overlaps with its repeated counterpart,
              it is not removed and the algorithm returns.
    """
    import ukkonen
    l = ' ' + l + ' '
    s = ukkonen.getLongestRepeatedSubstring(l + '$')
    while len(s) > minlen:
        while s[0] != ' ':
            s = s[1:]
        while s[-1] != ' ':
            s = s[:-1]
        if l.count(s) == 1:
            return l.strip()
        l = l.replace(s, ' ')
        l = l + s
        l = l.replace('  ', ' ')
        s = ukkonen.getLongestRepeatedSubstring(l + '$')
    return l.strip()
Пример #2
0
def check(s):
    assert get_longest_repeated_substring_brute(s) == \
        ukkonen.getLongestRepeatedSubstring(s+'$')