Exemplo n.º 1
0
 def test_basic(self):
     S = 'abcdefghijklmnopqrstuvwxyz'
     expected = [[-1] for x in range(26)]
     for i in range(26):
         for x in expected:
             x.append(x[-1])
         expected[i][-1] = i
     self.assertEqual(expected, bad_character_table(S))
Exemplo n.º 2
0
 def test_alternating_string(self):
     S = 'aabbaa'
     expected = [[-1,-1,-1,-1,-1,-1,-1] for x in range(26)]
     expected[0] = [-1,0,1,1,1,4,5]
     expected[1] = [-1,-1,-1,2,3,3,3]
     self.assertEqual(expected, bad_character_table(S))
Exemplo n.º 3
0
 def test_repeated_char_string(self):
     S = 'aaaa'
     expected = [[-1,-1,-1,-1,-1] for x in range(26)]
     expected[0] = [-1,0,1,2,3]
     self.assertEqual(expected, bad_character_table(S))
Exemplo n.º 4
0
 def test_single_char_string(self):
     alpha = 'abcdefghijklmnopqrstuvwxyz'
     for c in alpha:
         expected = [[-1,-1] for x in range(26)]
         expected[alphabet_index(c)][1] = 0
         self.assertEqual(expected, bad_character_table(c))
Exemplo n.º 5
0
 def test_empty_string(self):
     self.assertEqual([[] for x in range(26)], bad_character_table(''))
Exemplo n.º 6
0
def string_search(P, T):
    if len(P) == 0 or len(T) < len(P):
        return []

    matches = []

    # Preprocessing
    N = fundamental_preprocess(T[::-1]) # S[::-1] reverses S
    N.reverse()
    R = bad_character_table(P)
    L = good_suffix_table(P)
    F = full_shift_table(P)
    M = [-1 for c in T]

    k = len(P) - 1      # Represents alignment of end of P relative to T
    i = len(P) - 1      # Character to compare in P
    h = k               # Character to compare in T
    match = False       # Indicates whether an exact match has been found in this phase
    mismatch = False    # Indicates whether a mismatch has occurred

    while k < len(T):
        if M[h] == -1 or M[h] == 0 or N[i] == 0:    # Phase case 1
            #print 'Case 1'
            if T[h] == P[i]: 
                if i == 0:  # Case 1a
                    match = True
                    mismatch = False
                else:       # Case 1b
                    i -= 1
                    h -= 1
                    match = False
                    mismatch = False
            else:           # Case 1c
                match = False
                mismatch = True
        elif (M[h] < N[i] and M[h] != -1) or (M[h] == N[i] and 0 < N[i] < i+1): # Case 2 & 5
            #print 'Case 2 & 5'
            i -= M[h]
            h -= M[h]
            match = False
            mismatch = False
        elif M[h] >= N[i] and N[i] == i+1 > 0:  # Phase case 3
            #print 'Case 3'
            match = True 
            mismatch = False
        elif M[h] > N[i] and N[i] < i+1:    # Phase case 4
            #print 'Case 4'
            i -= N[i]
            h -= N[i]
            match = False
            mismatch = True
        if match:
            matches.append(k - len(P) + 1)
            M[k] = k - h
            k += len(P)-F[1] if len(P) > 1 else 1
            i = len(P) - 1
            h = k
            match = False
            mismatch = False
        if mismatch:
            char_shift = i - R[alphabet_index(T[h])][i]
            if i+1 == len(P):   # Mismatch happened on first attempt
                suffix_shift = 1
            elif L[i+1] == -1:   # Matched suffix does not appear anywhere in P
                suffix_shift = len(P) - F[i+1]
            else:               # Matched suffix appears in P
                suffix_shift = len(P) - L[i+1]
            M[k] = k - h
            k += max(char_shift, suffix_shift)
            i = len(P) - 1
            h = k
            match = False
            mismatch = False
    return matches