def test_find_common_subsequences(self):
        """Test getting a list of subsequences between two sequences."""
        no_shared = py_common_subseq.find_common_subsequences(NONE_SHARED_1,
            NONE_SHARED_2)
        self.assertEqual(no_shared, set(['']))

        partially_shared = py_common_subseq.find_common_subsequences(
            PARTIALLY_SHARED_1, PARTIALLY_SHARED_2)
        components = ['', 'qer', 'wr', 'qwr', 'er', 'qr', 'e', 'qw', 'q', 'r',
            'qe', 'w']
        self.assertEqual(partially_shared, set(components))

        all_shared = py_common_subseq.find_common_subsequences(ALL_SHARED_1,
            ALL_SHARED_2)
        components = ['', 'e', 's', 't', 'es', 'st', 'et', 'est']
        self.assertEqual(all_shared, set(components))
    def test_find_common_subsequences(self):
        """Test getting a list of subsequences between two sequences."""
        no_shared = py_common_subseq.find_common_subsequences(
            NONE_SHARED_1, NONE_SHARED_2)
        self.assertEqual(no_shared, set(['']))

        partially_shared = py_common_subseq.find_common_subsequences(
            PARTIALLY_SHARED_1, PARTIALLY_SHARED_2)
        components = [
            '', 'qer', 'wr', 'qwr', 'er', 'qr', 'e', 'qw', 'q', 'r', 'qe', 'w'
        ]
        self.assertEqual(partially_shared, set(components))

        all_shared = py_common_subseq.find_common_subsequences(
            ALL_SHARED_1, ALL_SHARED_2)
        components = ['', 'e', 's', 't', 'es', 'st', 'et', 'est']
        self.assertEqual(all_shared, set(components))
示例#3
0
def commonSubstrings(s1, s2):
    sorted = []
    common = commonSubs.find_common_subsequences(s1, s2)
    for i in common:
        if i != "" and len(i) == 3:
            sorted.append(i)
    sorted.sort(key=len)
    sorted.reverse()
    return sorted
示例#4
0
def get_longest_common_subsequence(test_seq, user_seq):
    subsequences = cs.find_common_subsequences(list(map(str, test_seq)),
                                               list(map(str, user_seq)),
                                               sep=',')

    if len(subsequences) == 1:
        return []

    lcs = list(map(int, max(subsequences, key=len).split(',')[1:]))
    return lcs
    def test_seperator(self):
        """Test getting a list of subsequences with a non-default seperator.

        Test getting a list of subsequences where elements of that subsequence
        are joined together by a non-standard seperator.
        """
        partially_shared = py_common_subseq.find_common_subsequences(
            PARTIALLY_SHARED_1, PARTIALLY_SHARED_2, sep=' ')
        components = ['', ' q e r', ' w r', ' q w r', ' e r', ' q r', ' e',
            ' q w', ' q', ' r', ' q e', ' w']
        self.assertEqual(partially_shared, set(components))
示例#6
0
def assess(origWord, writtenWord):
    lcs = max(find_common_subsequences(origWord, writtenWord), key=len)
    i = 0
    j = 0
    retList = []
    while i < len(lcs):
        if lcs[i] == origWord[j]:
            retList.append((origWord[j], 1))
            i += 1
        else:
            retList.append((origWord[j], 0))
        j += 1
    return retList
    def test_seperator(self):
        """Test getting a list of subsequences with a non-default seperator.

        Test getting a list of subsequences where elements of that subsequence
        are joined together by a non-standard seperator.
        """
        partially_shared = py_common_subseq.find_common_subsequences(
            PARTIALLY_SHARED_1, PARTIALLY_SHARED_2, sep=' ')
        components = [
            '', ' q e r', ' w r', ' q w r', ' e r', ' q r', ' e', ' q w', ' q',
            ' r', ' q e', ' w'
        ]
        self.assertEqual(partially_shared, set(components))
示例#8
0
 def lcs(s1, s2):
     subseq = py_common_subseq.find_common_subsequences(s1,s2)
     return max([len(x) for x in subseq])
示例#9
0
for i, email in zip(range(len(data['validity'])), data['email']):
    #print(email)
    nameString = str(data['name'][i])
    if (re.match(string=email, pattern=emailPattern) and len(nameString) > 0):
        data['validity'][i] = 1
        names = nameString.split(';')

        if (len(names) < 2 and len(names[0]) > 15):
            data['invalidNames'][i] = 1

        lcsValues = list()

        #print('Len: '+str(len(names)))
        for name in names:
            lenCommonSeq = [
                len(seq) for seq in py_common_subseq.find_common_subsequences(
                    name, email)
            ]
            lcsValues.append(lenCommonSeq)

        correctNameIndex = findCorrectName(lcsValues)

        #print(email+'->'+names[correctNameIndex])

        data['CorrectedName'][i] = names[correctNameIndex]

    print(i)
data.to_csv(fileName + '_Corrected.csv')
# test_seq_1 = 'Singh Rishan'
# test_seq_2 = '*****@*****.**'
# #py_common_subseq.count_common_subsequences(test_seq_1, test_seq_2)
# subSeq = py_common_subseq.find_common_subsequences(test_seq_1, test_seq_2)
        if list[i] == item:
            return i


def get_indices(sequence, subsequence):
    indices = []
    for i in reversed(range(len(subsequence))):
        list_index = list_rindex(sequence, subsequence[i])
        indices.append(list_index + 1)
        sequence = sequence[:list_index]

    return list(reversed(indices))


start_time = time()

s1 = [1000000, 2000000, 3000000, 4000000, 4000000, 1000000, 2000000, 3000000, 4000000, 1000000, 2000000]
seq_1 = list(map(str, s1))
s2 = [3000000, 1000000, 2000000, 4000000, 4000000, 1000000, 3000000, 1000000, 2000000, 3000000, 1000000]
seq_2 = list(map(str, s2))

subsequences = py_common_subseq.find_common_subsequences(seq_1, seq_2, sep=',')

lcs = list(map(int, max(subsequences, key=len).split(',')[1:]))

print(lcs)
print(get_indices(s1, lcs))
print(get_indices(s2, lcs))

print(time() - start_time)