def test_find_common_subsequences(self): """Test getting a list of subsequences between two sequences.""" no_shared = py_common_subseq.find_common_subsequences(NONE_SHARED_1, NONE_SHARED_2) self.assertEqual(no_shared, set([''])) partially_shared = py_common_subseq.find_common_subsequences( PARTIALLY_SHARED_1, PARTIALLY_SHARED_2) components = ['', 'qer', 'wr', 'qwr', 'er', 'qr', 'e', 'qw', 'q', 'r', 'qe', 'w'] self.assertEqual(partially_shared, set(components)) all_shared = py_common_subseq.find_common_subsequences(ALL_SHARED_1, ALL_SHARED_2) components = ['', 'e', 's', 't', 'es', 'st', 'et', 'est'] self.assertEqual(all_shared, set(components))
def test_find_common_subsequences(self): """Test getting a list of subsequences between two sequences.""" no_shared = py_common_subseq.find_common_subsequences( NONE_SHARED_1, NONE_SHARED_2) self.assertEqual(no_shared, set([''])) partially_shared = py_common_subseq.find_common_subsequences( PARTIALLY_SHARED_1, PARTIALLY_SHARED_2) components = [ '', 'qer', 'wr', 'qwr', 'er', 'qr', 'e', 'qw', 'q', 'r', 'qe', 'w' ] self.assertEqual(partially_shared, set(components)) all_shared = py_common_subseq.find_common_subsequences( ALL_SHARED_1, ALL_SHARED_2) components = ['', 'e', 's', 't', 'es', 'st', 'et', 'est'] self.assertEqual(all_shared, set(components))
def commonSubstrings(s1, s2): sorted = [] common = commonSubs.find_common_subsequences(s1, s2) for i in common: if i != "" and len(i) == 3: sorted.append(i) sorted.sort(key=len) sorted.reverse() return sorted
def get_longest_common_subsequence(test_seq, user_seq): subsequences = cs.find_common_subsequences(list(map(str, test_seq)), list(map(str, user_seq)), sep=',') if len(subsequences) == 1: return [] lcs = list(map(int, max(subsequences, key=len).split(',')[1:])) return lcs
def test_seperator(self): """Test getting a list of subsequences with a non-default seperator. Test getting a list of subsequences where elements of that subsequence are joined together by a non-standard seperator. """ partially_shared = py_common_subseq.find_common_subsequences( PARTIALLY_SHARED_1, PARTIALLY_SHARED_2, sep=' ') components = ['', ' q e r', ' w r', ' q w r', ' e r', ' q r', ' e', ' q w', ' q', ' r', ' q e', ' w'] self.assertEqual(partially_shared, set(components))
def assess(origWord, writtenWord): lcs = max(find_common_subsequences(origWord, writtenWord), key=len) i = 0 j = 0 retList = [] while i < len(lcs): if lcs[i] == origWord[j]: retList.append((origWord[j], 1)) i += 1 else: retList.append((origWord[j], 0)) j += 1 return retList
def test_seperator(self): """Test getting a list of subsequences with a non-default seperator. Test getting a list of subsequences where elements of that subsequence are joined together by a non-standard seperator. """ partially_shared = py_common_subseq.find_common_subsequences( PARTIALLY_SHARED_1, PARTIALLY_SHARED_2, sep=' ') components = [ '', ' q e r', ' w r', ' q w r', ' e r', ' q r', ' e', ' q w', ' q', ' r', ' q e', ' w' ] self.assertEqual(partially_shared, set(components))
def lcs(s1, s2): subseq = py_common_subseq.find_common_subsequences(s1,s2) return max([len(x) for x in subseq])
for i, email in zip(range(len(data['validity'])), data['email']): #print(email) nameString = str(data['name'][i]) if (re.match(string=email, pattern=emailPattern) and len(nameString) > 0): data['validity'][i] = 1 names = nameString.split(';') if (len(names) < 2 and len(names[0]) > 15): data['invalidNames'][i] = 1 lcsValues = list() #print('Len: '+str(len(names))) for name in names: lenCommonSeq = [ len(seq) for seq in py_common_subseq.find_common_subsequences( name, email) ] lcsValues.append(lenCommonSeq) correctNameIndex = findCorrectName(lcsValues) #print(email+'->'+names[correctNameIndex]) data['CorrectedName'][i] = names[correctNameIndex] print(i) data.to_csv(fileName + '_Corrected.csv') # test_seq_1 = 'Singh Rishan' # test_seq_2 = '*****@*****.**' # #py_common_subseq.count_common_subsequences(test_seq_1, test_seq_2) # subSeq = py_common_subseq.find_common_subsequences(test_seq_1, test_seq_2)
if list[i] == item: return i def get_indices(sequence, subsequence): indices = [] for i in reversed(range(len(subsequence))): list_index = list_rindex(sequence, subsequence[i]) indices.append(list_index + 1) sequence = sequence[:list_index] return list(reversed(indices)) start_time = time() s1 = [1000000, 2000000, 3000000, 4000000, 4000000, 1000000, 2000000, 3000000, 4000000, 1000000, 2000000] seq_1 = list(map(str, s1)) s2 = [3000000, 1000000, 2000000, 4000000, 4000000, 1000000, 3000000, 1000000, 2000000, 3000000, 1000000] seq_2 = list(map(str, s2)) subsequences = py_common_subseq.find_common_subsequences(seq_1, seq_2, sep=',') lcs = list(map(int, max(subsequences, key=len).split(',')[1:])) print(lcs) print(get_indices(s1, lcs)) print(get_indices(s2, lcs)) print(time() - start_time)