def get_similarity(self, a, b): #return self.lcs(a, b) a = np.array(list(a), dtype='U1').view(np.uint32) b = np.array(list(b), dtype='U1').view(np.uint32) length, path = mlpy.lcs_std(a, b) return length
def longest(ovv,cand): try: ovv_int = [char_map[x] for x in ovv.encode('ascii',"ignore").lower()] cand_int = [char_map[y] for y in cand.encode('ascii',"ignore").lower()] lcs = mlpy.lcs_std(ovv_int,cand_int)[0] except Exception as e: print(ovv,cand,e) lcs = difflib.SequenceMatcher(None, ovv,cand).find_longest_match(0, len(ovv), 0, len(cand))[2] return lcs
def caculLCS(): labelPath = GetLabelFile() for i in range(0, len(labelPath) - 1): for j in range(i + 1, len(labelPath)): seqi = label2number(labelPath[i]) namei = labelPath[i].split('\\')[4] seqj = label2number(labelPath[j]) namej = labelPath[j].split('\\')[4] length, path = mlpy.lcs_std(seqi, seqj) Write_LCS_Ans([namei, namej, length])
def main(): with open(sys.argv[1]) as handle: file = handle.read() n = int(file.split('\n')[0]) input_list = map(int, file.split('\n')[1].split()) print n # , input_list x_incre = range(1, n + 1) x_decre = x_incre[::-1] y = input_list incre_path = mlpy.lcs_std(x_incre, y)[1][1] decre_path = mlpy.lcs_std(x_decre, y)[1][1] incre_sub_list = [input_list[i] for i in incre_path] decre_sub_list = [input_list[i] for i in decre_path] for i in incre_sub_list: print i, print for j in decre_sub_list: print j, '''
def caculLCS(): labelPath=GetLabelFile() for i in range(0,len(labelPath)-1): for j in range(i+1,len(labelPath)): seqi=label2number(labelPath[i]) namei=labelPath[i].split('\\')[4] seqj=label2number(labelPath[j]) namej=labelPath[j].split('\\')[4] length, path = mlpy.lcs_std(seqi,seqj) Write_LCS_Ans([namei,namej,length])
def commonSubseq(x, y, table): str1 = [] str2 = [] res = [] for l in x: str1.append(table[l]) for l in y: str2.append(table[l]) length, path = mlpy.lcs_std(str1, str2) for i in path[0]: h = str1[i] for k, v in table.items(): if v == h: res.append(k) return ''.join(res)
def LCS_dist( self, xx, X ): # xx is the testing instance, X is the training instance (passed one by one) import mlpy i, j = int(xx[0]), int( X[0]) # extract indices, i for testing, j for training #print "i = " + str(i) + ", j = " +str(j) length, path = mlpy.lcs_std(self.__Xtest[i], self.__Xtrain[j]) dist_lcs = float(length) / np.sqrt( len(self.__Xtest[i]) * len(self.__Xtrain[j]) ) ## Formula taken from section 4.1.2 in paper: Anomaly Detection for Discrete Sequences: A Survey if dist_lcs != 0: dist_lcs_inv = float(1 / float(dist_lcs)) else: dist_lcs_inv = 9999.0 return dist_lcs_inv
def string_lcs(self, x, y): int_x = [ord(l) for l in x] int_y = [ord(l) for l in y] return mlpy.lcs_std(int_x, int_y)
for index, word in enumerate(allwords): if word.isupper() and len(word) > 2: acronym_list.append(''.join(e for e in word if e.isalnum())) acronym_indices.append(index) x = PrettyTable(["Acronym", "Definition"]) for index, acronym in enumerate(acronym_list): acronym_split = [] for c in acronym: acronym_split.append(ord(c.upper())) word_window = [ word for word in allwords[minString(acronym_indices[index] - 10):maxString(acronym_indices[index] + 10, len(allwords))] ] letter_window = [ord(word[0]) for word in word_window] length, path = mlpy.lcs_std(letter_window, acronym_split) printacronym = ''.join(chr(c).upper() for c in acronym_split) printfull = [] word_path = path[0] print(word_path) printfull = [word for word in word_window[word_path[0]:word_path[-1] + 1]] printfull = ' '.join(printfull) x.add_row([printacronym, printfull]) print(x) #print acronym_list #print acronym_indices