Пример #1
0
    def get_similarity(self, a, b):
        #return self.lcs(a, b)

        a = np.array(list(a), dtype='U1').view(np.uint32)
        b = np.array(list(b), dtype='U1').view(np.uint32)
        length, path = mlpy.lcs_std(a, b)
        return length
Пример #2
0
    def get_similarity(self, a, b):
        #return self.lcs(a, b)

        a = np.array(list(a), dtype='U1').view(np.uint32)
        b = np.array(list(b), dtype='U1').view(np.uint32)
        length, path = mlpy.lcs_std(a, b)
        return length
Пример #3
0
def longest(ovv,cand):
    try:
        ovv_int = [char_map[x] for x in ovv.encode('ascii',"ignore").lower()]
        cand_int = [char_map[y] for y in cand.encode('ascii',"ignore").lower()]
        lcs = mlpy.lcs_std(ovv_int,cand_int)[0]
    except Exception as e:
        print(ovv,cand,e)
        lcs = difflib.SequenceMatcher(None, ovv,cand).find_longest_match(0, len(ovv), 0, len(cand))[2]
    return lcs
Пример #4
0
def caculLCS():
    labelPath = GetLabelFile()
    for i in range(0, len(labelPath) - 1):
        for j in range(i + 1, len(labelPath)):
            seqi = label2number(labelPath[i])
            namei = labelPath[i].split('\\')[4]
            seqj = label2number(labelPath[j])
            namej = labelPath[j].split('\\')[4]
            length, path = mlpy.lcs_std(seqi, seqj)
            Write_LCS_Ans([namei, namej, length])
Пример #5
0
def main():
    with open(sys.argv[1]) as handle:
        file = handle.read()
    n = int(file.split('\n')[0])
    input_list = map(int, file.split('\n')[1].split())
    print n  # , input_list
    x_incre = range(1, n + 1)
    x_decre = x_incre[::-1]
    y = input_list
    incre_path = mlpy.lcs_std(x_incre, y)[1][1]
    decre_path = mlpy.lcs_std(x_decre, y)[1][1]
    incre_sub_list = [input_list[i] for i in incre_path]
    decre_sub_list = [input_list[i] for i in decre_path]
    for i in incre_sub_list:
        print i,
    print
    for j in decre_sub_list:
        print j,
    '''
def caculLCS():
    labelPath=GetLabelFile()
    for i in range(0,len(labelPath)-1):
        for j in range(i+1,len(labelPath)):
            seqi=label2number(labelPath[i])
            namei=labelPath[i].split('\\')[4]
            seqj=label2number(labelPath[j])
            namej=labelPath[j].split('\\')[4]
            length, path = mlpy.lcs_std(seqi,seqj)
            Write_LCS_Ans([namei,namej,length])
Пример #7
0
def commonSubseq(x, y, table):
	str1 = []
	str2 = []
	res = []
	for l in x:
		str1.append(table[l])
	for l in y:
		str2.append(table[l])
	length, path = mlpy.lcs_std(str1, str2)
	for i in path[0]:
		h = str1[i]
		for k, v in table.items():
			if v == h:
				res.append(k)
	return ''.join(res)
Пример #8
0
    def LCS_dist(
        self, xx, X
    ):  # xx is the testing instance, X is the training instance (passed one by one)
        import mlpy

        i, j = int(xx[0]), int(
            X[0])  # extract indices, i for testing, j for training

        #print "i = " + str(i) + ", j = " +str(j)

        length, path = mlpy.lcs_std(self.__Xtest[i], self.__Xtrain[j])

        dist_lcs = float(length) / np.sqrt(
            len(self.__Xtest[i]) * len(self.__Xtrain[j])
        )  ## Formula taken from section 4.1.2 in paper: Anomaly Detection for Discrete Sequences: A Survey

        if dist_lcs != 0:
            dist_lcs_inv = float(1 / float(dist_lcs))
        else:
            dist_lcs_inv = 9999.0

        return dist_lcs_inv
Пример #9
0
 def string_lcs(self, x, y):
     int_x = [ord(l) for l in x]
     int_y = [ord(l) for l in y]
     return mlpy.lcs_std(int_x, int_y)        
for index, word in enumerate(allwords):
    if word.isupper() and len(word) > 2:
        acronym_list.append(''.join(e for e in word if e.isalnum()))
        acronym_indices.append(index)

x = PrettyTable(["Acronym", "Definition"])
for index, acronym in enumerate(acronym_list):
    acronym_split = []
    for c in acronym:
        acronym_split.append(ord(c.upper()))
    word_window = [
        word for word in
        allwords[minString(acronym_indices[index] -
                           10):maxString(acronym_indices[index] +
                                         10, len(allwords))]
    ]
    letter_window = [ord(word[0]) for word in word_window]
    length, path = mlpy.lcs_std(letter_window, acronym_split)
    printacronym = ''.join(chr(c).upper() for c in acronym_split)
    printfull = []
    word_path = path[0]
    print(word_path)
    printfull = [word for word in word_window[word_path[0]:word_path[-1] + 1]]
    printfull = ' '.join(printfull)
    x.add_row([printacronym, printfull])
print(x)

#print acronym_list
#print acronym_indices