def test_lcs_kosa_koza(self): seq1 = 'kosa' seq2 = 'koza' result = [['k', 'o', 'a']] self.assertEqual(lcs.lcs(seq1, seq2), result)
def test_lcs_abcd_acbd(self): seq1 = 'abcd' seq2 = 'acbd' result = [['a', 'c', 'd'], ['a', 'b', 'd']] self.assertEqual(lcs.lcs(seq1, seq2), result)
def test_lcs_empty_acbd(self): seq1 = '' seq2 = 'acbd' result = [[]] self.assertEqual(lcs.lcs(seq1, seq2), result)
def solve2(cls,Str,len1,len2): tlen = len(Str[0]) plen = len(Str[1]) # print Str[0] # print Str[1] # print min(plen,tlen) ans = 0 MML = 4 MaxLen=MML+1 # print Str[0] # print Str[1] while MaxLen>MML: MaxLen = MML j = 1 now_i=0 #dp=[cls.MAX_CODE_LENGTH*[0],cls.MAX_CODE_LENGTH*[0]] k,s,t = lcs(Str[0],Str[1]) if k<MaxLen: continue if s+k<tlen and Str[0][s+k]!='$': Str[0] = Str[0][0:s]+"$"+Str[0][s+k:tlen] else: Str[0] = Str[0][0:s]+Str[0][s+k:tlen] tlen=len(Str[0]) if t+k<plen and Str[1][t+k]!='$': Str[1] = Str[1][0:t]+"$"+Str[1][t+k:plen] else: Str[1] = Str[1][0:t]+Str[1][t+k:plen] plen=len(Str[1]) ans+=k MaxLen = k return ans
def lcs_call(self, widget): str_a = self.txt_grpa.get_text() str_b = self.txt_grpb.get_text() str_result = lcs(str_a, str_b) dialog = DialogWindow(self, str_result) dialog.run() dialog.destroy()
def main(filepath): dataset = json.loads(open(filepath).read())["dataset"] original = {} sequences = {} for d in dataset: original.setdefault(d, 0) original[d] += 1 keys = original.keys() for i in range(len(keys)): for j in range(i + 1, len(keys)): lcs_result = lcs.lcs(keys[i], keys[j]) for l in lcs_result: if len(l) < 3: continue str_result = "" for c in l: str_result += c sequences.setdefault(str_result, 0) sequences[str_result] += 1 result = {} h = suffixtree2.ST(sequences) alldata = h.print_tree(h.get(), 3) for k, v in sorted(alldata.items(), key=lambda x: x[1], reverse=True): _cnt = 0 for s in keys: if check(s, k) == True: _cnt += 1 result.setdefault(k, 0) result[k] = _cnt for k, v in sorted(result.items(), key=lambda x: x[1], reverse=True): print k + " was included in " + str(v) + " data."
def compare_value(value1, value2): if value1 is None and value2 is None: return 0.0 if value1 is None or value2 is None: return 1.0 return 1.0 - (float(len(lcs.lcs(lcs.path(value1, value2)))) / max(len(value1), len(value2)))
def alignchildren(t1, t2, M, E, w, x): """ See figure 9 in reference. """ for c in w.elements(): c.inorder = False for c in x.elements(): c.inorder = False s1 = [child for child in w.elements() if child in M.left and M.left[child].parent == x] s2 = [child for child in x.elements() if child in M.right and M.right[child].parent == w] def equal(a, b): return (a, b) in M S = [(s1[x], s2[y]) for x, y in lcs.lcs(lcs.path(s1, s2, equal))] for (a, b) in S: a.inorder = b.inorder = True for a in s1: for b in s2: if (a, b) in M and (a, b) not in S: k = findpos(M, b) E.move(a.path(), w.path(), k) t1.move(a.path(), w.path(), str(k)) a.inorder = b.inorder = True
def alignchildren(t1, t2, M, E, w, x): """ See figure 9 in reference. """ for c in w.elements(): c.inorder = False for c in x.elements(): c.inorder = False s1 = [ child for child in w.elements() if child in M.left and M.left[child].parent == x ] s2 = [ child for child in x.elements() if child in M.right and M.right[child].parent == w ] def equal(a, b): return (a, b) in M S = [(s1[x], s2[y]) for x, y in lcs.lcs(lcs.path(s1, s2, equal))] for (a, b) in S: a.inorder = b.inorder = True for a in s1: for b in s2: if (a, b) in M and (a, b) not in S: k = findpos(M, b) E.move(a.path(), w.path(), k) t1.move(a.path(), w.path(), str(k)) a.inorder = b.inorder = True
def sorted_active_runnable(self, query, hwnds): with QMutexLocker(self.mutex): # update query and collect active ones self._refresh_tasks(hwnds, query) active_tasks = [self.tasks[h] for h in hwnds] # sort by last use if not query: return sorted(active_tasks, key=lambda t: t.usetime, reverse=True) titles = [task.fullname.lower() for task in active_tasks] def f(task, title): return task.query.distance_to(title) ds = [ f(task, title) * (10**len(query)) for task, title in zip(active_tasks, titles) ] best = ds[0] for i in itertools.takewhile(lambda i: ds[i] == best, range(len(ds))): ds[i] -= len(lcs(query, titles[i])) #return sorted(active_tasks, key=f) return [ task for i, task in sorted(enumerate(active_tasks), key=lambda i: ds[i[0]]) ]
def test_lcs(self): x = 'shirish' y = 'sigdyal' m = len(x) n = len(y) (array, length) = length_lcs(x, y, m, n) self.assertEqual(length, 2) result = lcs(x, y, array, length) self.assertListEqual(result, ['s', 'i'])
def image_fisher_featurize(im_key, gmm_sift, gmm_lcs, pca_sift, pca_lcs): t = time.time() s3 = boto3.resource('s3') s3.Bucket("pictureweb").download_file(im_key, "/tmp/img.jpg") im = scipy.misc.imread("/tmp/img.jpg", flatten=True) descs = sift(im).dot(pca_sift.T) sift_features = fisher.fisher_vector_features(descs.astype('float32'), *gmm_sift) im = scipy.misc.imread("/tmp/img.jpg") descs = lcs(im).reshape(-1, 96).dot(pca_lcs.T) lcs_features = fisher.fisher_vector_features(descs.astype('float32'), *gmm_lcs) out_features = np.hstack((sift_features, lcs_features)).T e = time.time() return out_features, e - t
def solve2(Str, len1, len2): code0 = copy.deepcopy(Str[0]) code1 = copy.deepcopy(Str[1]) tlen = len(code0) plen = len(code1) ans = 0 MML = 4 MaxLen = MML + 1 whileTime = 0 td = 0 while MaxLen > MML: whileTime += 1 MaxLen = MML j = 1 now_i = 0 # t1 = datetime.now() dp = [65536 * [0], 65536 * [0]] k, s, t = lcs(Str[0], Str[1]) # t2 = datetime.now() # print t2-t1 # print k # print code0[s:s+k]+" XXX "+code1[t:t+k]+" OOO " if k < MaxLen: continue #print code0[s:s+k] #print code1[t:t+k] if s + k < tlen and code0[s + k] != '$': code0 = code0[0:s] + "$" + code0[s + k:tlen] else: code0 = code0[0:s] + code0[s + k:tlen] tlen = len(code0) if t + k < plen and code1[t + k] != '$': code1 = code1[0:t] + "$" + code1[t + k:plen] else: code1 = code1[0:t] + code1[t + k:plen] plen = len(code1) ans += k MaxLen = k #Set=[] # print k # print Str print whileTime print td return ans
def solve2(Str,len1,len2): code0 = copy.deepcopy(Str[0]) code1 = copy.deepcopy(Str[1]) tlen = len(code0) plen = len(code1) ans = 0 MML = 4 MaxLen=MML+1 whileTime = 0 td = 0 while MaxLen>MML: whileTime += 1 MaxLen = MML j = 1 now_i=0 # t1 = datetime.now() dp=[65536*[0],65536*[0]] k,s,t = lcs(Str[0],Str[1]) # t2 = datetime.now() # print t2-t1 # print k # print code0[s:s+k]+" XXX "+code1[t:t+k]+" OOO " if k<MaxLen: continue #print code0[s:s+k] #print code1[t:t+k] if s+k<tlen and code0[s+k]!='$': code0 = code0[0:s]+"$"+code0[s+k:tlen] else: code0 = code0[0:s]+code0[s+k:tlen] tlen=len(code0) if t+k<plen and code1[t+k]!='$': code1 = code1[0:t]+"$"+code1[t+k:plen] else: code1 = code1[0:t]+code1[t+k:plen] plen=len(code1) ans+=k MaxLen = k #Set=[] # print k # print Str print whileTime print td return ans
def solve2(Str, len1, len2): #t_m=[0]*cls.MAX_CODE_LENGTH # p_m=t_m Set = [] tlen = len(Str[0]) plen = len(Str[1]) # print Str[0] # print Str[1] # print min(plen,tlen) ans = 0 # MML = min(tlen,plen) # if MML>200: # MML = 10 #else: # MML = max(MML/25,4) MML = 4 MaxLen = MML + 1 # print Str[0] # print Str[1] while MaxLen > MML: MaxLen = MML j = 1 now_i = 0 # dp=[cls.MAX_CODE_LENGTH*[0],cls.MAX_CODE_LENGTH*[0]] k, s, t = lcs(Str[0], Str[1]) # print k # print Str[0][s:s+k]+" XXX "+Str[1][t:t+k]+" OOO " if k < MaxLen: continue #print Str[0][s:s+k] #print Str[1][t:t+k] if s + k < tlen and Str[0][s + k] != '$': Str[0] = Str[0][0:s] + "$" + Str[0][s + k:tlen] else: Str[0] = Str[0][0:s] + Str[0][s + k:tlen] tlen = len(Str[0]) if t + k < plen and Str[1][t + k] != '$': Str[1] = Str[1][0:t] + "$" + Str[1][t + k:plen] else: Str[1] = Str[1][0:t] + Str[1][t + k:plen] plen = len(Str[1]) ans += k MaxLen = k #Set=[] # print k # print Str return ans
def solve2(Str,len1,len2): #t_m=[0]*cls.MAX_CODE_LENGTH # p_m=t_m Set=[] tlen = len(Str[0]) plen = len(Str[1]) # print Str[0] # print Str[1] # print min(plen,tlen) ans = 0 # MML = min(tlen,plen) # if MML>200: # MML = 10 #else: # MML = max(MML/25,4) MML = 4 MaxLen=MML+1 # print Str[0] # print Str[1] while MaxLen>MML: MaxLen = MML j = 1 now_i=0 # dp=[cls.MAX_CODE_LENGTH*[0],cls.MAX_CODE_LENGTH*[0]] k,s,t = lcs(Str[0],Str[1]) # print k # print Str[0][s:s+k]+" XXX "+Str[1][t:t+k]+" OOO " if k<MaxLen: continue #print Str[0][s:s+k] #print Str[1][t:t+k] if s+k<tlen and Str[0][s+k]!='$': Str[0] = Str[0][0:s]+"$"+Str[0][s+k:tlen] else: Str[0] = Str[0][0:s]+Str[0][s+k:tlen] tlen=len(Str[0]) if t+k<plen and Str[1][t+k]!='$': Str[1] = Str[1][0:t]+"$"+Str[1][t+k:plen] else: Str[1] = Str[1][0:t]+Str[1][t+k:plen] plen=len(Str[1]) ans+=k MaxLen = k #Set=[] # print k # print Str return ans
def calculate_lcs(img_keys, out_matrix, block_idx, descs_per_img=16): feats = [] s3 = boto3.resource('s3') import time t = time.time() np.random.seed(block_idx) for im_key in img_keys: s3.Bucket("pictureweb").download_file(im_key, "/tmp/img.jpg") im = scipy.misc.imread("/tmp/img.jpg") im = im.astype('float32') descs = lcs.lcs(im).reshape(-1, LCS_DESC_LENGTH) idxs = np.random.choice(descs.shape[0], descs_per_img) feats.append(descs[idxs, :]) out_matrix.put_block(np.vstack(feats), block_idx, 0) e = time.time() return e - t
def _match(nodes1, nodes2, M, equal): nodes = nodes1 + nodes2 for label in utils.nub([node.label for node in nodes]): s1 = get_chain(nodes1, label) s2 = get_chain(nodes2, label) path = lcs.lcs(lcs.path(s1, s2, equal)) for x, y in path: M.add((s1[x], s2[y])) for x, y in reversed(path): s1.pop(x) s2.pop(y) for x in range(len(s1)): for y in range(len(s2)): if equal(s1[x], s2[y]): M.add((s1[x], s2[y])) s2.pop(y) break
def sorted_active_runnable(self, query, hwnds): with QMutexLocker(self.mutex): # update query and collect active ones self._refresh_tasks(hwnds, query) active_tasks = [self.tasks[h] for h in hwnds] # sort by last use if not query: return sorted(active_tasks, key=lambda t: t.usetime, reverse=True) titles = [task.fullname.lower() for task in active_tasks] def f(task, title): return task.query.distance_to(title) ds = [f(task, title) * (10 ** len(query)) for task, title in zip(active_tasks, titles)] best = ds[0] for i in itertools.takewhile(lambda i: ds[i] == best, range(len(ds))): ds[i] -= len(lcs(query, titles[i])) #return sorted(active_tasks, key=f) return [task for i, task in sorted(enumerate(active_tasks), key=lambda i: ds[i[0]])]
def similarity(x, y): """ similarity(x, y) This function measures string similarity between x and y. The function returns: 0.8*(len(LongestCommonSubsequence(x, y))) + 0.2*1/(DamerauLevenshteinDistance(x, y)) LCS and Levenshtein are, by trial and error, found to be compensating for each others errors. Hence their combination in most cases seems to be one of the good solutions. Eg: For Abhogi and Behag, LCS gives a high similarity score of 3, which is balanced by the Levenshtein's distance of 4, thus penalizing it. """ len_thresh = 0.75 # beyond this difference of ratio between lengths, they are deemed different terms if len(y) == 0: return 0 ratio = 1.0*len(x)/len(y) if ratio > 1: ratio = 1/ratio; if ratio < len_thresh: return 0 subseq = lcs.lcs(x, y) dldist = levenshtein.dameraulevenshtein(x, y) if dldist == 0: dldist = 1 w1 = 0.8 w2 = 0.2 return w1*(1.0*len(subseq)/max([len(x), len(y)])) + (w2*1.0/dldist);
import numpy as np import lcs import time import io import boto3 import scipy.misc import fisher import pywren t = time.time() im_key = "imagenet_train_100k_pixels/8/11857.JPEG" client = boto3.client('s3') bio = io.BytesIO(client.get_object(Bucket="pictureweb", Key=im_key)["Body"].read()) pca_mat_lcs = np.load("pca_mat_lcs.npy") weights = np.load('gmm_lcs_weights_{0}_centers.npy'.format(16)) means = np.load('gmm_lcs_means_{0}_centers.npy'.format(16)) covars= np.load('gmm_lcs_covars_{0}_centers.npy'.format(16)) gmm_lcs = (means, covars, weights) img = scipy.misc.imread(bio, flatten=True) pwex = pywren.default_executor() z = lcs.lcs(img).reshape(-1, 96)
print flag flag = flag+ 1 machine_id = row[0] isp = row[2] country = row[1] if isp == "": continue if machineList.has_key(machine_id): if isp in machineList[machine_id]: continue else: index = -1 max_sim = -1 for isp_idx in range(0, len(machineList[machine_id])): isp_sub = machineList[machine_id][isp_idx] common_len = lcs.lcs(isp, isp_sub) sim = float(common_len) / max(len(isp), len(isp_sub)) #print sim #print isp_sub, isp if sim > eps and max_sim < sim: index = isp_idx max_sim = sim if index != -1: #update if len(isp) < len(machineList[machine_id][index]): machineList[machine_id][index] = isp comment_sql = UPDATE_SQL.format(newisp = isp, machine_id = machine_id, isp = machineList[machine_id][index]) cursor.execute(comment_sql) conn.commit() else: continue
def normal_lcs(a, b, la=None, lb=None): if not la: la = len(a) if not lb: lb = len(b) return 2 * lcs(a, b) / ( la + lb )
def get_longest_substr(self, first_start, first_end, second_start, second_end): # alpha & beta are the substrings under consideration. alpha, beta = self.first[first_start:first_end], self.second[second_start:second_end] start, end, length = lcs(alpha, beta) return first_start + start, second_start + end, length
def test_lcs(self): for case in self.cases: self.assertEqual(lcs.lcs(case[0][0], case[0][1]), case[1]) for case in self.cases_without_recursive: self.assertEqual(lcs.lcs(case[0][0], case[0][1]), case[1])
import concurrent.futures as fs import numpy as np import lcs from numpy import ascontiguousarray as C_ARRAY def convert_keystone_csv_to_numpy(fname): return np.array([[float(x) for x in y.split(",")] for y in open(fname).read().strip().split("\n")]) if __name__ == "__main__": im = scipy.misc.imread("./ILSVRC2012_val_00000293.JPEG") # convert image to BGR im = im[:, :, ::-1] descs = lcs.lcs(im).reshape(-1, 96) descs_mean = descs[:, :48] descs_std = descs[:, 48:] descs = np.vstack((descs_mean, descs_std)).reshape((-1, 96), order='F').T descs_keystone = C_ARRAY( convert_keystone_csv_to_numpy("./lcs_imagenet.txt")) pca_mat = convert_keystone_csv_to_numpy("./pcaMat_lcs.csv") weights = convert_keystone_csv_to_numpy("./gmmCoefs_lcs.csv") means = convert_keystone_csv_to_numpy("./gmmMeans_lcs.csv").T covars = convert_keystone_csv_to_numpy("./gmmVars_lcs.csv").T descs = pca_mat.dot(descs) pca_keystone = convert_keystone_csv_to_numpy("./pca_keystone_lcs.txt") gmm = (means, covars, weights) fv_keystone = convert_keystone_csv_to_numpy("./fisher_keystone_lcs.txt") fv_features = fisher.fisher_vector_features(
import lcs a ="aslkndlkasoihlkn" b ="asdhklneldbaiubsc" c = lcs.lcs(a,b) print(c) print(len(c)) print(len(a)) print(len(b)) # # But the Levenshtein distance should be 12, not ( Max(a,b) - lcs ) # a s l k n d l k a s o i h l k n # a s d h k l n e l d b a i u b s c
def _select_best(self, articles): from gn import gn from lcs import lcs return min(articles, key=lambda art: lcs(self.gn, gn(_em(art['title']))))
def calcScore(self, qtList): lcsSub = lcs.lcs(self.sub, self.qRaw) subIndex = self.qRaw.index(lcsSub) qTemplate = self.qRaw.replace(lcsSub, '') if self.pre == '': self.qType = 2 else: lcsPre = lcs.lcs(self.pre, qTemplate) preIndex = qTemplate.index(lcsPre) qTemplate = qTemplate.replace(lcsPre, '') if preIndex < subIndex: self.qType = 1 if self.qType == 0: qt01 = qTemplate[:subIndex] qt02 = qTemplate[subIndex:preIndex] qt03 = qTemplate[preIndex:] mSqt01 = 0 mSqt02 = 0 mSqt03 = 0 for vQt01 in qtList['01']: tmp = Levenshtein.jaro(qt01, vQt01) if tmp > mSqt01: mSqt01 = tmp for vQt02 in qtList['02']: tmp = Levenshtein.jaro(qt02, vQt02) if tmp > mSqt02: mSqt02 = tmp for vQt03 in qtList['03']: tmp = Levenshtein.jaro(qt03, vQt03) if tmp > mSqt03: mSqt03 = tmp self.score = (mSqt01 + mSqt02 + mSqt03 + Levenshtein.jaro( lcsSub, self.sub) + Levenshtein.jaro(lcsPre, self.pre)) / 5 if self.qType == 1: qt11 = qTemplate[:preIndex] qt12 = qTemplate[preIndex:subIndex] qt13 = qTemplate[subIndex:] mSqt11 = 0 mSqt12 = 0 mSqt13 = 0 for vQt11 in qtList['11']: tmp = Levenshtein.jaro(qt11, vQt11) if tmp > mSqt11: mSqt11 = tmp for vQt12 in qtList['12']: tmp = Levenshtein.jaro(qt12, vQt12) if tmp > mSqt12: mSqt12 = tmp for vQt13 in qtList['13']: tmp = Levenshtein.jaro(qt13, vQt13) if tmp > mSqt13: mSqt13 = tmp self.score = (mSqt11 + mSqt12 + mSqt13 + Levenshtein.jaro( lcsSub, self.sub) + Levenshtein.jaro(lcsPre, self.pre)) / 5 if self.qType == 2: qt20 = qTemplate qt21 = qTemplate[:subIndex] qt22 = qTemplate[subIndex:] mSqt20 = 0 preResult = set() for vQt20 in qtList['20']: vQt201 = vQt20[:vQt20.index('|||qS|||')] vQt202 = vQt20[vQt20.index('|||qS|||') + 8:vQt20.index(' ===>>> ')] vQt20pre = vQt20[vQt20.index(' ===>>> ') + 8:] sTmp20 = Levenshtein.jaro(vQt201, qt21) + Levenshtein.jaro( vQt202, qt22) sTmp20pre = 0 preTmp = set() for kb in self.kbDict: for pre in kb: tmp = Levenshtein.jaro(vQt20pre, pre) if tmp > sTmp20pre: sTmp20pre = tmp preTmp = set() if tmp == sTmp20pre: preTmp.add(pre) sTmp20 = (sTmp20 + sTmp20pre + Levenshtein.jaro(lcsSub, self.sub)) / 4 if sTmp20 > mSqt20: mSqt20 = sTmp20 preResult = set() if sTmp20 == mSqt20: for pre in preTmp: preResult.add(pre) self.pre = preResult self.score = mSqt20 return self.score
def test_longest_common_subsequence(self): self.assertEqual(lcs("waaaa", "bbbbasfaaewra"), "aaaa") self.assertEqual(lcs("abc", "def"), "") self.assertEqual(lcs("123", "01234"), "123") self.assertEqual(lcs("abcd", "dcba"), "c")
def alternative_pal(s): r = s[::-1] L = len(lcs(s, r)) return len(s) - L
def test_lcs(self): self.assertEqual(lcs(self.seq1, self.seq2), 3) self.assertEqual(lcs(self.seq3, self.seq4), 4)
def evaluate_step_pair(a_tokens, a_tags, b_tokens, b_tags): return lcs(tensor_to_list(a_tokens), tensor_to_list(b_tokens))
from ground_truth import ground_truth from translate import translate from lcs import lcs from numpy import median import sys, os rate_list = [] ground_truth = ground_truth('rsa_' + sys.argv[1] + '.sp') for i in range(1, 1001): inputfile = os.path.join('../RSA/client/results/', 'result-' + sys.argv[2] + '-' + str(i) + '.txt') recovered = translate(inputfile) recover_rate = float(len(lcs(recovered, ground_truth))) / float( len(ground_truth)) rate_list.append(recover_rate) print(str(i) + ': Recover rate is %.2f%%' % (recover_rate * 100)) Average = float(sum(rate_list)) / 1000 Median = median(rate_list) Max = max(rate_list) f = open("recover_rate.txt", "w+") f.write('Average recover rate is %.2f%%\n' % (Average * 100)) f.write('Median recover rate is %.2f%%\n' % (Median * 100)) f.write('Maximum recover rate is %.2f%%\n' % (Max * 100))
from lcs import lcs print lcs('aaa', 'a')
from lcs import length_lcs from lcs import lcs if __name__ == "__main__": x = list(input("\n Enter the first sequence:")) y = list(input("\n Enter the second sequence:")) m = len(x) n = len(y) (array, length) = length_lcs(x, y, m, n) print('\n Length of lcs is:', length) print('\n LCS is:', lcs(x, y, array, length))
def image_fisher_featurize_sift_lcs(im_keys, out_matrix, bidx, gmm_sift, pca_sift_mat, gmm_lcs, pca_lcs_mat): all_sift_features = [] all_lcs_features = [] t = time.time() for im_key in im_keys: s3 = boto3.resource('s3') client = boto3.client('s3') bio = io.BytesIO( client.get_object(Bucket="pictureweb", Key=im_key)["Body"].read()) im = scipy.misc.imread(bio, flatten=True) im = im.astype('float32') im /= 255.0 sift_descs = sift.sift(im) assert (np.all(sift_descs >= 0)) sift_descs = np.sqrt(sift_descs) sift_descs = (sift_descs).dot(pca_sift_mat) sift_features = fisher.fisher_vector_features( sift_descs.astype('float32'), *gmm_sift) sift_features /= np.linalg.norm(sift_features) bio = io.BytesIO( client.get_object(Bucket="pictureweb", Key=im_key)["Body"].read()) im = scipy.misc.imread(bio) lcs_descs = lcs.lcs(im).reshape(-1, LCS_DESC_LENGTH) try: assert (np.any(np.isnan(lcs_descs)) == False) except: raise Exception("RAISING LCS Error pre pca in {0}".format(im_key)) lcs_descs = (lcs_descs).dot(pca_lcs_mat) try: assert (np.any(np.isnan(lcs_descs)) == False) except: raise Exception("RAISING LCS Error post pca in {0}".format(im_key)) lcs_features = fisher.fisher_vector_features( lcs_descs.astype('float32'), *gmm_lcs) lcs_features /= np.linalg.norm(lcs_features) try: assert (np.any(np.isnan(lcs_features)) == False) except: raise Exception( "RAISING LCS Fisher Vector Error in {0}".format(im_key)) all_sift_features.append(sift_features) all_lcs_features.append(lcs_features) all_sift_features = np.array(all_sift_features) all_lcs_features = np.array(all_lcs_features) assert (np.any(np.isnan(all_sift_features)) == False) #sqrt normalization signs = np.sign(all_sift_features) all_sift_features = signs * np.sqrt(np.abs(all_sift_features)) feature_norms = np.linalg.norm(all_sift_features, axis=1)[:, np.newaxis] assert (np.any(np.isnan(feature_norms)) == False) all_sift_features /= feature_norms assert (np.any(np.isnan(all_sift_features)) == False) assert (np.any(np.isnan(all_lcs_features)) == False) signs = np.sign(all_lcs_features) all_lcs_features = signs * np.sqrt(np.abs(all_lcs_features)) feature_norms = np.linalg.norm(all_lcs_features, axis=1)[:, np.newaxis] assert (np.any(np.isnan(feature_norms)) == False) all_lcs_features /= feature_norms assert (np.any(np.isnan(all_lcs_features)) == False) features = np.hstack((all_sift_features, all_lcs_features)) out_matrix.put_block(features, bidx, 0) e = time.time() return e - t, t, e
def calculate_lcs(words): return len(lcs.lcs(words[0][0:10], words[1][0:10]))
# Given a string what is the min. no of insertions required to make the string palindrome # Step 1 : Take reverse of string # Step 2 : Apply LCS on string,rev of string # Step 3 : result = len(string) - len(lcs) from lcs import lcs s = "HELLO" x = lcs(s, s[::-1]) print(len(s) - x)
def lcs(text1, text2): value = seq.lcs(text1, text2) return abs(float(len(text1)-len(value))/float(len(text1)))
def shortestCommonSuperSequence(s1, s2): print('length= ', len(s1) + len(s2) - lcs(s1, s2))
def answerQ(qRaw, lKey, kbDict, qtList, threshold=0, debug=False): q = qRaw.strip().replace(' ', '') qtType = 0 #0:sub+pre 1:pre+sub 2:sub maxSubLen = 0 maxSubSetTmp = set() maxSubSet = set() maxPreLen = 0 maxPreSet = set() maxSPLen = 0 maxSPSet = set() result = '' lcsSub = '' lcstemp = '' lcsPre = '' subIndex = 0 scoreSub = 0 qRemoveSub = '' preIndex = 0 scoreSub = 0 qRemoveSubPre = '' maxScore = 0 qtMatchSet = set() bestAnswer = set() for qt01 in qtList['01']: if qt01 == '' or q.find(qt01) == 0: qR01 = q.replace(qt01, '', 1) for qt02 in qtList['02']: qFind2 = qR01.find(qt02) if qt02 == '' or qFind2 != 0: subCandidate = qR01[:qFind2] qR02 = qR01[qFind2:].replace(qt02, '', 1) for qt03 in qtList['03']: qFind3 = qR02.find(qt03) if qt03 == '' or qFind3 != 0: preCandidate = qR02[:qFind3] if subCandidate in kbDict: for kb in kbDict[subCandidate]: if preCandidate in kb: newAnswerCandidate = answerCandidate( subCandidate, preCandidate, q) qtMatchSet.add(newAnswerCandidate) for qt11 in qtList['11']: if qt11 == '' or q.find(qt11) == 0: qR11 = q.replace(qt11, '', 1) for qt12 in qtList['12']: qFind2 = qR11.find(qt12) if qt12 == '' or qFind2 != 0: preCandidate = qR11[:qFind2] qR12 = qR11[qFind2:].replace(qt12, '', 1) for qt13 in qtList['13']: qFind3 = qR12.find(qt13) if qt13 == '' or qFind3 != 0: subCandidate = qR12[:qFind3] if subCandidate in kbDict: for kb in kbDict[subCandidate]: if preCandidate in kb: newAnswerCandidate = answerCandidate( subCandidate, preCandidate, q) qtMatchSet.add(newAnswerCandidate) ## ## # First try to use question template to get perfectly matched QA pair ## for qt00 in qtList['00']: ## if qt00[0] == '' or q.find(qt00[0]) == 0: ## qR0 = q.replace(qt00[0], '', 1) ## qFind1 = qR0.find(qt00[1]) ## if qt00[1] == '' or qFind1 !=0: ## subCandidate = qR0[:qFind1] ## qR01 = qR0[qFind1:].replace(qt00[1], '', 1) ## qFind2 = qR01.find(qt00[2]) ## if qt00[2] == '' or qFind2 !=0: ## preCandidate = qR01[:qFind2] ## if subCandidate in kbDict: ## for kb in kbDict[subCandidate]: ## if preCandidate in kb: ## newAnswerCandidate = answerCandidate(subCandidate, preCandidate, q) ## qtMatchSet.add(newAnswerCandidate) ## ## ## for qt10 in qtList['10']: ## if qt10[0] == '' or q.find(qt10[0]) == 0: ## qR0 = q.replace(qt10[0], '', 1) ## qFind1 = qR0.find(qt10[1]) ## if qt10[1] == '' or qFind1 !=0: ## preCandidate = qR0[:qFind1] ## qR01 = qR0[qFind1:].replace(qt10[1], '', 1) ## qFind2 = qR01.find(qt10[2]) ## if qt10[2] == '' or qFind2 !=0: ## subCandidate = qR01[:qFind2] ## if subCandidate in kbDict: ## for kb in kbDict[subCandidate]: ## if preCandidate in kb: ## newAnswerCandidate = answerCandidate(subCandidate, preCandidate, q) ## qtMatchSet.add(newAnswerCandidate) ## for key in lKey: lcsSub = lcs.lcs(q, key) if lcsSub == '': continue lcsSubLen = len(lcsSub) if maxSubLen < lcsSubLen: maxSubSetTmp = set() maxSubLen = lcsSubLen if maxSubLen == lcsSubLen: maxSubSetTmp.add(key) maxSPLen = maxSubLen for key in lKey: lcsSub = lcs.lcs(q, key) if lcsSub == '': continue lcsSubLen = len(lcsSub) lcsSubIndex = q.index(lcsSub) qRemoveSub1 = q[:lcsSubIndex] qRemoveSub1Len = len(qRemoveSub1) qRemoveSub2 = q[lcsSubIndex + lcsSubLen:] qRemoveSub2Len = len(qRemoveSub2) foundPre = 0 for kb in kbDict[key]: for pre in list(kb): preLen = len(pre) lcsPre1 = '' lcsPre2 = '' if maxSubLen == lcsSubLen: lcsPre1 = lcs.lcs(qRemoveSub1, pre) lcsPre2 = lcs.lcs(qRemoveSub2, pre) if lcsPre1 != '' or lcsPre2 != '': newAnswerCandidate = answerCandidate(key, pre, q) foundPre = 1 maxSubSet.add(newAnswerCandidate) if preLen > maxPreLen: if qRemoveSub1Len > maxPreLen: lcsPre1 = lcs.lcs(qRemoveSub1, pre) if qRemoveSub2Len > maxPreLen: lcsPre2 = lcs.lcs(qRemoveSub2, pre) maxLcsPre12 = max(len(lcsPre1), len(lcsPre2)) if maxLcsPre12 > maxPreLen: maxPreLen = maxLcsPre12 maxPreSet = set() if maxLcsPre12 == maxPreLen: newAnswerCandidate = answerCandidate(key, pre, q) maxPreSet.add(newAnswerCandidate) maxResidual = maxSPLen - lcsSubLen if preLen > maxResidual: if qRemoveSub1Len > maxResidual: lcsPre1 = lcs.lcs(qRemoveSub1, pre) if qRemoveSub2Len > maxResidual: lcsPre2 = lcs.lcs(qRemoveSub2, pre) maxResidual12 = max(len(lcsPre1), len(lcsPre2)) if maxResidual12 > maxResidual: maxSPLen = maxResidual12 + lcsSubLen maxResidual = maxSPLen - lcsSubLen maxSPSet = set() if maxResidual12 == maxResidual: newAnswerCandidate = answerCandidate(key, pre, q) maxSPSet.add(newAnswerCandidate) if foundPre == 0 and maxSubLen == lcsSubLen: newAnswerCandidate = answerCandidate(key, '', q, 2, 0, kbDict[key]) maxSubSet.add(newAnswerCandidate) maxSubSetCopy = maxSubSet.copy() #print('len(maxSubSet) = ' + str(len(maxSubSetCopy)), end = '\r', flush=True) maxSubSet = set() for aCandidate in maxSubSetCopy: aCfound = 0 for aC in maxSubSet: if aC.pre == aCandidate.pre and aC.sub == aCandidate.sub: aCfound = 1 break if aCfound == 0: maxSubSet.add(aCandidate) maxPreSetCopy = maxPreSet.copy() #print('len(maxPreSet) = ' + str(len(maxPreSetCopy)), end = '\r', flush=True) maxPreSet = set() for aCandidate in maxPreSetCopy: aCfound = 0 for aC in maxPreSet: if aC.pre == aCandidate.pre and aC.sub == aCandidate.sub: aCfound = 1 break if aCfound == 0: maxPreSet.add(aCandidate) maxSPSetCopy = maxSPSet.copy() #print('len(maxSPSet) = ' + str(len(maxSPSetCopy)), end = '\r', flush=True) maxSPSet = set() for aCandidate in maxSPSetCopy: aCfound = 0 for aC in maxSPSet: if aC.pre == aCandidate.pre and aC.sub == aCandidate.sub: aCfound = 1 break if aCfound == 0: maxSPSet.add(aCandidate) for aCandidate in maxSubSet: scoreTmp = aCandidate.calcScore(qtList) if scoreTmp > maxScore: maxScore = scoreTmp bestAnswer = set() if scoreTmp == maxScore: bestAnswer.add(aCandidate) for aCandidate in maxPreSet: scoreTmp = aCandidate.calcScore(qtList) if scoreTmp > maxScore: maxScore = scoreTmp bestAnswer = set() if scoreTmp == maxScore: bestAnswer.add(aCandidate) for aCandidate in maxSPSet: scoreTmp = aCandidate.calcScore(qtList) if scoreTmp > maxScore: maxScore = scoreTmp bestAnswer = set() if scoreTmp == maxScore: bestAnswer.add(aCandidate) for aCandidate in qtMatchSet: scoreTmp = aCandidate.calcScore(qtList) if scoreTmp > maxScore: maxScore = scoreTmp bestAnswer = set() if scoreTmp == maxScore: bestAnswer.add(aCandidate) bestAnswerCopy = bestAnswer.copy() bestAnswer = set() for aCandidate in bestAnswerCopy: aCfound = 0 for aC in bestAnswer: if aC.pre == aCandidate.pre and aC.sub == aCandidate.sub: aCfound = 1 break if aCfound == 0: bestAnswer.add(aCandidate) if debug == False: return bestAnswer else: return [bestAnswer, maxSubSet, maxPreSet, maxSPSet]
with tag('div',klass = "bg-danger"): line('i','',klass = 'fa fa-remove pull-right') line('h4',i[3:]) if i[0:3] == 'LCS': with tag('div',klass = "bg-info"): line('i','',klass = 'fa fa-star pull-right') line('h4',i[3:]) return indent(doc.getvalue()) def saveMarkedUpContentToFile(content): with open('output.html', 'w') as f: f.write(content) if __name__ == "__main__" : input1 = get_file_content('input1.txt') input2 = get_file_content('input2.txt') operations = lcs.lcs(input1[0],input2[0]) input1 = ''.join(input1[0]) input2 = ''.join(input2[0]) print operations content = generate_html_code(input1,input2,operations) saveMarkedUpContentToFile(content)