Пример #1
1
    def test_lcs_kosa_koza(self):
        seq1 = 'kosa'
        seq2 = 'koza'

        result = [['k', 'o', 'a']]

        self.assertEqual(lcs.lcs(seq1, seq2), result)
Пример #2
1
    def test_lcs_abcd_acbd(self):
        seq1 = 'abcd'
        seq2 = 'acbd'

        result = [['a', 'c', 'd'], ['a', 'b', 'd']]

        self.assertEqual(lcs.lcs(seq1, seq2), result)
Пример #3
0
    def test_lcs_empty_acbd(self):
        seq1 = ''
        seq2 = 'acbd'

        result = [[]]

        self.assertEqual(lcs.lcs(seq1, seq2), result)
Пример #4
0
    def solve2(cls,Str,len1,len2):
        tlen = len(Str[0])
        plen = len(Str[1])
    #    print Str[0]
     #   print Str[1]
       # print min(plen,tlen)
        ans = 0
        MML = 4
        MaxLen=MML+1
       # print Str[0]
       # print Str[1]
        while MaxLen>MML:
            MaxLen = MML
            j = 1
            now_i=0
            #dp=[cls.MAX_CODE_LENGTH*[0],cls.MAX_CODE_LENGTH*[0]]
            
            k,s,t = lcs(Str[0],Str[1])

            if k<MaxLen:
                continue
            if s+k<tlen and Str[0][s+k]!='$':
                Str[0] = Str[0][0:s]+"$"+Str[0][s+k:tlen]
            else:
                Str[0] = Str[0][0:s]+Str[0][s+k:tlen]
            tlen=len(Str[0])
            if t+k<plen and Str[1][t+k]!='$':
                Str[1] = Str[1][0:t]+"$"+Str[1][t+k:plen]
            else:
                Str[1] = Str[1][0:t]+Str[1][t+k:plen]
            plen=len(Str[1])
            ans+=k
            MaxLen = k
        return ans
Пример #5
0
 def lcs_call(self, widget):
     str_a = self.txt_grpa.get_text()
     str_b = self.txt_grpb.get_text()
     str_result = lcs(str_a, str_b)
     dialog = DialogWindow(self, str_result)
     dialog.run()
     dialog.destroy()
Пример #6
0
def main(filepath):
    dataset = json.loads(open(filepath).read())["dataset"]
    original = {}
    sequences = {}
    for d in dataset:
        original.setdefault(d, 0)
        original[d] += 1
    keys = original.keys()
    for i in range(len(keys)):
        for j in range(i + 1, len(keys)):
            lcs_result = lcs.lcs(keys[i], keys[j])
            for l in lcs_result:
                if len(l) < 3:
                    continue
                str_result = ""
                for c in l:
                    str_result += c
                sequences.setdefault(str_result, 0)
                sequences[str_result] += 1
    result = {}
    h = suffixtree2.ST(sequences)
    alldata = h.print_tree(h.get(), 3)
    for k, v in sorted(alldata.items(), key=lambda x: x[1], reverse=True):
        _cnt = 0
        for s in keys:
            if check(s, k) == True:
                _cnt += 1
        result.setdefault(k, 0)
        result[k] = _cnt
    for k, v in sorted(result.items(), key=lambda x: x[1], reverse=True):
        print k + " was included in " + str(v) + " data."
Пример #7
0
def compare_value(value1, value2):
    if value1 is None and value2 is None:
        return 0.0
    if value1 is None or value2 is None:
        return 1.0
    return 1.0 - (float(len(lcs.lcs(lcs.path(value1, value2)))) /
                  max(len(value1), len(value2)))
Пример #8
0
def alignchildren(t1, t2, M, E, w, x):
  """
  See figure 9 in reference.
  """

  for c in w.elements():
    c.inorder = False
  for c in x.elements():
    c.inorder = False

  s1 = [child for child in w.elements() if child in M.left and M.left[child].parent == x]
  s2 = [child for child in x.elements() if child in M.right and M.right[child].parent == w]

  def equal(a, b):
    return (a, b) in M

  S = [(s1[x], s2[y]) for x, y in lcs.lcs(lcs.path(s1, s2, equal))]
  for (a, b) in S:
    a.inorder = b.inorder = True

  for a in s1:
    for b in s2:
      if (a, b) in M and (a, b) not in S:
        k = findpos(M, b)
        E.move(a.path(), w.path(), k)
        t1.move(a.path(), w.path(), str(k))
        a.inorder = b.inorder = True
Пример #9
0
def alignchildren(t1, t2, M, E, w, x):
    """
  See figure 9 in reference.
  """

    for c in w.elements():
        c.inorder = False
    for c in x.elements():
        c.inorder = False

    s1 = [
        child for child in w.elements()
        if child in M.left and M.left[child].parent == x
    ]
    s2 = [
        child for child in x.elements()
        if child in M.right and M.right[child].parent == w
    ]

    def equal(a, b):
        return (a, b) in M

    S = [(s1[x], s2[y]) for x, y in lcs.lcs(lcs.path(s1, s2, equal))]
    for (a, b) in S:
        a.inorder = b.inorder = True

    for a in s1:
        for b in s2:
            if (a, b) in M and (a, b) not in S:
                k = findpos(M, b)
                E.move(a.path(), w.path(), k)
                t1.move(a.path(), w.path(), str(k))
                a.inorder = b.inorder = True
Пример #10
0
    def sorted_active_runnable(self, query, hwnds):
        with QMutexLocker(self.mutex):
            # update query and collect active ones
            self._refresh_tasks(hwnds, query)
            active_tasks = [self.tasks[h] for h in hwnds]

            # sort by last use
            if not query:
                return sorted(active_tasks,
                              key=lambda t: t.usetime,
                              reverse=True)

            titles = [task.fullname.lower() for task in active_tasks]

            def f(task, title):
                return task.query.distance_to(title)

            ds = [
                f(task, title) * (10**len(query))
                for task, title in zip(active_tasks, titles)
            ]
            best = ds[0]

            for i in itertools.takewhile(lambda i: ds[i] == best,
                                         range(len(ds))):
                ds[i] -= len(lcs(query, titles[i]))

            #return sorted(active_tasks, key=f)
            return [
                task for i, task in sorted(enumerate(active_tasks),
                                           key=lambda i: ds[i[0]])
            ]
 def test_lcs(self):
     x = 'shirish'
     y = 'sigdyal'
     m = len(x)
     n = len(y)
     (array, length) = length_lcs(x, y, m, n)
     self.assertEqual(length, 2)
     result = lcs(x, y, array, length)
     self.assertListEqual(result, ['s', 'i'])
Пример #12
0
def image_fisher_featurize(im_key, gmm_sift, gmm_lcs, pca_sift, pca_lcs):
    t = time.time()
    s3 = boto3.resource('s3')
    s3.Bucket("pictureweb").download_file(im_key, "/tmp/img.jpg")
    im = scipy.misc.imread("/tmp/img.jpg", flatten=True)
    descs = sift(im).dot(pca_sift.T)
    sift_features = fisher.fisher_vector_features(descs.astype('float32'), *gmm_sift)
    im = scipy.misc.imread("/tmp/img.jpg")
    descs = lcs(im).reshape(-1, 96).dot(pca_lcs.T)
    lcs_features = fisher.fisher_vector_features(descs.astype('float32'), *gmm_lcs)
    out_features = np.hstack((sift_features, lcs_features)).T
    e = time.time()
    return out_features, e - t
Пример #13
0
Файл: try.py Проект: YLAsce/oj
def solve2(Str, len1, len2):
    code0 = copy.deepcopy(Str[0])
    code1 = copy.deepcopy(Str[1])
    tlen = len(code0)
    plen = len(code1)
    ans = 0
    MML = 4
    MaxLen = MML + 1
    whileTime = 0
    td = 0
    while MaxLen > MML:
        whileTime += 1
        MaxLen = MML
        j = 1
        now_i = 0
        #     t1 = datetime.now()
        dp = [65536 * [0], 65536 * [0]]

        k, s, t = lcs(Str[0], Str[1])

        #     t2 = datetime.now()
        #    print t2-t1

        #        print k
        #       print code0[s:s+k]+"   XXX  "+code1[t:t+k]+"   OOO  "

        if k < MaxLen:
            continue
            #print code0[s:s+k]
            #print code1[t:t+k]

        if s + k < tlen and code0[s + k] != '$':
            code0 = code0[0:s] + "$" + code0[s + k:tlen]
        else:
            code0 = code0[0:s] + code0[s + k:tlen]
        tlen = len(code0)
        if t + k < plen and code1[t + k] != '$':
            code1 = code1[0:t] + "$" + code1[t + k:plen]
        else:
            code1 = code1[0:t] + code1[t + k:plen]
        plen = len(code1)

        ans += k
        MaxLen = k
    #Set=[]
    #   print k
    #  print Str
    print whileTime
    print td
    return ans
Пример #14
0
Файл: try.py Проект: YLAsce/oj
def solve2(Str,len1,len2):
    code0 = copy.deepcopy(Str[0])
    code1 = copy.deepcopy(Str[1])
    tlen = len(code0)
    plen = len(code1)
    ans = 0
    MML = 4
    MaxLen=MML+1
    whileTime = 0
    td = 0
    while MaxLen>MML:
        whileTime += 1
        MaxLen = MML
        j = 1
        now_i=0
   #     t1 = datetime.now()
        dp=[65536*[0],65536*[0]]
        
        k,s,t = lcs(Str[0],Str[1])

   #     t2 = datetime.now()
    #    print t2-t1

#        print k
 #       print code0[s:s+k]+"   XXX  "+code1[t:t+k]+"   OOO  "
            
        if k<MaxLen:
            continue
            #print code0[s:s+k]
            #print code1[t:t+k]

        if s+k<tlen and code0[s+k]!='$':
            code0 = code0[0:s]+"$"+code0[s+k:tlen]
        else:
            code0 = code0[0:s]+code0[s+k:tlen]
        tlen=len(code0)
        if t+k<plen and code1[t+k]!='$':
            code1 = code1[0:t]+"$"+code1[t+k:plen]
        else:
            code1 = code1[0:t]+code1[t+k:plen]
        plen=len(code1)

        ans+=k
        MaxLen = k
    #Set=[]
     #   print k
      #  print Str
    print whileTime
    print td
    return ans
Пример #15
0
def solve2(Str, len1, len2):
    #t_m=[0]*cls.MAX_CODE_LENGTH
    # p_m=t_m
    Set = []
    tlen = len(Str[0])
    plen = len(Str[1])
    #  print Str[0]
    #  print Str[1]
    # print min(plen,tlen)
    ans = 0
    #  MML = min(tlen,plen)
    # if MML>200:
    #    MML = 10
    #else:
    #   MML = max(MML/25,4)
    MML = 4
    MaxLen = MML + 1
    # print Str[0]
    # print Str[1]
    while MaxLen > MML:
        MaxLen = MML
        j = 1
        now_i = 0
        #  dp=[cls.MAX_CODE_LENGTH*[0],cls.MAX_CODE_LENGTH*[0]]
        k, s, t = lcs(Str[0], Str[1])

        #    print k
        #   print Str[0][s:s+k]+"   XXX  "+Str[1][t:t+k]+"   OOO  "

        if k < MaxLen:
            continue
            #print Str[0][s:s+k]
            #print Str[1][t:t+k]
        if s + k < tlen and Str[0][s + k] != '$':
            Str[0] = Str[0][0:s] + "$" + Str[0][s + k:tlen]
        else:
            Str[0] = Str[0][0:s] + Str[0][s + k:tlen]
        tlen = len(Str[0])
        if t + k < plen and Str[1][t + k] != '$':
            Str[1] = Str[1][0:t] + "$" + Str[1][t + k:plen]
        else:
            Str[1] = Str[1][0:t] + Str[1][t + k:plen]
        plen = len(Str[1])
        ans += k
        MaxLen = k
    #Set=[]
    #   print k
    #  print Str
    return ans
Пример #16
0
def solve2(Str,len1,len2):
    #t_m=[0]*cls.MAX_CODE_LENGTH
   # p_m=t_m
    Set=[]
    tlen = len(Str[0])
    plen = len(Str[1])
  #  print Str[0]
  #  print Str[1]
   # print min(plen,tlen)
    ans = 0
  #  MML = min(tlen,plen)
   # if MML>200:
    #    MML = 10
    #else:
     #   MML = max(MML/25,4)
    MML = 4
    MaxLen=MML+1
   # print Str[0]
   # print Str[1]
    while MaxLen>MML:
        MaxLen = MML
        j = 1
        now_i=0
      #  dp=[cls.MAX_CODE_LENGTH*[0],cls.MAX_CODE_LENGTH*[0]]
        k,s,t = lcs(Str[0],Str[1])

    #    print k
     #   print Str[0][s:s+k]+"   XXX  "+Str[1][t:t+k]+"   OOO  "
            
        if k<MaxLen:
            continue
            #print Str[0][s:s+k]
            #print Str[1][t:t+k]
        if s+k<tlen and Str[0][s+k]!='$':
            Str[0] = Str[0][0:s]+"$"+Str[0][s+k:tlen]
        else:
            Str[0] = Str[0][0:s]+Str[0][s+k:tlen]
        tlen=len(Str[0])
        if t+k<plen and Str[1][t+k]!='$':
            Str[1] = Str[1][0:t]+"$"+Str[1][t+k:plen]
        else:
            Str[1] = Str[1][0:t]+Str[1][t+k:plen]
        plen=len(Str[1])
        ans+=k
        MaxLen = k
    #Set=[]
     #   print k
      #  print Str
    return ans
Пример #17
0
def calculate_lcs(img_keys, out_matrix, block_idx, descs_per_img=16):
    feats = []
    s3 = boto3.resource('s3')
    import time
    t = time.time()
    np.random.seed(block_idx)
    for im_key in img_keys:
        s3.Bucket("pictureweb").download_file(im_key, "/tmp/img.jpg")
        im = scipy.misc.imread("/tmp/img.jpg")
        im = im.astype('float32')
        descs = lcs.lcs(im).reshape(-1, LCS_DESC_LENGTH)
        idxs = np.random.choice(descs.shape[0], descs_per_img)
        feats.append(descs[idxs, :])
    out_matrix.put_block(np.vstack(feats), block_idx, 0)
    e = time.time()
    return e - t
Пример #18
0
def _match(nodes1, nodes2, M, equal):
  nodes = nodes1 + nodes2
  for label in utils.nub([node.label for node in nodes]):

    s1 = get_chain(nodes1, label)
    s2 = get_chain(nodes2, label)

    path = lcs.lcs(lcs.path(s1, s2, equal))

    for x, y in path:
      M.add((s1[x], s2[y]))
    for x, y in reversed(path):
      s1.pop(x)
      s2.pop(y)

    for x in range(len(s1)):
      for y in range(len(s2)):
        if equal(s1[x], s2[y]):
          M.add((s1[x], s2[y]))
          s2.pop(y)
          break
Пример #19
0
def _match(nodes1, nodes2, M, equal):
    nodes = nodes1 + nodes2
    for label in utils.nub([node.label for node in nodes]):

        s1 = get_chain(nodes1, label)
        s2 = get_chain(nodes2, label)

        path = lcs.lcs(lcs.path(s1, s2, equal))

        for x, y in path:
            M.add((s1[x], s2[y]))
        for x, y in reversed(path):
            s1.pop(x)
            s2.pop(y)

        for x in range(len(s1)):
            for y in range(len(s2)):
                if equal(s1[x], s2[y]):
                    M.add((s1[x], s2[y]))
                    s2.pop(y)
                    break
Пример #20
0
Файл: go.py Проект: Answeror/lit
    def sorted_active_runnable(self, query, hwnds):
        with QMutexLocker(self.mutex):
            # update query and collect active ones
            self._refresh_tasks(hwnds, query)
            active_tasks = [self.tasks[h] for h in hwnds]

            # sort by last use
            if not query:
                return sorted(active_tasks, key=lambda t: t.usetime, reverse=True)

            titles = [task.fullname.lower() for task in active_tasks]

            def f(task, title):
                return task.query.distance_to(title)

            ds = [f(task, title) * (10 ** len(query)) for task, title in zip(active_tasks, titles)]
            best = ds[0]

            for i in itertools.takewhile(lambda i: ds[i] == best, range(len(ds))):
                ds[i] -= len(lcs(query, titles[i]))

            #return sorted(active_tasks, key=f)
            return [task for i, task in sorted(enumerate(active_tasks), key=lambda i: ds[i[0]])]
Пример #21
0
def similarity(x, y):
	"""
	similarity(x, y)
	
	This function measures string similarity between x and y.
	The function returns:
	0.8*(len(LongestCommonSubsequence(x, y))) + 
	0.2*1/(DamerauLevenshteinDistance(x, y))
	
	LCS and Levenshtein are, by trial and error, found to be 
	compensating for each others errors. Hence their combination
	in most cases seems to be one of the good solutions.
	 
	Eg: For Abhogi and Behag, LCS gives a high similarity
	score of 3, which is balanced by the Levenshtein's distance
	of 4, thus penalizing it.
	"""

	len_thresh = 0.75 
	# beyond this difference of ratio between lengths, they are deemed different terms

	if len(y) == 0:
		return 0
	ratio = 1.0*len(x)/len(y)
	if ratio > 1:
		ratio = 1/ratio;
	if ratio < len_thresh:
		return 0

	subseq = lcs.lcs(x, y)
	dldist = levenshtein.dameraulevenshtein(x, y)
	if dldist == 0:
		dldist = 1
		
	w1 = 0.8
	w2 = 0.2
	return w1*(1.0*len(subseq)/max([len(x), len(y)])) + (w2*1.0/dldist);
Пример #22
0
import numpy as np
import lcs
import time
import io
import boto3
import scipy.misc
import fisher
import pywren

t = time.time()
im_key = "imagenet_train_100k_pixels/8/11857.JPEG"

client = boto3.client('s3')
bio = io.BytesIO(client.get_object(Bucket="pictureweb", Key=im_key)["Body"].read())

pca_mat_lcs = np.load("pca_mat_lcs.npy")
weights = np.load('gmm_lcs_weights_{0}_centers.npy'.format(16))
means = np.load('gmm_lcs_means_{0}_centers.npy'.format(16))
covars= np.load('gmm_lcs_covars_{0}_centers.npy'.format(16))
gmm_lcs = (means, covars, weights)

img = scipy.misc.imread(bio, flatten=True)
pwex = pywren.default_executor()
z = lcs.lcs(img).reshape(-1, 96)






Пример #23
0
	print flag
	flag = flag+ 1
	machine_id = row[0]
	isp = row[2]
	country = row[1]
	if isp == "":
		continue
	if machineList.has_key(machine_id):
		if isp in machineList[machine_id]:
			continue	
		else:
			index = -1
			max_sim = -1
			for isp_idx in range(0, len(machineList[machine_id])):
				isp_sub = machineList[machine_id][isp_idx]
				common_len = lcs.lcs(isp, isp_sub)
				sim = float(common_len) / max(len(isp), len(isp_sub))
				#print sim
				#print isp_sub, isp
				if sim > eps and max_sim < sim:
					index = isp_idx
					max_sim = sim
			if index != -1:
				#update
				if len(isp) < len(machineList[machine_id][index]):
					   	machineList[machine_id][index] = isp
						comment_sql = UPDATE_SQL.format(newisp = isp, machine_id = machine_id,  isp = machineList[machine_id][index])
						cursor.execute(comment_sql)
						conn.commit()
				else:
					continue
Пример #24
0
def normal_lcs(a, b, la=None, lb=None):
    if not la:
        la = len(a)
    if not lb:
        lb = len(b)
    return 2 * lcs(a, b) / ( la + lb )
Пример #25
0
 def get_longest_substr(self, first_start, first_end, second_start, second_end):
     # alpha & beta are the substrings under consideration.
     alpha, beta = self.first[first_start:first_end], self.second[second_start:second_end]
     start, end, length = lcs(alpha, beta)
     return first_start + start, second_start + end, length
Пример #26
0
 def test_lcs(self):
     for case in self.cases:
         self.assertEqual(lcs.lcs(case[0][0], case[0][1]), case[1])
     for case in self.cases_without_recursive:
         self.assertEqual(lcs.lcs(case[0][0], case[0][1]), case[1])
Пример #27
0
import concurrent.futures as fs
import numpy as np
import lcs
from numpy import ascontiguousarray as C_ARRAY


def convert_keystone_csv_to_numpy(fname):
    return np.array([[float(x) for x in y.split(",")]
                     for y in open(fname).read().strip().split("\n")])


if __name__ == "__main__":
    im = scipy.misc.imread("./ILSVRC2012_val_00000293.JPEG")
    # convert image to BGR
    im = im[:, :, ::-1]
    descs = lcs.lcs(im).reshape(-1, 96)
    descs_mean = descs[:, :48]
    descs_std = descs[:, 48:]
    descs = np.vstack((descs_mean, descs_std)).reshape((-1, 96), order='F').T

    descs_keystone = C_ARRAY(
        convert_keystone_csv_to_numpy("./lcs_imagenet.txt"))
    pca_mat = convert_keystone_csv_to_numpy("./pcaMat_lcs.csv")
    weights = convert_keystone_csv_to_numpy("./gmmCoefs_lcs.csv")
    means = convert_keystone_csv_to_numpy("./gmmMeans_lcs.csv").T
    covars = convert_keystone_csv_to_numpy("./gmmVars_lcs.csv").T
    descs = pca_mat.dot(descs)
    pca_keystone = convert_keystone_csv_to_numpy("./pca_keystone_lcs.txt")
    gmm = (means, covars, weights)
    fv_keystone = convert_keystone_csv_to_numpy("./fisher_keystone_lcs.txt")
    fv_features = fisher.fisher_vector_features(
import lcs

a ="aslkndlkasoihlkn"
b ="asdhklneldbaiubsc"


c = lcs.lcs(a,b)
print(c)
print(len(c))

print(len(a))
print(len(b))

# # But the Levenshtein distance should be 12, not ( Max(a,b) - lcs )
# a	s	 	l	k	 	n	d	l	k	a	s	o	i	h	l	k	n
# a	s	d	h	k	l	n	e	l	 	d	b	a	i	u	b	s	c
Пример #29
0
 def _select_best(self, articles):
     from gn import gn
     from lcs import lcs
     return min(articles,
                key=lambda art: lcs(self.gn, gn(_em(art['title']))))
Пример #30
0
    def calcScore(self, qtList):
        lcsSub = lcs.lcs(self.sub, self.qRaw)
        subIndex = self.qRaw.index(lcsSub)
        qTemplate = self.qRaw.replace(lcsSub, '')
        if self.pre == '':
            self.qType = 2
        else:
            lcsPre = lcs.lcs(self.pre, qTemplate)
            preIndex = qTemplate.index(lcsPre)
            qTemplate = qTemplate.replace(lcsPre, '')
            if preIndex < subIndex:
                self.qType = 1

        if self.qType == 0:
            qt01 = qTemplate[:subIndex]
            qt02 = qTemplate[subIndex:preIndex]
            qt03 = qTemplate[preIndex:]
            mSqt01 = 0
            mSqt02 = 0
            mSqt03 = 0
            for vQt01 in qtList['01']:
                tmp = Levenshtein.jaro(qt01, vQt01)
                if tmp > mSqt01:
                    mSqt01 = tmp
            for vQt02 in qtList['02']:
                tmp = Levenshtein.jaro(qt02, vQt02)
                if tmp > mSqt02:
                    mSqt02 = tmp
            for vQt03 in qtList['03']:
                tmp = Levenshtein.jaro(qt03, vQt03)
                if tmp > mSqt03:
                    mSqt03 = tmp
            self.score = (mSqt01 + mSqt02 + mSqt03 + Levenshtein.jaro(
                lcsSub, self.sub) + Levenshtein.jaro(lcsPre, self.pre)) / 5
        if self.qType == 1:
            qt11 = qTemplate[:preIndex]
            qt12 = qTemplate[preIndex:subIndex]
            qt13 = qTemplate[subIndex:]
            mSqt11 = 0
            mSqt12 = 0
            mSqt13 = 0
            for vQt11 in qtList['11']:
                tmp = Levenshtein.jaro(qt11, vQt11)
                if tmp > mSqt11:
                    mSqt11 = tmp
            for vQt12 in qtList['12']:
                tmp = Levenshtein.jaro(qt12, vQt12)
                if tmp > mSqt12:
                    mSqt12 = tmp
            for vQt13 in qtList['13']:
                tmp = Levenshtein.jaro(qt13, vQt13)
                if tmp > mSqt13:
                    mSqt13 = tmp
            self.score = (mSqt11 + mSqt12 + mSqt13 + Levenshtein.jaro(
                lcsSub, self.sub) + Levenshtein.jaro(lcsPre, self.pre)) / 5

        if self.qType == 2:
            qt20 = qTemplate
            qt21 = qTemplate[:subIndex]
            qt22 = qTemplate[subIndex:]
            mSqt20 = 0
            preResult = set()
            for vQt20 in qtList['20']:
                vQt201 = vQt20[:vQt20.index('|||qS|||')]
                vQt202 = vQt20[vQt20.index('|||qS|||') +
                               8:vQt20.index(' ===>>> ')]
                vQt20pre = vQt20[vQt20.index(' ===>>> ') + 8:]
                sTmp20 = Levenshtein.jaro(vQt201, qt21) + Levenshtein.jaro(
                    vQt202, qt22)
                sTmp20pre = 0
                preTmp = set()
                for kb in self.kbDict:
                    for pre in kb:
                        tmp = Levenshtein.jaro(vQt20pre, pre)
                        if tmp > sTmp20pre:
                            sTmp20pre = tmp
                            preTmp = set()
                        if tmp == sTmp20pre:
                            preTmp.add(pre)
                sTmp20 = (sTmp20 + sTmp20pre +
                          Levenshtein.jaro(lcsSub, self.sub)) / 4
                if sTmp20 > mSqt20:
                    mSqt20 = sTmp20
                    preResult = set()
                if sTmp20 == mSqt20:
                    for pre in preTmp:
                        preResult.add(pre)
            self.pre = preResult
            self.score = mSqt20
        return self.score
Пример #31
0
 def test_longest_common_subsequence(self):
     self.assertEqual(lcs("waaaa", "bbbbasfaaewra"), "aaaa")
     self.assertEqual(lcs("abc", "def"), "")
     self.assertEqual(lcs("123", "01234"), "123")
     self.assertEqual(lcs("abcd", "dcba"), "c")
Пример #32
0
def alternative_pal(s):
    r = s[::-1]
    L = len(lcs(s, r))
    return len(s) - L
Пример #33
0
 def test_lcs(self):
     self.assertEqual(lcs(self.seq1, self.seq2), 3)
     self.assertEqual(lcs(self.seq3, self.seq4), 4)
Пример #34
0
def evaluate_step_pair(a_tokens, a_tags, b_tokens, b_tags):
    return lcs(tensor_to_list(a_tokens), tensor_to_list(b_tokens))
Пример #35
0
from ground_truth import ground_truth
from translate import translate
from lcs import lcs
from numpy import median

import sys, os

rate_list = []
ground_truth = ground_truth('rsa_' + sys.argv[1] + '.sp')

for i in range(1, 1001):
    inputfile = os.path.join('../RSA/client/results/',
                             'result-' + sys.argv[2] + '-' + str(i) + '.txt')
    recovered = translate(inputfile)

    recover_rate = float(len(lcs(recovered, ground_truth))) / float(
        len(ground_truth))
    rate_list.append(recover_rate)

    print(str(i) + ': Recover rate is %.2f%%' % (recover_rate * 100))

Average = float(sum(rate_list)) / 1000
Median = median(rate_list)
Max = max(rate_list)

f = open("recover_rate.txt", "w+")

f.write('Average recover rate is %.2f%%\n' % (Average * 100))
f.write('Median recover rate is %.2f%%\n' % (Median * 100))
f.write('Maximum recover rate is %.2f%%\n' % (Max * 100))
Пример #36
0
Файл: try.py Проект: YLAsce/oj
from lcs import lcs
print lcs('aaa', 'a')
from lcs import length_lcs
from lcs import lcs

if __name__ == "__main__":
    x = list(input("\n Enter the first sequence:"))
    y = list(input("\n Enter the second sequence:"))
    m = len(x)
    n = len(y)
    (array, length) = length_lcs(x, y, m, n)
    print('\n Length of lcs is:', length)
    print('\n LCS is:', lcs(x, y, array, length))
Пример #38
0
def image_fisher_featurize_sift_lcs(im_keys, out_matrix, bidx, gmm_sift,
                                    pca_sift_mat, gmm_lcs, pca_lcs_mat):
    all_sift_features = []
    all_lcs_features = []
    t = time.time()
    for im_key in im_keys:
        s3 = boto3.resource('s3')
        client = boto3.client('s3')
        bio = io.BytesIO(
            client.get_object(Bucket="pictureweb", Key=im_key)["Body"].read())
        im = scipy.misc.imread(bio, flatten=True)
        im = im.astype('float32')
        im /= 255.0
        sift_descs = sift.sift(im)
        assert (np.all(sift_descs >= 0))
        sift_descs = np.sqrt(sift_descs)
        sift_descs = (sift_descs).dot(pca_sift_mat)
        sift_features = fisher.fisher_vector_features(
            sift_descs.astype('float32'), *gmm_sift)
        sift_features /= np.linalg.norm(sift_features)

        bio = io.BytesIO(
            client.get_object(Bucket="pictureweb", Key=im_key)["Body"].read())
        im = scipy.misc.imread(bio)
        lcs_descs = lcs.lcs(im).reshape(-1, LCS_DESC_LENGTH)
        try:
            assert (np.any(np.isnan(lcs_descs)) == False)
        except:
            raise Exception("RAISING LCS Error pre pca in {0}".format(im_key))

        lcs_descs = (lcs_descs).dot(pca_lcs_mat)
        try:
            assert (np.any(np.isnan(lcs_descs)) == False)
        except:
            raise Exception("RAISING LCS Error post pca in {0}".format(im_key))

        lcs_features = fisher.fisher_vector_features(
            lcs_descs.astype('float32'), *gmm_lcs)
        lcs_features /= np.linalg.norm(lcs_features)
        try:
            assert (np.any(np.isnan(lcs_features)) == False)
        except:
            raise Exception(
                "RAISING LCS Fisher Vector Error in {0}".format(im_key))

        all_sift_features.append(sift_features)
        all_lcs_features.append(lcs_features)

    all_sift_features = np.array(all_sift_features)
    all_lcs_features = np.array(all_lcs_features)

    assert (np.any(np.isnan(all_sift_features)) == False)
    #sqrt normalization
    signs = np.sign(all_sift_features)
    all_sift_features = signs * np.sqrt(np.abs(all_sift_features))
    feature_norms = np.linalg.norm(all_sift_features, axis=1)[:, np.newaxis]
    assert (np.any(np.isnan(feature_norms)) == False)
    all_sift_features /= feature_norms
    assert (np.any(np.isnan(all_sift_features)) == False)

    assert (np.any(np.isnan(all_lcs_features)) == False)
    signs = np.sign(all_lcs_features)
    all_lcs_features = signs * np.sqrt(np.abs(all_lcs_features))
    feature_norms = np.linalg.norm(all_lcs_features, axis=1)[:, np.newaxis]
    assert (np.any(np.isnan(feature_norms)) == False)
    all_lcs_features /= feature_norms
    assert (np.any(np.isnan(all_lcs_features)) == False)

    features = np.hstack((all_sift_features, all_lcs_features))
    out_matrix.put_block(features, bidx, 0)
    e = time.time()
    return e - t, t, e
Пример #39
0
def calculate_lcs(words):
  return len(lcs.lcs(words[0][0:10], words[1][0:10]))
Пример #40
0
# Given a string what is the min. no of insertions required to make the string palindrome
# Step 1 : Take reverse of string
# Step 2 : Apply LCS on string,rev of string
# Step 3 : result = len(string) - len(lcs)
from lcs import lcs

s = "HELLO"
x = lcs(s, s[::-1])
print(len(s) - x)
Пример #41
0
def lcs(text1, text2):
    value = seq.lcs(text1, text2)
    return abs(float(len(text1)-len(value))/float(len(text1)))
def shortestCommonSuperSequence(s1, s2):
    print('length= ', len(s1) + len(s2) - lcs(s1, s2))
Пример #43
0
def answerQ(qRaw, lKey, kbDict, qtList, threshold=0, debug=False):

    q = qRaw.strip().replace(' ', '')
    qtType = 0  #0:sub+pre 1:pre+sub 2:sub

    maxSubLen = 0
    maxSubSetTmp = set()
    maxSubSet = set()
    maxPreLen = 0
    maxPreSet = set()
    maxSPLen = 0
    maxSPSet = set()

    result = ''
    lcsSub = ''
    lcstemp = ''
    lcsPre = ''
    subIndex = 0
    scoreSub = 0
    qRemoveSub = ''

    preIndex = 0
    scoreSub = 0
    qRemoveSubPre = ''

    maxScore = 0
    qtMatchSet = set()
    bestAnswer = set()

    for qt01 in qtList['01']:
        if qt01 == '' or q.find(qt01) == 0:
            qR01 = q.replace(qt01, '', 1)
            for qt02 in qtList['02']:
                qFind2 = qR01.find(qt02)
                if qt02 == '' or qFind2 != 0:
                    subCandidate = qR01[:qFind2]
                    qR02 = qR01[qFind2:].replace(qt02, '', 1)
                    for qt03 in qtList['03']:
                        qFind3 = qR02.find(qt03)
                        if qt03 == '' or qFind3 != 0:
                            preCandidate = qR02[:qFind3]
                            if subCandidate in kbDict:
                                for kb in kbDict[subCandidate]:
                                    if preCandidate in kb:
                                        newAnswerCandidate = answerCandidate(
                                            subCandidate, preCandidate, q)
                                        qtMatchSet.add(newAnswerCandidate)

    for qt11 in qtList['11']:
        if qt11 == '' or q.find(qt11) == 0:
            qR11 = q.replace(qt11, '', 1)
            for qt12 in qtList['12']:
                qFind2 = qR11.find(qt12)
                if qt12 == '' or qFind2 != 0:
                    preCandidate = qR11[:qFind2]
                    qR12 = qR11[qFind2:].replace(qt12, '', 1)
                    for qt13 in qtList['13']:
                        qFind3 = qR12.find(qt13)
                        if qt13 == '' or qFind3 != 0:
                            subCandidate = qR12[:qFind3]
                            if subCandidate in kbDict:
                                for kb in kbDict[subCandidate]:
                                    if preCandidate in kb:
                                        newAnswerCandidate = answerCandidate(
                                            subCandidate, preCandidate, q)
                                        qtMatchSet.add(newAnswerCandidate)


##
##    # First try to use question template to get perfectly matched QA pair
##    for qt00 in qtList['00']:
##        if qt00[0] == '' or q.find(qt00[0]) == 0:
##            qR0 = q.replace(qt00[0], '', 1)
##            qFind1 = qR0.find(qt00[1])
##            if qt00[1] == '' or qFind1 !=0:
##                subCandidate = qR0[:qFind1]
##                qR01 = qR0[qFind1:].replace(qt00[1], '', 1)
##                qFind2 = qR01.find(qt00[2])
##                if qt00[2] == '' or qFind2 !=0:
##                    preCandidate = qR01[:qFind2]
##                    if subCandidate in kbDict:
##                        for kb in kbDict[subCandidate]:
##                            if preCandidate in kb:
##                                newAnswerCandidate = answerCandidate(subCandidate, preCandidate, q)
##                                qtMatchSet.add(newAnswerCandidate)
##
##
##    for qt10 in qtList['10']:
##        if qt10[0] == '' or q.find(qt10[0]) == 0:
##            qR0 = q.replace(qt10[0], '', 1)
##            qFind1 = qR0.find(qt10[1])
##            if qt10[1] == '' or qFind1 !=0:
##                preCandidate = qR0[:qFind1]
##                qR01 = qR0[qFind1:].replace(qt10[1], '', 1)
##                qFind2 = qR01.find(qt10[2])
##                if qt10[2] == '' or qFind2 !=0:
##                    subCandidate = qR01[:qFind2]
##                    if subCandidate in kbDict:
##                        for kb in kbDict[subCandidate]:
##                            if preCandidate in kb:
##                                newAnswerCandidate = answerCandidate(subCandidate, preCandidate, q)
##                                qtMatchSet.add(newAnswerCandidate)
##

    for key in lKey:
        lcsSub = lcs.lcs(q, key)
        if lcsSub == '':
            continue
        lcsSubLen = len(lcsSub)
        if maxSubLen < lcsSubLen:
            maxSubSetTmp = set()
            maxSubLen = lcsSubLen

        if maxSubLen == lcsSubLen:
            maxSubSetTmp.add(key)

    maxSPLen = maxSubLen

    for key in lKey:
        lcsSub = lcs.lcs(q, key)
        if lcsSub == '':
            continue

        lcsSubLen = len(lcsSub)

        lcsSubIndex = q.index(lcsSub)
        qRemoveSub1 = q[:lcsSubIndex]
        qRemoveSub1Len = len(qRemoveSub1)
        qRemoveSub2 = q[lcsSubIndex + lcsSubLen:]
        qRemoveSub2Len = len(qRemoveSub2)
        foundPre = 0
        for kb in kbDict[key]:
            for pre in list(kb):
                preLen = len(pre)
                lcsPre1 = ''
                lcsPre2 = ''
                if maxSubLen == lcsSubLen:
                    lcsPre1 = lcs.lcs(qRemoveSub1, pre)
                    lcsPre2 = lcs.lcs(qRemoveSub2, pre)
                    if lcsPre1 != '' or lcsPre2 != '':
                        newAnswerCandidate = answerCandidate(key, pre, q)
                        foundPre = 1
                        maxSubSet.add(newAnswerCandidate)

                if preLen > maxPreLen:
                    if qRemoveSub1Len > maxPreLen:
                        lcsPre1 = lcs.lcs(qRemoveSub1, pre)
                    if qRemoveSub2Len > maxPreLen:
                        lcsPre2 = lcs.lcs(qRemoveSub2, pre)
                    maxLcsPre12 = max(len(lcsPre1), len(lcsPre2))
                    if maxLcsPre12 > maxPreLen:
                        maxPreLen = maxLcsPre12
                        maxPreSet = set()
                    if maxLcsPre12 == maxPreLen:
                        newAnswerCandidate = answerCandidate(key, pre, q)
                        maxPreSet.add(newAnswerCandidate)

                maxResidual = maxSPLen - lcsSubLen
                if preLen > maxResidual:
                    if qRemoveSub1Len > maxResidual:
                        lcsPre1 = lcs.lcs(qRemoveSub1, pre)
                    if qRemoveSub2Len > maxResidual:
                        lcsPre2 = lcs.lcs(qRemoveSub2, pre)
                    maxResidual12 = max(len(lcsPre1), len(lcsPre2))
                    if maxResidual12 > maxResidual:
                        maxSPLen = maxResidual12 + lcsSubLen
                        maxResidual = maxSPLen - lcsSubLen
                        maxSPSet = set()
                    if maxResidual12 == maxResidual:
                        newAnswerCandidate = answerCandidate(key, pre, q)
                        maxSPSet.add(newAnswerCandidate)
        if foundPre == 0 and maxSubLen == lcsSubLen:
            newAnswerCandidate = answerCandidate(key, '', q, 2, 0, kbDict[key])
            maxSubSet.add(newAnswerCandidate)

    maxSubSetCopy = maxSubSet.copy()
    #print('len(maxSubSet) = ' + str(len(maxSubSetCopy)), end = '\r', flush=True)
    maxSubSet = set()
    for aCandidate in maxSubSetCopy:
        aCfound = 0
        for aC in maxSubSet:
            if aC.pre == aCandidate.pre and aC.sub == aCandidate.sub:
                aCfound = 1
                break
        if aCfound == 0:
            maxSubSet.add(aCandidate)

    maxPreSetCopy = maxPreSet.copy()
    #print('len(maxPreSet) = ' + str(len(maxPreSetCopy)), end = '\r', flush=True)
    maxPreSet = set()

    for aCandidate in maxPreSetCopy:
        aCfound = 0
        for aC in maxPreSet:
            if aC.pre == aCandidate.pre and aC.sub == aCandidate.sub:
                aCfound = 1
                break
        if aCfound == 0:
            maxPreSet.add(aCandidate)

    maxSPSetCopy = maxSPSet.copy()
    #print('len(maxSPSet) = ' + str(len(maxSPSetCopy)), end = '\r', flush=True)
    maxSPSet = set()
    for aCandidate in maxSPSetCopy:
        aCfound = 0
        for aC in maxSPSet:
            if aC.pre == aCandidate.pre and aC.sub == aCandidate.sub:
                aCfound = 1
                break
        if aCfound == 0:
            maxSPSet.add(aCandidate)

    for aCandidate in maxSubSet:
        scoreTmp = aCandidate.calcScore(qtList)
        if scoreTmp > maxScore:
            maxScore = scoreTmp
            bestAnswer = set()
        if scoreTmp == maxScore:
            bestAnswer.add(aCandidate)

    for aCandidate in maxPreSet:
        scoreTmp = aCandidate.calcScore(qtList)
        if scoreTmp > maxScore:
            maxScore = scoreTmp
            bestAnswer = set()
        if scoreTmp == maxScore:
            bestAnswer.add(aCandidate)

    for aCandidate in maxSPSet:
        scoreTmp = aCandidate.calcScore(qtList)
        if scoreTmp > maxScore:
            maxScore = scoreTmp
            bestAnswer = set()
        if scoreTmp == maxScore:
            bestAnswer.add(aCandidate)

    for aCandidate in qtMatchSet:
        scoreTmp = aCandidate.calcScore(qtList)
        if scoreTmp > maxScore:
            maxScore = scoreTmp
            bestAnswer = set()
        if scoreTmp == maxScore:
            bestAnswer.add(aCandidate)

    bestAnswerCopy = bestAnswer.copy()

    bestAnswer = set()

    for aCandidate in bestAnswerCopy:
        aCfound = 0
        for aC in bestAnswer:
            if aC.pre == aCandidate.pre and aC.sub == aCandidate.sub:
                aCfound = 1
                break
        if aCfound == 0:
            bestAnswer.add(aCandidate)

    if debug == False:
        return bestAnswer
    else:
        return [bestAnswer, maxSubSet, maxPreSet, maxSPSet]
Пример #44
0
 def _select_best(self, articles):
     from gn import gn
     from lcs import lcs
     return min(articles, key=lambda art: lcs(self.gn, gn(_em(art['title']))))
                        with tag('div',klass = "bg-danger"):
                            line('i','',klass = 'fa fa-remove pull-right')
                            line('h4',i[3:])
                    if i[0:3] == 'LCS':
                        with tag('div',klass = "bg-info"):
                            line('i','',klass = 'fa fa-star pull-right')
                            line('h4',i[3:])



    return indent(doc.getvalue())

def saveMarkedUpContentToFile(content):
        with open('output.html', 'w') as f:
            f.write(content)




if __name__ == "__main__" :
    input1 = get_file_content('input1.txt')
    input2 = get_file_content('input2.txt')

    operations = lcs.lcs(input1[0],input2[0])
    input1 = ''.join(input1[0])
    input2 = ''.join(input2[0])
    print operations
    content = generate_html_code(input1,input2,operations)
    saveMarkedUpContentToFile(content)

Пример #46
0
def compare_value(value1, value2):
  if value1 is None and value2 is None:
    return 0.0
  if value1 is None or value2 is None:
    return 1.0
  return 1.0 - (float(len(lcs.lcs(lcs.path(value1, value2)))) / max(len(value1), len(value2)))