def run_test_cases():
    filepath = "test_cases_tuples_wiki.tsv"
    fw = codecs.open("test_cases_tuples_results_wiki_3.tsv", "w", "utf-8")
    # t = tqdm(total=os.path.getsize(filepath))
    i = 0
    fw.write(
        "word1\tword2\tsimilarity\tpos_sim_union\tbi_lcs\tpos_sim_union_05\tdice\tjcd\tlcs\tldn\tbisim1\t"
        "bisim2\tbisim3\ttrisim1\txdice\txxdice\n")
    bigram_matrix = read_bigram_matrix()
    with codecs.open(filepath, encoding='utf-8') as f:
        for line in f:
            print(i)
            i += 1
            # t.update(len(line.encode('utf-8')))
            s = line.split("\t")
            s0 = " " + s[0] + " "
            s1 = " " + s[1] + " "
            fw.write(
                s0 + "\t" + s1 + "\t" + s[2].strip() + "\t" +
                str(pos_sim_union(list(bigrams(s0)), list(bigrams(s1)))) +
                "\t" + str(bi_lcs(list(bigrams(s0)), list(bigrams(s1)))) +
                "\t" +
                str(pos_sim_union_05(list(bigrams(s0)), list(bigrams(s1)))) +
                "\t" + str(1 - dice(s0, s1)) + "\t" + str(1 - jcd(s0, s1)) +
                "\t" + str(1 - lcs(s0, s1)) + "\t" + str(1 - ldn(s0, s1)) +
                "\t" + str(1 - bisim1(s0, s1)) + "\t" +
                str(1 - bisim2(s0, s1)) + "\t" + str(1 - bisim3(s0, s1)) +
                "\t" + str(1 - trisim1(s0, s1)) + "\t" +
                str(1 - xdice(s0, s1)) + "\t" + str(1 - xxdice(s0, s1)) +
                "\t" + "\n")
示例#2
0
 def test_trisim1(self):
     strings.trisim1(self.a, self.b)
示例#3
0
 def test_trisim1(self):
     _ = strings.trisim1(self.a, self.b, normalized=True)
     d = strings.trisim1(self.a, self.b, normalized=False)
     assert int(d) == 8
示例#4
0
 def test_trisim1(self):
     strings.trisim1(self.a,self.b)
示例#5
0
def test_trisim1(a, b):
    _ = strings.trisim1(a, b, normalized=True)
    d = strings.trisim1(a, b, normalized=False)
    assert int(d) == 8