def test_match_size_2(): cand = '我我我我我我' ref = '中华人民的一份子我' n_size = 1 cand_ngram = get_ngram(cand, n_size) ref_ngram = get_ngram(ref, n_size) match_size, cand_size = get_match_size(cand_ngram, ref_ngram) print('match size: {}'.format(match_size)) print('cand size: {}'.format(cand_size))
def add_entity_bonus(self, cand, entity_ref): for n_size in range(self.n_size): cand_ngram = common.get_ngram(cand, n_size, label='ENTITY') ref_ngram = [] for reff_id, r in enumerate(entity_ref): ref_ngram.append(common.get_ngram(r, n_size, label='ENTITY')) match_size, cand_size = common.get_match_size(cand_ngram, ref_ngram) self.match_ngram[n_size] += self.beta * match_size self.candi_ngram[n_size] += self.beta * match_size
def add_yn_bonus(self, cand, ref_list, yn_label, yn_ref): for n_size in range(self.n_size): cand_ngram = common.get_ngram(cand, n_size, label=yn_label) ref_ngram = [] for ref_id, r in enumerate(yn_ref): ref_ngram.append(common.get_ngram(ref_list[ref_id], n_size, label=r)) match_size, cand_size = common.get_match_size(cand_ngram, ref_ngram) self.match_ngram[n_size] += self.alpha * match_size self.candi_ngram[n_size] += self.alpha * match_size
def count_ngram(self, cand, ref_list, n_size): cand_ngram = common.get_ngram(cand, n_size) refs_ngram = [] for ref in ref_list: refs_ngram.append(common.get_ngram(ref, n_size)) if n_size not in self.match_ngram: self.match_ngram[n_size] = 0 self.candi_ngram[n_size] = 0 match_size, cand_size = common.get_match_size(cand_ngram, refs_ngram) self.match_ngram[n_size] += match_size self.candi_ngram[n_size] += cand_size
def count_ngram(self, cand: str, ref: str, n_size: int): """计算子序列重合的个数,并存储到字典中 Arguments: cand {str} -- 预备答案 ref {str} -- 参考答案 n_size {int} -- 子序列的大小 """ cand_ngram = common.get_ngram(cand, n_size) ref_ngram = common.get_ngram(ref, n_size) if n_size not in self.match_ngram: self.match_ngram[n_size] = 0 self.candi_ngram[n_size] = 0 match_size, cand_size = common.get_match_size(cand_ngram, ref_ngram) self.match_ngram[n_size] += match_size self.candi_ngram[n_size] += cand_size
def test_ngram_2(): sentence = '中华人民共和国' n_size = 1 print(get_ngram(sentence, n_size))
def test_ngram_1(): sentence = '我是孙维松' n_size = 2 print(get_ngram(sentence, n_size))