def sentence_score_cobalt(self, sentence1, sentence2, alignments, word_level_scores): functional_words1 = list(filter(lambda x: word_sim.function_word(x.form), sentence1)) functional_words2 = list(filter(lambda x: word_sim.function_word(x.form), sentence2)) weighted_length1 = self.delta * (len(sentence1) - len(functional_words1)) + ((1.0 - self.delta) * len(functional_words1)) weighted_length2 = self.delta * (len(sentence2) - len(functional_words2)) + ((1.0 - self.delta) * len(functional_words2)) weighted_matches1 = 0 weighted_matches2 = 0 for i, a in enumerate(alignments[0]): if not word_sim.function_word(sentence1[a[0] - 1].form): weighted_matches1 += self.delta * (max(word_level_scores[i].similarity - word_level_scores[i].penalty_mean, self.minimal_aligned_relatedness)) else: weighted_matches1 += (1 - self.delta) * (max(word_level_scores[i].similarity - word_level_scores[i].penalty_mean, self.minimal_aligned_relatedness)) if not word_sim.function_word(sentence2[a[1] - 1].form): weighted_matches2 += self.delta * (max(word_level_scores[i].similarity - word_level_scores[i].penalty_mean, self.minimal_aligned_relatedness)) else: weighted_matches2 += (1 - self.delta) * (max(word_level_scores[i].similarity - word_level_scores[i].penalty_mean, self.minimal_aligned_relatedness)) if weighted_length1 == 0: precision = weighted_matches1 else: precision = weighted_matches1 / weighted_length1 if weighted_length2 == 0: recall = weighted_matches2 else: recall = weighted_matches2 / weighted_length2 if precision == 0 or recall == 0 or (((1.0 - self.alpha) / precision) + (self.alpha / recall)) == 0: fmean = 0 else: fmean = 1.0 / (((1.0 - self.alpha) / precision) + (self.alpha / recall)) score = fmean return score
def run(self, cand, ref): counted = [] aligned_tokens_cand = [] for a in cand['alignments'][0]: aligned_tokens_cand.append(a[1]) prev = None for i in sorted(aligned_tokens_cand): if (prev is None and i > 1) or (prev is not None and (i != prev + 1 and i != prev)): count = 0 if prev is None: prev = 0 for j in range(prev, i - prev - 1): if not word_sim.function_word(ref['tokens'][j]): count += 1 counted.append(count) if len(counted) == AbstractChunkFeature.chunk_number: break prev = i if prev is None: prev = 0 if prev < len(cand['tokens']) and len(counted) < AbstractChunkFeature.chunk_number: count = 0 for j in range(prev, len(cand['tokens']) - 1): if not word_sim.function_word(cand['tokens'][j]): count += 1 counted.append(count) while len(counted) < AbstractChunkFeature.chunk_number: counted.append(0) AbstractChunkFeature.set_value(self, counted)