def load_X(sent_pairs): """Create a matrix where every row is a pair of sentences and every column in a feature. Feature (column) order is not important to the algorithm.""" features = ["NIST", "BLEU", "Word Error Rate", "Longest common substring", "Levenshtein distance"] X = np.zeros((len(sent_pairs), len(features))) NIST = 0 BLEU = 1 WER = 2 LCS = 3 LD = 4 for i, pair in enumerate(sent_pairs): t1, t2 = pair t1_token = word_tokenize(t1) t2_token = word_tokenize(t2) # print(f"Sentences: {t1}\t{t2}") # calculate the scores ed = edit_distance(t1_token, t2_token) X[i, WER] = ed / len(t1_token) + ed / len(t2_token) try: X[i, NIST] = sentence_nist([t1_token], t2_token) + sentence_nist([t2_token], t1_token) except ZeroDivisionError: X[i, NIST] = 0 X[i, BLEU] = sentence_bleu([t1_token], t2_token) + sentence_bleu([t2_token], t1_token) X[i, LCS] = lcs(t1, t2) X[i, LD] = edit_distance(t1, t2) return X
def main(sts_data, output_file): """Calculate pearson correlation between semantic similarity scores and string similarity metrics. Data is formatted as in the STS benchmark""" # score_types = ["NIST", "BLEU", "Word Error Rate", "Longest common substring", "Levenshtein distance"] # read the dataset texts = [] labels = [] with open(sts_data, 'r', encoding='utf8') as dd: for line in dd: fields = line.strip().split("\t") labels.append(float(fields[4])) t1 = fields[5].lower() t2 = fields[6].lower() texts.append((t1, t2)) print(f"Found {len(texts)} STS pairs") NIST = [] BLEU = [] WER = [] LCS = [] LD = [] for i, pair in enumerate(texts): t1, t2 = pair t1_token = word_tokenize(t1) t2_token = word_tokenize(t2) # print(f"Sentences: {t1}\t{t2}") # calculate the scores ed = edit_distance(t1_token, t2_token) WER.append(ed / len(t1_token) + ed / len(t2_token)) try: NIST.append( sentence_nist([t1_token], t2_token) + sentence_nist([t2_token], t1_token)) except ZeroDivisionError: NIST.append(0) BLEU.append( sentence_bleu([t1_token], t2_token) + sentence_bleu([t2_token], t1_token)) LCS.append(lcs(t1, t2)) LD.append(edit_distance(t1, t2)) result = dict() result['NIST correlation'] = round(pearsonr(labels, NIST)[0], 3) result['BLEU correlation'] = round(pearsonr(labels, BLEU)[0], 3) result['Word Error Rate correlation'] = round(pearsonr(labels, WER)[0], 3) result['Longest common substring correlation'] = round( pearsonr(labels, LCS)[0], 3) result['Levenshtein distance correlation'] = round( pearsonr(labels, LD)[0], 3) with open(output_file, 'w') as out: out.write(f"Semantic textual similarity for {sts_data}\n") # TODO: write scores. See example output for formatting for metric, corr in result.items(): out.write(f'{metric}: {corr}\n')
def nist_func(x, y): "catch the zero dividend and return it as zero" try: nist1 = sentence_nist([x], y) nist2 = sentence_nist([y], x) return nist1 + nist2 except ZeroDivisionError: return 0
def get_metrics(pred, target): turns = len(target) bleu_2 = 0 bleu_4 = 0 meteor = 0 nist_2 = 0 nist_4 = 0 for index in range(turns): pred_utt = pred[index] target_utt = target[index] min_len = min(len(pred_utt), len(target_utt)) lens = min(min_len, 4) if lens == 0: continue if lens >= 4: bleu_4_utt = sentence_bleu( [target_utt], pred_utt, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=SmoothingFunction().method1) nist_4_utt = sentence_nist([target_utt], pred_utt, 4) else: bleu_4_utt = 0 nist_4_utt = 0 if lens >= 2: bleu_2_utt = sentence_bleu( [target_utt], pred_utt, weights=(0.5, 0.5), smoothing_function=SmoothingFunction().method1) nist_2_utt = sentence_nist([target_utt], pred_utt, 2) else: bleu_2_utt = 0 nist_2_utt = 0 bleu_2 += bleu_2_utt bleu_4 += bleu_4_utt meteor += meteor_score([" ".join(target_utt)], " ".join(pred_utt)) nist_2 += nist_2_utt nist_4 += nist_4_utt bleu_2 /= turns bleu_4 /= turns meteor /= turns nist_2 /= turns nist_4 /= turns return bleu_2, bleu_4, meteor, nist_2, nist_4
def nist_2(labels, preds): label = ' '.join([str(elem) for elem in labels]) prediction = ' '.join([str(elem) for elem in preds]) if len(prediction) < 2 or len(label) < 2: return 0 return sentence_nist([label], prediction, 2)
def calculate_metrics(data, src_field, trg_field, model, device, dump_path=None, model_name='default', max_len=MAX_LEN): """Calculate BLEU and NIST metrics""" preds_file = dump_path + model_name + '-baseline-output.txt' refs_file = dump_path + model_name + '-devel-conc.txt' bleu_scores = [] nist_scores = [] multibleu_smoother = SmoothingFunction().method4 refs = [] preds = [] # Group sources and references src_unique = list(set([tuple(e.src) for e in data.examples])) for src in src_unique: pred_trg = translate_sentence(src, src_field, trg_field, model, device, max_len) # Remove <eos> pred_trg = pred_trg[:-1] ref_list = get_ref_list(data, src) preds.append(' '.join(pred_trg)) refs.append(ref_list) # Calculate scores and save bleu_score = sentence_bleu(ref_list, pred_trg, smoothing_function=multibleu_smoother) nist_score = sentence_nist( ref_list, pred_trg, ) bleu_scores.append(bleu_score) nist_scores.append(nist_score) # Dump all results in official e2e metric script compatible format file_refs = [] for ref in refs: ref_sentences = [' '.join(tokens) for tokens in ref] refs_joined = '\n'.join(ref_sentences) file_refs.append(refs_joined) with open(preds_file, 'w') as pred_f: pred_f.write('\n'.join(preds)) with open(refs_file, 'w') as ref_f: ref_f.write('\n\n'.join(file_refs)) print(f'Writing files for {model_name}') print(f'Predictions in {preds_file}') print(f'References in {refs_file}') return np.mean(bleu_scores), np.mean(nist_scores)
def get_nist(self): ngram = self.gram nist = list() reference = self.get_reference() weight = tuple((1. / ngram for _ in range(ngram))) with open(self.test_data) as test_data: for hypothesis in test_data: hypothesis = nltk.word_tokenize(hypothesis) nist.append( nist_score.sentence_nist(reference, hypothesis, n=ngram)) return sum(nist) / len(nist)
def sim_nist(self, hyps, ref): """ :param refs - a list of strings representing references :param hyps - a list of tokens of the hypothesis :return maxbleu - recall bleu :return avgbleu - precision bleu """ scores = [] for hyp in hyps: try: scores.append(sentence_nist([ref], hyp)) except: scores.append(0.0) return np.max(scores), np.mean(scores)
def calculate_nist_score(self): ''' this is the main method to calculate the nist score ''' hypo, ref, has_one_sentence = self._p.split_references_hypothesis() if ref is None: return 0 elif has_one_sentence: ''' 1 or more references with 1 sentence: ''' nist_score = sentence_nist(ref, hypo) # calculate the sentence-level nist score return nist_score else: ''' 1 or more references with more than 1 sentence: ''' nist_score = corpus_nist(ref, hypo) # calculate the corpus-level nist score return nist_score
def calculate_max_nist(list_references, list_hypothesis): sum_bleu = 0.0 for i,d in enumerate(list_references): references_items = list_references[i] hypothesis = list_hypothesis[i] bleu_score_sentence = [] for reference in references_items: try: bleu_score_sentence.append(nist_score.sentence_nist([reference], hypothesis)) sum_bleu += max(bleu_score_sentence) except: sum_bleu += 0 mean_bleu = sum_bleu / len(list_hypothesis) return mean_bleu
def nist_func(x, y): try: return nist_score.sentence_nist([nltk.word_tokenize(x)], nltk.word_tokenize(y)) except ZeroDivisionError: return 0
def calc_nist(self, reference, hypothesis, gram): return nist_score.sentence_nist(reference, hypothesis, n=gram)
for df in dfs: bleu_sm = [] met = [] nist = [] for i in range(len(df.index)): row = df.loc[i] ref_tokens = word_tokenize(row['reference']) can_tokens = word_tokenize(row['candidate']) bleu_sm.append( sentence_bleu([ref_tokens], can_tokens, smoothing_function=smoother) ) met.append( nltk.translate.meteor_score.meteor_score([row['reference']], row['candidate']) ) nist.append( sentence_nist([ref_tokens], can_tokens, 2) ) df['bleu_sm'] = bleu_sm df['met'] = met df['nist'] = nist ############ Evaluate various models on various feature selections col_set = [] col_set.append(['bleu_uni']) col_set.append(['bleu_sm']) col_set.append(['met']) col_set.append(['nist']) col_set.append(['bleu_uni','bleu_sm','met','nist']) col_set.append(['cos_s_r']) col_set.append(['cos_s_c'])
def get_nist_score(candidate, reference, ngrams): candidate = candidate.split(' ') reference = [reference.split(' ')] return float(sentence_nist(reference, candidate, ngrams))
def nist(batch_targets, batch_outputs): score={} score_nist=sentence_nist(batch_targets, batch_outputs, n=4) return score
from nltk.translate.nist_score import sentence_nist import xlrd # Give the location of the file #if sheet.cell_value(i,0)==0: # score= str(sentence_bleu(reference, candidate, weights=(1, 0, 0, 0)))+ ','+ str(sentence_bleu(reference, candidate, weights=(0, 1, 0, 0)))+','+ str(sentence_bleu(reference, candidate, weights=(0, 0, 1, 0)))+','+ str(sentence_bleu(reference, candidate, weights=(0, 0, 0, 1)))+','+ str(sentence_bleu(reference, candidate, weights=(0.5, 0.5, 0, 0)))+','+ str(sentence_bleu(reference, candidate, weights=(0.33, 0.33, 0.33, 0)))+','+ str(sentence_bleu(reference, candidate, weights=(0.25, 0.25, 0.25, 0.25)))+',Human-generated' '''''' else: # score= str(sentence_bleu(reference, candidate, weights=(1, 0, 0, 0)))+ ','+ str(sentence_bleu(reference, candidate, weights=(0, 1, 0, 0)))+','+ str(sentence_bleu(reference, candidate, weights=(0, 0, 1, 0)))+','+ str(sentence_bleu(reference, candidate, weights=(0, 0, 0, 1)))+','+ str(sentence_bleu(reference, candidate, weights=(0.5, 0.5, 0, 0)))+','+ str(sentence_bleu(reference, candidate, weights=(0.33, 0.33, 0.33, 0)))+','+ str(sentence_bleu(reference, candidate, weights=(0.25, 0.25, 0.25, 0.25)))+',Machine-generated' ''' loc = ("/Users/ishitagupta/Desktop/kashgari_data.xlsx") # To open Workbook wb = xlrd.open_workbook(loc) sheet = wb.sheet_by_index(2) i = 0 for i in range(1, 4): textreference = str(sheet.cell_value(i, 0)) textcandidate = str(sheet.cell_value(i, 1)) print(textreference.split()) print(textcandidate.split()) reference = [textreference.split()] candidate = textcandidate.split() if len(candidate) == 1: score = 1.0 print(round(100 * score)) else: score = str(round(100 * sentence_nist(reference, candidate, 2))) print(score)
def compute_score(self, candidate: List[str], references: List[List[str]]) -> Tensor: score = sentence_nist(candidate, references, n=self.ngram_order) return score
def sent_NIST(reference, hypothesis): ''' Same philosophy as with sentence BLEU. ''' return nist_score.sentence_nist(reference, hypothesis)
def getNistScore(self,hypSent,refSent): return NN.sentence_nist([refSent.split()],hypSent.split(),n=5)
etree.SubElement(paraphrase, 'BLUE_w3_titles').text = str( sentence_bleu([list_title_1], list_title_2, weights=weights3)) etree.SubElement(paraphrase, 'BLUE_w4_titles').text = str( sentence_bleu([list_title_1], list_title_2, weights=weights4)) etree.SubElement(paraphrase, 'BLUE_w1_articles').text = str( sentence_bleu([list_text_1], list_text_2, weights=weights1)) etree.SubElement(paraphrase, 'BLUE_w2_articles').text = str( sentence_bleu([list_text_1], list_text_2, weights=weights2)) etree.SubElement(paraphrase, 'BLUE_w3_articles').text = str( sentence_bleu([list_text_1], list_text_2, weights=weights3)) etree.SubElement(paraphrase, 'BLUE_w4_articles').text = str( sentence_bleu([list_text_1], list_text_2, weights=weights4)) # NIST nist_titles = sentence_nist([list_title_1], list_title_2, n=3) nist_articles = sentence_nist([list_text_1], list_text_2, n=3) etree.SubElement(paraphrase, 'nist_titles').text = str(nist_titles) etree.SubElement(paraphrase, 'nist_articles').text = str(nist_articles) etree.SubElement(paraphrase, 'nist_diff').text = str(nist_titles - nist_articles) # ROUGE title_1_space = title_1.replace(";", " ") title_2_space = title_2.replace(";", " ") text_1_space = text_1.replace(";", " ") text_2_space = text_2.replace(";", " ") rouge = Rouge() title_score = rouge.get_scores(title_1_space, title_2_space)[0] article_score = rouge.get_scores(text_1_space, text_2_space)[0]
def string_sim(sent_pairs): """Create a matrix where every row is a pair of sentences and every column in a feature. Feature (column) order is not important to the algorithm.""" features = [ "NIST", "BLEU", "Word Error Rate", "Longest common substring", "Levenshtein distance" ] nist_list = [] bleu_list = [] wer_list = [] lcs_list = [] dist_list = [] for pair in sent_pairs: t1 = pair[0] t2 = pair[1] t1_token = word_tokenize(pair[0]) t2_token = word_tokenize(pair[1]) # NIST try: nist1 = nist_score.sentence_nist([ t2_token, ], t1_token) nist2 = nist_score.sentence_nist([ t1_token, ], t2_token) nist = nist1 + nist2 except ZeroDivisionError: nist = 0 nist_list.append(nist) # BLEU bleu1 = bleu_score.sentence_bleu([ t1_token, ], t2_token) bleu2 = bleu_score.sentence_bleu([ t2_token, ], t1_token) bleu_list.append(bleu1 + bleu2) # Longgest common substring s = SequenceMatcher(None, t1, t2) lcs = s.find_longest_match(0, len(t1), 0, len(t2)) lcs_list.append(lcs[2]) # Edit distance dist = edit_distance(t1, t2) dist_list.append(dist) # Word error rate dist_wer = edit_distance(t1_token, t2_token) wer = dist_wer / len(t1_token) + dist_wer / len(t2_token) wer_list.append(wer) all_list = [nist_list, bleu_list, wer_list, lcs_list, dist_list] X = np.zeros((len(sent_pairs), len(features))) for i in range(len(all_list)): X[:, i] = np.asarray(all_list[i]) return X
def compute_nist(hypothesis, references): hypothesis = list(ntok.tokenize(hypothesis)) references = [list(ntok.tokenize(reference)) for reference in references] return sentence_nist(references, hypothesis)
def __call__(self): root = etree.parse( r'C:\Users\kiva0319\PycharmProjects\Diploma2020\processed\paraphrases.xml' ) root = root.getroot() corpus = etree.SubElement(root, "corpus") result_xml = etree.Element('raw_data') result_doc = etree.ElementTree(result_xml) corpus_info = etree.SubElement(result_xml, 'head') etree.SubElement(corpus_info, 'description').text = "—" etree.SubElement(corpus_info, 'date').text = str(date.today()) articles_list = etree.SubElement(result_xml, 'corpus') count = 0 for element in root[1]: id = element[0].text old_id = element[1].text id_1 = element[2].text id_2 = element[3].text title_1 = element[4].text title_2 = element[5].text text_1 = element[6].text text_2 = element[7].text words_title_1 = int(element[8].text) words_title_2 = int(element[9].text) words_article_1 = int(element[10].text) words_article_2 = int(element[11].text) num_of_paragraphs_1 = int(element[12].text) num_of_paragraphs_2 = int(element[13].text) element_paragraphs_1 = element[14].text element_paragraphs_2 = element[15].text jaccard = element[16].text clas = element[17].text print(count, id, flush=True) # words_max = max(words_max, words_article_1) # words_max = max(words_max, words_article_2) # chars_max = max(chars_max, len(text_1)) # chars_max = max(chars_max, len(text_2)) # continue paraphrase = etree.SubElement(articles_list, 'paraphrase') etree.SubElement(paraphrase, 'value', name="id").text = id etree.SubElement(paraphrase, 'value', name="old_id").text = old_id etree.SubElement(paraphrase, 'value', name="id_1").text = id_1 etree.SubElement(paraphrase, 'value', name="id_2").text = id_2 etree.SubElement(paraphrase, 'value', name="title_1").text = title_1 etree.SubElement(paraphrase, 'value', name="title_2").text = title_2 etree.SubElement(paraphrase, 'value', name="jaccard").text = jaccard etree.SubElement(paraphrase, 'value', name="class").text = clas # words and paragraphs diff etree.SubElement(paraphrase, 'words_title_diff').text = str( abs(words_title_1 - words_title_2)) etree.SubElement(paraphrase, 'words_article_diff').text = str( abs(words_article_1 - words_article_2)) etree.SubElement(paraphrase, 'paragraphs_diff').text = str( abs(num_of_paragraphs_1 - num_of_paragraphs_2)) # flesch_reading_ease textstat.textstat.set_lang("ru") etree.SubElement(paraphrase, 'flesch_reading_ease_title_1').text = str( textstat.flesch_reading_ease(" ".join( title_1.split(";")))) etree.SubElement(paraphrase, 'flesch_reading_ease__title_2').text = str( textstat.flesch_reading_ease(" ".join( title_2.split(";")))) etree.SubElement( paraphrase, 'flesch_reading_ease_article_1').text = str( textstat.flesch_reading_ease(" ".join(text_1.split(";"))) / num_of_paragraphs_1) etree.SubElement( paraphrase, 'flesch_reading_ease_article_2').text = str( textstat.flesch_reading_ease(" ".join(text_2.split(";"))) / num_of_paragraphs_2) # BLUE weights1 = (1, 0, 0, 0) weights2 = (0.5, 0.5, 0, 0) weights3 = (0.33, 0.33, 0.33, 0) weights4 = (0.25, 0.25, 0.25, 0.25) list_title_1 = title_1.split(";") list_title_2 = title_2.split(";") list_text_1 = text_1.split(";") list_text_2 = text_2.split(";") etree.SubElement(paraphrase, 'BLUE_w1_titles').text = str( sentence_bleu([list_title_1], list_title_2, weights=weights1)) etree.SubElement(paraphrase, 'BLUE_w2_titles').text = str( sentence_bleu([list_title_1], list_title_2, weights=weights2)) etree.SubElement(paraphrase, 'BLUE_w3_titles').text = str( sentence_bleu([list_title_1], list_title_2, weights=weights3)) etree.SubElement(paraphrase, 'BLUE_w4_titles').text = str( sentence_bleu([list_title_1], list_title_2, weights=weights4)) etree.SubElement(paraphrase, 'BLUE_w1_articles').text = str( sentence_bleu([list_text_1], list_text_2, weights=weights1)) etree.SubElement(paraphrase, 'BLUE_w2_articles').text = str( sentence_bleu([list_text_1], list_text_2, weights=weights2)) etree.SubElement(paraphrase, 'BLUE_w3_articles').text = str( sentence_bleu([list_text_1], list_text_2, weights=weights3)) etree.SubElement(paraphrase, 'BLUE_w4_articles').text = str( sentence_bleu([list_text_1], list_text_2, weights=weights4)) # NIST nist_1_titles = 0 nist_1_articles = 0 nist_2_titles = 0 nist_2_articles = 0 nist_3_titles = 0 nist_3_articles = 0 try: nist_1_titles = sentence_nist([list_title_1], list_title_2, n=1) except ZeroDivisionError: print("ZeroDivisionError id =", count) try: nist_1_articles = sentence_nist([list_text_1], list_text_2, n=1) except ZeroDivisionError: print("ZeroDivisionError id =", count) try: nist_2_titles = sentence_nist([list_title_1], list_title_2, n=2) except ZeroDivisionError: print("ZeroDivisionError id =", count) try: nist_2_articles = sentence_nist([list_text_1], list_text_2, n=2) except ZeroDivisionError: print("ZeroDivisionError id =", count) try: nist_3_titles = sentence_nist([list_title_1], list_title_2, n=3) except ZeroDivisionError: print("ZeroDivisionError id =", count) try: nist_3_articles = sentence_nist([list_text_1], list_text_2, n=3) except ZeroDivisionError: print("ZeroDivisionError id =", count) etree.SubElement(paraphrase, 'nist_1_titles').text = str(nist_1_titles) etree.SubElement(paraphrase, 'nist_1_articles').text = str(nist_1_articles) etree.SubElement(paraphrase, 'nist_2_titles').text = str(nist_2_titles) etree.SubElement(paraphrase, 'nist_2_articles').text = str(nist_2_articles) etree.SubElement(paraphrase, 'nist_3_titles').text = str(nist_3_titles) etree.SubElement(paraphrase, 'nist_3_articles').text = str(nist_3_articles) etree.SubElement(paraphrase, 'nist_1_diff').text = str(nist_1_titles - nist_1_articles) etree.SubElement(paraphrase, 'nist_2_diff').text = str(nist_2_titles - nist_2_articles) etree.SubElement(paraphrase, 'nist_3_diff').text = str(nist_3_titles - nist_3_articles) # ROUGE title_1_space = title_1.replace(";", " ") title_2_space = title_2.replace(";", " ") text_1_space = text_1.replace(";", " ") text_2_space = text_2.replace(";", " ") rouge = Rouge() title_score = rouge.get_scores(title_1_space, title_2_space)[0] article_score = rouge.get_scores(text_1_space, text_2_space)[0] etree.SubElement(paraphrase, 'rouge-1_titles').text = str( title_score['rouge-1']['f']) etree.SubElement(paraphrase, 'rouge-2_titles').text = str( title_score['rouge-2']['f']) etree.SubElement(paraphrase, 'rouge-L_titles').text = str( title_score['rouge-l']['f']) etree.SubElement(paraphrase, 'rouge-1_articles').text = str( article_score['rouge-1']['f']) etree.SubElement(paraphrase, 'rouge-2_articles').text = str( article_score['rouge-2']['f']) etree.SubElement(paraphrase, 'rouge-L_articles').text = str( article_score['rouge-l']['f']) # METEOR stemmer = SnowballStemmer("russian") wikiwordnet = WikiWordnet() etree.SubElement(paraphrase, 'meteor_title').text = str( meteor_score([title_1_space], title_2_space, stemmer=stemmer, wordnet=wikiwordnet)) etree.SubElement(paraphrase, 'meteor_article').text = str( meteor_score([text_1_space], text_2_space, stemmer=stemmer, wordnet=wikiwordnet)) count += 1 outFile = open("processed/metrics.xml", 'wb') result_doc.write(outFile, xml_declaration=True, encoding='utf-8', pretty_print=True)
def calc_nist(self, reference, hypothesis, gram=5): print("Type:", type(reference[0][0])) print("Lengths:", len(reference), len(hypothesis)) print("Hypothesis 2:", hypothesis) return nist_score.sentence_nist(reference, hypothesis, n=gram)
def nist(predict, target, n): if len(predict) < n or len(target) < n: return 0 return sentence_nist([target], predict, n)
def nist(df): # tokenization happens inside nist df['nist'] = df.apply(lambda x: sentence_nist(x['reference'], x['translation']), axis=1) return df