def main(): torch.multiprocessing.set_sharing_strategy("file_system") parser = argparse.ArgumentParser("Visualize BERTScore") parser.add_argument( "--lang", type=str, default="en", help="two-letter abbreviation of the language (e.g., en)") parser.add_argument("-m", "--model", default=None, help="BERT model name (default: bert-base-uncased)") parser.add_argument("-l", "--num_layers", type=int, default=None, help="use first N layer in BERT (default: 8)") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("-r", "--ref", type=str, required=True, help="reference sentence") parser.add_argument("-c", "--cand", type=str, required=True, help="candidate sentence") parser.add_argument("-f", "--file", type=str, default="visualize.png", help="name of file to save output matrix in") parser.add_argument( "--rescale_with_baseline", action="store_true", help="Rescaling the numerical score with precomputed baselines") parser.add_argument("--baseline_path", default=None, type=str, help="path of custom baseline csv file") args = parser.parse_args() bert_score.plot_example( args.cand, args.ref, model_type=args.model, lang=args.lang, num_layers=args.num_layers, fname=args.file, rescale_with_baseline=args.rescale_with_baseline, baseline_path=args.baseline_path, )
def main(): torch.multiprocessing.set_sharing_strategy('file_system') parser = argparse.ArgumentParser('Visualize BERTScore') parser.add_argument( '--lang', type=str, default='en', help='two-letter abbreviation of the language (e.g., en)') parser.add_argument('-m', '--model', default=None, help='BERT model name (default: bert-base-uncased)') parser.add_argument('-l', '--num_layers', type=int, default=None, help='use first N layer in BERT (default: 8)') parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity') parser.add_argument('-r', '--ref', type=str, required=True, help='reference sentence') parser.add_argument('-c', '--cand', type=str, required=True, help='candidate sentence') parser.add_argument('-f', '--file', type=str, default='visualize.png', help='name of file to save output matrix in') parser.add_argument( '--rescale-with-baseline', action='store_true', help='Rescaling the numerical score with precomputed baselines') args = parser.parse_args() bert_score.plot_example(args.cand, args.ref, model_type=args.model, lang=args.lang, num_layers=args.num_layers, fname=args.file, rescale_with_baseline=args.rescale_with_baseline)
def main(): torch.multiprocessing.set_sharing_strategy('file_system') parser = argparse.ArgumentParser('Calculate BERTScore') parser.add_argument('--bert', default='bert-base-multilingual-cased', choices=bert_score.bert_types, help='BERT model name (default: bert-base-uncased)') parser.add_argument('-l', '--num_layers', default=8, help='use first N layer in BERT (default: 8)') parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity') parser.add_argument('-r', '--ref', required=True, help='reference sentence') parser.add_argument('-c', '--cand', required=True, help='candidate sentence') parser.add_argument('-o', '--output_file_name', default='', help='output file name') args = parser.parse_args() cand = args.cand ref = args.ref fname = args.output_file_name bert_score.plot_example(cand, ref, verbose=args.verbose, bert=args.bert, num_layers=args.num_layers, fname=fname)
def word_alignment(sentence1, sentence2, outfile): batches = batchify(sentence1, sentence2, batch_size=500) # output = [] for batch in batches: b0 = [x.sent for x in batch[0]] b1 = [x.sent for x in batch[1]] otokens1 = [[x.tokens[y].word for y in x.tokens if y != 0] for x in batch[0]] otokens2 = [[x.tokens[y].word for y in x.tokens if y != 0] for x in batch[1]] # otokens1 = [b.split(' ') for b in b0] # otokens2 = [b.split(' ') for b in b1] btokens1, btokens2, sim = plot_example(b0, b1) sim = compress_similarity_matrix(otokens1, otokens2, btokens1, btokens2, sim) for i in range(len(batch[0])): try: sim[i].shape cont = True except: cont = False if cont: try: reordered1, reordered2 = get_sentence_ordering_dep( batch[0][i], batch[1][i], sim[i]) if reordered1 is None or reordered2 is None: continue r1 = " ".join(reordered1) r2 = " ".join(reordered2) t1 = " ".join(otokens1[i]) t2 = " ".join(otokens2[i]) outfile.write(t1 + '\n') outfile.write(t2 + '\n') outfile.write(r1 + '\n') outfile.write(r2 + '\n') outfile.write('\n') except: continue
def plot_similarity_matrix(cand, ref, lang): plot_example(cand, ref, lang=lang)
def bert_viz_heatmap(ref, pred): from bert_score import plot_example ref = ' '.join(ref[0]).strip() pred = ' '.join(pred[0]).strip() plot_example(pred, ref, lang='en', rescale_with_baseline=True)
from bert_score import score, plot_example import matplotlib.pyplot as plt import os os.environ["CUDA_VISIBLE_DEVICES"] = "-1" with open("../data/models/hyps.txt") as f: cands = [line.strip() for line in f] with open("../data/models/refs.txt") as f: refs = [line.strip() for line in f] P, R, F1 = score(cands, refs, lang='en') print(f"System level F1 score: {F1.mean():.3f}") plt.hist(F1, bins=20) plt.show() print(cands[0]) print(f'P={P[0]:.6f} R={R[0]:.6f} F={F1[0]:.6f}') plot_example(cands[0], refs[0], lang="en")