示例#1
0
def main():
    torch.multiprocessing.set_sharing_strategy("file_system")

    parser = argparse.ArgumentParser("Visualize BERTScore")
    parser.add_argument(
        "--lang",
        type=str,
        default="en",
        help="two-letter abbreviation of the language (e.g., en)")
    parser.add_argument("-m",
                        "--model",
                        default=None,
                        help="BERT model name (default: bert-base-uncased)")
    parser.add_argument("-l",
                        "--num_layers",
                        type=int,
                        default=None,
                        help="use first N layer in BERT (default: 8)")
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="increase output verbosity")
    parser.add_argument("-r",
                        "--ref",
                        type=str,
                        required=True,
                        help="reference sentence")
    parser.add_argument("-c",
                        "--cand",
                        type=str,
                        required=True,
                        help="candidate sentence")
    parser.add_argument("-f",
                        "--file",
                        type=str,
                        default="visualize.png",
                        help="name of file to save output matrix in")
    parser.add_argument(
        "--rescale_with_baseline",
        action="store_true",
        help="Rescaling the numerical score with precomputed baselines")
    parser.add_argument("--baseline_path",
                        default=None,
                        type=str,
                        help="path of custom baseline csv file")

    args = parser.parse_args()

    bert_score.plot_example(
        args.cand,
        args.ref,
        model_type=args.model,
        lang=args.lang,
        num_layers=args.num_layers,
        fname=args.file,
        rescale_with_baseline=args.rescale_with_baseline,
        baseline_path=args.baseline_path,
    )
示例#2
0
def main():
    torch.multiprocessing.set_sharing_strategy('file_system')

    parser = argparse.ArgumentParser('Visualize BERTScore')
    parser.add_argument(
        '--lang',
        type=str,
        default='en',
        help='two-letter abbreviation of the language (e.g., en)')
    parser.add_argument('-m',
                        '--model',
                        default=None,
                        help='BERT model name (default: bert-base-uncased)')
    parser.add_argument('-l',
                        '--num_layers',
                        type=int,
                        default=None,
                        help='use first N layer in BERT (default: 8)')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='increase output verbosity')
    parser.add_argument('-r',
                        '--ref',
                        type=str,
                        required=True,
                        help='reference sentence')
    parser.add_argument('-c',
                        '--cand',
                        type=str,
                        required=True,
                        help='candidate sentence')
    parser.add_argument('-f',
                        '--file',
                        type=str,
                        default='visualize.png',
                        help='name of file to save output matrix in')
    parser.add_argument(
        '--rescale-with-baseline',
        action='store_true',
        help='Rescaling the numerical score with precomputed baselines')

    args = parser.parse_args()

    bert_score.plot_example(args.cand,
                            args.ref,
                            model_type=args.model,
                            lang=args.lang,
                            num_layers=args.num_layers,
                            fname=args.file,
                            rescale_with_baseline=args.rescale_with_baseline)
示例#3
0
def main():
    torch.multiprocessing.set_sharing_strategy('file_system')

    parser = argparse.ArgumentParser('Calculate BERTScore')
    parser.add_argument('--bert',
                        default='bert-base-multilingual-cased',
                        choices=bert_score.bert_types,
                        help='BERT model name (default: bert-base-uncased)')
    parser.add_argument('-l',
                        '--num_layers',
                        default=8,
                        help='use first N layer in BERT (default: 8)')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='increase output verbosity')
    parser.add_argument('-r',
                        '--ref',
                        required=True,
                        help='reference sentence')
    parser.add_argument('-c',
                        '--cand',
                        required=True,
                        help='candidate sentence')
    parser.add_argument('-o',
                        '--output_file_name',
                        default='',
                        help='output file name')

    args = parser.parse_args()

    cand = args.cand
    ref = args.ref
    fname = args.output_file_name
    bert_score.plot_example(cand,
                            ref,
                            verbose=args.verbose,
                            bert=args.bert,
                            num_layers=args.num_layers,
                            fname=fname)
def word_alignment(sentence1, sentence2, outfile):
    batches = batchify(sentence1, sentence2, batch_size=500)
    # output = []
    for batch in batches:
        b0 = [x.sent for x in batch[0]]
        b1 = [x.sent for x in batch[1]]
        otokens1 = [[x.tokens[y].word for y in x.tokens if y != 0]
                    for x in batch[0]]
        otokens2 = [[x.tokens[y].word for y in x.tokens if y != 0]
                    for x in batch[1]]
        # otokens1 = [b.split(' ') for b in b0]
        # otokens2 = [b.split(' ') for b in b1]

        btokens1, btokens2, sim = plot_example(b0, b1)

        sim = compress_similarity_matrix(otokens1, otokens2, btokens1,
                                         btokens2, sim)

        for i in range(len(batch[0])):
            try:
                sim[i].shape
                cont = True
            except:
                cont = False

            if cont:
                try:
                    reordered1, reordered2 = get_sentence_ordering_dep(
                        batch[0][i], batch[1][i], sim[i])
                    if reordered1 is None or reordered2 is None:
                        continue

                    r1 = " ".join(reordered1)
                    r2 = " ".join(reordered2)
                    t1 = " ".join(otokens1[i])
                    t2 = " ".join(otokens2[i])
                    outfile.write(t1 + '\n')
                    outfile.write(t2 + '\n')
                    outfile.write(r1 + '\n')
                    outfile.write(r2 + '\n')
                    outfile.write('\n')

                except:
                    continue
示例#5
0
def plot_similarity_matrix(cand, ref, lang):
    plot_example(cand, ref, lang=lang)
示例#6
0
def bert_viz_heatmap(ref, pred):
    from bert_score import plot_example
    ref = ' '.join(ref[0]).strip()
    pred = ' '.join(pred[0]).strip()
    plot_example(pred, ref, lang='en', rescale_with_baseline=True)
示例#7
0
from bert_score import score, plot_example
import matplotlib.pyplot as plt
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

with open("../data/models/hyps.txt") as f:
    cands = [line.strip() for line in f]

with open("../data/models/refs.txt") as f:
    refs = [line.strip() for line in f]

P, R, F1 = score(cands, refs, lang='en')

print(f"System level F1 score: {F1.mean():.3f}")

plt.hist(F1, bins=20)
plt.show()

print(cands[0])
print(f'P={P[0]:.6f} R={R[0]:.6f} F={F1[0]:.6f}')
plot_example(cands[0], refs[0], lang="en")