Python corpus_ter示例，sacrebleu.corpus_ter Python示例

示例#1

0

显示文件

文件： utils.py 项目： divvun/lang-sme-ml-swe

def get_scores(enc_sources,
               enc_target_sents,
               model,
               device,
               tokenizersrc,
               tokenizertrg,
               search="greedy",
               n=4):
    """
    takes a list of sentences and their translations in string form and returns score objects
    model is the trained transformer model
    tokenizer is the spm sentencpiece vocabulary in the form "name".model
    search is the decoding strategy, either greedy or beam search
    n is the beam width in beam search
    """
    model.eval()
    sp.load(tokenizertrg)
    targets = []
    outputs = []
    target_str = [sp.DecodeIds(sent.tolist()) for sent in enc_target_sents]
    output_str = []
    if search == "greedy":
        x = divide_chunks(enc_sources, 100)
        output_str = []
        for sents in x:
            print((len(output_str) / len(enc_sources)) * 100, end="\r")
            y = translate_enc_sentences(model,
                                        sents,
                                        device,
                                        tokenizertrg,
                                        max_length=150)
            output_str.extend(y)
        bleu = sacrebleu.corpus_bleu(output_str, [target_str])
        chrf = sacrebleu.corpus_chrf(output_str, [target_str])
        ter = sacrebleu.corpus_ter(output_str, [target_str])

        return bleu, chrf, ter
    elif search == "beam":
        prediction = beam_search(source, device, tokenizersrc, tokenizertrg, n)
    sp.Load(tokenizertrg)
    target = sp.DecodeIds(target.tolist())
    targets.append([target.split()])
    target_str.append(target)
    outputs.append(prediction.split())
    output_str.append(prediction)

    bleu = sacrebleu.corpus_bleu(output_str, [target_str])
    chrf = sacrebleu.corpus_chrf(output_str, [target_str])
    ter = sacrebleu.corpus_ter(output_str, [target_str])
    return bleu, chrf, ter

示例#2

0

显示文件

文件： drnmt_rerank.py 项目： sdadas/fairseq

def eval_metric(args, hypos, ref):
    if args.metric == "bleu":
        score = sacrebleu.corpus_bleu(hypos, [ref]).score
    else:
        score = sacrebleu.corpus_ter(hypos, [ref]).score

    return score

示例#3

0

显示文件

文件： loss.py 项目： PePAL-Anonymous/PePAL

    def _compute_bleu_ter(self, batch, user_vec, glob_vec, output, target):
        vocab = self.bert_tok.ids_to_tokens

        batch_data = self.translator.translate_batch(batch, vocab, False)
        translations = self.from_batch(batch_data)

        sys = []
        for trans in translations:
            line = ' '.join(trans[0]).replace(' ##', '').replace('##', '')
            sys.append(line)

        references = []
        for tgt in batch.tgt.squeeze(-1).transpose(0, 1):
            references.append(self._build_target_tokens(tgt))

        refs = []
        for ref in references:
            line = ' '.join(ref[1:]).replace(' ##', '').replace('##', '')
            refs.append(line)

        bleu = sacrebleu.corpus_bleu(sys, [refs], force=True)
        ter = sacrebleu.corpus_ter(sys, [refs])

        bleu_stats = onmt.utils.Statistics(bleu=bleu.score * len(sys),
                                           sent=len(sys))
        ter_stats = onmt.utils.Statistics(ter=ter.score * len(sys),
                                          sent=len(sys))

        return (bleu_stats, ter_stats)

示例#4

0

显示文件

文件： base.py 项目： salvacarrion/nmt-continual-learning

def compute_metrics(hyp_dec_all,
                    ref_dec_all,
                    use_sacrebleu=True,
                    use_torchtext=True,
                    use_ter=False):
    metrics = {}

    # Sacrebleu
    if use_sacrebleu:
        metrics["sacrebleu_rawcorpusbleu"] = sacrebleu.raw_corpus_bleu(
            hyp_dec_all, [ref_dec_all]).score
        metrics["sacrebleu_bleu"] = sacrebleu.corpus_bleu(
            hyp_dec_all, [ref_dec_all]).score
        metrics["sacrebleu_chrf"] = sacrebleu.corpus_chrf(
            hyp_dec_all, [ref_dec_all]).score
        if use_ter:  # Quite slow
            metrics["sacrebleu_ter"] = sacrebleu.corpus_ter(
                hyp_dec_all, [ref_dec_all]).score

    # Torchtext
    if use_torchtext:
        m_bleu_score = bleu_score([x.split(" ") for x in hyp_dec_all],
                                  [[x.split(" ")] for x in ref_dec_all])
        metrics["torchtext_bleu"] = m_bleu_score * 100
    return metrics

示例#5

0

显示文件

文件： statistical_differences.py 项目： midobal/covid19mlia-mt-task

def compute_metrics(ref, hyp, hyp_order, metric):
    # Read sentences
    refs = []
    hyps = []
    for id in hyp_order:
        for segment in hyp[id]:
            hyps.append(segment)
        try:
            for segment in ref[id]:
                refs.append(segment)
        except KeyError:
            sys.stderr.write('Error: there are no references for document'
                             + ' "' + id + '"\n')
            sys.exit(-1)

    scores = []
    dir = os.path.dirname(os.path.realpath(sys.argv[0]))
    for n in range(len(hyps)):
        if metric == 'bleu':
            try:
                score = sacrebleu.corpus_bleu([hyps[n]], [[refs[n]]])
            except EOFError:
                sys.stderr.write('Error: source and reference have different'
                                 + ' lengths.\n')
                sys.exit(-1)

        elif metric == 'ter':
            try:
                score = sacrebleu.corpus_ter([hyps[n]], [[refs[n]]])
            except EOFError:
                sys.stderr.write('Error: source and reference have different'
                                 + ' lengths.\n')
                sys.exit(-1)

        else:
            hyps_file = save_to_file(hyps[n])
            refs_file = save_to_file(refs[n])
            try:
                process = subprocess.Popen((dir + '/beer_2.0/beer -s '
                                            + hyps_file + ' -r '
                                            + refs_file).split(),
                                           stdout=subprocess.PIPE)
                score, error = process.communicate()
            except FileNotFoundError:
                sys.stderr.write('Error: Beer requirement has not been'
                                 + 'satisfied.\n')
                sys.exit(-1)

            # Delete aux files
            process = subprocess.Popen(('rm ' + hyps_file + ' '
                                        + refs_file).split(),
                                       stdout=subprocess.PIPE)
            output, error = process.communicate()

        if metric == 'beer':
            scores.append([float(score.split()[-1])])
        else:
            scores.append([score.score])

    return scores

示例#6

0

显示文件

def ter(items):
    """Translation Error Rate is an error metric for machine translation that
    measures the number of edits required to change a system output into one
    of the references
    Source: http://www.cs.umd.edu/~snover/tercom/
    Paper: http://mt-archive.info/AMTA-2006-Snover.pdf

    Lower is better
    """
    refs = list(zip(*items))[0]
    preds = list(zip(*items))[1]
    refs, preds = _sacreformat(refs, preds)
    return sacrebleu.corpus_ter(preds, refs).score

示例#7

0

显示文件

文件： evaluate.py 项目： midobal/covid19mlia-mt-task

def compute_metrics(ref, hyp, hyp_order):
    # Read sentences
    refs = []
    hyps = []
    for id in hyp_order:
        for segment in hyp[id]:
            hyps.append(segment)
        try:
            for segment in ref[id]:
                refs.append(segment)
        except KeyError:
            sys.stderr.write('Error: there are no references for document' +
                             ' "' + id + '"\n')
            sys.exit(-1)

    # Compute BLEU and TER
    try:
        bleu = sacrebleu.corpus_bleu(hyps, [refs])
        ter = sacrebleu.corpus_ter(hyps, [refs])
    except EOFError:
        sys.stderr.write('Error: source and reference have different' +
                         ' lengths.\n')
        sys.exit(-1)

    # Create aux files for BEER
    dir = os.path.dirname(os.path.realpath(sys.argv[0]))
    hyps_file = save_to_file(hyps)
    refs_file = save_to_file(refs)

    # Compute BEER
    try:
        process = subprocess.Popen((dir + '/beer_2.0/beer -s ' + hyps_file +
                                    ' -r ' + refs_file).split(),
                                   stdout=subprocess.PIPE)
        beer, error = process.communicate()
    except FileNotFoundError:
        sys.stderr.write('Error: Beer requirement has not been satisfied.\n')
        sys.exit(-1)

    # Delete aux files
    process = subprocess.Popen(('rm ' + hyps_file + ' ' + refs_file).split(),
                               stdout=subprocess.PIPE)
    output, error = process.communicate()

    return bleu.score, ter.score, float(beer.split()[-1])

示例#8

0

显示文件

文件： scorers.py 项目： surafelml/OpenNMT-tf

 def __call__(self, ref_path, hyp_path):
   with tf.io.gfile.GFile(ref_path) as ref_stream, tf.io.gfile.GFile(hyp_path) as sys_stream:
     ter = sacrebleu.corpus_ter(sys_stream, [ref_stream])
     return ter.score

示例#9

0

显示文件

 def __call__(self, ref_path, hyp_path):
     sys_stream = _get_lines(hyp_path)
     ref_stream = _get_lines(ref_path)
     ter = sacrebleu.corpus_ter(sys_stream, [ref_stream])
     return ter.score

示例#10

0

显示文件

文件： cli.py 项目： ist424865/lightning-convai

def test(
    experiment: str,
    test_set: str,
    cuda: bool,
    seed: int,
    sample: bool,
    top_p: float,
    temperature: float,
    num_beams: int,
    to_json: str,
) -> None:
    """Testing function where a trained model is tested in its ability to rank candidate
    answers and produce replies.
    """
    logging.disable(logging.WARNING)
    model = PersonaGPT2.from_experiment(experiment)
    seed_everything(seed)

    cuda = cuda and torch.cuda.is_available()
    if cuda:
        model.to("cuda")

    with open(test_set, "r", encoding="utf-8") as f:
        dataset = json.loads(f.read())

    replies, rankings = [], []
    for dialog in tqdm(dataset, desc="Scoring dialogs...", dynamic_ncols=True):

        # 1) Prepares Persona
        persona = dialog["personality"].copy()
        persona_ids = [model.tokenizer.encode(s) for s in persona]

        for utterance in dialog["utterances"]:

            # 2) Saves Ground-Truth
            ground_truth_reply = utterance["candidates"][-1]

            # 3) Prepares History
            history = utterance["history"][-(2 * model.hparams.max_history +
                                             1):]
            history_ids = [model.tokenizer.encode(h) for h in history]

            # 4) Rank Candidates in batch:
            batch = []
            for j, candidate in enumerate(utterance["candidates"]):
                candidate_ids = model.tokenizer.encode(candidate)
                instance = DataModule.build_input(
                    tokenizer=model.tokenizer,
                    persona=persona_ids,
                    history=history_ids,
                    reply=candidate_ids,
                )
                batch.append(instance)

            # from list of dictionaries to dictionary of lists
            batch = {k: [d[k] for d in batch] for k in batch[0]}
            batch = DataModule.pad_dataset(batch)
            if cuda:
                batch = {
                    k: torch.LongTensor(v).cuda()
                    for k, v in batch.items()
                }
            else:
                batch = {k: torch.LongTensor(v) for k, v in batch.items()}

            mc_logits = model(**batch).mc_logits

            rankings.append({
                "persona":
                persona,
                "history":
                history,
                "candidates":
                utterance["candidates"],
                "ranking":
                torch.topk(mc_logits,
                           len(utterance["candidates"])).indices.tolist(),
            })

            # 5) Generates Reply
            bot_input = DataModule.build_input(tokenizer=model.tokenizer,
                                               persona=persona_ids,
                                               history=history_ids)
            # Nucleus Sampling
            if sample:
                history_ids = model.generate(
                    input_ids=torch.LongTensor([bot_input["input_ids"]
                                                ]).cuda()
                    if cuda else torch.LongTensor([bot_input["input_ids"]]),
                    token_type_ids=torch.LongTensor(
                        [bot_input["token_type_ids"]]).cuda() if cuda else
                    torch.LongTensor([bot_input["token_type_ids"]]),
                    max_length=200,
                    do_sample=True,
                    top_p=top_p,
                    temperature=0.7,
                )
            # Beam Search
            else:
                history_ids = model.generate(
                    input_ids=torch.LongTensor([bot_input["input_ids"]
                                                ]).cuda()
                    if cuda else torch.LongTensor([bot_input["input_ids"]]),
                    token_type_ids=torch.LongTensor(
                        [bot_input["token_type_ids"]]).cuda() if cuda else
                    torch.LongTensor([bot_input["token_type_ids"]]),
                    max_length=200,
                    num_beams=num_beams,
                    no_repeat_ngram_size=2,
                    early_stopping=True,
                )

            bot_reply_ids = history_ids[:, len(bot_input["input_ids"]):][0]
            bot_reply = model.tokenizer.decode(bot_reply_ids,
                                               skip_special_tokens=True)

            replies.append({
                "persona":
                persona,
                "history":
                history,
                "bot":
                " ".join(wordpunct_tokenize(bot_reply.lower())),
                "human":
                ground_truth_reply,
            })

    # 6) Runs Ranking Metrics
    hits_1, hits_5, hits_10 = [], [], []
    for ranks in rankings:
        hits_1.append((len(ranks["candidates"]) - 1) in ranks["ranking"][:1])
        hits_5.append((len(ranks["candidates"]) - 1) in ranks["ranking"][:5])
        hits_10.append((len(ranks["candidates"]) - 1) in ranks["ranking"][:10])

    click.secho("Hits@1: {}".format(sum(hits_1) / len(hits_1)), fg="yellow")
    click.secho("Hits@5: {}".format(sum(hits_5) / len(hits_5)), fg="yellow")
    click.secho("Hits@10: {}".format(sum(hits_10) / len(hits_10)), fg="yellow")

    # 7) Runs Generation Metrics
    refs = [[s["human"] for s in replies]]
    sys = [s["bot"] for s in replies]

    bleu = sacrebleu.corpus_bleu(sys, refs, lowercase=True,
                                 tokenize="intl").score
    click.secho(f"BLEU: {bleu}", fg="blue")
    ter = sacrebleu.corpus_ter(sys, refs, no_punct=True).score
    click.secho(f"TER: {ter}", fg="blue")

    # BERTScore returns precison, recall, f1.. we will use F1
    bertscore = float(
        bert_score.score(
            cands=sys,
            refs=refs[0],
            lang="en",
            verbose=False,
            nthreads=4,
        )[2].mean())
    click.secho(f"BERTScore: {bertscore}", fg="blue")

    # 8) Saves results.
    if isinstance(to_json, str):
        data = {
            "results": {
                "BLEU": bleu,
                "TER": ter,
                "BERTScore": bertscore,
                "Hits@1": sum(hits_1) / len(hits_1),
                "Hits@5": sum(hits_5) / len(hits_5),
                "Hits@10": sum(hits_10) / len(hits_10),
            },
            "generation": replies,
            "ranking": rankings,
        }
        with open(to_json, "w") as outfile:
            json.dump(data, outfile, ensure_ascii=False, indent=4)
        click.secho(f"Predictions saved in: {to_json}.", fg="yellow")

示例#11

0

显示文件

def get_ter(in_sent, target_sent):
    ter = sacrebleu.corpus_ter([in_sent], [[target_sent]])
    out = " ".join(map(str, [ter.score, ter.num_edits, ter.ref_length]))
    return out

示例#12

0

显示文件

def compute_ter_score(hyps: Iterable[str], refs: List[Iterable[str]]) -> float:
    result = sacrebleu.corpus_ter(hyps, refs)
    return float(np.round(float(result.score) * 100, 2))