Exemplo n.º 1
0
 def test_smoothed_sentence_bleu(self):
     """
     Testing calculation of smoothed_sentence_bleu() function.
     Inputs:
         target_tokens: [11, 12, 13, 14, 15]
         hypo_tokens: [11, 12, 14, 15]
         actual precision:
             unigram: 4/4 = 1
             bigram:  2/3 = 0.667
             trigram: 0/2 = 0
             4-gram:  0/1 = 0
         smoothed precision:
             unigram: 4/4    = 1
             bigram:  2/3    = 0.667
             trigram: 0.5/2  = 0.25
             4-gram:  0.25/1 = 0.25
         smoothed geom. mean: (1 * 2/3 * 1/4 * 1/4) ^ (1/4) = 0.4518
         brevity penalty: e ^ (1 - 5/4) = 0.7788
     Desired Output:
         0.4518 * 0.7788 = 0.35186
     """
     test_args = test_utils.ModelParamsDict()
     _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
     task = tasks.DictionaryHolderTask(src_dict, tgt_dict)
     target_tokens = torch.IntTensor([11, 12, 13, 14, 15])
     hypo_tokens = torch.IntTensor([11, 12, 14, 15])
     smoothed_bleu = generate.smoothed_sentence_bleu(
         task, target_tokens, hypo_tokens)
     np.testing.assert_almost_equal(smoothed_bleu, 0.35186, decimal=5)
Exemplo n.º 2
0
def random_search(
    scores_info_export_path,
    num_trials,
    report_oracle_bleu=False,
    report_intermediate_results=False,
):
    with open(scores_info_export_path, "rb") as f:
        scores_info = pickle.load(f)

    dummy_task = DummyTask()

    if report_oracle_bleu:
        pad, eos, unk = (0, 1, 2)
        oracle_scorer = bleu.Scorer(pad, eos, unk)

        for example in scores_info:
            smoothed_bleu = []
            for hypo in example["hypos"]:
                eval_score = smoothed_sentence_bleu(
                    dummy_task,
                    torch.IntTensor(example["target_tokens"]),
                    torch.IntTensor(hypo),
                )
                smoothed_bleu.append(eval_score)
            best_hypo_ind = np.argmax(smoothed_bleu)
            example["best_hypo_ind"] = best_hypo_ind

            oracle_scorer.add(
                torch.IntTensor(example["target_tokens"]),
                torch.IntTensor(example["hypos"][best_hypo_ind]),
            )

        print("oracle BLEU", oracle_scorer.score())

    num_features = scores_info[0]["scores"].shape[1]
    assert all(
        example["scores"].shape[1] == num_features for example in scores_info
    ), "All examples must have the same number of scores!"
    feature_weights = np.zeros(num_features)
    feature_weights[0] = 1
    score = evaluate_weights(scores_info, feature_weights, length_penalty=0)
    best_score = score
    best_weights = feature_weights
    best_length_penalty = 0

    nonzero_features = identify_nonzero_features(scores_info)

    for i in range(num_trials):
        feature_weights = np.zeros(num_features)
        random_weights = np.random.dirichlet(np.ones(nonzero_features.size))
        feature_weights[nonzero_features] = random_weights
        length_penalty = 1.5 * np.random.random()

        score = evaluate_weights(scores_info, feature_weights, length_penalty)
        if score > best_score:
            best_score = score
            best_weights = feature_weights
            best_length_penalty = length_penalty

        if report_intermediate_results:
            print(f"\r[{i}]  best: {best_score}", end="", flush=True)

    if report_intermediate_results:
        print()
    print("best BLEU: ", best_score)
    print("best weights: ", best_weights)
    print("best length penalty: ", length_penalty)

    return best_weights, best_length_penalty, best_score