Пример #1
0
    def get_aer(self, dataset, epoch):
        """Compute the Alignment Error Rate of the model using the best alignments

        Parameters:
            dataset: The training dataset
            epoch:   The current epoch
        Returns:
            AER score
        """
        print("Computing AER on validation dataset")
        gold_sets = aer.read_naacl_alignments(
            "datasets/validation/dev.wa.nonullalign")

        if self.opt.mode == 'test':
            gold_sets = aer.read_naacl_alignments(
                "datasets/testing/answers/test.wa.nonullalign")

        metric = aer.AERSufficientStatistics()

        predictions = self.get_best_alignments(dataset.val_data, epoch)

        for gold, pred in zip(gold_sets, predictions):
            prediction = set([(alignment[1], alignment[2])
                              for alignment in pred])
            metric.update(sure=gold[0], probable=gold[1], predicted=prediction)

        return metric.aer()
Пример #2
0
def compute_aer(predictions, file_path):
    """
    Computes the Alignment Error Rate.
    """

    gold_sets = aer.read_naacl_alignments(file_path)
    metric = aer.AERSufficientStatistics()

    for gold, prediction in zip(gold_sets, predictions):
        prediction = set([(alignment[1], alignment[2]) for alignment in prediction])
        metric.update(sure=gold[0], probable=gold[1], predicted=prediction)
    print(metric.aer())
    return metric.aer()
Пример #3
0
    def aer(self):
        gold_sets = aer.read_naacl_alignments(self.path_true)

        validation_corpus = read_data(self.english_val, self.french_val)

        predictions = []

        for E, F in validation_corpus.corpus:
            values, _ = self.viterbi_alignment(E.s, F.s, split=False)
            links = set()
            for j in range(1, values.shape[0]):
                winner = np.argwhere(values[j] == np.max(values[j])).flatten()
                score = np.abs(winner - j)
                best = winner[np.argmin(score)]
                links.add((j, best + 1))
            predictions.append(links)

        metric = aer.AERSufficientStatistics()
        # then we iterate over the corpus
        for gold, pred in zip(gold_sets, predictions):
            metric.update(sure=gold[0], probable=gold[1], predicted=pred)

        return metric.aer()