Exemplo n.º 1
0
  def evaluate(self, data, ref_alignments, batch_size=4, training=False):
    """Evaluate the model on a data set."""

    ref_align = read_naacl_alignments(ref_alignments)

    ref_iterator = iter(ref_align)
    metric = AERSufficientStatistics()
    accuracy_correct = 0
    accuracy_total = 0
    loss_total = 0
    steps = 0.

    for batch_id, batch in enumerate(iterate_minibatches(data, batch_size=batch_size)):
      x, y = prepare_data(batch, self.x_vocabulary, self.y_vocabulary)
      y_len = np.sum(np.sign(y), axis=1, dtype="int64")

      align, prob, acc_correct, acc_total, loss = self.get_viterbi(x, y, training)
      accuracy_correct += acc_correct
      accuracy_total += acc_total
      loss_total += loss
      steps += 1

      for alignment, N, (sure, probable) in zip(align, y_len, ref_iterator):
        # the evaluation ignores NULL links, so we discard them
        # j is 1-based in the naacl format
        pred = set((aj, j) for j, aj in enumerate(alignment[:N], 1) if aj > 0)
        metric.update(sure=sure, probable=probable, predicted=pred)
        # print(batch[s])
        # print(alignment[:N])
        # print(pred)
        #       s +=1

    accuracy = accuracy_correct / float(accuracy_total)
    return metric.aer(), accuracy, loss_total/float(steps)
Exemplo n.º 2
0
def calculate_aer(predictions):
    from random import random
    # 1. Read in gold alignments
    gold_sets = read_naacl_alignments('data/validation/dev.wa.nonullalign')

    # 3. Compute AER
    metric = AERSufficientStatistics()
    for gold, pred in zip(gold_sets, predictions):
        metric.update(sure=gold[0], probable=gold[1], predicted=pred)
    return metric.aer()
Exemplo n.º 3
0
    def calculate_aer(
            self, validation_corpus: List[Tuple[str, str]],
            validation_gold: List[List[Tuple[Set[int], Set[int]]]]) -> float:
        """Calculate AER on validation corpus using gold standard"""
        predictions = map(self.align, validation_corpus)

        # Compute AER
        metric = AERSufficientStatistics()
        for gold, pred in zip(validation_gold, predictions):
            (sure, probable) = gold
            metric.update(sure=sure, probable=probable, predicted=pred)
        return metric.aer()
Exemplo n.º 4
0
    def get_validation_metrics(self) -> Metrics:

        log_data_probability = 0
        entropy = 0
        predicted_alignments = []

        for sentence in self.data.validation_data:

            log_sentence_probability = 0
            sentence_alignment = []

            alignment = self.get_best_alignment(sentence, False)

            for word_alignment in alignment.word_alignments:
                log_sentence_probability += math.log(
                    word_alignment.probability)

                if word_alignment.english is not 0:
                    sentence_alignment.append(
                        (word_alignment.english, word_alignment.french + 1
                         )  # french alignments start from 1
                    )
            entropy += -log_sentence_probability
            log_data_probability += log_sentence_probability
            predicted_alignments.append(set(sentence_alignment))

        data_probability = math.exp(log_data_probability)
        aer = AERSufficientStatistics(self.validation_gold_alignments,
                                      predicted_alignments).aer()
        perplexity = entropy

        return Metrics(data_probability, aer, perplexity)
Exemplo n.º 5
0
    def calculate_aer(self, eval_alignement_path, test_alignments):

        gold_standard = read_naacl_alignments(eval_alignement_path)

        metric = AERSufficientStatistics()

        for gold_alignments, test_alignments in zip(gold_standard,
                                                    test_alignments):
            metric.update(sure=gold_alignments[0],
                          probable=gold_alignments[1],
                          predicted=test_alignments)

        aer = metric.aer()

        self.aer.append(aer)

        print("AER: {}".format(aer))
Exemplo n.º 6
0
def evaluate_model(model,
                   alignment_path,
                   parallel_corpus,
                   predictions_file_path=None):

    # 1. Read in gold alignments
    gold_sets = read_naacl_alignments(alignment_path)

    # pairs are in format (e_w_indx, f_w_indx)

    # 2. Here I have the predictions of my own algorithm
    predictions = []
    sentence_number = 0
    if predictions_file_path:
        write_file = open(predictions_file_path, 'w')
    for (french_sentence,
         english_sentence), (s, _) in zip(parallel_corpus, gold_sets):
        sentence_number += 1
        alignment = model.infer_alignment(french_sentence, english_sentence)
        temp_pred = []
        for i, a in enumerate(alignment):
            # skip null-token alignments
            if a == 0:
                continue
            temp_pred.append((a, i + 1))
            if predictions_file_path:
                write_file.write("%04d %d %d %s\n" %
                                 (sentence_number, a, i + 1, "P"))
        predictions.append(set(temp_pred))

    if predictions_file_path:
        write_file.close()
    # 3. Compute AER

    # first we get an object that manages sufficient statistics
    metric = AERSufficientStatistics()
    # then we iterate over the corpus
    for gold, pred in zip(gold_sets, predictions):
        metric.update(sure=gold[0], probable=gold[1], predicted=pred)
    # AER
    return metric.aer()
Exemplo n.º 7
0
write_alignments(model, 'ibm2-uniform.mle.naacl')

model = Model2(data, None, 'random')
model.load_parameters('parameters')
write_alignments(model, 'ibm2-random.mle.naacl')

model = Model2(data, None, 'ibm1')
model.load_parameters('parameters')
write_alignments(model, 'ibm2-ibm1.mle.naacl')

model = BayesianModel2(data, None, 0.1)
model.load_parameters('parameters')
write_alignments(model, 'ibm2.vb.naacl')

model = JumpingModel2(data, None, 'random')
model.load_parameters('parameters')
write_alignments(model, 'ibm2-jumps.mle.naacl')

testing_gold_alignment_pickle = 'pickles/testing_gold_alignments.pickle'
with open(testing_gold_alignment_pickle, 'rb') as file:
    testing_gold_alignments = pickle.load(file)

for file in os.listdir('predictions'):
    if file.endswith('.naacl'):
        predictions = []
        for prediction in read_naacl_alignments('predictions/{}'.format(file)):
            predictions.append(prediction[0])
        aer = AERSufficientStatistics(testing_gold_alignments,
                                      predictions).aer()
        print('{}: {}'.format(file, round(aer, 5)))