Exemplo n.º 1
0
  def evaluate(self, data, ref_alignments, batch_size=4, training=False):
    """Evaluate the model on a data set."""

    ref_align = read_naacl_alignments(ref_alignments)

    ref_iterator = iter(ref_align)
    metric = AERSufficientStatistics()
    accuracy_correct = 0
    accuracy_total = 0
    loss_total = 0
    steps = 0.

    for batch_id, batch in enumerate(iterate_minibatches(data, batch_size=batch_size)):
      x, y = prepare_data(batch, self.x_vocabulary, self.y_vocabulary)
      y_len = np.sum(np.sign(y), axis=1, dtype="int64")

      align, prob, acc_correct, acc_total, loss = self.get_viterbi(x, y, training)
      accuracy_correct += acc_correct
      accuracy_total += acc_total
      loss_total += loss
      steps += 1

      for alignment, N, (sure, probable) in zip(align, y_len, ref_iterator):
        # the evaluation ignores NULL links, so we discard them
        # j is 1-based in the naacl format
        pred = set((aj, j) for j, aj in enumerate(alignment[:N], 1) if aj > 0)
        metric.update(sure=sure, probable=probable, predicted=pred)
        # print(batch[s])
        # print(alignment[:N])
        # print(pred)
        #       s +=1

    accuracy = accuracy_correct / float(accuracy_total)
    return metric.aer(), accuracy, loss_total/float(steps)
Exemplo n.º 2
0
def calculate_aer(predictions):
    from random import random
    # 1. Read in gold alignments
    gold_sets = read_naacl_alignments('data/validation/dev.wa.nonullalign')

    # 3. Compute AER
    metric = AERSufficientStatistics()
    for gold, pred in zip(gold_sets, predictions):
        metric.update(sure=gold[0], probable=gold[1], predicted=pred)
    return metric.aer()
Exemplo n.º 3
0
    def calculate_aer(
            self, validation_corpus: List[Tuple[str, str]],
            validation_gold: List[List[Tuple[Set[int], Set[int]]]]) -> float:
        """Calculate AER on validation corpus using gold standard"""
        predictions = map(self.align, validation_corpus)

        # Compute AER
        metric = AERSufficientStatistics()
        for gold, pred in zip(validation_gold, predictions):
            (sure, probable) = gold
            metric.update(sure=sure, probable=probable, predicted=pred)
        return metric.aer()
Exemplo n.º 4
0
    def calculate_aer(self, eval_alignement_path, test_alignments):

        gold_standard = read_naacl_alignments(eval_alignement_path)

        metric = AERSufficientStatistics()

        for gold_alignments, test_alignments in zip(gold_standard,
                                                    test_alignments):
            metric.update(sure=gold_alignments[0],
                          probable=gold_alignments[1],
                          predicted=test_alignments)

        aer = metric.aer()

        self.aer.append(aer)

        print("AER: {}".format(aer))
Exemplo n.º 5
0
def evaluate_model(model,
                   alignment_path,
                   parallel_corpus,
                   predictions_file_path=None):

    # 1. Read in gold alignments
    gold_sets = read_naacl_alignments(alignment_path)

    # pairs are in format (e_w_indx, f_w_indx)

    # 2. Here I have the predictions of my own algorithm
    predictions = []
    sentence_number = 0
    if predictions_file_path:
        write_file = open(predictions_file_path, 'w')
    for (french_sentence,
         english_sentence), (s, _) in zip(parallel_corpus, gold_sets):
        sentence_number += 1
        alignment = model.infer_alignment(french_sentence, english_sentence)
        temp_pred = []
        for i, a in enumerate(alignment):
            # skip null-token alignments
            if a == 0:
                continue
            temp_pred.append((a, i + 1))
            if predictions_file_path:
                write_file.write("%04d %d %d %s\n" %
                                 (sentence_number, a, i + 1, "P"))
        predictions.append(set(temp_pred))

    if predictions_file_path:
        write_file.close()
    # 3. Compute AER

    # first we get an object that manages sufficient statistics
    metric = AERSufficientStatistics()
    # then we iterate over the corpus
    for gold, pred in zip(gold_sets, predictions):
        metric.update(sure=gold[0], probable=gold[1], predicted=pred)
    # AER
    return metric.aer()