def get_aer(self, dataset, epoch): """Compute the Alignment Error Rate of the model using the best alignments Parameters: dataset: The training dataset epoch: The current epoch Returns: AER score """ print("Computing AER on validation dataset") gold_sets = aer.read_naacl_alignments( "datasets/validation/dev.wa.nonullalign") if self.opt.mode == 'test': gold_sets = aer.read_naacl_alignments( "datasets/testing/answers/test.wa.nonullalign") metric = aer.AERSufficientStatistics() predictions = self.get_best_alignments(dataset.val_data, epoch) for gold, pred in zip(gold_sets, predictions): prediction = set([(alignment[1], alignment[2]) for alignment in pred]) metric.update(sure=gold[0], probable=gold[1], predicted=prediction) return metric.aer()
def read_data(n: int = None, lower=False): # Read in training data tokenized_target = read_tokens('../data/training/hansards.36.2.f', lower, n) tokenized_source = read_tokens('../data/training/hansards.36.2.e', lower, n) training_corpus = list(zip(tokenized_target, tokenized_source)) vocab_target = sentence_vocab(tokenized_target) print(f'vocabulary size english: {len(vocab_target)}') # Read in validation data validation_corpus = list( zip( read_tokens('../data/validation/dev.f', lower), read_tokens('../data/validation/dev.e', lower), )) validation_gold = aer.read_naacl_alignments( '../data/validation/dev.wa.nonullalign') # Read in test data test_corpus = list( zip( read_tokens('../data/testing/test/test.f', lower), read_tokens('../data/testing/test/test.e', lower), )) test_gold = aer.read_naacl_alignments( '../data/testing/answers/test.wa.nonullalign') return (training_corpus, validation_corpus, test_corpus, validation_gold, test_gold, vocab_target)
def evaluate(self, data, ref_alignments, batch_size=4, training=False): """Evaluate the model on a data set.""" ref_align = read_naacl_alignments(ref_alignments) ref_iterator = iter(ref_align) metric = AERSufficientStatistics() accuracy_correct = 0 accuracy_total = 0 loss_total = 0 steps = 0. for batch_id, batch in enumerate(iterate_minibatches(data, batch_size=batch_size)): x, y = prepare_data(batch, self.x_vocabulary, self.y_vocabulary) y_len = np.sum(np.sign(y), axis=1, dtype="int64") align, prob, acc_correct, acc_total, loss = self.get_viterbi(x, y, training) accuracy_correct += acc_correct accuracy_total += acc_total loss_total += loss steps += 1 for alignment, N, (sure, probable) in zip(align, y_len, ref_iterator): # the evaluation ignores NULL links, so we discard them # j is 1-based in the naacl format pred = set((aj, j) for j, aj in enumerate(alignment[:N], 1) if aj > 0) metric.update(sure=sure, probable=probable, predicted=pred) # print(batch[s]) # print(alignment[:N]) # print(pred) # s +=1 accuracy = accuracy_correct / float(accuracy_total) return metric.aer(), accuracy, loss_total/float(steps)
def calculate_aer(predictions): from random import random # 1. Read in gold alignments gold_sets = read_naacl_alignments('data/validation/dev.wa.nonullalign') # 3. Compute AER metric = AERSufficientStatistics() for gold, pred in zip(gold_sets, predictions): metric.update(sure=gold[0], probable=gold[1], predicted=pred) return metric.aer()
def compute_aer(predictions, file_path): """ Computes the Alignment Error Rate. """ gold_sets = aer.read_naacl_alignments(file_path) metric = aer.AERSufficientStatistics() for gold, prediction in zip(gold_sets, predictions): prediction = set([(alignment[1], alignment[2]) for alignment in prediction]) metric.update(sure=gold[0], probable=gold[1], predicted=prediction) print(metric.aer()) return metric.aer()
def calculate_aer(self, eval_alignement_path, test_alignments): gold_standard = read_naacl_alignments(eval_alignement_path) metric = AERSufficientStatistics() for gold_alignments, test_alignments in zip(gold_standard, test_alignments): metric.update(sure=gold_alignments[0], probable=gold_alignments[1], predicted=test_alignments) aer = metric.aer() self.aer.append(aer) print("AER: {}".format(aer))
def evaluate_model(model, alignment_path, parallel_corpus, predictions_file_path=None): # 1. Read in gold alignments gold_sets = read_naacl_alignments(alignment_path) # pairs are in format (e_w_indx, f_w_indx) # 2. Here I have the predictions of my own algorithm predictions = [] sentence_number = 0 if predictions_file_path: write_file = open(predictions_file_path, 'w') for (french_sentence, english_sentence), (s, _) in zip(parallel_corpus, gold_sets): sentence_number += 1 alignment = model.infer_alignment(french_sentence, english_sentence) temp_pred = [] for i, a in enumerate(alignment): # skip null-token alignments if a == 0: continue temp_pred.append((a, i + 1)) if predictions_file_path: write_file.write("%04d %d %d %s\n" % (sentence_number, a, i + 1, "P")) predictions.append(set(temp_pred)) if predictions_file_path: write_file.close() # 3. Compute AER # first we get an object that manages sufficient statistics metric = AERSufficientStatistics() # then we iterate over the corpus for gold, pred in zip(gold_sets, predictions): metric.update(sure=gold[0], probable=gold[1], predicted=pred) # AER return metric.aer()
def aer(self): gold_sets = aer.read_naacl_alignments(self.path_true) validation_corpus = read_data(self.english_val, self.french_val) predictions = [] for E, F in validation_corpus.corpus: values, _ = self.viterbi_alignment(E.s, F.s, split=False) links = set() for j in range(1, values.shape[0]): winner = np.argwhere(values[j] == np.max(values[j])).flatten() score = np.abs(winner - j) best = winner[np.argmin(score)] links.add((j, best + 1)) predictions.append(links) metric = aer.AERSufficientStatistics() # then we iterate over the corpus for gold, pred in zip(gold_sets, predictions): metric.update(sure=gold[0], probable=gold[1], predicted=pred) return metric.aer()
def get_validation_alignments(path): validation_alignments = aer.read_naacl_alignments(path) return validation_alignments
write_alignments(model, 'ibm2-uniform.mle.naacl') model = Model2(data, None, 'random') model.load_parameters('parameters') write_alignments(model, 'ibm2-random.mle.naacl') model = Model2(data, None, 'ibm1') model.load_parameters('parameters') write_alignments(model, 'ibm2-ibm1.mle.naacl') model = BayesianModel2(data, None, 0.1) model.load_parameters('parameters') write_alignments(model, 'ibm2.vb.naacl') model = JumpingModel2(data, None, 'random') model.load_parameters('parameters') write_alignments(model, 'ibm2-jumps.mle.naacl') testing_gold_alignment_pickle = 'pickles/testing_gold_alignments.pickle' with open(testing_gold_alignment_pickle, 'rb') as file: testing_gold_alignments = pickle.load(file) for file in os.listdir('predictions'): if file.endswith('.naacl'): predictions = [] for prediction in read_naacl_alignments('predictions/{}'.format(file)): predictions.append(prediction[0]) aer = AERSufficientStatistics(testing_gold_alignments, predictions).aer() print('{}: {}'.format(file, round(aer, 5)))
def validation_alignments(): val_naacl_path = 'validation/dev.wa.nonullalign' reference_alignments = aer.read_naacl_alignments(val_naacl_path) return reference_alignments
import sys from aer import read_naacl_alignments from data import ParallelData, Sentence, Alignment, WordAlignment, Certainty from model import Model2, JumpingModel2 initialisation_type = sys.argv[1] validation_gold_alignment_pickle = 'pickles/validation_gold_alignments.pickle' testing_gold_alignment_pickle = 'pickles/testing_gold_alignments.pickle' if os.path.isfile(validation_gold_alignment_pickle): with open(validation_gold_alignment_pickle, 'rb') as file: validation_gold_alignments = pickle.load(file) else: validation_gold_alignments = read_naacl_alignments( 'data/validation/dev.wa.nonullalign') with open(validation_gold_alignment_pickle, 'wb') as file: pickle.dump(validation_gold_alignments, file) if os.path.isfile(testing_gold_alignment_pickle): with open(testing_gold_alignment_pickle, 'rb') as file: testing_gold_alignments = pickle.load(file) else: testing_gold_alignments = read_naacl_alignments( 'data/testing/answers/test.wa.nonullalign') with open(testing_gold_alignment_pickle, 'wb') as file: pickle.dump(testing_gold_alignments, file) # loading data t_model2_parameters_pickle = "parameters/model2-{}/t.pickle".format( initialisation_type)