def test_bad_prediction(self): ranker = NearestNeigbourRanker( path_to_predictions=self.predictions, embedding_extractor=self.static_extractor, all_labels=self.labels, data_loader=self.data_loader, max_rank=4, y_label="phrase") bad_prediction = torch.from_numpy(np.array([0.0, 1.0])) ranker._predicted_embeddings[2] = bad_prediction ranks, _, _ = ranker.get_target_based_rank() np.testing.assert_equal(ranks[2], 4)
def test_close_prediction(self): ranker = NearestNeigbourRanker( path_to_predictions=self.predictions, embedding_extractor=self.static_extractor, all_labels=self.labels, data_loader=self.data_loader, max_rank=5, y_label="phrase") close_prediction = torch.from_numpy(np.array([0.7030, 0.668])) ranker._predicted_embeddings[2] = close_prediction / np.linalg.norm( close_prediction) ranks, _, _ = ranker.get_target_based_rank() np.testing.assert_equal(ranks[2], 2)
def test_ranks(self): predictions = np.array([[0.3426, 0.9395], [0.7465, 0.6654], [0.9788, 0.2049], [0.9788, 0.2049]], dtype=np.float32) correct_ranks = [3, 4, 3, 6] ranker = NearestNeigbourRanker( path_to_predictions=self.predictions, embedding_extractor=self.static_extractor, all_labels=self.labels, data_loader=self.data_loader, max_rank=6, y_label="phrase") ranker._predicted_embeddings = predictions ranks, _, _ = ranker.get_target_based_rank() np.testing.assert_equal(ranks, correct_ranks)
def test_all_ranks(self): ranker = NearestNeigbourRanker( path_to_predictions=self.predictions, embedding_extractor=self.static_extractor, all_labels=self.labels, data_loader=self.data_loader, max_rank=14, y_label="phrase") ranks = ranker._ranks np.testing.assert_equal(ranks, np.full([4], 1))
def test_nearestneighbour_contextualized(self): predictions = "embeddings/bert_predictions.npy" correct_ranks = [1, 2, 2, 2] ranker = NearestNeigbourRanker( path_to_predictions=predictions, embedding_extractor=self.contextualized_extractor, all_labels=self.attributes, data_loader=self.data_loader_contextualized, max_rank=6, y_label="label") ranks = ranker.ranks np.testing.assert_equal(ranks, correct_ranks)
def test_precicion_at_rank(self): ranks = [1, 4, 5, 2, 6, 7, 8, 10, 15, 1] p_at_1 = NearestNeigbourRanker.precision_at_rank(1, ranks) p_at_5 = NearestNeigbourRanker.precision_at_rank(5, ranks) np.testing.assert_equal(p_at_1, 0.2) np.testing.assert_equal(p_at_5, 0.5)
batch_size=len(data_val), num_workers=0) average_phrases = get_average_phrase(data_loader_val, average=config['average_vectors']) save_predictions(predictions=average_phrases, path=prediction_path_val) labels = extract_all_labels(training_data=config["train_data_path"], validation_data=config["validation_data_path"], test_data=config["test_data_path"], separator=config["data"]["separator"], label=config["data"]["label"]) ranker_attribute_val = NearestNeigbourRanker( path_to_predictions=prediction_path_val, embedding_extractor=feature_extractor, data_loader=data_loader_val, all_labels=labels, y_label="label", max_rank=1000) ranker_attribute_val.save_ranks(evaluation_path_val) save_results(ranker_attribute_val, scores_path_val) if config["eval_on_test"]: data_test = StaticRankingDataset( config["validation_data_path"], config["feature_extractor"]["static"]["pretrained_model"], config["data"]["separator"], config["data"]["modifier"], config["data"]["head"], config["data"]["label"]) data_loader_test = DataLoader(data_val, batch_size=len(data_val), num_workers=0) if config["feature_extractor"]["contextualized_embeddings"] is False:
valid_model.eval() if valid_model: logger.info("generating predictions for validation data...") valid_predictions, valid_loss = predict(valid_loader, valid_model, device) save_predictions(predictions=valid_predictions, path=prediction_path_dev) logger.info("saved predictions to %s" % prediction_path_dev) logger.info("validation loss: %.5f" % (valid_loss)) rank_loader = torch.utils.data.DataLoader( dataset_valid, batch_size=len(dataset_valid), num_workers=0) ranker_attribute = NearestNeigbourRanker( path_to_predictions=prediction_path_dev, embedding_extractor=feature_extractor, data_loader=rank_loader, all_labels=labels, y_label="label", max_rank=1000) ranker_attribute.save_ranks(rank_path_dev) logger.info("result for learned attribute representation") logger.info(ranker_attribute.result) logger.info("quartiles : %s" % str(ranker_attribute.quartiles)) logger.info("precision at rank 1: %.2f; precision at rank 5 %.2f" % (ranker_attribute._map_1, ranker_attribute._map_5)) logger.info("accuracy: %.2f; f1 score: %.2f" % (ranker_attribute.accuracy, ranker_attribute.f1)) logger.info("saved ranks to %s" % rank_path_dev) if config["eval_on_test"]:
def evaluate(predictions_final_phrase, predictions_att_rep, predictions_reconstructed_rep, ranks_final_phrase, ranks_att_rep, ranks_reconstructed_rep, dataset, labels, embedding_extractor): """ Load nearest neighbour ranker for each representation type and log the corresponding results :param predictions_final_phrase: the predicted representations (the final composed phrase) :param predictions_att_rep: the predicted representations (the attribute-specific representation) :param predictions_reconstructed_rep: the predicted representations (the reconstructed phrase) :param ranks_final_phrase: the path to save the ranks to (for final phrase) :param ranks_att_rep: the path to save the ranks to (for the attribute) :param ranks_reconstructed_rep: the path to save the ranks to (for reconstructed phrase) :param dataset: the dataset to evaluate on :param labels: all possible labels that can be predicted :param embedding_extractor: the feature extractor that corresponds to the data """ rank_loader = torch.utils.data.DataLoader(dataset, batch_size=len(dataset), num_workers=0, shuffle=False) ranker_attribute = NearestNeigbourRanker( path_to_predictions=predictions_att_rep, embedding_extractor=embedding_extractor, data_loader=rank_loader, all_labels=labels, y_label="label", max_rank=1000) ranker_attribute.save_ranks(ranks_att_rep) ranker_reconstructed = NearestNeigbourRanker( path_to_predictions=predictions_reconstructed_rep, embedding_extractor=embedding_extractor, data_loader=rank_loader, all_labels=labels, y_label="label", max_rank=1000) ranker_reconstructed.save_ranks(ranks_reconstructed_rep) ranker_final_rep = NearestNeigbourRanker( path_to_predictions=predictions_final_phrase, embedding_extractor=embedding_extractor, data_loader=rank_loader, all_labels=labels, y_label="label", max_rank=1000) ranker_final_rep.save_ranks(ranks_final_phrase) logger.info(("result for learned attribute representation")) logger.info(ranker_attribute.result) logger.info("quartiles : %s" % str(ranker_attribute.quartiles)) logger.info("precision at rank 1: %.2f; precision at rank 5 %.2f" % (ranker_attribute._map_1, ranker_attribute._map_5)) logger.info("accuracy: %.2f; f1 score: %.2f" % (ranker_attribute.accuracy, ranker_attribute.f1)) logger.info(("result for reconstructed representation")) logger.info(ranker_reconstructed.result) logger.info("quartiles : %s" % str(ranker_reconstructed.quartiles)) logger.info("precision at rank 1: %.2f; precision at rank 5 %.2f" % (ranker_reconstructed._map_1, ranker_reconstructed._map_5)) logger.info("accuracy: %.2f; f1 score: %.2f" % (ranker_reconstructed.accuracy, ranker_reconstructed.f1)) logger.info(("\nresult for final representation")) logger.info(ranker_final_rep.result) logger.info("quartiles : %s" % str(ranker_final_rep.quartiles)) logger.info("precision at rank 1: %.2f; precision at rank 5 %.2f" % (ranker_final_rep._map_1, ranker_final_rep._map_5)) logger.info("accuracy: %.2f; f1 score: %.2f" % (ranker_final_rep.accuracy, ranker_final_rep.f1))