def test_predict_all_with_novelties(self): """Test scoring all triples with labeling as novel w.r.t. training and testing.""" all_df = get_all_prediction_df( model=self.model, triples_factory=self.dataset.training, testing=self.testing_mapped_triples, ) self.assertIsInstance(all_df, pd.DataFrame) self.assertEqual( [ 'head_id', 'head_label', 'relation_id', 'relation_label', 'tail_id', 'tail_label', 'score', 'in_training', 'in_testing', ], list(all_df.columns), ) possible = self.dataset.training.num_relations * self.model.num_entities**2 self.assertEqual(possible, len(all_df.index)) self.assertEqual(self.dataset.training.num_triples, all_df['in_training'].sum()) self.assertEqual(self.testing_mapped_triples.shape[0], all_df['in_testing'].sum())
def test_predict_all_no_novelties(self): """Test scoring all triples without labeling as novel w.r.t. training and testing.""" all_df = get_all_prediction_df(model=self.model, testing=self.testing_mapped_triples, add_novelties=False) self.assertIsInstance(all_df, pd.DataFrame) self.assertEqual( ['head_id', 'head_label', 'relation_id', 'relation_label', 'tail_id', 'tail_label', 'score'], list(all_df.columns), ) possible = self.model.triples_factory.num_relations * self.model.num_entities ** 2 self.assertEqual(possible, len(all_df.index))
def test_predict_all_remove_known(self): """Test scoring all triples while removing non-novel triples w.r.t. training and testing.""" all_df = get_all_prediction_df(model=self.model, testing=self.testing_mapped_triples, remove_known=True) self.assertIsInstance(all_df, pd.DataFrame) self.assertEqual( ['head_id', 'head_label', 'relation_id', 'relation_label', 'tail_id', 'tail_label', 'score'], list(all_df.columns), ) possible = self.model.triples_factory.num_relations * self.model.num_entities ** 2 known = self.model.triples_factory.num_triples + self.testing_mapped_triples.shape[0] self.assertNotEqual(possible, known, msg='testing and training triples cover all possible triples') self.assertEqual(possible - known, len(all_df.index))
def test_get_all_prediction_df(self): """Test consistency of top-k scoring.""" ks = [5, 10] dfs = [ get_all_prediction_df( model=self.instance, triples_factory=self.factory, batch_size=1, k=k, ).nlargest(n=min(ks), columns="score").reset_index(drop=True) for k in ks ] assert set(dfs[0].columns) == set(dfs[0].columns) for column in dfs[0].columns: numpy.testing.assert_equal(dfs[0][column].values, dfs[1][column].values)