class TestTriplesFactory(unittest.TestCase): """Class for testing triples factories.""" def setUp(self) -> None: """Instantiate test instance.""" self.factory = Nations().training def test_correct_inverse_creation(self): """Test if the triples and the corresponding inverses are created and sorted correctly.""" t = [ ['e1', 'a.', 'e5'], ['e1', 'a', 'e2'], ] t = np.array(t, dtype=np.str) factory = TriplesFactory(triples=t, create_inverse_triples=True) reference_relation_to_id = { 'a': 0, f'a{INVERSE_SUFFIX}': 1, 'a.': 2, f'a.{INVERSE_SUFFIX}': 3 } self.assertEqual(reference_relation_to_id, factory.relation_to_id) def test_automatic_inverse_detection(self): """Test if the TriplesFactory detects that the triples contain inverses and creates correct ids.""" t = [ ['e3', f'a.{INVERSE_SUFFIX}', 'e10'], ['e1', 'a', 'e2'], ['e1', 'a.', 'e5'], ['e4', f'a{INVERSE_SUFFIX}', 'e5'], ] t = np.array(t, dtype=np.str) factory = TriplesFactory(triples=t, create_inverse_triples=False) reference_relation_to_id = { 'a': 0, f'a{INVERSE_SUFFIX}': 1, 'a.': 2, f'a.{INVERSE_SUFFIX}': 3 } self.assertEqual(reference_relation_to_id, factory.relation_to_id) self.assertTrue(factory.create_inverse_triples) def test_automatic_incomplete_inverse_detection(self): """Test if the TriplesFactory detects that the triples contain incomplete inverses and creates correct ids.""" t = [ ['e3', f'a.{INVERSE_SUFFIX}', 'e10'], ['e1', 'a', 'e2'], ['e1', 'a.', 'e5'], ] t = np.array(t, dtype=np.str) factory = TriplesFactory(triples=t, create_inverse_triples=False) reference_relation_to_id = { 'a': 0, f'a{INVERSE_SUFFIX}': 1, 'a.': 2, f'a.{INVERSE_SUFFIX}': 3 } self.assertEqual(reference_relation_to_id, factory.relation_to_id) self.assertTrue(factory.create_inverse_triples) def test_right_sorting(self): """Test if the triples and the corresponding inverses are sorted correctly.""" t = [ ['e1', 'a', 'e1'], ['e1', 'a.', 'e1'], ['e1', f'a.{INVERSE_SUFFIX}', 'e1'], ['e1', 'a.bc', 'e1'], ['e1', f'a.bc{INVERSE_SUFFIX}', 'e1'], ['e1', f'a{INVERSE_SUFFIX}', 'e1'], ['e1', 'abc', 'e1'], ['e1', f'abc{INVERSE_SUFFIX}', 'e1'], ] t = np.array(t, dtype=np.str) factory = TriplesFactory(triples=t, create_inverse_triples=False) reference_relation_to_id = { 'a': 0, f'a{INVERSE_SUFFIX}': 1, 'a.': 2, f'a.{INVERSE_SUFFIX}': 3, 'a.bc': 4, f'a.bc{INVERSE_SUFFIX}': 5, 'abc': 6, f'abc{INVERSE_SUFFIX}': 7, } self.assertEqual(reference_relation_to_id, factory.relation_to_id) def test_id_to_label(self): """Test ID-to-label conversion.""" for label_to_id, id_to_label in [ (self.factory.entity_to_id, self.factory.entity_id_to_label), (self.factory.relation_to_id, self.factory.relation_id_to_label), ]: for k in label_to_id.keys(): assert id_to_label[label_to_id[k]] == k for k in id_to_label.keys(): assert label_to_id[id_to_label[k]] == k def test_tensor_to_df(self): """Test tensor_to_df().""" # check correct translation labeled_triples = set( tuple(row) for row in self.factory.triples.tolist()) tensor = self.factory.mapped_triples scores = torch.rand(tensor.shape[0]) df = self.factory.tensor_to_df(tensor=tensor, scores=scores) re_labeled_triples = set( tuple(row) for row in df[['head_label', 'relation_label', 'tail_label']].values.tolist()) assert labeled_triples == re_labeled_triples # check column order assert tuple(df.columns) == TRIPLES_DF_COLUMNS + ('scores', ) def test_new_with_restriction(self): """Test new_with_restriction().""" example_relation_restriction = { 'economicaid', 'dependent', } example_entity_restriction = { 'brazil', 'burma', 'china', } for inverse_triples in (True, False): original_triples_factory = Nations( create_inverse_triples=inverse_triples, ).training for entity_restriction in (None, example_entity_restriction): for relation_restriction in (None, example_relation_restriction): # apply restriction restricted_triples_factory = original_triples_factory.new_with_restriction( entities=entity_restriction, relations=relation_restriction, ) # check that the triples factory is returned as is, if and only if no restriction is to apply no_restriction_to_apply = (entity_restriction is None and relation_restriction is None) equal_factory_object = (id(restricted_triples_factory) == id(original_triples_factory)) assert no_restriction_to_apply == equal_factory_object # check that inverse_triples is correctly carried over assert (original_triples_factory.create_inverse_triples == restricted_triples_factory.create_inverse_triples) # verify that the label-to-ID mapping has not been changed assert original_triples_factory.entity_to_id == restricted_triples_factory.entity_to_id assert original_triples_factory.relation_to_id == restricted_triples_factory.relation_to_id # verify that triples have been filtered if entity_restriction is not None: present_relations = set( restricted_triples_factory.triples[:, 0]).union( restricted_triples_factory.triples[:, 2]) assert set(entity_restriction).issuperset( present_relations) if relation_restriction is not None: present_relations = set( restricted_triples_factory.triples[:, 1]) exp_relations = set(relation_restriction) if original_triples_factory.create_inverse_triples: exp_relations = exp_relations.union( map( original_triples_factory. relation_to_inverse.get, exp_relations)) assert exp_relations.issuperset(present_relations)
class TestTriplesFactory(unittest.TestCase): """Class for testing triples factories.""" def setUp(self) -> None: """Instantiate test instance.""" self.factory = Nations().training def test_correct_inverse_creation(self): """Test if the triples and the corresponding inverses are created.""" t = [ ["e1", "a.", "e5"], ["e1", "a", "e2"], ] t = np.array(t, dtype=str) factory = TriplesFactory.from_labeled_triples( triples=t, create_inverse_triples=True) instances = factory.create_slcwa_instances() assert len(instances) == 4 def test_automatic_incomplete_inverse_detection(self): """Test detecting that the triples contain inverses, warns about them, and filters them out.""" # comment(mberr): from my pov this behaviour is faulty: the triples factory is expected to say it contains # inverse relations, although the triples contained in it are not the same we would have when removing the # first triple, and passing create_inverse_triples=True. t = [ ["e3", f"a.{INVERSE_SUFFIX}", "e10"], ["e1", "a", "e2"], ["e1", "a.", "e5"], ] t = np.array(t, dtype=str) for create_inverse_triples in (False, True): with patch("pykeen.triples.triples_factory.logger.warning" ) as warning: factory = TriplesFactory.from_labeled_triples( triples=t, create_inverse_triples=create_inverse_triples) # check for warning warning.assert_called() # check for filtered triples assert factory.num_triples == 2 # check for correct inverse triples flag assert factory.create_inverse_triples == create_inverse_triples def test_id_to_label(self): """Test ID-to-label conversion.""" for label_to_id, id_to_label in [ (self.factory.entity_to_id, self.factory.entity_id_to_label), (self.factory.relation_to_id, self.factory.relation_id_to_label), ]: for k in label_to_id.keys(): assert id_to_label[label_to_id[k]] == k for k in id_to_label.keys(): assert label_to_id[id_to_label[k]] == k def test_tensor_to_df(self): """Test tensor_to_df().""" # check correct translation labeled_triples = set( tuple(row) for row in self.factory.triples.tolist()) tensor = self.factory.mapped_triples scores = torch.rand(tensor.shape[0]) df = self.factory.tensor_to_df(tensor=tensor, scores=scores) re_labeled_triples = set( tuple(row) for row in df[["head_label", "relation_label", "tail_label"]].values.tolist()) assert labeled_triples == re_labeled_triples # check column order assert tuple(df.columns) == TRIPLES_DF_COLUMNS + ("scores", ) def _test_restriction( self, original_triples_factory: TriplesFactory, entity_restriction: Optional[Collection[str]], invert_entity_selection: bool, relation_restriction: Optional[Collection[str]], invert_relation_selection: bool, ): """Run the actual test for new_with_restriction.""" # apply restriction restricted_triples_factory = original_triples_factory.new_with_restriction( entities=entity_restriction, relations=relation_restriction, invert_entity_selection=invert_entity_selection, invert_relation_selection=invert_relation_selection, ) # check that the triples factory is returned as is, if and only if no restriction is to apply no_restriction_to_apply = entity_restriction is None and relation_restriction is None equal_factory_object = id(restricted_triples_factory) == id( original_triples_factory) assert no_restriction_to_apply == equal_factory_object # check that inverse_triples is correctly carried over assert original_triples_factory.create_inverse_triples == restricted_triples_factory.create_inverse_triples # verify that the label-to-ID mapping has not been changed assert original_triples_factory.entity_to_id == restricted_triples_factory.entity_to_id assert original_triples_factory.relation_to_id == restricted_triples_factory.relation_to_id # verify that triples have been filtered if entity_restriction is not None: present_entities = set( restricted_triples_factory.triples[:, 0]).union( restricted_triples_factory.triples[:, 2]) expected_entities = (set( original_triples_factory.entity_id_to_label.values( )).difference(entity_restriction) if invert_entity_selection else entity_restriction) assert expected_entities.issuperset(present_entities) if relation_restriction is not None: present_relations = set(restricted_triples_factory.triples[:, 1]) expected_relations = ( set(original_triples_factory.relation_id_to_label.values()) if invert_relation_selection else set(relation_restriction)) assert expected_relations.issuperset(present_relations) def test_new_with_restriction(self): """Test new_with_restriction().""" relation_restriction = { "economicaid", "dependent", } entity_restriction = { "brazil", "burma", "china", } for inverse_triples in (True, False): original_triples_factory = Nations( create_inverse_triples=inverse_triples, ).training # Test different combinations of restrictions for ( (entity_restriction, invert_entity_selection), (relation_restriction, invert_relation_selection), ) in itt.product( ((None, None), (entity_restriction, False), (entity_restriction, True)), ((None, None), (relation_restriction, False), (relation_restriction, True)), ): with self.subTest( entity_restriction=entity_restriction, invert_entity_selection=invert_entity_selection, relation_restriction=relation_restriction, invert_relation_selection=invert_relation_selection, ): self._test_restriction( original_triples_factory=original_triples_factory, entity_restriction=entity_restriction, invert_entity_selection=invert_entity_selection, relation_restriction=relation_restriction, invert_relation_selection=invert_relation_selection, ) def test_create_lcwa_instances(self): """Test create_lcwa_instances.""" factory = Nations().training instances = factory.create_lcwa_instances() assert isinstance(instances, LCWAInstances) # check compressed triples # reconstruct triples from compressed form reconstructed_triples = set() for hr, row_id in zip(instances.pairs, range(instances.compressed.shape[0])): h, r = hr.tolist() _, tails = instances.compressed[row_id].nonzero() reconstructed_triples.update((h, r, t) for t in tails.tolist()) original_triples = { tuple(hrt) for hrt in factory.mapped_triples.tolist() } assert original_triples == reconstructed_triples # check data loader for batch in torch.utils.data.DataLoader(instances, batch_size=2): assert len(batch) == 2 assert all(torch.is_tensor(x) for x in batch) x, y = batch batch_size = x.shape[0] assert x.shape == (batch_size, 2) assert x.dtype == torch.long assert y.shape == (batch_size, factory.num_entities) assert y.dtype == torch.get_default_dtype() def test_split_inverse_triples(self): """Test whether inverse triples are only created in the training factory.""" # set create inverse triple to true self.factory.create_inverse_triples = True # split factory train, *others = self.factory.split() # check that in *training* inverse triple are to be created assert train.create_inverse_triples # check that in all other splits no inverse triples are to be created assert not any(f.create_inverse_triples for f in others)
class TestTriplesFactory(unittest.TestCase): """Class for testing triples factories.""" def setUp(self) -> None: """Instantiate test instance.""" self.factory = Nations().training def test_correct_inverse_creation(self): """Test if the triples and the corresponding inverses are created.""" t = [ ['e1', 'a.', 'e5'], ['e1', 'a', 'e2'], ] t = np.array(t, dtype=np.str) factory = TriplesFactory.from_labeled_triples( triples=t, create_inverse_triples=True) instances = factory.create_slcwa_instances() assert len(instances) == 4 def test_automatic_incomplete_inverse_detection(self): """Test detecting that the triples contain inverses, warns about them, and filters them out.""" # comment(mberr): from my pov this behaviour is faulty: the triples factory is expected to say it contains # inverse relations, although the triples contained in it are not the same we would have when removing the # first triple, and passing create_inverse_triples=True. t = [ ['e3', f'a.{INVERSE_SUFFIX}', 'e10'], ['e1', 'a', 'e2'], ['e1', 'a.', 'e5'], ] t = np.array(t, dtype=np.str) for create_inverse_triples in (False, True): with patch("pykeen.triples.triples_factory.logger.warning" ) as warning: factory = TriplesFactory.from_labeled_triples( triples=t, create_inverse_triples=create_inverse_triples) # check for warning warning.assert_called() # check for filtered triples assert factory.num_triples == 2 # check for correct inverse triples flag assert factory.create_inverse_triples == create_inverse_triples def test_id_to_label(self): """Test ID-to-label conversion.""" for label_to_id, id_to_label in [ (self.factory.entity_to_id, self.factory.entity_id_to_label), (self.factory.relation_to_id, self.factory.relation_id_to_label), ]: for k in label_to_id.keys(): assert id_to_label[label_to_id[k]] == k for k in id_to_label.keys(): assert label_to_id[id_to_label[k]] == k def test_tensor_to_df(self): """Test tensor_to_df().""" # check correct translation labeled_triples = set( tuple(row) for row in self.factory.triples.tolist()) tensor = self.factory.mapped_triples scores = torch.rand(tensor.shape[0]) df = self.factory.tensor_to_df(tensor=tensor, scores=scores) re_labeled_triples = set( tuple(row) for row in df[['head_label', 'relation_label', 'tail_label']].values.tolist()) assert labeled_triples == re_labeled_triples # check column order assert tuple(df.columns) == TRIPLES_DF_COLUMNS + ('scores', ) def test_new_with_restriction(self): """Test new_with_restriction().""" example_relation_restriction = { 'economicaid', 'dependent', } example_entity_restriction = { 'brazil', 'burma', 'china', } for inverse_triples in (True, False): original_triples_factory = Nations( create_inverse_triples=inverse_triples, ).training for entity_restriction in (None, example_entity_restriction): for relation_restriction in (None, example_relation_restriction): # apply restriction restricted_triples_factory = original_triples_factory.new_with_restriction( entities=entity_restriction, relations=relation_restriction, ) # check that the triples factory is returned as is, if and only if no restriction is to apply no_restriction_to_apply = (entity_restriction is None and relation_restriction is None) equal_factory_object = (id(restricted_triples_factory) == id(original_triples_factory)) assert no_restriction_to_apply == equal_factory_object # check that inverse_triples is correctly carried over assert (original_triples_factory.create_inverse_triples == restricted_triples_factory.create_inverse_triples) # verify that the label-to-ID mapping has not been changed assert original_triples_factory.entity_to_id == restricted_triples_factory.entity_to_id assert original_triples_factory.relation_to_id == restricted_triples_factory.relation_to_id # verify that triples have been filtered if entity_restriction is not None: present_entities = set( restricted_triples_factory.triples[:, 0]).union( restricted_triples_factory.triples[:, 2]) assert set(entity_restriction).issuperset( present_entities) if relation_restriction is not None: present_relations = set( restricted_triples_factory.triples[:, 1]) exp_relations = set(relation_restriction) assert exp_relations.issuperset(present_relations) def test_create_lcwa_instances(self): """Test create_lcwa_instances.""" factory = Nations().training instances = factory.create_lcwa_instances() assert isinstance(instances, LCWAInstances) # check compressed triples # reconstruct triples from compressed form reconstructed_triples = set() for hr, row_id in zip(instances.pairs, range(instances.compressed.shape[0])): h, r = hr.tolist() _, tails = instances.compressed[row_id].nonzero() reconstructed_triples.update((h, r, t) for t in tails.tolist()) original_triples = { tuple(hrt) for hrt in factory.mapped_triples.tolist() } assert original_triples == reconstructed_triples # check data loader for batch in torch.utils.data.DataLoader(instances, batch_size=2): assert len(batch) == 2 assert all(torch.is_tensor(x) for x in batch) x, y = batch batch_size = x.shape[0] assert x.shape == (batch_size, 2) assert x.dtype == torch.long assert y.shape == (batch_size, factory.num_entities) assert y.dtype == torch.get_default_dtype()