Пример #1
0
    def test_create_lcwa_instances(self):
        """Test create_lcwa_instances."""
        factory = Nations().training
        instances = factory.create_lcwa_instances()
        assert isinstance(instances, LCWAInstances)

        # check compressed triples
        # reconstruct triples from compressed form
        reconstructed_triples = set()
        for hr, row_id in zip(instances.pairs,
                              range(instances.compressed.shape[0])):
            h, r = hr.tolist()
            _, tails = instances.compressed[row_id].nonzero()
            reconstructed_triples.update((h, r, t) for t in tails.tolist())
        original_triples = {
            tuple(hrt)
            for hrt in factory.mapped_triples.tolist()
        }
        assert original_triples == reconstructed_triples

        # check data loader
        for batch in torch.utils.data.DataLoader(instances, batch_size=2):
            assert len(batch) == 2
            assert all(torch.is_tensor(x) for x in batch)
            x, y = batch
            batch_size = x.shape[0]
            assert x.shape == (batch_size, 2)
            assert x.dtype == torch.long
            assert y.shape == (batch_size, factory.num_entities)
            assert y.dtype == torch.get_default_dtype()
Пример #2
0
class TestSplit(unittest.TestCase):
    """Test splitting."""

    triples_factory: TriplesFactory

    def setUp(self) -> None:
        """Set up the tests."""
        self.triples_factory = Nations().training
        self.assertEqual(1592, self.triples_factory.num_triples)

    def test_split_naive(self):
        """Test splitting a factory in two with a given ratio."""
        ratio = 0.8
        train_triples_factory, test_triples_factory = self.triples_factory.split(
            ratio)
        expected_train_triples = int(self.triples_factory.num_triples * ratio)
        self.assertEqual(expected_train_triples,
                         train_triples_factory.num_triples)
        self.assertEqual(
            self.triples_factory.num_triples - expected_train_triples,
            test_triples_factory.num_triples)

    def test_split_multi(self):
        """Test splitting a factory in three."""
        ratios = r0, r1 = 0.80, 0.10
        t0, t1, t2 = self.triples_factory.split(ratios)
        expected_0_triples = int(self.triples_factory.num_triples * r0)
        expected_1_triples = int(self.triples_factory.num_triples * r1)
        expected_2_triples = self.triples_factory.num_triples - expected_0_triples - expected_1_triples
        self.assertEqual(expected_0_triples, t0.num_triples)
        self.assertEqual(expected_1_triples, t1.num_triples)
        self.assertEqual(expected_2_triples, t2.num_triples)
Пример #3
0
 def pre_setup_hook(self) -> None:
     """Set up the test case with a triples factory, training instances, and a default positive batch."""
     self.triples_factory = Nations().training
     self.training_instances = self.triples_factory.create_slcwa_instances()
     random_state = numpy.random.RandomState(seed=self.seed)
     batch_indices = random_state.randint(low=0,
                                          high=len(self.training_instances),
                                          size=(self.batch_size, ))
     self.positive_batch = self.training_instances.mapped_triples[
         batch_indices]
Пример #4
0
    def test_new_with_restriction(self):
        """Test new_with_restriction()."""
        example_relation_restriction = {
            'economicaid',
            'dependent',
        }
        example_entity_restriction = {
            'brazil',
            'burma',
            'china',
        }
        for inverse_triples in (True, False):
            original_triples_factory = Nations(
                create_inverse_triples=inverse_triples, ).training
            for entity_restriction in (None, example_entity_restriction):
                for relation_restriction in (None,
                                             example_relation_restriction):
                    # apply restriction
                    restricted_triples_factory = original_triples_factory.new_with_restriction(
                        entities=entity_restriction,
                        relations=relation_restriction,
                    )
                    # check that the triples factory is returned as is, if and only if no restriction is to apply
                    no_restriction_to_apply = (entity_restriction is None and
                                               relation_restriction is None)
                    equal_factory_object = (id(restricted_triples_factory) ==
                                            id(original_triples_factory))
                    assert no_restriction_to_apply == equal_factory_object

                    # check that inverse_triples is correctly carried over
                    assert (original_triples_factory.create_inverse_triples ==
                            restricted_triples_factory.create_inverse_triples)

                    # verify that the label-to-ID mapping has not been changed
                    assert original_triples_factory.entity_to_id == restricted_triples_factory.entity_to_id
                    assert original_triples_factory.relation_to_id == restricted_triples_factory.relation_to_id

                    # verify that triples have been filtered
                    if entity_restriction is not None:
                        present_relations = set(
                            restricted_triples_factory.triples[:, 0]).union(
                                restricted_triples_factory.triples[:, 2])
                        assert set(entity_restriction).issuperset(
                            present_relations)

                    if relation_restriction is not None:
                        present_relations = set(
                            restricted_triples_factory.triples[:, 1])
                        exp_relations = set(relation_restriction)
                        if original_triples_factory.create_inverse_triples:
                            exp_relations = exp_relations.union(
                                map(
                                    original_triples_factory.
                                    relation_to_inverse.get, exp_relations))
                        assert exp_relations.issuperset(present_relations)
Пример #5
0
 def setUp(self) -> None:
     """Set up the test case with a triples factory and model."""
     self.batch_size = 16
     self.seed = 42
     self.num_negs_per_pos = 10
     self.triples_factory = Nations().training
     self.slcwa_instances = self.triples_factory.create_slcwa_instances()
     self.negative_sampler = self.negative_sampling_cls(triples_factory=self.triples_factory)
     self.scaling_negative_sampler = self.negative_sampling_cls(
         triples_factory=self.triples_factory,
         num_negs_per_pos=self.num_negs_per_pos,
     )
     random = numpy.random.RandomState(seed=self.seed)
     batch_indices = random.randint(low=0, high=self.slcwa_instances.num_instances, size=(self.batch_size,))
     self.positive_batch = self.slcwa_instances.mapped_triples[batch_indices]
Пример #6
0
 def test_early_stopping(self):
     """Tests early stopping."""
     # Set automatic_memory_optimization to false during testing
     nations = Nations()
     model: Model = TransE(triples_factory=nations.training)
     evaluator = RankBasedEvaluator(automatic_memory_optimization=False)
     stopper = EarlyStopper(
         model=model,
         evaluator=evaluator,
         training_triples_factory=nations.training,
         evaluation_triples_factory=nations.validation,
         patience=self.patience,
         relative_delta=self.relative_delta,
         metric='mean_rank',
     )
     training_loop = SLCWATrainingLoop(
         model=model,
         triples_factory=nations.training,
         optimizer=Adam(params=model.get_grad_params()),
         automatic_memory_optimization=False,
     )
     losses = training_loop.train(
         triples_factory=nations.training,
         num_epochs=self.max_num_epochs,
         batch_size=self.batch_size,
         stopper=stopper,
         use_tqdm=False,
     )
     self.assertEqual(stopper.number_results,
                      len(losses) // stopper.frequency)
     self.assertEqual(self.stop_epoch,
                      len(losses),
                      msg='Did not stop early like it should have')
Пример #7
0
    def setUpClass(cls):
        """Set up a shared result."""
        cls.device = resolve_device('cuda')
        cls.dataset = Nations()

        cls.model = MockModel(triples_factory=cls.dataset.training)

        # The MockModel gives the highest score to the highest entity id
        max_score = cls.dataset.num_entities - 1

        # The test triples are created to yield the third highest score on both head and tail prediction
        cls.dataset.testing.mapped_triples = torch.tensor(
            [[max_score - 2, 0, max_score - 2]])

        # Write new mapped triples to the model, since the model's triples will be used to filter
        # These triples are created to yield the highest score on both head and tail prediction for the
        # test triple at hand
        cls.dataset.training.mapped_triples = torch.tensor([
            [max_score - 2, 0, max_score],
            [max_score, 0, max_score - 2],
        ], )

        # The validation triples are created to yield the second highest score on both head and tail prediction for the
        # test triple at hand
        cls.dataset.validation.mapped_triples = torch.tensor([
            [max_score - 2, 0, max_score - 1],
            [max_score - 1, 0, max_score - 2],
        ], )
Пример #8
0
    def setUp(self):
        """Prepare for testing the evaluation filtering."""
        self.evaluator = RankBasedEvaluator(
            filtered=True, automatic_memory_optimization=False)
        self.triples_factory = Nations().training
        self.model = FixedModel(triples_factory=self.triples_factory)

        # The MockModel gives the highest score to the highest entity id
        max_score = self.triples_factory.num_entities - 1

        # The test triples are created to yield the third highest score on both head and tail prediction
        self.test_triples = torch.tensor([[max_score - 2, 0, max_score - 2]])

        # Write new mapped triples to the model, since the model's triples will be used to filter
        # These triples are created to yield the highest score on both head and tail prediction for the
        # test triple at hand
        self.training_triples = torch.tensor([
            [max_score - 2, 0, max_score],
            [max_score, 0, max_score - 2],
        ], )

        # The validation triples are created to yield the second highest score on both head and tail prediction for the
        # test triple at hand
        self.validation_triples = torch.tensor([
            [max_score - 2, 0, max_score - 1],
            [max_score - 1, 0, max_score - 2],
        ], )
Пример #9
0
def test_sample_negatives():
    """Test for sample_negatives."""
    dataset = Nations()
    num_negatives = 2
    evaluation_triples = dataset.validation.mapped_triples
    additional_filter_triples = dataset.training.mapped_triples
    negatives = sample_negatives(
        evaluation_triples=evaluation_triples,
        additional_filter_triples=additional_filter_triples,
        num_entities=dataset.num_entities,
        num_samples=num_negatives,
    )
    head_negatives, tail_negatives = negatives[LABEL_HEAD], negatives[
        LABEL_TAIL]
    num_triples = evaluation_triples.shape[0]
    true = set(
        map(
            tuple,
            prepare_filter_triples(
                mapped_triples=evaluation_triples,
                additional_filter_triples=additional_filter_triples,
            ).tolist(),
        ))
    for i, negatives in zip((0, 2), (head_negatives, tail_negatives)):
        assert torch.is_tensor(negatives)
        assert negatives.dtype == torch.long
        assert negatives.shape == (num_triples, num_negatives)
        # check true negatives
        full_negatives = torch.empty(num_triples, num_negatives, 3)
        full_negatives[:, :, :] = evaluation_triples[:, None, :]
        full_negatives[:, :, i] = negatives
        full_negatives = full_negatives.view(-1, 3)
        negative_set = set(map(tuple, full_negatives.tolist()))
        assert negative_set.isdisjoint(true)
Пример #10
0
 def setUp(self):
     """Prepare for testing the evaluation structure."""
     self.counter = 1337
     self.evaluator = DummyEvaluator(counter=self.counter, filtered=True)
     self.triples_factory = Nations().training
     self.model = DummyModel(triples_factory=self.triples_factory,
                             automatic_memory_optimization=False)
Пример #11
0
class _NegativeSamplingTestCase:
    """A test case for quickly defining common tests for samplers."""

    #: The batch size
    batch_size: int
    #: The random seed
    seed: int
    #: The triples factory
    triples_factory: TriplesFactory
    #: The sLCWA instances
    slcwa_instances: SLCWAInstances
    #: Class of negative sampling to test
    negative_sampling_cls: ClassVar[Type[NegativeSampler]]
    #: The negative sampler instance, initialized in setUp
    negative_sampler: NegativeSampler
    #: A positive batch
    positive_batch: torch.LongTensor

    def setUp(self) -> None:
        """Set up the test case with a triples factory and model."""
        self.batch_size = 16
        self.seed = 42
        self.num_negs_per_pos = 10
        self.triples_factory = Nations().training
        self.slcwa_instances = self.triples_factory.create_slcwa_instances()
        self.negative_sampler = self.negative_sampling_cls(triples_factory=self.triples_factory)
        self.scaling_negative_sampler = self.negative_sampling_cls(
            triples_factory=self.triples_factory,
            num_negs_per_pos=self.num_negs_per_pos,
        )
        random = numpy.random.RandomState(seed=self.seed)
        batch_indices = random.randint(low=0, high=self.slcwa_instances.num_instances, size=(self.batch_size,))
        self.positive_batch = self.slcwa_instances.mapped_triples[batch_indices]

    def test_sample(self) -> None:
        # Generate negative sample
        negative_batch = self.negative_sampler.sample(positive_batch=self.positive_batch)

        # check shape
        assert negative_batch.shape == self.positive_batch.shape

        # check bounds: heads
        assert _array_check_bounds(negative_batch[:, 0], low=0, high=self.triples_factory.num_entities)

        # check bounds: relations
        assert _array_check_bounds(negative_batch[:, 1], low=0, high=self.triples_factory.num_relations)

        # check bounds: tails
        assert _array_check_bounds(negative_batch[:, 2], low=0, high=self.triples_factory.num_entities)

        # Check that all elements got corrupted
        assert (negative_batch != self.positive_batch).any(dim=1).all()

        # Generate scaled negative sample
        scaled_negative_batch = self.scaling_negative_sampler.sample(
            positive_batch=self.positive_batch,
        )

        assert scaled_negative_batch.shape[0] == self.positive_batch.shape[0] * self.num_negs_per_pos
        assert scaled_negative_batch.shape[1] == self.positive_batch.shape[1]
Пример #12
0
 def setUp(self) -> None:
     """Set up the test case with a triples factory and TransE as an example model."""
     self.batch_size = 16
     self.embedding_dim = 8
     self.factory = Nations().training
     self.model = TransE(self.factory,
                         embedding_dim=self.embedding_dim).to_device_()
Пример #13
0
 def _pre_instantiation_hook(
     self, kwargs: MutableMapping[str, Any]
 ) -> MutableMapping[str, Any]:  # noqa: D102
     kwargs = super()._pre_instantiation_hook(kwargs=kwargs)
     self.generator = set_random_seed(seed=self.seed)[1]
     kwargs["triples_factory"] = self.triples_factory = Nations().training
     return kwargs
Пример #14
0
 def setUp(self) -> None:
     """Instantiate triples factory and model."""
     self.triples_factory = Nations().training
     self.random_seed = 123
     self.checkpoint_file = "PyKEEN_training_loop_test_checkpoint.pt"
     self.num_epochs = 10
     self.temporary_directory = tempfile.TemporaryDirectory()
Пример #15
0
 def setUp(self) -> None:
     """Set up the test case with a triples factory."""
     self.triples_factory = Nations().training
     self.num_samples = 20
     self.num_epochs = 10
     self.graph_sampler = GraphSampler(triples_factory=self.triples_factory,
                                       num_samples=self.num_samples)
Пример #16
0
 def setUp(self) -> None:
     """Prepare for test."""
     dataset = Nations()
     self.initializer = pykeen.nn.init.LabelBasedInitializer.from_triples_factory(
         triples_factory=dataset.training,
         for_entities=True,
     )
     self.shape = self.initializer.tensor.shape
Пример #17
0
 def pre_setup_hook(self) -> None:
     """Prepare case-level variables before the setup() function."""
     self.triples_factory = Nations().training
     self.loss = self.loss_cls()
     self.model = TransE(triples_factory=self.triples_factory,
                         loss=self.loss,
                         random_seed=self.random_seed)
     self.optimizer = self.optimizer_cls(self.model.get_grad_params())
Пример #18
0
 def _pre_instantiation_hook(
     self, kwargs: MutableMapping[str, Any]
 ) -> MutableMapping[str, Any]:  # noqa: D102
     kwargs = super()._pre_instantiation_hook(kwargs=kwargs)
     dataset = Nations()
     self.factory = dataset.training
     kwargs["triples_factory"] = self.factory
     return kwargs
Пример #19
0
 def setUp(self) -> None:
     """Set up the test case with a triples factory."""
     self.triples_factory = Nations().training
     self.batch_size = 20
     self.num_epochs = 10
     self.graph_sampler = GraphSampler(
         mapped_triples=self.triples_factory.mapped_triples,
         batch_size=self.batch_size,
     )
Пример #20
0
 def setUp(self) -> None:
     """Set up the test case with a triples factory and model."""
     self.device = resolve_device()
     self.triples_factory = Nations().training
     self.batch_size = 16
     self.positive_batch = self.triples_factory.mapped_triples[:self.batch_size, :].to(device=self.device)
     super().setUp()
     # move test instance to device
     self.instance = self.instance.to(self.device)
Пример #21
0
 def _pre_instantiation_hook(self, kwargs: MutableMapping[str, Any]) -> MutableMapping[str, Any]:  # noqa: D102
     kwargs = super()._pre_instantiation_hook(kwargs=kwargs)
     self.output_dim = self.input_dim
     self.factory = Nations().training
     self.source, self.edge_type, self.target = self.factory.mapped_triples.t()
     self.x = torch.rand(self.factory.num_entities, self.input_dim)
     kwargs["input_dim"] = self.input_dim
     kwargs["num_relations"] = self.factory.num_relations
     return kwargs
Пример #22
0
 def setUpClass(cls):
     """Set up a shared result."""
     cls.result = pipeline(
         model='TransE',
         dataset='nations',
         training_kwargs=dict(num_epochs=5),
     )
     cls.model = cls.result.model
     nations = Nations()
     cls.testing_mapped_triples = nations.testing.mapped_triples.to(cls.model.device)
Пример #23
0
    def test_remix(self):
        """Test the remix algorithm."""
        reference = Nations()
        for random_state in range(20):
            derived = reference.remix(random_state=random_state)
            self.assertEqual(reference.training.num_triples,
                             derived.training.num_triples)
            self.assertFalse((reference.training.mapped_triples ==
                              derived.training.mapped_triples).all())

            self.assertEqual(reference.testing.num_triples,
                             derived.testing.num_triples)
            self.assertFalse((reference.testing.mapped_triples ==
                              derived.testing.mapped_triples).all())

            self.assertEqual(reference.validation.num_triples,
                             derived.validation.num_triples)
            self.assertFalse((reference.validation.mapped_triples ==
                              derived.validation.mapped_triples).all())
Пример #24
0
 def setUp(self) -> None:
     """Set up the test case with a triples factory and model."""
     self.generator = torch.random.manual_seed(seed=42)
     self.batch_size = 16
     self.triples_factory = Nations().training
     self.device = resolve_device()
     self.regularizer = self.regularizer_cls(
         device=self.device,
         **(self.regularizer_kwargs or {}),
     )
     self.positive_batch = self.triples_factory.mapped_triples[:self.batch_size, :].to(device=self.device)
Пример #25
0
    def setUp(self) -> None:
        """Set up the test case with a triples factory and model."""
        _, self.generator, _ = set_random_seed(42)

        dataset = Nations(create_inverse_triples=self.create_inverse_triples)
        self.factory = dataset.training
        self.model = self.model_cls(
            triples_factory=self.factory,
            embedding_dim=self.embedding_dim,
            **(self.model_kwargs or {}),
        ).to_device_()
Пример #26
0
 def test_relations_to_sparse_matrices(self):
     """Test :func:`triples_factory_to_sparse_matrices`."""
     triples_factory = Nations().training
     rel, inv = triples_factory_to_sparse_matrices(triples_factory)
     for m in (rel, inv):
         # check type
         assert isinstance(m, scipy.sparse.spmatrix)
         assert m.dtype == numpy.int32
         # check shape
         assert m.shape[0] == triples_factory.num_relations
         # check 1-hot
         assert m.max() == 1
Пример #27
0
    def setUp(self) -> None:
        """Set up the test case."""
        # Settings
        self.batch_size = 8
        self.embedding_dim = 7

        # Initialize evaluator
        self.evaluator = self.evaluator_cls(**(self.evaluator_kwargs or {}))

        # Use small test dataset
        self.factory = Nations().training

        # Use small model (untrained)
        self.model = TransE(triples_factory=self.factory, embedding_dim=self.embedding_dim)
Пример #28
0
 def setUp(self):
     """Prepare for testing the early stopper."""
     # Set automatic_memory_optimization to false for tests
     self.mock_evaluator = MockEvaluator(self.mock_losses, automatic_memory_optimization=False)
     nations = Nations()
     self.model = MockModel(triples_factory=nations.training)
     self.stopper = EarlyStopper(
         model=self.model,
         evaluator=self.mock_evaluator,
         evaluation_triples_factory=nations.validation,
         patience=self.patience,
         relative_delta=self.delta,
         larger_is_better=False,
     )
Пример #29
0
 def setUpClass(cls):
     """Set up a shared result."""
     cls.device = resolve_device('cuda')
     cls.result = pipeline(
         model='TransE',
         dataset='nations',
         training_kwargs=dict(num_epochs=5, use_tqdm=False),
         evaluation_kwargs=dict(use_tqdm=False),
         device=cls.device,
         random_seed=42,
     )
     cls.model = cls.result.model
     nations = Nations()
     cls.testing_mapped_triples = nations.testing.mapped_triples.to(cls.model.device)
Пример #30
0
    def test_metadata(self):
        """Test metadata passing for triples factories."""
        t = Nations().training
        self.assertEqual(NATIONS_TRAIN_PATH, t.metadata['path'])
        self.assertEqual(
            (
                f'TriplesFactory(num_entities=14, num_relations=55, num_triples=1592,'
                f' inverse_triples=False, path="{NATIONS_TRAIN_PATH}")'
            ),
            repr(t),
        )

        entities = ['poland', 'ussr']
        x = t.new_with_restriction(entities=entities)
        entities_ids = t.entities_to_ids(entities=entities)
        self.assertEqual(NATIONS_TRAIN_PATH, x.metadata['path'])
        self.assertEqual(
            (
                f'TriplesFactory(num_entities=14, num_relations=55, num_triples=37,'
                f' inverse_triples=False, entity_restriction={repr(entities_ids)}, path="{NATIONS_TRAIN_PATH}")'
            ),
            repr(x),
        )

        relations = ['negativebehavior']
        v = t.new_with_restriction(relations=relations)
        relations_ids = t.relations_to_ids(relations=relations)
        self.assertEqual(NATIONS_TRAIN_PATH, x.metadata['path'])
        self.assertEqual(
            (
                f'TriplesFactory(num_entities=14, num_relations=55, num_triples=29,'
                f' inverse_triples=False, path="{NATIONS_TRAIN_PATH}", relation_restriction={repr(relations_ids)})'
            ),
            repr(v),
        )

        w = t.clone_and_exchange_triples(t.triples[0:5], keep_metadata=False)
        self.assertIsInstance(w, TriplesFactory)
        self.assertNotIn('path', w.metadata)
        self.assertEqual(
            'TriplesFactory(num_entities=14, num_relations=55, num_triples=5, inverse_triples=False)',
            repr(w),
        )

        y, z = t.split()
        self.assertEqual(NATIONS_TRAIN_PATH, y.metadata['path'])
        self.assertEqual(NATIONS_TRAIN_PATH, z.metadata['path'])