Пример #1
0
    def test_error_on_nan(self):
        """Test if the correct error is raised for non-finite loss values."""
        model = TransE(triples_factory=self.triples_factory)
        patience = 2

        class NaNTrainingLoop(self.cls):
            def __init__(self, **kwargs):
                super().__init__(**kwargs)
                self.patience = patience

            def _process_batch(self, *args, **kwargs):
                loss = super()._process_batch(*args, **kwargs)
                self.patience -= 1
                if self.patience < 0:
                    return torch.as_tensor([float("nan")], device=loss.device, dtype=torch.float32)
                return loss

        training_loop = NaNTrainingLoop(
            model=model,
            triples_factory=self.triples_factory,
            optimizer=self.optimizer_cls(model.get_grad_params()),
        )
        with self.assertRaises(NonFiniteLossError):
            training_loop.train(
                triples_factory=self.triples_factory,
                num_epochs=patience + 1,
                batch_size=self.batch_size,
            )
Пример #2
0
 def setUp(self) -> None:
     """Set up the test case with a triples factory and TransE as an example model."""
     self.batch_size = 16
     self.embedding_dim = 8
     self.factory = Nations().training
     self.model = TransE(self.factory,
                         embedding_dim=self.embedding_dim).to_device_()
Пример #3
0
 def pre_setup_hook(self) -> None:
     """Prepare case-level variables before the setup() function."""
     self.triples_factory = Nations().training
     self.loss = self.loss_cls()
     self.model = TransE(triples_factory=self.triples_factory,
                         loss=self.loss,
                         random_seed=self.random_seed)
     self.optimizer = self.optimizer_cls(self.model.get_grad_params())
Пример #4
0
class TestBaseModel(unittest.TestCase):
    """Test models are set in the right mode at the right time."""

    batch_size: int
    embedding_dim: int
    factory: TriplesFactory
    model: Model

    def setUp(self) -> None:
        """Set up the test case with a triples factory and TransE as an example model."""
        self.batch_size = 16
        self.embedding_dim = 8
        self.factory = Nations().training
        self.model = TransE(self.factory, embedding_dim=self.embedding_dim).to_device_()

    def _check_scores(self, scores) -> None:
        """Check the scores produced by a forward function."""
        # check for finite values by default
        assert torch.all(torch.isfinite(scores)).item()

    def test_predict_scores_all_heads(self) -> None:
        """Test ``BaseModule.predict_scores_all_heads``."""
        batch = torch.zeros(self.batch_size, 2, dtype=torch.long, device=self.model.device)

        # Set into training mode to check if it is correctly set to evaluation mode.
        self.model.train()

        scores = self.model.predict_h(batch)
        assert scores.shape == (self.batch_size, self.model.num_entities)
        self._check_scores(scores)

        assert not self.model.training

    def test_predict_scores_all_tails(self) -> None:
        """Test ``BaseModule.predict_scores_all_tails``."""
        batch = torch.zeros(self.batch_size, 2, dtype=torch.long, device=self.model.device)

        # Set into training mode to check if it is correctly set to evaluation mode.
        self.model.train()

        scores = self.model.predict_t(batch)
        assert scores.shape == (self.batch_size, self.model.num_entities)
        self._check_scores(scores)

        assert not self.model.training

    def test_predict_scores(self) -> None:
        """Test ``BaseModule.predict_scores``."""
        batch = torch.zeros(self.batch_size, 3, dtype=torch.long, device=self.model.device)

        # Set into training mode to check if it is correctly set to evaluation mode.
        self.model.train()

        scores = self.model.predict_hrt(batch)
        assert scores.shape == (self.batch_size, 1)
        self._check_scores(scores)

        assert not self.model.training
Пример #5
0
    def test_checkpoints(self):
        """Test whether interrupting the given training loop type can be resumed using checkpoints."""
        # Train a model in one shot
        model = TransE(
            triples_factory=self.triples_factory,
            random_seed=self.random_seed,
        )
        training_loop = self._with_model(model)
        losses = training_loop.train(
            triples_factory=self.triples_factory,
            num_epochs=self.num_epochs,
            batch_size=self.batch_size,
            use_tqdm=False,
            use_tqdm_batch=False,
        )

        with tempfile.TemporaryDirectory() as directory:
            name = "checkpoint.pt"

            # Train a model for the first half
            model = TransE(
                triples_factory=self.triples_factory,
                random_seed=self.random_seed,
            )
            training_loop = self._with_model(model)
            training_loop.train(
                triples_factory=self.triples_factory,
                num_epochs=int(self.num_epochs // 2),
                batch_size=self.batch_size,
                checkpoint_name=name,
                checkpoint_directory=directory,
                checkpoint_frequency=0,
            )

            # Continue training of the first part
            model = TransE(
                triples_factory=self.triples_factory,
                random_seed=123,
            )
            training_loop = self._with_model(model)
            losses_2 = training_loop.train(
                triples_factory=self.triples_factory,
                num_epochs=self.num_epochs,
                batch_size=self.batch_size,
                checkpoint_name=name,
                checkpoint_directory=directory,
                checkpoint_frequency=0,
            )

        self.assertEqual(losses, losses_2)
Пример #6
0
    def setUp(self) -> None:
        """Set up the test case."""
        # Settings
        self.batch_size = 8
        self.embedding_dim = 7

        # Initialize evaluator
        self.evaluator = self.evaluator_cls(**(self.evaluator_kwargs or {}))

        # Use small test dataset
        self.factory = Nations().training

        # Use small model (untrained)
        self.model = TransE(triples_factory=self.factory, embedding_dim=self.embedding_dim)
Пример #7
0
 def test_early_stopping(self):
     """Tests early stopping."""
     # Set automatic_memory_optimization to false during testing
     nations = Nations()
     model: Model = TransE(triples_factory=nations.training)
     evaluator = RankBasedEvaluator(automatic_memory_optimization=False)
     stopper = EarlyStopper(
         model=model,
         evaluator=evaluator,
         training_triples_factory=nations.training,
         evaluation_triples_factory=nations.validation,
         patience=self.patience,
         relative_delta=self.relative_delta,
         metric='mean_rank',
     )
     training_loop = SLCWATrainingLoop(
         model=model,
         triples_factory=nations.training,
         optimizer=Adam(params=model.get_grad_params()),
         automatic_memory_optimization=False,
     )
     losses = training_loop.train(
         triples_factory=nations.training,
         num_epochs=self.max_num_epochs,
         batch_size=self.batch_size,
         stopper=stopper,
         use_tqdm=False,
     )
     self.assertEqual(stopper.number_results,
                      len(losses) // stopper.frequency)
     self.assertEqual(self.stop_epoch,
                      len(losses),
                      msg='Did not stop early like it should have')
Пример #8
0
    def test_error_on_nan(self):
        """Test if the correct error is raised for non-finite loss values."""
        model = TransE(triples_factory=self.triples_factory)
        training_loop = NaNTrainingLoop(model=model, patience=2)

        with self.assertRaises(NonFiniteLossError):
            training_loop.train(num_epochs=3, batch_size=self.batch_size)
Пример #9
0
    def _test_checkpoints(self, training_loop_type: str):
        """Test whether interrupting the given training loop type can be resumed using checkpoints."""
        training_loop_class = get_training_loop_cls(training_loop_type)

        # Train a model in one shot
        model = TransE(
            triples_factory=self.triples_factory,
            random_seed=self.random_seed,
        )
        optimizer_cls = get_optimizer_cls(None)
        optimizer = optimizer_cls(params=model.get_grad_params())
        training_loop = training_loop_class(model=model, optimizer=optimizer, automatic_memory_optimization=False)
        losses = training_loop.train(
            num_epochs=self.num_epochs,
            batch_size=self.batch_size,
            use_tqdm=False,
            use_tqdm_batch=False,
        )

        # Train a model for the first half
        model = TransE(
            triples_factory=self.triples_factory,
            random_seed=self.random_seed,
        )
        optimizer_cls = get_optimizer_cls(None)
        optimizer = optimizer_cls(params=model.get_grad_params())
        training_loop = training_loop_class(model=model, optimizer=optimizer, automatic_memory_optimization=False)
        training_loop.train(
            num_epochs=int(self.num_epochs // 2),
            batch_size=self.batch_size,
            checkpoint_name=self.checkpoint_file,
            checkpoint_directory=self.temporary_directory.name,
            checkpoint_frequency=0,
        )

        # Continue training of the first part
        model = TransE(
            triples_factory=self.triples_factory,
            random_seed=123,
        )
        optimizer_cls = get_optimizer_cls(None)
        optimizer = optimizer_cls(params=model.get_grad_params())
        training_loop = training_loop_class(model=model, optimizer=optimizer, automatic_memory_optimization=False)
        losses_2 = training_loop.train(
            num_epochs=self.num_epochs,
            batch_size=self.batch_size,
            checkpoint_name=self.checkpoint_file,
            checkpoint_directory=self.temporary_directory.name,
            checkpoint_frequency=0,
        )

        self.assertEqual(losses, losses_2)
Пример #10
0
 def test_blacklist_loss_on_slcwa(self):
     """Test an allowed sLCWA loss."""
     model = TransE(
         triples_factory=self.triples_factory,
         loss=CrossEntropyLoss(),
     )
     with self.assertRaises(TrainingApproachLossMismatchError):
         NaNTrainingLoop(model=model, patience=2, automatic_memory_optimization=False)
Пример #11
0
 def test_sub_batching(self):
     """Test if sub-batching works as expected."""
     model = TransE(triples_factory=self.triples_factory)
     training_loop = DummyTrainingLoop(
         model=model,
         sub_batch_size=self.sub_batch_size,
         automatic_memory_optimization=False,
     )
     training_loop.train(num_epochs=1, batch_size=self.batch_size, sub_batch_size=self.sub_batch_size)
Пример #12
0
    def test_lcwa_margin_ranking_loss_helper(self):
        """Test if output is correct for the LCWA training loop use case."""
        factory = TriplesFactory.from_labeled_triples(triples=self.triples)

        loss_cls = MarginRankingLoss(
            margin=0,
            reduction='sum',
        )

        model = TransE(
            triples_factory=factory,
            embedding_dim=8,
            preferred_device='cpu',
            loss=loss_cls,
        )

        loop = LCWATrainingLoop(model=model, triples_factory=factory)
        loss = loop._mr_loss_helper(predictions=self.predictions,
                                    labels=self.labels)
        self.assertEqual(14, loss)

        loss_cls = MarginRankingLoss(
            margin=0,
            reduction='mean',
        )

        model = TransE(
            triples_factory=factory,
            embedding_dim=8,
            preferred_device='cpu',
            loss=loss_cls,
        )

        loop = LCWATrainingLoop(model=model, triples_factory=factory)
        loss = loop._mr_loss_helper(predictions=self.predictions,
                                    labels=self.labels)
        self.assertEqual(1, loss)
Пример #13
0
    def _build_model(
        self,
        triples_factory: CoreTriplesFactory
    ) -> TransE:
        """Build new TransE model for embedding.

        Parameters
        ------------------
        graph: Graph
            The graph to build the model for.
        """
        return TransE(
            triples_factory=triples_factory,
            embedding_dim=self._embedding_size,
            scoring_fct_norm=self._scoring_fct_norm
        )
Пример #14
0
class TrainingLoopTestCase(unittest_templates.GenericTestCase[TrainingLoop]):
    """A generic test case for training loops."""

    model: Model
    factory: TriplesFactory
    loss_cls: ClassVar[Type[Loss]]
    loss: Loss
    optimizer_cls: ClassVar[Type[Optimizer]] = Adam
    optimizer: Optimizer
    random_seed = 0
    batch_size: int = 128
    sub_batch_size: int = 30
    num_epochs: int = 10

    def pre_setup_hook(self) -> None:
        """Prepare case-level variables before the setup() function."""
        self.triples_factory = Nations().training
        self.loss = self.loss_cls()
        self.model = TransE(triples_factory=self.triples_factory, loss=self.loss, random_seed=self.random_seed)
        self.optimizer = self.optimizer_cls(self.model.get_grad_params())

    def _with_model(self, model: Model) -> TrainingLoop:
        return self.cls(
            model=model,
            triples_factory=self.triples_factory,
            automatic_memory_optimization=False,
            optimizer=self.optimizer_cls(model.get_grad_params()),
        )

    def _pre_instantiation_hook(self, kwargs: MutableMapping[str, Any]) -> MutableMapping[str, Any]:  # noqa: D102
        kwargs = super()._pre_instantiation_hook(kwargs=kwargs)
        kwargs["triples_factory"] = self.triples_factory
        kwargs["automatic_memory_optimization"] = False
        kwargs["optimizer"] = self.optimizer
        kwargs["model"] = self.model
        return kwargs

    def test_train(self):
        """Test training does not error."""
        self.instance.train(
            triples_factory=self.triples_factory,
            num_epochs=1,
        )

    def test_sub_batching(self):
        """Test if sub-batching works as expected."""
        self.instance.train(
            triples_factory=self.triples_factory,
            num_epochs=1,
            batch_size=self.batch_size,
            sub_batch_size=self.sub_batch_size,
        )

    def test_sub_batching_support(self):
        """Test if sub-batching works as expected."""
        model = ConvE(triples_factory=self.triples_factory)
        training_loop = self._with_model(model)

        with self.assertRaises(NotImplementedError):
            training_loop.train(
                triples_factory=self.triples_factory,
                num_epochs=1,
                batch_size=self.batch_size,
                sub_batch_size=self.sub_batch_size,
            )

    def test_error_on_nan(self):
        """Test if the correct error is raised for non-finite loss values."""
        model = TransE(triples_factory=self.triples_factory)
        patience = 2

        class NaNTrainingLoop(self.cls):
            def __init__(self, **kwargs):
                super().__init__(**kwargs)
                self.patience = patience

            def _process_batch(self, *args, **kwargs):
                loss = super()._process_batch(*args, **kwargs)
                self.patience -= 1
                if self.patience < 0:
                    return torch.as_tensor([float("nan")], device=loss.device, dtype=torch.float32)
                return loss

        training_loop = NaNTrainingLoop(
            model=model,
            triples_factory=self.triples_factory,
            optimizer=self.optimizer_cls(model.get_grad_params()),
        )
        with self.assertRaises(NonFiniteLossError):
            training_loop.train(
                triples_factory=self.triples_factory,
                num_epochs=patience + 1,
                batch_size=self.batch_size,
            )

    def test_checkpoints(self):
        """Test whether interrupting the given training loop type can be resumed using checkpoints."""
        # Train a model in one shot
        model = TransE(
            triples_factory=self.triples_factory,
            random_seed=self.random_seed,
        )
        training_loop = self._with_model(model)
        losses = training_loop.train(
            triples_factory=self.triples_factory,
            num_epochs=self.num_epochs,
            batch_size=self.batch_size,
            use_tqdm=False,
            use_tqdm_batch=False,
        )

        with tempfile.TemporaryDirectory() as directory:
            name = "checkpoint.pt"

            # Train a model for the first half
            model = TransE(
                triples_factory=self.triples_factory,
                random_seed=self.random_seed,
            )
            training_loop = self._with_model(model)
            training_loop.train(
                triples_factory=self.triples_factory,
                num_epochs=int(self.num_epochs // 2),
                batch_size=self.batch_size,
                checkpoint_name=name,
                checkpoint_directory=directory,
                checkpoint_frequency=0,
            )

            # Continue training of the first part
            model = TransE(
                triples_factory=self.triples_factory,
                random_seed=123,
            )
            training_loop = self._with_model(model)
            losses_2 = training_loop.train(
                triples_factory=self.triples_factory,
                num_epochs=self.num_epochs,
                batch_size=self.batch_size,
                checkpoint_name=name,
                checkpoint_directory=directory,
                checkpoint_frequency=0,
            )

        self.assertEqual(losses, losses_2)
Пример #15
0
class _AbstractEvaluatorTests:
    """A test case for quickly defining common tests for evaluators models."""

    # The triples factory and model
    factory: TriplesFactory
    model: Model

    #: The evaluator to be tested
    evaluator_cls: ClassVar[Type[Evaluator]]
    evaluator_kwargs: ClassVar[Optional[Mapping[str, Any]]] = None

    # Settings
    batch_size: int
    embedding_dim: int

    #: The evaluator instantiation
    evaluator: Evaluator

    def setUp(self) -> None:
        """Set up the test case."""
        # Settings
        self.batch_size = 8
        self.embedding_dim = 7

        # Initialize evaluator
        self.evaluator = self.evaluator_cls(**(self.evaluator_kwargs or {}))

        # Use small test dataset
        self.factory = Nations().training

        # Use small model (untrained)
        self.model = TransE(triples_factory=self.factory,
                            embedding_dim=self.embedding_dim)

    def _get_input(
        self,
        inverse: bool = False,
    ) -> Tuple[torch.LongTensor, torch.FloatTensor,
               Optional[torch.BoolTensor]]:
        # Get batch
        hrt_batch = self.factory.mapped_triples[:self.batch_size].to(
            self.model.device)

        # Compute scores
        if inverse:
            scores = self.model.score_h(rt_batch=hrt_batch[:, 1:])
        else:
            scores = self.model.score_t(hr_batch=hrt_batch[:, :2])

        # Compute mask only if required
        if self.evaluator.requires_positive_mask:
            # TODO: Re-use filtering code
            triples = self.factory.mapped_triples
            if inverse:
                sel_col, start_col = 0, 1
            else:
                sel_col, start_col = 2, 0
            stop_col = start_col + 2

            # shape: (batch_size, num_triples)
            triple_mask = (triples[None, :, start_col:stop_col] ==
                           hrt_batch[:, None, start_col:stop_col]).all(dim=-1)
            batch_indices, triple_indices = triple_mask.nonzero(as_tuple=True)
            entity_indices = triples[triple_indices, sel_col]

            # shape: (batch_size, num_entities)
            mask = torch.zeros_like(scores, dtype=torch.bool)
            mask[batch_indices, entity_indices] = True
        else:
            mask = None

        return hrt_batch, scores, mask

    def test_process_tail_scores_(self) -> None:
        """Test the evaluator's ``process_tail_scores_()`` function."""
        hrt_batch, scores, mask = self._get_input()
        true_scores = scores[torch.arange(0, hrt_batch.shape[0]),
                             hrt_batch[:, 2]][:, None]
        self.evaluator.process_tail_scores_(
            hrt_batch=hrt_batch,
            true_scores=true_scores,
            scores=scores,
            dense_positive_mask=mask,
        )

    def test_process_head_scores_(self) -> None:
        """Test the evaluator's ``process_head_scores_()`` function."""
        hrt_batch, scores, mask = self._get_input(inverse=True)
        true_scores = scores[torch.arange(0, hrt_batch.shape[0]),
                             hrt_batch[:, 0]][:, None]
        self.evaluator.process_head_scores_(
            hrt_batch=hrt_batch,
            true_scores=true_scores,
            scores=scores,
            dense_positive_mask=mask,
        )

    def test_finalize(self) -> None:
        # Process one batch
        hrt_batch, scores, mask = self._get_input()
        true_scores = scores[torch.arange(0, hrt_batch.shape[0]),
                             hrt_batch[:, 2]][:, None]
        self.evaluator.process_tail_scores_(
            hrt_batch=hrt_batch,
            true_scores=true_scores,
            scores=scores,
            dense_positive_mask=mask,
        )

        result = self.evaluator.finalize()
        assert isinstance(result, MetricResults)

        self._validate_result(result=result,
                              data={
                                  'batch': hrt_batch,
                                  'scores': scores,
                                  'mask': mask
                              })

    def _validate_result(
        self,
        result: MetricResults,
        data: Dict[str, torch.Tensor],
    ):
        logger.warning(
            f'{self.__class__.__name__} did not overwrite _validate_result.')
Пример #16
0
from pykeen.triples import TriplesFactory
from pykeen.evaluation import RankBasedEvaluator
from pykeen.models import TransE
from torch.optim import Adam
from pykeen.training import SLCWATrainingLoop

n_tokeep = 300
minimum = 500

tf = TriplesFactory.from_path(f'data/rare/rare_{minimum}_{n_tokeep}.csv')
training, testing, validation = tf.split([.8, .1, .1])

# Pick model, optimizer, training approach
model = TransE(triples_factory=training)
optimizer = Adam(params=model.get_grad_params())
training_loop = SLCWATrainingLoop(
    model=model,
    triples_factory=training,
    optimizer=optimizer,
)

# train
training_loop.train(
    triples_factory=training,
    num_epochs=500,
    batch_size=256,
)

# evaluate
evaluator = RankBasedEvaluator(ks=[50])
mapped_triples = testing.mapped_triples