def test_sampled_softmax_has_greater_loss_in_train_mode(self):
        sampled_softmax = SampledSoftmaxLoss(num_words=10000, embedding_dim=12, num_samples=10)

        # sequence_length, embedding_dim
        embedding = torch.rand(100, 12)
        targets = torch.randint(0, 1000, (100,)).long()

        sampled_softmax.train()
        train_loss = sampled_softmax(embedding, targets).item()

        sampled_softmax.eval()
        eval_loss = sampled_softmax(embedding, targets).item()

        assert eval_loss > train_loss
    def test_sampled_equals_unsampled_during_eval(self):
        sampled_softmax = SampledSoftmaxLoss(num_words=10000, embedding_dim=12, num_samples=40)
        unsampled_softmax = _SoftmaxLoss(num_words=10000, embedding_dim=12)

        sampled_softmax.eval()
        unsampled_softmax.eval()

        # set weights equal, use transpose because opposite shapes
        sampled_softmax.softmax_w.data = unsampled_softmax.softmax_w.t()
        sampled_softmax.softmax_b.data = unsampled_softmax.softmax_b

        # sequence_length, embedding_dim
        embedding = torch.rand(100, 12)
        targets = torch.randint(0, 1000, (100,)).long()

        full_loss = unsampled_softmax(embedding, targets).item()
        sampled_loss = sampled_softmax(embedding, targets).item()

        # Should be really close
        np.testing.assert_almost_equal(sampled_loss, full_loss)