Exemplo n.º 1
0
def test_Dense():
    """Tests probflow.modules.Dense"""

    # Should error w/ int < 1
    with pytest.raises(ValueError):
        dense = Dense(0, 1)
    with pytest.raises(ValueError):
        dense = Dense(5, -1)

    # Create the module
    dense = Dense(5, 1)

    # Test MAP outputs are same
    x = tf.random.normal([4, 5])
    samples1 = dense(x)
    samples2 = dense(x)
    assert np.all(samples1.numpy() == samples2.numpy())
    assert samples1.ndim == 2
    assert samples1.shape[0] == 4
    assert samples1.shape[1] == 1

    # Test samples are different
    with Sampling():
        samples1 = dense(x)
        samples2 = dense(x)
    assert np.all(samples1.numpy() != samples2.numpy())
    assert samples1.ndim == 2
    assert samples1.shape[0] == 4
    assert samples1.shape[1] == 1

    # parameters should return [weights, bias]
    param_list = dense.parameters
    assert isinstance(param_list, list)
    assert len(param_list) == 2
    assert all(isinstance(p, Parameter) for p in param_list)
    param_names = [p.name for p in dense.parameters]
    assert "Dense_weights" in param_names
    assert "Dense_bias" in param_names
    weights = [p for p in dense.parameters if p.name == "Dense_weights"]
    assert weights[0].shape == [5, 1]
    bias = [p for p in dense.parameters if p.name == "Dense_bias"]
    assert bias[0].shape == [1, 1]

    # kl_loss should return sum of KL losses
    kl_loss = dense.kl_loss()
    assert isinstance(kl_loss, tf.Tensor)
    assert kl_loss.ndim == 0

    # test Flipout
    with Sampling(flipout=True):
        samples1 = dense(x)
        samples2 = dense(x)
    assert np.all(samples1.numpy() != samples2.numpy())
    assert samples1.ndim == 2
    assert samples1.shape[0] == 4
    assert samples1.shape[1] == 1
Exemplo n.º 2
0
    def epistemic_sample(self, x=None, n=1000, batch_size=None):
        """Draw samples of the model's estimate given x, including only
        epistemic uncertainty (uncertainty due to uncertainty as to the
        model's parameter values)

        TODO: Docs...


        Parameters
        ----------
        x : |ndarray| or |DataFrame| or |Series| or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").
        n : int
            Number of samples to draw from the model per datapoint.
        batch_size : None or int
            Compute using batches of this many datapoints.  Default is `None`
            (i.e., do not use batching).


        Returns
        -------
        |ndarray|
            Samples from the predicted mean distribution.  Size
            (num_samples, x.shape[0], ...)
        """
        with Sampling(n=n, flipout=False):
            return self._sample(
                x, lambda x: x.mean(), ed=0, batch_size=batch_size
            )
Exemplo n.º 3
0
 def train_fn(x_data, y_data):
     self.reset_kl_loss()
     with Sampling(n=n_mc, flipout=flipout):
         self._optimizer.zero_grad()
         elbo_loss = self.elbo_loss(x_data, y_data, n, n_mc)
         elbo_loss.backward()
         self._optimizer.step()
     return elbo_loss
Exemplo n.º 4
0
 def train_fn(x_data, y_data):
     self.reset_kl_loss()
     with Sampling(n=n_mc, flipout=flipout):
         with tf.GradientTape() as tape:
             elbo_loss = self.elbo_loss(x_data, y_data, n, n_mc)
         variables = self.trainable_variables
         gradients = tape.gradient(elbo_loss, variables)
         self._optimizer.apply_gradients(zip(gradients, variables))
     return elbo_loss
Exemplo n.º 5
0
 def elbo_loss(self, *args):
     self._probflow_model.reset_kl_loss()
     with Sampling(n=1, flipout=False):
         if len(args) == 1:
             elbo_loss = self._probflow_model.elbo_loss(
                 None, args[0], n)
         else:
             elbo_loss = self._probflow_model.elbo_loss(
                 args[0], args[1], n)
     return elbo_loss
Exemplo n.º 6
0
def test_BatchNormalization():
    """Tests probflow.modules.BatchNormalization"""

    # Create the module
    bn = BatchNormalization(5)

    # Test MAP outputs are the same
    x = torch.randn([4, 5])
    samples1 = bn(x)
    samples2 = bn(x)
    assert np.all(samples1.detach().numpy() == samples2.detach().numpy())
    assert samples1.ndim == 2
    assert samples1.shape[0] == 4
    assert samples1.shape[1] == 5

    # Samples should actually be the same b/c using deterministic posterior
    with Sampling():
        samples1 = bn(x)
        samples2 = bn(x)
    assert np.all(samples1.detach().numpy() == samples2.detach().numpy())
    assert samples1.ndim == 2
    assert samples1.shape[0] == 4
    assert samples1.shape[1] == 5

    # parameters should return list of all parameters
    param_list = bn.parameters
    assert isinstance(param_list, list)
    assert len(param_list) == 2
    assert all(isinstance(p, Parameter) for p in param_list)
    param_names = [p.name for p in bn.parameters]
    assert "BatchNormalization_weight" in param_names
    assert "BatchNormalization_bias" in param_names
    param_shapes = [p.shape for p in bn.parameters]
    assert [1, 5] in param_shapes

    # kl_loss should return sum of KL losses
    kl_loss = bn.kl_loss()
    assert isinstance(kl_loss, torch.Tensor)
    assert kl_loss.ndim == 0

    # Test it works w/ dense layer and sequential
    seq = Sequential([
        Dense(5, 10),
        BatchNormalization(10),
        torch.nn.ReLU(),
        Dense(10, 3),
        BatchNormalization(3),
        torch.nn.ReLU(),
        Dense(3, 1),
    ])
    assert len(seq.parameters) == 10
    out = seq(torch.randn([6, 5]))
    assert out.ndim == 2
    assert out.shape[0] == 6
    assert out.shape[1] == 1
Exemplo n.º 7
0
def test_Sequential():
    """Tests probflow.modules.Sequential"""

    # Create the module
    seq = Sequential(
        [Dense(5, 10), tf.nn.relu,
         Dense(10, 3), tf.nn.relu,
         Dense(3, 1)])

    # Steps should be list
    assert isinstance(seq.steps, list)
    assert len(seq.steps) == 5

    # Test MAP outputs are the same
    x = tf.random.normal([4, 5])
    samples1 = seq(x)
    samples2 = seq(x)
    assert np.all(samples1.numpy() == samples2.numpy())
    assert samples1.ndim == 2
    assert samples1.shape[0] == 4
    assert samples1.shape[1] == 1

    # Test samples are different
    with Sampling():
        samples1 = seq(x)
        samples2 = seq(x)
    assert np.all(samples1.numpy() != samples2.numpy())
    assert samples1.ndim == 2
    assert samples1.shape[0] == 4
    assert samples1.shape[1] == 1

    # parameters should return list of all parameters
    param_list = seq.parameters
    assert isinstance(param_list, list)
    assert len(param_list) == 6
    assert all(isinstance(p, Parameter) for p in param_list)
    param_names = [p.name for p in seq.parameters]
    assert "Dense_weights" in param_names
    assert "Dense_bias" in param_names
    param_shapes = [p.shape for p in seq.parameters]
    assert [5, 10] in param_shapes
    assert [1, 10] in param_shapes
    assert [10, 3] in param_shapes
    assert [1, 3] in param_shapes
    assert [3, 1] in param_shapes
    assert [1, 1] in param_shapes

    # kl_loss should return sum of KL losses
    kl_loss = seq.kl_loss()
    assert isinstance(kl_loss, tf.Tensor)
    assert kl_loss.ndim == 0
Exemplo n.º 8
0
    def posterior_sample(self, n: int = 1):
        """Sample from the posterior distribution.

        Parameters
        ----------
        n : int > 0
            Number of samples to draw from the posterior distribution.
            Default = 1

        Returns
        -------
        TODO
        """
        if n < 1:
            raise ValueError("n must be positive")
        with Sampling(n=n):
            return to_numpy(self())
Exemplo n.º 9
0
    def posterior_sample(self, n: int = 1):
        """Sample from the posterior distribution.

        Parameters
        ----------
        n : int > 0
            Number of samples to draw from the posterior distribution.
            Default = 1

        Returns
        -------
        |ndarray|
            Samples from the parameter's posterior distribution.  If ``n>1`` of
            size ``(n, self.prior.shape)``.  If ``n==1``, of size
            ``(self.prior.shape)``.
        """
        if n < 1:
            raise ValueError("n must be positive")
        with Sampling(n=n):
            return to_numpy(self())
Exemplo n.º 10
0
    def predictive_sample(self, x=None, n=1000):
        """Draw samples from the posterior predictive distribution given x

        TODO: Docs...


        Parameters
        ----------
        x : |ndarray| or |DataFrame| or |Series| or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").
        n : int
            Number of samples to draw from the model per datapoint.


        Returns
        -------
        |ndarray|
            Samples from the predictive distribution.  Size
            (num_samples, x.shape[0], ...)
        """
        with Sampling(n=n, flipout=False):
            return self._sample(x, lambda x: x.sample(), ed=0)
Exemplo n.º 11
0
def test_Module():
    """Tests the Module abstract base class"""
    class TestModule(Module):
        def __init__(self):
            self.p1 = Parameter(name="TestParam1")
            self.p2 = Parameter(name="TestParam2", shape=[5, 4])

        def __call__(self, x):
            return O.sum(self.p2(), axis=None) + x * self.p1()

    the_module = TestModule()

    # parameters should return a list of all the parameters
    param_list = the_module.parameters
    assert isinstance(param_list, list)
    assert len(param_list) == 2
    assert all(isinstance(p, Parameter) for p in param_list)
    param_names = [p.name for p in param_list]
    assert "TestParam1" in param_names
    assert "TestParam2" in param_names

    # n_parameters property
    nparams = the_module.n_parameters
    assert isinstance(nparams, int)
    assert nparams == 21

    # n_variables property
    nvars = the_module.n_variables
    assert isinstance(nvars, int)
    assert nvars == 42

    # trainable_variables should return list of all variables in the model
    var_list = the_module.trainable_variables
    assert isinstance(var_list, list)
    assert len(var_list) == 4
    assert all(isinstance(v, tf.Variable) for v in var_list)

    # kl_loss should return sum of all the kl losses
    kl_loss = the_module.kl_loss()
    assert isinstance(kl_loss, tf.Tensor)
    assert kl_loss.ndim == 0

    # calling a module should return a tensor
    x = tf.random.normal([5])
    sample1 = the_module(x)
    assert isinstance(sample1, tf.Tensor)
    assert sample1.ndim == 1
    assert sample1.shape[0] == 5

    # should be the same when sampling is off
    sample2 = the_module(x)
    assert np.all(sample1.numpy() == sample2.numpy())

    # outputs should be different when sampling is on
    with Sampling(n=1):
        sample1 = the_module(x)
        sample2 = the_module(x)
    assert np.all(sample1.numpy() != sample2.numpy())

    # bayesian_update should update all params in the module
    assert tf.reduce_all(
        the_module.p1.prior.loc != the_module.p1.posterior.loc).numpy()
    assert tf.reduce_all(
        the_module.p2.prior.scale != the_module.p2.posterior.scale).numpy()
    the_module.bayesian_update()
    assert tf.reduce_all(
        the_module.p1.prior.loc == the_module.p1.posterior.loc).numpy()
    assert tf.reduce_all(
        the_module.p2.prior.scale == the_module.p2.posterior.scale).numpy()
Exemplo n.º 12
0
def test_Embedding():
    """Tests probflow.modules.Embedding"""

    # Should error w/ int < 1
    with pytest.raises(ValueError):
        emb = Embedding(0, 1)
    with pytest.raises(ValueError):
        emb = Embedding(5, -1)

    # Should error w/ k and d of different lengths
    with pytest.raises(ValueError):
        emb = Embedding([2, 3], [2, 3, 4])

    # Create the module
    emb = Embedding(10, 5)

    # Check parameters
    assert len(emb.parameters) == 1
    assert emb.parameters[0].name == "Embedding_0"
    assert emb.parameters[0].shape == [10, 5]

    # Embeddings should be DeterministicParameters by default
    assert all(isinstance(e, DeterministicParameter) for e in emb.embeddings)

    # Test MAP outputs are the same
    x = tf.random.uniform([20, 1], minval=0, maxval=9, dtype=tf.dtypes.int32)
    samples1 = emb(x)
    samples2 = emb(x)
    assert np.all(samples1.numpy() == samples2.numpy())
    assert samples1.ndim == 2
    assert samples1.shape[0] == 20
    assert samples1.shape[1] == 5

    # Samples should actually be the same b/c using deterministic posterior
    with Sampling(n=1):
        samples1 = emb(x)
        samples2 = emb(x)
    assert np.all(samples1.numpy() == samples2.numpy())
    assert samples1.ndim == 2
    assert samples1.shape[0] == 20
    assert samples1.shape[1] == 5

    # kl_loss should return sum of KL losses
    kl_loss = emb.kl_loss()
    assert isinstance(kl_loss, tf.Tensor)
    assert kl_loss.ndim == 0

    # Should be able to embed multiple columns by passing list of k and d
    emb = Embedding([10, 20], [5, 4])

    # Check parameters
    assert len(emb.parameters) == 2
    assert emb.parameters[0].name == "Embedding_0"
    assert emb.parameters[0].shape == [10, 5]
    assert emb.parameters[1].name == "Embedding_1"
    assert emb.parameters[1].shape == [20, 4]

    # Test MAP outputs are the same
    x1 = tf.random.uniform([20, 1], minval=0, maxval=9, dtype=tf.dtypes.int32)
    x2 = tf.random.uniform([20, 1], minval=0, maxval=19, dtype=tf.dtypes.int32)
    x = tf.concat([x1, x2], axis=1)
    samples1 = emb(x)
    samples2 = emb(x)
    assert np.all(samples1.numpy() == samples2.numpy())
    assert samples1.ndim == 2
    assert samples1.shape[0] == 20
    assert samples1.shape[1] == 9

    # With probabilistic = True, samples should be different
    emb = Embedding(10, 5, probabilistic=True)
    x = tf.random.uniform([20, 1], minval=0, maxval=9, dtype=tf.dtypes.int32)
    with Sampling(n=1):
        samples1 = emb(x)
        samples2 = emb(x)
    assert np.all(samples1.numpy() != samples2.numpy())
    assert samples1.ndim == 2
    assert samples1.shape[0] == 20
    assert samples1.shape[1] == 5
    assert all(not isinstance(e, DeterministicParameter)
               for e in emb.embeddings)
Exemplo n.º 13
0
def test_Module():
    """Tests the Module abstract base class"""
    class TestModule(Module):
        def __init__(self):
            self.p1 = Parameter(name="TestParam1")
            self.p2 = Parameter(name="TestParam2", shape=[5, 4])

        def __call__(self, x):
            return O.sum(self.p2(), axis=None) + x * self.p1()

    the_module = TestModule()

    # parameters should return a list of all the parameters
    param_list = the_module.parameters
    assert isinstance(param_list, list)
    assert len(param_list) == 2
    assert all(isinstance(p, Parameter) for p in param_list)
    param_names = [p.name for p in param_list]
    assert "TestParam1" in param_names
    assert "TestParam2" in param_names

    # n_parameters property
    nparams = the_module.n_parameters
    assert isinstance(nparams, int)
    assert nparams == 21

    # n_variables property
    nvars = the_module.n_variables
    assert isinstance(nvars, int)
    assert nvars == 42

    # trainable_variables should return list of all variables in the model
    var_list = the_module.trainable_variables
    assert isinstance(var_list, list)
    assert len(var_list) == 4
    assert all(isinstance(v, tf.Variable) for v in var_list)

    # kl_loss should return sum of all the kl losses
    kl_loss = the_module.kl_loss()
    assert isinstance(kl_loss, tf.Tensor)
    assert kl_loss.ndim == 0

    # calling a module should return a tensor
    x = tf.random.normal([5])
    sample1 = the_module(x)
    assert isinstance(sample1, tf.Tensor)
    assert sample1.ndim == 1
    assert sample1.shape[0] == 5

    # should be the same when sampling is off
    sample2 = the_module(x)
    assert np.all(sample1.numpy() == sample2.numpy())

    # outputs should be different when sampling is on
    with Sampling():
        sample1 = the_module(x)
        sample2 = the_module(x)
    assert np.all(sample1.numpy() != sample2.numpy())

    # A second test module which contains sub-modules
    class TestModule2(Module):
        def __init__(self, shape):
            self.mod = TestModule()
            self.p3 = Parameter(name="TestParam3", shape=shape)

        def __call__(self, x):
            return self.mod(x) + O.sum(self.p3(), axis=None)

    the_module = TestModule2([3, 2])

    # parameters should return a list of all the parameters
    param_list = the_module.parameters
    assert isinstance(param_list, list)
    assert len(param_list) == 3
    assert all(isinstance(p, Parameter) for p in param_list)
    param_names = [p.name for p in param_list]
    assert "TestParam1" in param_names
    assert "TestParam2" in param_names
    assert "TestParam3" in param_names

    # n_params property
    nparams = the_module.n_parameters
    assert isinstance(nparams, int)
    assert nparams == 27

    # trainable_variables should return list of all variables in the model
    var_list = the_module.trainable_variables
    assert isinstance(var_list, list)
    assert len(var_list) == 6
    assert all(isinstance(v, tf.Variable) for v in var_list)

    # kl_loss should return sum of all the kl losses
    kl_loss = the_module.kl_loss()
    assert isinstance(kl_loss, tf.Tensor)
    assert kl_loss.ndim == 0

    # parent module's loss should be greater than child module's
    assert the_module.kl_loss().numpy() > the_module.mod.kl_loss().numpy()

    # calling a module should return a tensor
    x = tf.random.normal([5])
    sample1 = the_module(x)
    assert isinstance(sample1, tf.Tensor)
    assert sample1.ndim == 1
    assert sample1.shape[0] == 5

    # of the appropriate size
    x = tf.random.normal([5, 4])
    sample1 = the_module(x)
    assert isinstance(sample1, tf.Tensor)
    assert sample1.ndim == 2
    assert sample1.shape[0] == 5
    assert sample1.shape[1] == 4

    # Another test module which contains lists/dicts w/ parameters
    class TestModule3(Module):
        def __init__(self):
            self.a_list = [
                Parameter(name="TestParam4"),
                Parameter(name="TestParam5"),
            ]
            self.a_dict = {
                "a": Parameter(name="TestParam6"),
                "b": Parameter(name="TestParam7"),
            }

        def __call__(self, x):
            return (tf.ones([x.shape[0], 1]) + self.a_list[0]() +
                    self.a_list[1]() + self.a_dict["a"]() + self.a_dict["b"]())

    the_module = TestModule3()

    # parameters should return a list of all the parameters
    param_list = the_module.parameters
    assert isinstance(param_list, list)
    assert len(param_list) == 4
    assert all(isinstance(p, Parameter) for p in param_list)
    param_names = [p.name for p in param_list]
    assert "TestParam4" in param_names
    assert "TestParam5" in param_names
    assert "TestParam6" in param_names
    assert "TestParam7" in param_names

    # n_params property
    nparams = the_module.n_parameters
    assert isinstance(nparams, int)
    assert nparams == 4

    # Should be able to initialize and add kl losses
    the_module.reset_kl_loss()
    assert the_module.kl_loss_batch() == 0
    the_module.add_kl_loss(3.145)
    assert is_close(the_module.kl_loss_batch().numpy(), 3.145)

    # And should also be able to pass two dists to add_kl_loss
    the_module.reset_kl_loss()
    d1 = tfd.Normal(0.0, 1.0)
    d2 = tfd.Normal(1.0, 1.0)
    assert the_module.kl_loss_batch() == 0
    the_module.add_kl_loss(d1, d2)
    assert the_module.kl_loss_batch().numpy() > 0.0
Exemplo n.º 14
0
def test_DenseNetwork():
    """Tests probflow.modules.DenseNetwork"""

    # Should error w/ int < 1
    with pytest.raises(ValueError):
        DenseNetwork([0, 1, 5])
    with pytest.raises(ValueError):
        DenseNetwork([5, -1, 4])

    # Create the module
    dense_net = DenseNetwork([5, 4, 3, 2])

    # Test MAP outputs are same
    x = torch.randn([7, 5])
    samples1 = dense_net(x)
    samples2 = dense_net(x)
    assert np.all(samples1.detach().numpy() == samples2.detach().numpy())
    assert samples1.ndim == 2
    assert samples1.shape[0] == 7
    assert samples1.shape[1] == 2

    # Test samples are different
    with Sampling(n=1):
        samples1 = dense_net(x)
        samples2 = dense_net(x)
    assert np.all(samples1.detach().numpy() != samples2.detach().numpy())
    assert samples1.ndim == 2
    assert samples1.shape[0] == 7
    assert samples1.shape[1] == 2

    # parameters should return [weights, bias] for each layer
    param_list = dense_net.parameters
    assert isinstance(param_list, list)
    assert len(param_list) == 6
    assert all(isinstance(p, Parameter) for p in param_list)
    param_names = [p.name for p in dense_net.parameters]
    assert "DenseNetwork_Dense0_weights" in param_names
    assert "DenseNetwork_Dense0_bias" in param_names
    assert "DenseNetwork_Dense1_weights" in param_names
    assert "DenseNetwork_Dense1_bias" in param_names
    assert "DenseNetwork_Dense2_weights" in param_names
    assert "DenseNetwork_Dense2_bias" in param_names
    shapes = {
        "DenseNetwork_Dense0_weights": [5, 4],
        "DenseNetwork_Dense0_bias": [1, 4],
        "DenseNetwork_Dense1_weights": [4, 3],
        "DenseNetwork_Dense1_bias": [1, 3],
        "DenseNetwork_Dense2_weights": [3, 2],
        "DenseNetwork_Dense2_bias": [1, 2],
    }
    for name, shape in shapes.items():
        param = [p for p in dense_net.parameters if p.name == name]
        assert param[0].shape == shape

    # kl_loss should return sum of KL losses
    kl_loss = dense_net.kl_loss()
    assert isinstance(kl_loss, torch.Tensor)
    assert kl_loss.ndim == 0

    # test Flipout
    with Sampling(n=1, flipout=True):
        samples1 = dense_net(x)
        samples2 = dense_net(x)
    assert np.all(samples1.detach().numpy() != samples2.detach().numpy())
    assert samples1.ndim == 2
    assert samples1.shape[0] == 7
    assert samples1.shape[1] == 2

    # With probabilistic = False
    dense_net = DenseNetwork([5, 4, 3, 2], probabilistic=False)
    with Sampling(n=1):
        samples1 = dense_net(x)
        samples2 = dense_net(x)
    assert np.all(samples1.detach().numpy() == samples2.detach().numpy())
    assert samples1.ndim == 2
    assert samples1.shape[0] == 7
    assert samples1.shape[1] == 2

    # With batch norm before
    dense_net = DenseNetwork([5, 4, 3, 2],
                             batch_norm=True,
                             batch_norm_loc="after")
    with Sampling(n=1):
        samples1 = dense_net(x)
    assert samples1.ndim == 2
    assert samples1.shape[0] == 7
    assert samples1.shape[1] == 2

    # With batch norm after
    dense_net = DenseNetwork([5, 4, 3, 2],
                             batch_norm=True,
                             batch_norm_loc="before")
    with Sampling(n=1):
        samples1 = dense_net(x)
    assert samples1.ndim == 2
    assert samples1.shape[0] == 7
    assert samples1.shape[1] == 2
Exemplo n.º 15
0
def test_Parameter_scalar():
    """Tests the generic scalar Parameter"""

    # Create scalar parameter
    param = Parameter()

    # Check defaults
    assert isinstance(param.shape, list)
    assert param.shape[0] == 1
    assert isinstance(param.untransformed_variables, dict)
    assert all(isinstance(p, str) for p in param.untransformed_variables)
    assert all(
        isinstance(p, tf.Variable)
        for _, p in param.untransformed_variables.items())

    # Shape should be >0
    with pytest.raises(ValueError):
        Parameter(shape=-1)
    with pytest.raises(ValueError):
        Parameter(shape=[20, 0, 1])

    # trainable_variables should be a property returning list of vars
    assert all(isinstance(v, tf.Variable) for v in param.trainable_variables)

    # variables should be a property returning dict of transformed vars
    assert isinstance(param.variables, dict)
    assert all(isinstance(v, str) for v in param.variables)

    # loc should be variable, while scale should have been transformed->tensor
    assert isinstance(param.variables["loc"], tf.Variable)
    assert isinstance(param.variables["scale"], tf.Tensor)

    # posterior should be a distribution object
    assert isinstance(param.posterior, BaseDistribution)
    assert isinstance(param.posterior(), tfd.Normal)

    # __call__ should return the MAP estimate by default
    sample1 = param()
    sample2 = param()
    assert sample1.ndim == 1
    assert sample2.ndim == 1
    assert sample1.shape[0] == 1
    assert sample2.shape[0] == 1
    assert sample1.numpy() == sample2.numpy()

    # within a Sampling statement, should randomly sample from the dist
    with Sampling(n=1):
        sample1 = param()
        sample2 = param()
    assert sample1.ndim == 1
    assert sample2.ndim == 1
    assert sample1.shape[0] == 1
    assert sample2.shape[0] == 1
    assert sample1.numpy() != sample2.numpy()

    # sampling statement should effect N samples
    with Sampling(n=10):
        sample1 = param()
        sample2 = param()
    assert sample1.ndim == 2
    assert sample2.ndim == 2
    assert sample1.shape[0] == 10
    assert sample1.shape[1] == 1
    assert sample2.shape[0] == 10
    assert sample2.shape[1] == 1
    assert np.all(sample1.numpy() != sample2.numpy())

    # sampling statement should allow static samples
    sample1 = param()
    with Sampling(static=True):
        with Sampling(n=1):
            sample2 = param()
            sample3 = param()
    with Sampling(static=True):
        with Sampling(n=1):
            sample4 = param()
            sample5 = param()
    assert sample1.ndim == 1
    assert sample2.ndim == 1
    assert sample3.ndim == 1
    assert sample4.ndim == 1
    assert sample5.ndim == 1
    assert sample1.shape[0] == 1
    assert sample2.shape[0] == 1
    assert sample3.shape[0] == 1
    assert sample4.shape[0] == 1
    assert sample5.shape[0] == 1
    assert sample1.numpy() != sample2.numpy()
    assert sample1.numpy() != sample3.numpy()
    assert sample2.numpy() == sample3.numpy()
    assert sample1.numpy() != sample4.numpy()
    assert sample1.numpy() != sample5.numpy()
    assert sample4.numpy() == sample5.numpy()
    assert sample2.numpy() != sample4.numpy()

    # sampling statement should allow static samples (and work w/ n>1)
    with Sampling(static=True):
        with Sampling(n=5):
            sample1 = param()
            sample2 = param()
    with Sampling(static=True):
        with Sampling(n=5):
            sample3 = param()
            sample4 = param()
    assert sample1.ndim == 2
    assert sample2.ndim == 2
    assert sample3.ndim == 2
    assert sample4.ndim == 2
    assert sample1.shape[0] == 5
    assert sample1.shape[1] == 1
    assert sample2.shape[0] == 5
    assert sample2.shape[1] == 1
    assert sample3.shape[0] == 5
    assert sample3.shape[1] == 1
    assert sample4.shape[0] == 5
    assert sample4.shape[1] == 1
    assert np.all(sample1.numpy() == sample2.numpy())
    assert np.all(sample1.numpy() != sample3.numpy())
    assert np.all(sample1.numpy() != sample4.numpy())
    assert np.all(sample2.numpy() != sample3.numpy())
    assert np.all(sample2.numpy() != sample4.numpy())
    assert np.all(sample3.numpy() == sample4.numpy())

    # kl_loss should return sum of kl divergences
    kl_loss = param.kl_loss()
    assert isinstance(kl_loss, tf.Tensor)
    assert kl_loss.ndim == 0

    # prior_sample should be 1D
    prior_sample = param.prior_sample()
    assert prior_sample.ndim == 0
    prior_sample = param.prior_sample(n=7)
    assert prior_sample.ndim == 1
    assert prior_sample.shape[0] == 7

    # prior and posterior shouldn't be the same (post was randomly initialized)
    assert tf.reduce_all(param.prior.loc != param.posterior.loc).numpy()
    assert tf.reduce_all(param.prior.scale != param.posterior.scale).numpy()

    # but they should be the same after running bayesian_update
    param.bayesian_update()
    assert tf.reduce_all(param.prior.loc == param.posterior.loc).numpy()
    assert tf.reduce_all(param.prior.scale == param.posterior.scale).numpy()
Exemplo n.º 16
0
    def log_prob(
        self,
        x,
        y=None,
        individually=True,
        distribution=False,
        n=1000,
        batch_size=None,
    ):
        """Compute the log probability of `y` given the model

        TODO: Docs...


        Parameters
        ----------
        x : |ndarray| or |DataFrame| or |Series| or Tensor
            Independent variable values of the dataset to evaluate (aka the
            "features").
        y : |ndarray| or |DataFrame| or |Series| or Tensor
            Dependent variable values of the dataset to evaluate (aka the
            "target").
        individually : bool
            If ``individually`` is True, returns log probability for each
            sample individually, so return shape is ``(x.shape[0], ?)``.
            If ``individually`` is False, returns sum of all log probabilities,
            so return shape is ``(1, ?)``.
        distribution : bool
            If ``distribution`` is True, returns log probability posterior
            distribution (``n`` samples from the model),
            so return shape is ``(?, n)``.
            If ``distribution`` is False, returns log posterior probabilities
            using the maximum a posteriori estimate for each parameter,
            so the return shape is ``(?, 1)``.
        n : int
            Number of samples to draw for each distribution if
            ``distribution=True``.
        batch_size : None or int
            Compute using batches of this many datapoints.  Default is `None`
            (i.e., do not use batching).

        Returns
        -------
        log_probs : |ndarray|
            Log probabilities. Shape is determined by ``individually``,
            ``distribution``, and ``n`` kwargs.
        """

        # Get a distribution of samples
        if distribution:
            with Sampling(n=1, flipout=False):
                probs = []
                for i in range(n):
                    t_probs = []
                    for x_data, y_data in make_generator(
                        x, y, batch_size=batch_size
                    ):
                        if x_data is None:
                            t_probs += [self().log_prob(y_data)]
                        else:
                            t_probs += [self(x_data).log_prob(y_data)]
                    probs += [np.concatenate(to_numpy(t_probs), axis=0)]
            probs = np.stack(to_numpy(probs), axis=probs[0].ndim)

        # Use MAP estimates
        else:
            probs = []
            for x_data, y_data in make_generator(x, y, batch_size=batch_size):
                if x_data is None:
                    probs += [self().log_prob(y_data)]
                else:
                    probs += [self(x_data).log_prob(y_data)]
            probs = np.concatenate(to_numpy(probs), axis=0)

        # Return log prob of each sample or sum of log probs
        if individually:
            return probs
        else:
            return np.sum(probs, axis=0)
Exemplo n.º 17
0
def test_Parameter_scalar():
    """Tests the generic scalar Parameter"""

    # Create scalar parameter
    param = Parameter()

    # Check defaults
    assert isinstance(param.shape, list)
    assert param.shape[0] == 1
    assert isinstance(param.untransformed_variables, dict)
    assert all(isinstance(p, str) for p in param.untransformed_variables)
    assert all(
        isinstance(p, tf.Variable)
        for _, p in param.untransformed_variables.items())

    # Shape should be >0
    with pytest.raises(ValueError):
        Parameter(shape=-1)
    with pytest.raises(ValueError):
        Parameter(shape=[20, 0, 1])

    # trainable_variables should be a property returning list of vars
    assert all(isinstance(v, tf.Variable) for v in param.trainable_variables)

    # variables should be a property returning dict of transformed vars
    assert isinstance(param.variables, dict)
    assert all(isinstance(v, str) for v in param.variables)

    # loc should be variable, while scale should have been transformed->tensor
    assert isinstance(param.variables["loc"], tf.Variable)
    assert isinstance(param.variables["scale"], tf.Tensor)

    # posterior should be a distribution object
    assert isinstance(param.posterior, BaseDistribution)
    assert isinstance(param.posterior(), tfd.Normal)

    # __call__ should return the MAP estimate by default
    sample1 = param()
    sample2 = param()
    assert sample1.ndim == 1
    assert sample2.ndim == 1
    assert sample1.shape[0] == 1
    assert sample2.shape[0] == 1
    assert sample1.numpy() == sample2.numpy()

    # within a Sampling statement, should randomly sample from the dist
    with Sampling():
        sample1 = param()
        sample2 = param()
    assert sample1.ndim == 1
    assert sample2.ndim == 1
    assert sample1.shape[0] == 1
    assert sample2.shape[0] == 1
    assert sample1.numpy() != sample2.numpy()

    # sampling statement should effect N samples
    with Sampling(n=10):
        sample1 = param()
        sample2 = param()
    assert sample1.ndim == 2
    assert sample2.ndim == 2
    assert sample1.shape[0] == 10
    assert sample1.shape[1] == 1
    assert sample2.shape[0] == 10
    assert sample2.shape[1] == 1
    assert np.all(sample1.numpy() != sample2.numpy())

    # kl_loss should return sum of kl divergences
    kl_loss = param.kl_loss()
    assert isinstance(kl_loss, tf.Tensor)
    assert kl_loss.ndim == 0

    # prior_sample should be 1D
    prior_sample = param.prior_sample()
    assert prior_sample.ndim == 0
    prior_sample = param.prior_sample(n=7)
    assert prior_sample.ndim == 1
    assert prior_sample.shape[0] == 7