def _single_credit_cluster(idx): vec = np.zeros(feature_dim, dtype=np.int32) vec[idx] = 1 return ApplicantDistribution( features=distributions.Constant(mean=vec), group_membership=distributions.Constant(group_membership), will_default=distributions.Bernoulli(1 - success_probs[idx]), )
def test_bernoulli_sampling(): logits = 0.232 n_samples = 10000 p = distributions.Bernoulli(mx.nd.array([logits])) samples = p.sample(n_samples) mean = mx.nd.mean(samples).asnumpy() print('sampling mean, mean', mean, p.mean.asnumpy()) np.testing.assert_allclose(mean, p.mean.asnumpy(), rtol=1e-2)
def test_bernoulli_log_prob(): logits = 0.384 data = [0, 1, 0, 0, 1] p = distributions.Bernoulli(mx.nd.array([logits])) np_log_prob = scipy.stats.bernoulli.logpmf(np.array(data), p=p.mean.asnumpy()) mx_log_prob = p.log_prob(mx.nd.array(data)).asnumpy() np.testing.assert_allclose(mx_log_prob, np_log_prob)
def _single_gmm(): """Returns a mixture of gaussian applicant distributions.""" return distributions.Mixture( components=[ ApplicantDistribution( features=distributions.Gaussian(mean=mean, std=0.5), group_membership=distributions.Constant(group), will_default=distributions.Bernoulli( p=default_likelihoods[0]), ), ApplicantDistribution( features=distributions.Gaussian(mean=np.array(mean) + np.array(intercluster_vec), std=0.5), group_membership=distributions.Constant(group), will_default=distributions.Bernoulli( p=default_likelihoods[1]), ), ], weights=[0.3, 0.7], )
def test_improper_distributions_raise_errors(self): for p in [-10, -0.9, 1.3]: with self.assertRaises(ValueError): _ = distributions.Bernoulli(p=p) for vec in [ [0.1, 0.3, 0.5], # Does not sum to one. [0.5, 0.9, -0.4], # Has negative values. ]: with self.assertRaises(ValueError): _ = distributions.Mixture( weights=vec, components=[distributions.Constant(mean=(0, ))] * len(vec))
def p_x_fn(self, z_above: nd.NDArray, weight: nd.NDArray, bias: nd.NDArray = None) -> distributions.BaseDistribution: # z_above: [n_samples, batch_size, size_above] # weight: [size_above, data_size] if self.data_distribution == 'gaussian': params = nd.dot(z_above, weight) + bias variance = nd.ones_like(params) return distributions.Gaussian(params, variance) elif self.data_distribution == 'bernoulli': params = nd.dot(z_above, weight) + bias return distributions.Bernoulli(logits=params) elif self.data_distribution == 'poisson': # minimum intercept is 0.01 return distributions.Poisson( 0.01 + nd.dot(z_above, util.softplus(weight))) else: raise ValueError('Incompatible data distribution: %s' % self.data_distribution)
def test_bernoulli_returns_proportionally(self): my_distribution = distributions.Bernoulli(p=0.9) rng = np.random.RandomState(seed=100) samples = [my_distribution.sample(rng) for _ in range(1000)] self.assertAlmostEqual(np.mean(samples), 0.9, delta=0.1)