def create_shared_params(self, trace=None, size=None, jitter=1, start=None): if trace is None: if size is None: raise opvi.ParametrizationError( "Need `trace` or `size` to initialize") else: ipfn = make_initial_point_fn( model=self.model, overrides=start, jitter_rvs={}, return_transformed=True, ) start = ipfn( self.model.rng_seeder.randint(2**30, dtype=np.int64)) start = pm.floatX(DictToArrayBijection.map(start)) # Initialize particles histogram = np.tile(start, (size, 1)) histogram += pm.floatX( np.random.normal(0, jitter, histogram.shape)) else: histogram = np.empty((len(trace) * len(trace.chains), self.ddim)) i = 0 for t in trace.chains: for j in range(len(trace)): histogram[i] = DictToArrayBijection.map(trace.point(j, t)) i += 1 return dict(histogram=aesara.shared(pm.floatX(histogram), "histogram"))
def create_shared_params(self, start=None): start = self._prepare_start(start) rho = np.zeros((self.ddim,)) return { "mu": aesara.shared(pm.floatX(start), "mu"), "rho": aesara.shared(pm.floatX(rho), "rho"), }
def __call__(self, x): neg_value = np.float64(self.logp_func(pm.floatX(x))) value = -1.0 * nan_to_high(neg_value) if self.use_gradient: neg_grad = self.dlogp_func(pm.floatX(x)) if np.all(np.isfinite(neg_grad)): self.previous_x = x grad = nan_to_num(-1.0 * neg_grad) grad = grad.astype(np.float64) else: self.previous_x = x grad = None if self.n_eval % 10 == 0: self.update_progress_desc(neg_value, grad) if self.n_eval > self.maxeval: self.update_progress_desc(neg_value, grad) raise StopIteration self.n_eval += 1 if self.progressbar: assert isinstance(self.progress, ProgressBar) self.progress.update_bar(self.n_eval) if self.use_gradient: return value, grad else: return value
def test_vae(): minibatch_size = 10 data = pm.floatX(np.random.rand(100)) x_mini = pm.Minibatch(data, minibatch_size) x_inp = at.vector() x_inp.tag.test_value = data[:minibatch_size] ae = aesara.shared(pm.floatX([0.1, 0.1])) be = aesara.shared(pm.floatX(1.0)) ad = aesara.shared(pm.floatX(1.0)) bd = aesara.shared(pm.floatX(1.0)) enc = x_inp.dimshuffle(0, "x") * ae.dimshuffle("x", 0) + be mu, rho = enc[:, 0], enc[:, 1] with pm.Model(): # Hidden variables zs = pm.Normal("zs", mu=0, sigma=1, size=minibatch_size) dec = zs * ad + bd # Observation model pm.Normal("xs_", mu=dec, sigma=0.1, observed=x_inp) pm.fit( 1, local_rv={zs: dict(mu=mu, rho=rho)}, more_replacements={x_inp: x_mini}, more_obj_params=[ae, be, ad, bd], )
def create_shared_params(self, trace=None, size=None, jitter=1, start=None): if trace is None: if size is None: raise opvi.ParametrizationError( "Need `trace` or `size` to initialize") else: start = self._prepare_start(start) # Initialize particles histogram = np.tile(start, (size, 1)) histogram += pm.floatX( np.random.normal(0, jitter, histogram.shape)) else: histogram = np.empty((len(trace) * len(trace.chains), self.ddim)) i = 0 for t in trace.chains: for j in range(len(trace)): histogram[i] = DictToArrayBijection.map(trace.point( j, t)).data i += 1 return dict(histogram=aesara.shared(pm.floatX(histogram), "histogram"))
def test_observed_type(self): X_ = pm.floatX(np.random.randn(100, 5)) X = pm.floatX(aesara.shared(X_)) with pm.Model(): x1 = pm.Normal("x1", observed=X_) x2 = pm.Normal("x2", observed=X) assert x1.type == X.type assert x2.type == X.type
def test_free_rv(self): with pm.Model() as model4: Normal("n", observed=[[1, 1], [1, 1]], total_size=[2, 2]) p4 = aesara.function([], model4.logp()) with pm.Model() as model5: n = Normal("n", total_size=[2, Ellipsis, 2], size=(2, 2)) p5 = aesara.function([n.tag.value_var], model5.logp()) assert p4() == p5(pm.floatX([[1]])) assert p4() == p5(pm.floatX([[1, 1], [1, 1]]))
def test_fit_with_nans(score): X_mean = pm.floatX(np.linspace(0, 10, 10)) y = pm.floatX(np.random.normal(X_mean * 4, 0.05)) with pm.Model(): inp = pm.Normal("X", X_mean, size=X_mean.shape) coef = pm.Normal("b", 4.0) mean = inp * coef pm.Normal("y", mean, 0.1, observed=y) with pytest.raises(FloatingPointError) as e: advi = pm.fit(100, score=score, obj_optimizer=pm.adam(learning_rate=float("nan")))
def test_var_replacement(): X_mean = pm.floatX(np.linspace(0, 10, 10)) y = pm.floatX(np.random.normal(X_mean * 4, 0.05)) with pm.Model(): inp = pm.Normal("X", X_mean, size=X_mean.shape) coef = pm.Normal("b", 4.0) mean = inp * coef pm.Normal("y", mean, 0.1, observed=y) advi = pm.fit(100) assert advi.sample_node(mean).eval().shape == (10,) x_new = pm.floatX(np.linspace(0, 10, 11)) assert advi.sample_node(mean, more_replacements={inp: x_new}).eval().shape == (11,)
def randidx(self, size=None): if size is None: size = (1,) elif isinstance(size, TensorVariable): if size.ndim < 1: size = size[None] elif size.ndim > 1: raise ValueError("size ndim should be no more than 1d") else: pass else: size = tuple(np.atleast_1d(size)) return self._rng.uniform( size=size, low=pm.floatX(0), high=pm.floatX(self.histogram.shape[0]) - pm.floatX(1e-16) ).astype("int32")
def test_aesara_switch_broadcast_edge_cases_1(self): # Tests against two subtle issues related to a previous bug in Theano # where `tt.switch` would not always broadcast tensors with single # values https://github.com/pymc-devs/aesara/issues/270 # Known issue 1: https://github.com/pymc-devs/pymc/issues/4389 data = pm.floatX(np.zeros(10)) with pm.Model() as m: p = pm.Beta("p", 1, 1) obs = pm.Bernoulli("obs", p=p, observed=data) npt.assert_allclose( logpt_sum(obs).eval({p.tag.value_var: pm.floatX(np.array(0.0))}), np.log(0.5) * 10, )
def test_cloning_available(self): gop = generator(integers()) res = gop**2 shared = aesara.shared(floatX(10)) res1 = aesara.clone_replace(res, {gop: shared}) f = aesara.function([], res1) assert f() == np.float32(100)
def apply(self, f): # f: kernel function for KSD f(histogram) -> (k(x,.), \nabla_x k(x,.)) stein = Stein( approx=self.approx, kernel=f, use_histogram=self.approx.all_histograms, temperature=self.temperature, ) return pm.floatX(-1) * stein.grad
def rslice(self, total, size, seed): if size is None: return slice(None) elif isinstance(size, int): rng = pm.at_rng(seed) Minibatch.RNG[id(self)].append(rng) return rng.uniform(size=(size,), low=0.0, high=pm.floatX(total) - 1e-16).astype("int64") else: raise TypeError("Unrecognized size type, %r" % size)
def build_model(self, distfam, params, size, transform, initval=None): if initval is not None: initval = pm.floatX(initval) with pm.Model() as m: distfam("x", size=size, transform=transform, initval=initval, **params) return m
def create_shared_params(self, start=None): ipfn = make_initial_point_fn( model=self.model, overrides=start, jitter_rvs={}, return_transformed=True, ) start = ipfn(self.model.rng_seeder.randint(2**30, dtype=np.int64)) if self.batched: start = start[self.group[0].name][0] else: start = DictToArrayBijection.map(start) rho = np.zeros((self.ddim, )) if self.batched: start = np.tile(start, (self.bdim, 1)) rho = np.tile(rho, (self.bdim, 1)) return { "mu": aesara.shared(pm.floatX(start), "mu"), "rho": aesara.shared(pm.floatX(rho), "rho"), }
def adagrad_window(loss_or_grads=None, params=None, learning_rate=0.001, epsilon=0.1, n_win=10): """Returns a function that returns parameter updates. Instead of accumulated estimate, uses running window Parameters ---------- loss_or_grads: symbolic expression or list of expressions A scalar loss expression, or a list of gradient expressions params: list of shared variables The variables to generate update expressions for learning_rate: float Learning rate. epsilon: float Offset to avoid zero-division in the normalizer of adagrad. n_win: int Number of past steps to calculate scales of parameter gradients. Returns ------- OrderedDict A dictionary mapping each parameter to its update expression """ if loss_or_grads is None and params is None: return partial(adagrad_window, **_get_call_kwargs(locals())) elif loss_or_grads is None or params is None: raise ValueError( "Please provide both `loss_or_grads` and `params` to get updates") grads = get_or_compute_grads(loss_or_grads, params) updates = OrderedDict() for param, grad in zip(params, grads): i = aesara.shared(pm.floatX(0)) i_int = i.astype("int32") value = param.get_value(borrow=True) accu = aesara.shared( np.zeros(value.shape + (n_win, ), dtype=value.dtype)) # Append squared gradient vector to accu_new accu_new = at.set_subtensor(accu[..., i_int], grad**2) i_new = at.switch((i + 1) < n_win, i + 1, 0) updates[accu] = accu_new updates[i] = i_new accu_sum = accu_new.sum(axis=-1) updates[param] = param - (learning_rate * grad / at.sqrt(accu_sum + epsilon)) return updates
def test_scale_cost_to_minibatch_works(aux_total_size): mu0 = 1.5 sigma = 1.0 y_obs = np.array([1.6, 1.4]) beta = len(y_obs) / float(aux_total_size) # TODO: aesara_config # with pm.Model(aesara_config=dict(floatX='float64')): # did not not work as expected # there were some numeric problems, so float64 is forced with aesara.config.change_flags(floatX="float64", warn_float64="ignore"): assert aesara.config.floatX == "float64" assert aesara.config.warn_float64 == "ignore" post_mu = np.array([1.88], dtype=aesara.config.floatX) post_sigma = np.array([1], dtype=aesara.config.floatX) with pm.Model(): mu = pm.Normal("mu", mu=mu0, sigma=sigma) pm.Normal("y", mu=mu, sigma=1, observed=y_obs, total_size=aux_total_size) # Create variational gradient tensor mean_field_1 = MeanField() assert mean_field_1.scale_cost_to_minibatch mean_field_1.shared_params["mu"].set_value(post_mu) mean_field_1.shared_params["rho"].set_value(np.log(np.exp(post_sigma) - 1)) with aesara.config.change_flags(compute_test_value="off"): elbo_via_total_size_scaled = -pm.operators.KL(mean_field_1)()(10000) with pm.Model(): mu = pm.Normal("mu", mu=mu0, sigma=sigma) pm.Normal("y", mu=mu, sigma=1, observed=y_obs, total_size=aux_total_size) # Create variational gradient tensor mean_field_2 = MeanField() assert mean_field_1.scale_cost_to_minibatch mean_field_2.scale_cost_to_minibatch = False assert not mean_field_2.scale_cost_to_minibatch mean_field_2.shared_params["mu"].set_value(post_mu) mean_field_2.shared_params["rho"].set_value(np.log(np.exp(post_sigma) - 1)) with aesara.config.change_flags(compute_test_value="off"): elbo_via_total_size_unscaled = -pm.operators.KL(mean_field_2)()(10000) np.testing.assert_allclose( elbo_via_total_size_unscaled.eval(), elbo_via_total_size_scaled.eval() * pm.floatX(1 / beta), rtol=0.02, atol=1e-1, )
def __call__(self, nmc, **kwargs): op = self.op # type: KSD grad = op.apply(self.tf) if self.approx.all_histograms: z = self.approx.joint_histogram else: z = self.approx.symbolic_random if "more_obj_params" in kwargs: params = self.obj_params + kwargs["more_obj_params"] else: params = self.test_params + kwargs["more_tf_params"] grad *= pm.floatX(-1) grads = at.grad(None, params, known_grads={z: grad}) return self.approx.set_size_and_deterministic( grads, nmc, 0, kwargs.get("more_replacements"))
class TestElementWiseLogp(SeededTest): def build_model(self, distfam, params, size, transform, initval=None): if initval is not None: initval = pm.floatX(initval) with pm.Model() as m: distfam("x", size=size, transform=transform, initval=initval, **params) return m def check_transform_elementwise_logp(self, model): x = model.free_RVs[0] x_val_transf = x.tag.value_var pt = model.initial_point(0) test_array_transf = floatX( np.random.randn(*pt[x_val_transf.name].shape)) transform = x_val_transf.tag.transform test_array_untransf = transform.backward(test_array_transf, *x.owner.inputs).eval() # Create input variable with same dimensionality as untransformed test_array x_val_untransf = at.constant(test_array_untransf).type() jacob_det = transform.log_jac_det(test_array_transf, *x.owner.inputs) assert joint_logpt(x, sum=False)[0].ndim == x.ndim == jacob_det.ndim v1 = joint_logpt(x, x_val_transf, jacobian=False).eval( {x_val_transf: test_array_transf}) v2 = joint_logpt(x, x_val_untransf, transformed=False).eval( {x_val_untransf: test_array_untransf}) close_to(v1, v2, tol) def check_vectortransform_elementwise_logp(self, model): x = model.free_RVs[0] x_val_transf = x.tag.value_var pt = model.initial_point(0) test_array_transf = floatX( np.random.randn(*pt[x_val_transf.name].shape)) transform = x_val_transf.tag.transform test_array_untransf = transform.backward(test_array_transf, *x.owner.inputs).eval() # Create input variable with same dimensionality as untransformed test_array x_val_untransf = at.constant(test_array_untransf).type() jacob_det = transform.log_jac_det(test_array_transf, *x.owner.inputs) # Original distribution is univariate if x.owner.op.ndim_supp == 0: assert joint_logpt( x, sum=False)[0].ndim == x.ndim == (jacob_det.ndim + 1) # Original distribution is multivariate else: assert joint_logpt( x, sum=False)[0].ndim == (x.ndim - 1) == jacob_det.ndim a = joint_logpt(x, x_val_transf, jacobian=False).eval({x_val_transf: test_array_transf}) b = joint_logpt(x, x_val_untransf, transformed=False).eval( {x_val_untransf: test_array_untransf}) # Hack to get relative tolerance close_to(a, b, np.abs(0.5 * (a + b) * tol)) @pytest.mark.parametrize( "sigma,size", [ (2.5, 2), (5.0, (2, 3)), (np.ones(3) * 10.0, (4, 3)), ], ) def test_half_normal(self, sigma, size): model = self.build_model(pm.HalfNormal, {"sigma": sigma}, size=size, transform=tr.log) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize("lam,size", [(2.5, 2), (5.0, (2, 3)), (np.ones(3), (4, 3))]) def test_exponential(self, lam, size): model = self.build_model(pm.Exponential, {"lam": lam}, size=size, transform=tr.log) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize( "a,b,size", [ (1.0, 1.0, 2), (0.5, 0.5, (2, 3)), (np.ones(3), np.ones(3), (4, 3)), ], ) def test_beta(self, a, b, size): model = self.build_model(pm.Beta, { "alpha": a, "beta": b }, size=size, transform=tr.logodds) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize( "lower,upper,size", [ (0.0, 1.0, 2), (0.5, 5.5, (2, 3)), (pm.floatX(np.zeros(3)), pm.floatX(np.ones(3)), (4, 3)), ], ) def test_uniform(self, lower, upper, size): def transform_params(*inputs): _, _, _, lower, upper = inputs lower = at.as_tensor_variable(lower) if lower is not None else None upper = at.as_tensor_variable(upper) if upper is not None else None return lower, upper interval = tr.Interval(bounds_fn=transform_params) model = self.build_model(pm.Uniform, { "lower": lower, "upper": upper }, size=size, transform=interval) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize( "lower, c, upper, size", [ (0.0, 1.0, 2.0, 2), (-10, 0, 200, (2, 3)), (np.zeros(3), np.ones(3), np.ones(3), (4, 3)), ], ) def test_triangular(self, lower, c, upper, size): def transform_params(*inputs): _, _, _, lower, _, upper = inputs lower = at.as_tensor_variable(lower) if lower is not None else None upper = at.as_tensor_variable(upper) if upper is not None else None return lower, upper interval = tr.Interval(bounds_fn=transform_params) model = self.build_model(pm.Triangular, { "lower": lower, "c": c, "upper": upper }, size=size, transform=interval) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize("mu,kappa,size", [(0.0, 1.0, 2), (-0.5, 5.5, (2, 3)), (np.zeros(3), np.ones(3), (4, 3))]) def test_vonmises(self, mu, kappa, size): model = self.build_model(pm.VonMises, { "mu": mu, "kappa": kappa }, size=size, transform=tr.circular) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize("a,size", [(np.ones(2), None), (np.ones((2, 3)) * 0.5, None), (np.ones(3), (4, ))]) def test_dirichlet(self, a, size): model = self.build_model(pm.Dirichlet, {"a": a}, size=size, transform=tr.simplex) self.check_vectortransform_elementwise_logp(model) def test_normal_ordered(self): model = self.build_model( pm.Normal, { "mu": 0.0, "sigma": 1.0 }, size=3, initval=np.asarray([-1.0, 1.0, 4.0]), transform=tr.ordered, ) self.check_vectortransform_elementwise_logp(model) @pytest.mark.parametrize( "sigma,size", [ (2.5, (2, )), (np.ones(3), (4, 3)), ], ) def test_half_normal_ordered(self, sigma, size): initval = np.sort(np.abs(np.random.randn(*size))) model = self.build_model( pm.HalfNormal, {"sigma": sigma}, size=size, initval=initval, transform=tr.Chain([tr.log, tr.ordered]), ) self.check_vectortransform_elementwise_logp(model) @pytest.mark.parametrize("lam,size", [(2.5, (2, )), (np.ones(3), (4, 3))]) def test_exponential_ordered(self, lam, size): initval = np.sort(np.abs(np.random.randn(*size))) model = self.build_model( pm.Exponential, {"lam": lam}, size=size, initval=initval, transform=tr.Chain([tr.log, tr.ordered]), ) self.check_vectortransform_elementwise_logp(model) @pytest.mark.parametrize( "a,b,size", [ ( 1.0, 1.0, (2, ), ), (np.ones(3), np.ones(3), (4, 3)), ], ) def test_beta_ordered(self, a, b, size): initval = np.sort(np.abs(np.random.rand(*size))) model = self.build_model( pm.Beta, { "alpha": a, "beta": b }, size=size, initval=initval, transform=tr.Chain([tr.logodds, tr.ordered]), ) self.check_vectortransform_elementwise_logp(model) @pytest.mark.parametrize( "lower,upper,size", [(0.0, 1.0, (2, )), (pm.floatX(np.zeros(3)), pm.floatX(np.ones(3)), (4, 3))], ) def test_uniform_ordered(self, lower, upper, size): def transform_params(*inputs): _, _, _, lower, upper = inputs lower = at.as_tensor_variable(lower) if lower is not None else None upper = at.as_tensor_variable(upper) if upper is not None else None return lower, upper interval = tr.Interval(bounds_fn=transform_params) initval = np.sort(np.abs(np.random.rand(*size))) model = self.build_model( pm.Uniform, { "lower": lower, "upper": upper }, size=size, initval=initval, transform=tr.Chain([interval, tr.ordered]), ) self.check_vectortransform_elementwise_logp(model) @pytest.mark.parametrize("mu,kappa,size", [(0.0, 1.0, (2, )), (np.zeros(3), np.ones(3), (4, 3))]) def test_vonmises_ordered(self, mu, kappa, size): initval = np.sort(np.abs(np.random.rand(*size))) model = self.build_model( pm.VonMises, { "mu": mu, "kappa": kappa }, size=size, initval=initval, transform=tr.Chain([tr.circular, tr.ordered]), ) self.check_vectortransform_elementwise_logp(model) @pytest.mark.parametrize( "lower,upper,size,transform", [ (0.0, 1.0, (2, ), tr.simplex), (0.5, 5.5, (2, 3), tr.simplex), (np.zeros(3), np.ones(3), (4, 3), tr.Chain([tr.sum_to_1, tr.logodds])), ], ) def test_uniform_other(self, lower, upper, size, transform): initval = np.ones(size) / size[-1] model = self.build_model( pm.Uniform, { "lower": lower, "upper": upper }, size=size, initval=initval, transform=transform, ) self.check_vectortransform_elementwise_logp(model) @pytest.mark.parametrize( "mu,cov,size,shape", [ (np.zeros(2), np.diag(np.ones(2)), None, (2, )), (np.zeros(3), np.diag(np.ones(3)), (4, ), (4, 3)), ], ) def test_mvnormal_ordered(self, mu, cov, size, shape): initval = np.sort(np.random.randn(*shape)) model = self.build_model(pm.MvNormal, { "mu": mu, "cov": cov }, size=size, initval=initval, transform=tr.ordered) self.check_vectortransform_elementwise_logp(model)
def __init__(self, approx, beta=1.0): super().__init__(approx) self.beta = pm.floatX(beta)
def integers(): i = 0 while True: yield pm.floatX(i) i += 1
def gen(): for i in range(2): yield floatX(np.ones((10, 10)) * i)
def cov(self): x = self.histogram - self.mean return x.T.dot(x) / pm.floatX(self.histogram.shape[0])
class TestElementWiseLogp(SeededTest): def build_model(self, distfam, params, size, transform, initval=None): if initval is not None: initval = pm.floatX(initval) with pm.Model() as m: distfam("x", size=size, transform=transform, initval=initval, **params) return m def check_transform_elementwise_logp(self, model): x = model.free_RVs[0] x0 = x.tag.value_var assert x.ndim == logpt(x, sum=False).ndim pt = model.initial_point array = np.random.randn(*pt[x0.name].shape) transform = x0.tag.transform logp_notrans = logpt(x, transform.backward(array, *x.owner.inputs), transformed=False) jacob_det = transform.log_jac_det(aesara.shared(array), *x.owner.inputs) assert logpt(x, sum=False).ndim == jacob_det.ndim v1 = logpt(x, array, jacobian=False).eval() v2 = logp_notrans.eval() close_to(v1, v2, tol) def check_vectortransform_elementwise_logp(self, model, vect_opt=0): x = model.free_RVs[0] x0 = x.tag.value_var # TODO: For some reason the ndim relations # dont hold up here. But final log-probablity # values are what we expected. # assert (x.ndim - 1) == logpt(x, sum=False).ndim pt = model.initial_point array = np.random.randn(*pt[x0.name].shape) transform = x0.tag.transform logp_nojac = logpt(x, transform.backward(array, *x.owner.inputs), transformed=False) jacob_det = transform.log_jac_det(aesara.shared(array), *x.owner.inputs) # assert logpt(x).ndim == jacob_det.ndim # Hack to get relative tolerance a = logpt(x, array.astype(aesara.config.floatX), jacobian=False).eval() b = logp_nojac.eval() close_to(a, b, np.abs(0.5 * (a + b) * tol)) @pytest.mark.parametrize( "sd,size", [ (2.5, 2), (5.0, (2, 3)), (np.ones(3) * 10.0, (4, 3)), ], ) def test_half_normal(self, sd, size): model = self.build_model(pm.HalfNormal, {"sd": sd}, size=size, transform=tr.log) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize("lam,size", [(2.5, 2), (5.0, (2, 3)), (np.ones(3), (4, 3))]) def test_exponential(self, lam, size): model = self.build_model(pm.Exponential, {"lam": lam}, size=size, transform=tr.log) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize( "a,b,size", [ (1.0, 1.0, 2), (0.5, 0.5, (2, 3)), (np.ones(3), np.ones(3), (4, 3)), ], ) def test_beta(self, a, b, size): model = self.build_model(pm.Beta, { "alpha": a, "beta": b }, size=size, transform=tr.logodds) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize( "lower,upper,size", [ (0.0, 1.0, 2), (0.5, 5.5, (2, 3)), (pm.floatX(np.zeros(3)), pm.floatX(np.ones(3)), (4, 3)), ], ) def test_uniform(self, lower, upper, size): def transform_params(*inputs): _, _, _, lower, upper = inputs lower = at.as_tensor_variable(lower) if lower is not None else None upper = at.as_tensor_variable(upper) if upper is not None else None return lower, upper interval = tr.interval(transform_params) model = self.build_model(pm.Uniform, { "lower": lower, "upper": upper }, size=size, transform=interval) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize( "lower, c, upper, size", [ (0.0, 1.0, 2.0, 2), (-10, 0, 200, (2, 3)), (np.zeros(3), np.ones(3), np.ones(3), (4, 3)), ], ) def test_triangular(self, lower, c, upper, size): def transform_params(*inputs): _, _, _, lower, _, upper = inputs lower = at.as_tensor_variable(lower) if lower is not None else None upper = at.as_tensor_variable(upper) if upper is not None else None return lower, upper interval = tr.interval(transform_params) model = self.build_model(pm.Triangular, { "lower": lower, "c": c, "upper": upper }, size=size, transform=interval) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize("mu,kappa,size", [(0.0, 1.0, 2), (-0.5, 5.5, (2, 3)), (np.zeros(3), np.ones(3), (4, 3))]) def test_vonmises(self, mu, kappa, size): model = self.build_model(pm.VonMises, { "mu": mu, "kappa": kappa }, size=size, transform=tr.circular) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize("a,size", [(np.ones(2), None), (np.ones((2, 3)) * 0.5, None), (np.ones(3), (4, ))]) def test_dirichlet(self, a, size): model = self.build_model(pm.Dirichlet, {"a": a}, size=size, transform=tr.simplex) self.check_vectortransform_elementwise_logp(model, vect_opt=1) def test_normal_ordered(self): model = self.build_model( pm.Normal, { "mu": 0.0, "sd": 1.0 }, size=3, initval=np.asarray([-1.0, 1.0, 4.0]), transform=tr.ordered, ) self.check_vectortransform_elementwise_logp(model, vect_opt=0) @pytest.mark.parametrize( "sd,size", [ (2.5, (2, )), (np.ones(3), (4, 3)), ], ) def test_half_normal_ordered(self, sd, size): initval = np.sort(np.abs(np.random.randn(*size))) model = self.build_model( pm.HalfNormal, {"sd": sd}, size=size, initval=initval, transform=tr.Chain([tr.log, tr.ordered]), ) self.check_vectortransform_elementwise_logp(model, vect_opt=0) @pytest.mark.parametrize("lam,size", [(2.5, (2, )), (np.ones(3), (4, 3))]) def test_exponential_ordered(self, lam, size): initval = np.sort(np.abs(np.random.randn(*size))) model = self.build_model( pm.Exponential, {"lam": lam}, size=size, initval=initval, transform=tr.Chain([tr.log, tr.ordered]), ) self.check_vectortransform_elementwise_logp(model, vect_opt=0) @pytest.mark.parametrize( "a,b,size", [ ( 1.0, 1.0, (2, ), ), (np.ones(3), np.ones(3), (4, 3)), ], ) def test_beta_ordered(self, a, b, size): initval = np.sort(np.abs(np.random.rand(*size))) model = self.build_model( pm.Beta, { "alpha": a, "beta": b }, size=size, initval=initval, transform=tr.Chain([tr.logodds, tr.ordered]), ) self.check_vectortransform_elementwise_logp(model, vect_opt=0) @pytest.mark.parametrize( "lower,upper,size", [(0.0, 1.0, (2, )), (pm.floatX(np.zeros(3)), pm.floatX(np.ones(3)), (4, 3))], ) def test_uniform_ordered(self, lower, upper, size): def transform_params(*inputs): _, _, _, lower, upper = inputs lower = at.as_tensor_variable(lower) if lower is not None else None upper = at.as_tensor_variable(upper) if upper is not None else None return lower, upper interval = tr.interval(transform_params) initval = np.sort(np.abs(np.random.rand(*size))) model = self.build_model( pm.Uniform, { "lower": lower, "upper": upper }, size=size, initval=initval, transform=tr.Chain([interval, tr.ordered]), ) self.check_vectortransform_elementwise_logp(model, vect_opt=1) @pytest.mark.parametrize("mu,kappa,size", [(0.0, 1.0, (2, )), (np.zeros(3), np.ones(3), (4, 3))]) def test_vonmises_ordered(self, mu, kappa, size): initval = np.sort(np.abs(np.random.rand(*size))) model = self.build_model( pm.VonMises, { "mu": mu, "kappa": kappa }, size=size, initval=initval, transform=tr.Chain([tr.circular, tr.ordered]), ) self.check_vectortransform_elementwise_logp(model, vect_opt=0) @pytest.mark.parametrize( "lower,upper,size,transform", [ (0.0, 1.0, (2, ), tr.simplex), (0.5, 5.5, (2, 3), tr.simplex), (np.zeros(3), np.ones(3), (4, 3), tr.Chain([tr.sum_to_1, tr.logodds])), ], ) def test_uniform_other(self, lower, upper, size, transform): initval = np.ones(size) / size[-1] model = self.build_model( pm.Uniform, { "lower": lower, "upper": upper }, size=size, initval=initval, transform=transform, ) self.check_vectortransform_elementwise_logp(model, vect_opt=1) @pytest.mark.parametrize( "mu,cov,size,shape", [ (np.zeros(2), np.diag(np.ones(2)), None, (2, )), (np.zeros(3), np.diag(np.ones(3)), (4, ), (4, 3)), ], ) def test_mvnormal_ordered(self, mu, cov, size, shape): initval = np.sort(np.random.randn(*shape)) model = self.build_model(pm.MvNormal, { "mu": mu, "cov": cov }, size=size, initval=initval, transform=tr.ordered) self.check_vectortransform_elementwise_logp(model, vect_opt=1)
def find_constrained_prior( distribution: pm.Distribution, lower: float, upper: float, init_guess: Dict[str, float], mass: float = 0.95, fixed_params: Optional[Dict[str, float]] = None, ) -> Dict[str, float]: """ Find optimal parameters to get `mass` % of probability of `pm_dist` between `lower` and `upper`. Note: only works for one- and two-parameter distributions, as there are exactly two constraints. Fix some combination of parameters if you want to use it on >=3-parameter distributions. Parameters ---------- distribution : pm.Distribution PyMC distribution you want to set a prior on. Needs to have a ``logcdf`` method implemented in PyMC. lower : float Lower bound to get `mass` % of probability of `pm_dist`. upper : float Upper bound to get `mass` % of probability of `pm_dist`. init_guess: Dict[str, float] Initial guess for ``scipy.optimize.least_squares`` to find the optimal parameters of `pm_dist` fitting the interval constraint. Must be a dictionary with the name of the PyMC distribution's parameter as keys and the initial guess as values. mass: float, default to 0.95 Share of the probability mass we want between ``lower`` and ``upper``. Defaults to 95%. fixed_params: Dict[str, float], Optional, default None Only used when `pm_dist` has at least three parameters. Dictionary of fixed parameters, so that there are only 2 to optimize. For instance, for a StudenT, you fix nu to a constant and get the optimized mu and sigma. Returns ------- The optimized distribution parameters as a dictionary with the parameters' name as key and the optimized value as value. Examples -------- .. code-block:: python # get parameters obeying constraints opt_params = pm.find_constrained_prior( pm.Gamma, lower=0.1, upper=0.4, mass=0.75, init_guess={"alpha": 1, "beta": 10} ) # use these parameters to draw random samples samples = pm.Gamma.dist(**opt_params, size=100).eval() # use these parameters in a model with pm.Model(): x = pm.Gamma('x', **opt_params) # specify fixed values before optimization opt_params = pm.find_constrained_prior( pm.StudentT, lower=0, upper=1, init_guess={"mu": 5, "sigma": 2}, fixed_params={"nu": 7}, ) """ assert 0.01 <= mass <= 0.99, ( "This function optimizes the mass of the given distribution +/- " f"1%, so `mass` has to be between 0.01 and 0.99. You provided {mass}.") # exit when any parameter is not scalar: if np.any(np.asarray(distribution.rv_op.ndims_params) != 0): raise NotImplementedError( "`pm.find_constrained_prior` does not work with non-scalar parameters yet.\n" "Feel free to open a pull request on PyMC repo if you really need this feature." ) dist_params = aet.vector("dist_params") params_to_optim = { arg_name: dist_params[i] for arg_name, i in zip(init_guess.keys(), range(len(init_guess))) } if fixed_params is not None: params_to_optim.update(fixed_params) dist = distribution.dist(**params_to_optim) try: logcdf_lower = pm.logcdf(dist, pm.floatX(lower)) logcdf_upper = pm.logcdf(dist, pm.floatX(upper)) except AttributeError: raise AttributeError( f"You cannot use `find_constrained_prior` with {distribution} -- it doesn't have a logcdf " "method yet.\nOpen an issue or, even better, a pull request on PyMC repo if you really " "need it.") cdf_error = (pm.math.exp(logcdf_upper) - pm.math.exp(logcdf_lower)) - mass cdf_error_fn = pm.aesaraf.compile_pymc([dist_params], cdf_error, allow_input_downcast=True) try: aesara_jac = pm.gradient(cdf_error, [dist_params]) jac = pm.aesaraf.compile_pymc([dist_params], aesara_jac, allow_input_downcast=True) # when PyMC cannot compute the gradient except (NotImplementedError, NullTypeGradError): jac = "2-point" opt = optimize.least_squares(cdf_error_fn, x0=list(init_guess.values()), jac=jac) if not opt.success: raise ValueError("Optimization of parameters failed.") # save optimal parameters opt_params = { param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x) } if fixed_params is not None: opt_params.update(fixed_params) # check mass in interval is not too far from `mass` opt_dist = distribution.dist(**opt_params) mass_in_interval = (pm.math.exp(pm.logcdf(opt_dist, upper)) - pm.math.exp(pm.logcdf(opt_dist, lower))).eval() if (np.abs(mass_in_interval - mass)) > 0.01: warnings.warn( f"Final optimization has {(mass_in_interval if mass_in_interval.ndim < 1 else mass_in_interval[0])* 100:.0f}% of probability mass between " f"{lower} and {upper} instead of the requested {mass * 100:.0f}%.\n" "You may need to use a more flexible distribution, change the fixed parameters in the " "`fixed_params` dictionary, or provide better initial guesses.") return opt_params