def test_leapfrog_reversible(): n = 3 np.random.seed(42) start, model, _ = models.non_normal(n) size = sum(start[n.name].size for n in model.value_vars) scaling = floatX(np.random.rand(size)) class HMC(BaseHMC): def _hamiltonian_step(self, *args, **kwargs): pass step = HMC(vars=model.value_vars, model=model, scaling=scaling) step.integrator._logp_dlogp_func.set_extra_values({}) astart = DictToArrayBijection.map(start) p = RaveledVars(floatX(step.potential.random()), astart.point_map_info) q = RaveledVars(floatX(np.random.randn(size)), astart.point_map_info) start = step.integrator.compute_state(p, q) for epsilon in [0.01, 0.1]: for n_steps in [1, 2, 3, 4, 20]: state = start for _ in range(n_steps): state = step.integrator.step(epsilon, state) for _ in range(n_steps): state = step.integrator.step(-epsilon, state) npt.assert_allclose(state.q.data, start.q.data, rtol=1e-5) npt.assert_allclose(state.p.data, start.p.data, rtol=1e-5)
def __call__(self, X): XY = X.dot(X.T) x2 = at.sum(X ** 2, axis=1).dimshuffle(0, "x") X2e = at.repeat(x2, X.shape[0], axis=1) H = X2e + X2e.T - 2.0 * XY V = at.sort(H.flatten()) length = V.shape[0] # median distance m = at.switch( at.eq((length % 2), 0), # if even vector at.mean(V[((length // 2) - 1) : ((length // 2) + 1)]), # if odd vector V[length // 2], ) h = 0.5 * m / at.log(floatX(H.shape[0]) + floatX(1)) # RBF Kxy = at.exp(-H / h / 2.0) # Derivative dxkxy = -at.dot(Kxy, X) sumkxy = at.sum(Kxy, axis=-1, keepdims=True) dxkxy = at.add(dxkxy, at.mul(X, sumkxy)) / h return Kxy, dxkxy
def __init__(self, distribution, lower, upper, transform="infer", *args, **kwargs): if lower is not None: lower = at.as_tensor_variable(floatX(lower)) if upper is not None: upper = at.as_tensor_variable(floatX(upper)) if transform == "infer": if lower is None and upper is None: transform = None default = None elif lower is not None and upper is not None: transform = transforms.interval(lower, upper) default = 0.5 * (lower + upper) elif upper is not None: transform = transforms.upperbound(upper) default = upper - 1 else: transform = transforms.lowerbound(lower) default = lower + 1 else: default = None super().__init__(distribution, lower, upper, default, *args, transform=transform, **kwargs)
def extend(self, direction): """Double the treesize by extending the tree in the given direction. If direction is larger than 0, extend it to the right, otherwise extend it to the left. Return a tuple `(diverging, turning)` of type (DivergenceInfo, bool). `diverging` indicates, that the tree extension was aborted because the energy change exceeded `self.Emax`. `turning` indicates that the tree extension was stopped because the termination criterior was reached (the trajectory is turning back). """ if direction > 0: tree, diverging, turning = self._build_subtree( self.right, self.depth, floatX(np.asarray(self.step_size))) leftmost_begin, leftmost_end = self.left, self.right rightmost_begin, rightmost_end = tree.left, tree.right leftmost_p_sum = self.p_sum rightmost_p_sum = tree.p_sum self.right = tree.right else: tree, diverging, turning = self._build_subtree( self.left, self.depth, floatX(np.asarray(-self.step_size))) leftmost_begin, leftmost_end = tree.right, tree.left rightmost_begin, rightmost_end = self.left, self.right leftmost_p_sum = tree.p_sum rightmost_p_sum = self.p_sum self.left = tree.right self.depth += 1 self.n_proposals += tree.n_proposals if diverging or turning: return diverging, turning size1, size2 = self.log_size, tree.log_size if logbern(size2 - size1): self.proposal = tree.proposal self.log_size = np.logaddexp(self.log_size, tree.log_size) self.log_weighted_accept_sum = np.logaddexp( self.log_weighted_accept_sum, tree.log_weighted_accept_sum) self.p_sum[:] += tree.p_sum # Additional turning check only when tree depth > 0 to avoid redundant work if self.depth > 0: left, right = self.left, self.right p_sum = self.p_sum turning = (p_sum.dot(left.v) <= 0) or (p_sum.dot(right.v) <= 0) p_sum1 = leftmost_p_sum + rightmost_begin.p.data turning1 = (p_sum1.dot(leftmost_begin.v) <= 0) or (p_sum1.dot( rightmost_begin.v) <= 0) p_sum2 = leftmost_end.p.data + rightmost_p_sum turning2 = (p_sum2.dot(leftmost_end.v) <= 0) or (p_sum2.dot( rightmost_end.v) <= 0) turning = turning | turning1 | turning2 return diverging, turning
def astep(self, q0: RaveledVars) -> Tuple[RaveledVars, List[Dict[str, Any]]]: point_map_info = q0.point_map_info q0 = q0.data # same tuning scheme as DEMetropolis if not self.steps_until_tune and self.tune: if self.tune_target == "scaling": self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval)) elif self.tune_target == "lambda": self.lamb = tune(self.lamb, self.accepted / float(self.tune_interval)) # Reset counter self.steps_until_tune = self.tune_interval self.accepted = 0 epsilon = self.proposal_dist() * self.scaling it = len(self._history) # use the DE-MCMC-Z proposal scheme as soon as the history has 2 entries if it > 1: # differential evolution proposal # select two other chains iz1 = np.random.randint(it) iz2 = np.random.randint(it) while iz2 == iz1: iz2 = np.random.randint(it) z1 = self._history[iz1] z2 = self._history[iz2] # propose a jump q = floatX(q0 + self.lamb * (z1 - z2) + epsilon) else: # propose just with noise in the first 2 iterations q = floatX(q0 + epsilon) accept = self.delta_logp(q, q0) q_new, accepted = metrop_select(accept, q, q0) self.accepted += accepted self._history.append(q_new) self.steps_until_tune -= 1 stats = { "tune": self.tune, "scaling": self.scaling, "lambda": self.lamb, "accept": np.exp(accept), "accepted": accepted, } q_new = RaveledVars(q_new, point_map_info) return q_new, [stats]
def test_hessian(self): chol_vec = at.vector("chol_vec") chol_vec.tag.test_value = floatX(np.array([0.1, 2, 3])) chol = at.stack([ at.stack([at.exp(0.1 * chol_vec[0]), 0]), at.stack([chol_vec[1], 2 * at.exp(chol_vec[2])]), ]) cov = at.dot(chol, chol.T) delta = at.matrix("delta") delta.tag.test_value = floatX(np.ones((5, 2))) logp = MvNormalLogp()(cov, delta) g_cov, g_delta = at.grad(logp, [cov, delta]) # TODO: What's the test? Something needs to be asserted. at.grad(g_delta.sum() + g_cov.sum(), [delta, cov])
def test_multinomial_bound(): x = np.array([1, 5]) n = x.sum() with pm.Model() as modelA: p_a = pm.Dirichlet("p", floatX(np.ones(2))) MultinomialA("x", n, p_a, observed=x) with pm.Model() as modelB: p_b = pm.Dirichlet("p", floatX(np.ones(2))) MultinomialB("x", n, p_b, observed=x) assert np.isclose(modelA.logp({"p_stickbreaking__": [0]}), modelB.logp({"p_stickbreaking__": [0]}))
def test_model_to_graphviz_for_model_with_data_container(self): with pm.Model() as model: x = pm.Data("x", [1.0, 2.0, 3.0]) y = pm.Data("y", [1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 10.0) obs_sigma = floatX(np.sqrt(1e-2)) pm.Normal("obs", beta * x, obs_sigma, observed=y) pm.sample(1000, init=None, tune=1000, chains=1) for formatting in {"latex", "latex_with_params"}: with pytest.raises(ValueError, match="Unsupported formatting"): pm.model_to_graphviz(model, formatting=formatting) exp_without = [ 'x [label="x\n~\nData" shape=box style="rounded, filled"]', 'beta [label="beta\n~\nNormal"]', 'obs [label="obs\n~\nNormal" style=filled]', ] exp_with = [ 'x [label="x\n~\nData" shape=box style="rounded, filled"]', 'beta [label="beta\n~\nNormal(mu=0.0, sigma=10.0)"]', f'obs [label="obs\n~\nNormal(mu=f(f(beta), x), sigma={obs_sigma})" style=filled]', ] for formatting, expected_substrings in [ ("plain", exp_without), ("plain_with_params", exp_with), ]: g = pm.model_to_graphviz(model, formatting=formatting) # check formatting of RV nodes for expected in expected_substrings: assert expected in g.source
def test_elemwise_velocity(): scaling = np.array([1, 2, 3]) x = floatX(np.ones_like(scaling)) pot = quadpotential.quad_potential(scaling, True) v = pot.velocity(x) npt.assert_allclose(v, scaling) assert v.dtype == pot.dtype
def backward(self, y_): y = y_.T y = aet.concatenate([y, -aet.sum(y, 0, keepdims=True)]) # "softmax" with vector support and no deprication warning: e_y = aet.exp(y - aet.max(y, 0, keepdims=True)) x = e_y / aet.sum(e_y, 0, keepdims=True) return floatX(x.T)
def test_mixture_of_mvn(self): mu1 = np.asarray([0.0, 1.0]) cov1 = np.diag([1.5, 2.5]) mu2 = np.asarray([1.0, 0.0]) cov2 = np.diag([2.5, 3.5]) obs = np.asarray([[0.5, 0.5], mu1, mu2]) with Model() as model: w = Dirichlet("w", floatX(np.ones(2)), transform=None, shape=(2,)) mvncomp1 = MvNormal.dist(mu=mu1, cov=cov1) mvncomp2 = MvNormal.dist(mu=mu2, cov=cov2) y = Mixture("x_obs", w, [mvncomp1, mvncomp2], observed=obs) # check logp of each component complogp_st = np.vstack( ( st.multivariate_normal.logpdf(obs, mu1, cov1), st.multivariate_normal.logpdf(obs, mu2, cov2), ) ).T complogp = y.distribution._comp_logp(aesara.shared(obs)).eval() assert_allclose(complogp, complogp_st) # check logp of mixture testpoint = model.test_point mixlogp_st = logsumexp(np.log(testpoint["w"]) + complogp_st, axis=-1, keepdims=False) assert_allclose(y.logp_elemwise(testpoint), mixlogp_st) # check logp of model priorlogp = st.dirichlet.logpdf( x=testpoint["w"], alpha=np.ones(2), ) assert_allclose(model.logp(testpoint), mixlogp_st.sum() + priorlogp)
def test_linear(): lam = -0.78 sig2 = 5e-3 N = 300 dt = 1e-1 sde = lambda x, lam: (lam * x, sig2) x = floatX(_gen_sde_path(sde, (lam, ), dt, N, 5.0)) z = x + np.random.randn(x.size) * sig2 # build model with Model() as model: lamh = Flat("lamh") xh = EulerMaruyama("xh", dt, sde, (lamh, ), shape=N + 1, testval=x) Normal("zh", mu=xh, sigma=sig2, observed=z) # invert with model: trace = sample(init="advi+adapt_diag", chains=1) ppc = sample_posterior_predictive(trace, model=model) ppcf = fast_sample_posterior_predictive(trace, model=model) # test p95 = [2.5, 97.5] lo, hi = np.percentile(trace[lamh], p95, axis=0) assert (lo < lam) and (lam < hi) lo, hi = np.percentile(ppc["zh"], p95, axis=0) assert ((lo < z) * (z < hi)).mean() > 0.95 lo, hi = np.percentile(ppcf["zh"], p95, axis=0) assert ((lo < z) * (z < hi)).mean() > 0.95
def dlogp(inputs, gradients): (g_logp, ) = gradients cov, delta = inputs g_logp.tag.test_value = floatX(1.0) n, k = delta.shape chol_cov = cholesky(cov) diag = aet.nlinalg.diag(chol_cov) ok = aet.all(diag > 0) chol_cov = aet.switch(ok, chol_cov, aet.fill(chol_cov, 1)) delta_trans = solve_lower(chol_cov, delta.T).T inner = n * aet.eye(k) - aet.dot(delta_trans.T, delta_trans) g_cov = solve_upper(chol_cov.T, inner) g_cov = solve_upper(chol_cov.T, g_cov.T) tau_delta = solve_upper(chol_cov.T, delta_trans.T) g_delta = tau_delta.T g_cov = aet.switch(ok, g_cov, -np.nan) g_delta = aet.switch(ok, g_delta, -np.nan) return [-0.5 * g_cov * g_logp, -g_delta * g_logp]
def test_logp(self): np.random.seed(42) chol_val = floatX(np.array([[1, 0.9], [0, 2]])) cov_val = floatX(np.dot(chol_val, chol_val.T)) cov = at.matrix("cov") cov.tag.test_value = cov_val delta_val = floatX(np.random.randn(5, 2)) delta = at.matrix("delta") delta.tag.test_value = delta_val expect = stats.multivariate_normal(mean=np.zeros(2), cov=cov_val) expect = expect.logpdf(delta_val).sum() logp = MvNormalLogp()(cov, delta) logp_f = aesara.function([cov, delta], logp) logp = logp_f(cov_val, delta_val) npt.assert_allclose(logp, expect)
def forward_val(self, x_, point=None): x = x_.T n = x.shape[0] lx = np.log(x) shift = np.sum(lx, 0, keepdims=True) / n y = lx[:-1] - shift return floatX(y.T)
def forward(self, x_): x = x_.T n = x.shape[0] lx = aet.log(x) shift = aet.sum(lx, 0, keepdims=True) / n y = lx[:-1] - shift return floatX(y.T)
def random(self): """Draw random value from QuadPotential.""" n = floatX(normal(size=self.size)) n /= self.d_sqrt n = self.factor.solve_Lt(n) n = self.factor.apply_Pt(n) return n
def initialize_population(self): """Create an initial population from the prior distribution.""" population = [] var_info = OrderedDict() if self.start is None: init_rnd = sample_prior_predictive( self.draws, var_names=[v.name for v in self.model.unobserved_RVs], model=self.model, ) else: init_rnd = self.start init = self.model.initial_point for v in self.variables: var_info[v.name] = (init[v.name].shape, init[v.name].size) for i in range(self.draws): point = Point( {v.name: init_rnd[v.name][i] for v in self.variables}, model=self.model) population.append(DictToArrayBijection.map(point).data) self.posterior = np.array(floatX(population)) self.var_info = var_info
def test_expand_packed_triangular(): with pytest.raises(ValueError): x = at.matrix("x") x.tag.test_value = np.array([[1.0]], dtype=aesara.config.floatX) expand_packed_triangular(5, x) N = 5 packed = at.vector("packed") packed.tag.test_value = floatX(np.zeros(N * (N + 1) // 2)) with pytest.raises(TypeError): expand_packed_triangular(packed.shape[0], packed) np.random.seed(42) vals = np.random.randn(N, N) lower = floatX(np.tril(vals)) lower_packed = floatX(vals[lower != 0]) upper = floatX(np.triu(vals)) upper_packed = floatX(vals[upper != 0]) expand_lower = expand_packed_triangular(N, packed, lower=True) expand_upper = expand_packed_triangular(N, packed, lower=False) expand_diag_lower = expand_packed_triangular(N, packed, lower=True, diagonal_only=True) expand_diag_upper = expand_packed_triangular(N, packed, lower=False, diagonal_only=True) assert np.all(expand_lower.eval({packed: lower_packed}) == lower) assert np.all(expand_upper.eval({packed: upper_packed}) == upper) assert np.all( expand_diag_lower.eval({packed: lower_packed}) == floatX(np.diag( vals))) assert np.all( expand_diag_upper.eval({packed: upper_packed}) == floatX(np.diag( vals)))
def test_grad(self): np.random.seed(42) def func(chol_vec, delta): chol = at.stack([ at.stack([at.exp(0.1 * chol_vec[0]), 0]), at.stack([chol_vec[1], 2 * at.exp(chol_vec[2])]), ]) cov = at.dot(chol, chol.T) return MvNormalLogp()(cov, delta) chol_vec_val = floatX(np.array([0.5, 1.0, -0.1])) delta_val = floatX(np.random.randn(1, 2)) verify_grad(func, [chol_vec_val, delta_val]) delta_val = floatX(np.random.randn(5, 2)) verify_grad(func, [chol_vec_val, delta_val])
def build_model(self): data = pd.read_csv( pm.get_data("wells.dat"), delimiter=" ", index_col="id", dtype={"switch": np.int8}, ) data.dist /= 100 data.educ /= 4 col = data.columns P = data[col[1:]] P -= P.mean() P["1"] = 1 with pm.Model() as model: effects = pm.Normal("effects", mu=0, sigma=100, shape=len(P.columns)) logit_p = at.dot(floatX(np.array(P)), effects) pm.Bernoulli("s", logit_p=logit_p, observed=floatX(data.switch.values)) return model
def reset(self): self._var = np.array(self._initial_diag, dtype=self.dtype, copy=True) self._var_aesara = aesara.shared(self._var) self._stds = np.sqrt(self._initial_diag) self._inv_stds = floatX(1.0) / self._stds self._foreground_var = _WeightedVariance(self._n, self._initial_mean, self._initial_diag, self._initial_weight, self.dtype) self._background_var = _WeightedVariance(self._n, dtype=self.dtype) self._n_samples = 0
def test_leapfrog_reversible(): n = 3 np.random.seed(42) start, model, _ = models.non_normal(n) size = model.ndim scaling = floatX(np.random.rand(size)) step = BaseHMC(vars=model.vars, model=model, scaling=scaling) step.integrator._logp_dlogp_func.set_extra_values({}) p = floatX(step.potential.random()) q = floatX(np.random.randn(size)) start = step.integrator.compute_state(p, q) for epsilon in [0.01, 0.1]: for n_steps in [1, 2, 3, 4, 20]: state = start for _ in range(n_steps): state = step.integrator.step(epsilon, state) for _ in range(n_steps): state = step.integrator.step(-epsilon, state) npt.assert_allclose(state.q, start.q, rtol=1e-5) npt.assert_allclose(state.p, start.p, rtol=1e-5)
def test_mixture_list_of_poissons(self): with Model() as model: w = Dirichlet("w", floatX(np.ones_like(self.pois_w)), shape=self.pois_w.shape) mu = Gamma("mu", 1.0, 1.0, shape=self.pois_w.size) Mixture("x_obs", w, [Poisson.dist(mu[0]), Poisson.dist(mu[1])], observed=self.pois_x) step = Metropolis() trace = sample(5000, step, random_seed=self.random_seed, progressbar=False, chains=1) assert_allclose(np.sort(trace["w"].mean(axis=0)), np.sort(self.pois_w), rtol=0.1, atol=0.1) assert_allclose( np.sort(trace["mu"].mean(axis=0)), np.sort(self.pois_mu), rtol=0.1, atol=0.1 )
def backward(self, rv_var, rv_value): if rv_var.broadcastable[-1]: # If this variable is just a bunch of scalars/degenerate # Dirichlets, we can't transform it return rv_value y = rv_value.T y = at.concatenate([y, -at.sum(y, 0, keepdims=True)]) # "softmax" with vector support and no deprication warning: e_y = at.exp(y - at.max(y, 0, keepdims=True)) x = e_y / at.sum(e_y, 0, keepdims=True) return floatX(x.T)
def test_logpt_subtensor(): """Make sure we can compute a log-likelihood for ``Y[I]`` where ``Y`` and ``I`` are random variables.""" size = 5 mu_base = floatX(np.power(10, np.arange(np.prod(size)))).reshape(size) mu = np.stack([mu_base, -mu_base]) sigma = 0.001 rng = aesara.shared(np.random.RandomState(232), borrow=True) A_rv = Normal.dist(mu, sigma, rng=rng) A_rv.name = "A" p = 0.5 I_rv = Bernoulli.dist(p, size=size, rng=rng) I_rv.name = "I" A_idx = A_rv[I_rv, at.ogrid[A_rv.shape[-1]:]] assert isinstance(A_idx.owner.op, (Subtensor, AdvancedSubtensor, AdvancedSubtensor1)) A_idx_value_var = A_idx.type() A_idx_value_var.name = "A_idx_value" I_value_var = I_rv.type() I_value_var.name = "I_value" A_idx_logp = logpt(A_idx, {A_idx: A_idx_value_var, I_rv: I_value_var}) logp_vals_fn = aesara.function([A_idx_value_var, I_value_var], A_idx_logp) # The compiled graph should not contain any `RandomVariables` assert_no_rvs(logp_vals_fn.maker.fgraph.outputs[0]) decimals = select_by_precision(float64=6, float32=4) for i in range(10): bern_sp = sp.bernoulli(p) I_value = bern_sp.rvs(size=size).astype(I_rv.dtype) norm_sp = sp.norm(mu[I_value, np.ogrid[mu.shape[1]:]], sigma) A_idx_value = norm_sp.rvs().astype(A_idx.dtype) exp_obs_logps = norm_sp.logpdf(A_idx_value) exp_obs_logps += bern_sp.logpmf(I_value) logp_vals = logp_vals_fn(A_idx_value, I_value) np.testing.assert_almost_equal(logp_vals, exp_obs_logps, decimal=decimals)
def forward(self, rv_var, rv_value): if rv_var.broadcastable[-1]: # If this variable is just a bunch of scalars/degenerate # Dirichlets, we can't transform it return rv_value x = rv_value.T n = x.shape[0] lx = at.log(x) shift = at.sum(lx, 0, keepdims=True) / n y = lx[:-1] - shift return floatX(y.T)
def __init__(self, A, dtype=None): """Compute the lower cholesky decomposition of the potential. Parameters ---------- A: matrix, ndim = 2 Inverse of covariance matrix for the potential vector """ if dtype is None: dtype = aesara.config.floatX self.dtype = dtype self.L = floatX(scipy.linalg.cholesky(A, lower=True))
def test_normal_mixture(self): with Model() as model: w = Dirichlet("w", floatX(np.ones_like(self.norm_w)), shape=self.norm_w.size) mu = Normal("mu", 0.0, 10.0, shape=self.norm_w.size) tau = Gamma("tau", 1.0, 1.0, shape=self.norm_w.size) NormalMixture("x_obs", w, mu, tau=tau, observed=self.norm_x) step = Metropolis() trace = sample(5000, step, random_seed=self.random_seed, progressbar=False, chains=1) assert_allclose(np.sort(trace["w"].mean(axis=0)), np.sort(self.norm_w), rtol=0.1, atol=0.1) assert_allclose( np.sort(trace["mu"].mean(axis=0)), np.sort(self.norm_mu), rtol=0.1, atol=0.1 )
def test_user_potential(): model = pymc3.Model() with model: pymc3.Normal("a", mu=0, sigma=1) # Work around missing nonlocal in python2 called = [] class Potential(quadpotential.QuadPotentialDiag): def energy(self, x, velocity=None): called.append(1) return super().energy(x, velocity) pot = Potential(floatX([1])) with model: step = pymc3.NUTS(potential=pot) pymc3.sample(10, init=None, step=step, chains=1) assert called