def incomplete_beta_ps(a, b, value): """Power series for incomplete beta Use when b*x is small and value not too close to 1. Based on Cephes library by Steve Moshier (incbet.c) """ one = aet.constant(1, dtype="float64") ai = one / a u = (one - b) * value t1 = u / (a + one) t = u threshold = np.MachAr().eps * ai s = aet.constant(0, dtype="float64") def _step(i, t, s): t *= (i - b) * value / i step = t / (a + i) s += step return ((t, s), until(aet.abs_(step) < threshold)) (t, s), _ = scan(_step, sequences=[aet.arange(2, 302)], outputs_info=[e for e in aet.cast((t, s), "float64")]) s = s[-1] + t1 + ai t = gammaln(a + b) - gammaln(a) - gammaln(b) + a * aet.log(value) + aet.log(s) return aet.exp(t)
def symbolic_logq_not_scaled(self): z0 = self.symbolic_initial std = rho2sigma(self.rho) logdet = at.log(std) quaddist = -0.5 * z0**2 - at.log((2 * np.pi)**0.5) logq = quaddist - logdet return logq.sum(range(1, logq.ndim))
def check_jacobian_det( transform, domain, constructor=at.dscalar, test=0, make_comparable=None, elemwise=False, rv_var=None, ): y = constructor("y") y.tag.test_value = test if rv_var is None: rv_var = y rv_inputs = rv_var.owner.inputs if rv_var.owner else [] x = transform.backward(y, *rv_inputs) if make_comparable: x = make_comparable(x) if not elemwise: jac = at.log(at.nlinalg.det(jacobian(x, [y]))) else: jac = at.log(at.abs_(at.diag(jacobian(x, [y])))) # ljd = log jacobian det actual_ljd = aesara.function([y], jac) computed_ljd = aesara.function( [y], at.as_tensor_variable(transform.log_jac_det(y, *rv_inputs)), on_unused_input="ignore" ) for yval in domain.vals: close_to(actual_ljd(yval), computed_ljd(yval), tol)
def logdet(self): z = self.z0 # sxd u = self.u_ # d w = self.w_ # d b = self.b # . deriv = self.h.deriv # f' if not self.batched: # f'(sxd \dot d + .) * -xd = sxd phi = deriv(z.dot(w) + b).dimshuffle(0, "x") * w.dimshuffle("x", 0) # \abs(. + sxd \dot d) = s det = aet.abs_(1.0 + phi.dot(u)) return aet.log(det) else: z = z.swapaxes(0, 1) b = b.dimshuffle(0, "x") # z bxsxd # u bxd # w bxd # b bx-x- # f'(bxsxd \bdot bxd + bx-x-) * bx-xd = bxsxd phi = deriv(aet.batched_dot(z, w) + b).dimshuffle( 0, 1, "x") * w.dimshuffle(0, "x", 1) # \abs(. + bxsxd \bdot bxd) = bxs det = aet.abs_(1.0 + aet.batched_dot(phi, u)) # bxs return aet.log(det).sum(0) # s
def _build_marginal_likelihood_logp(self, y, X, Xu, sigma): sigma2 = at.square(sigma) Kuu = self.cov_func(Xu) Kuf = self.cov_func(Xu, X) Luu = cholesky(stabilize(Kuu)) A = solve_lower(Luu, Kuf) Qffd = at.sum(A * A, 0) if self.approx == "FITC": Kffd = self.cov_func(X, diag=True) Lamd = at.clip(Kffd - Qffd, 0.0, np.inf) + sigma2 trace = 0.0 elif self.approx == "VFE": Lamd = at.ones_like(Qffd) * sigma2 trace = (1.0 / (2.0 * sigma2)) * (at.sum(self.cov_func(X, diag=True)) - at.sum(at.sum(A * A, 0))) else: # DTC Lamd = at.ones_like(Qffd) * sigma2 trace = 0.0 A_l = A / Lamd L_B = cholesky(at.eye(Xu.shape[0]) + at.dot(A_l, at.transpose(A))) r = y - self.mean_func(X) r_l = r / Lamd c = solve_lower(L_B, at.dot(A, r_l)) constant = 0.5 * X.shape[0] * at.log(2.0 * np.pi) logdet = 0.5 * at.sum(at.log(Lamd)) + at.sum(at.log(at.diag(L_B))) quadratic = 0.5 * (at.dot(r, r_l) - at.dot(c, c)) return -1.0 * (constant + logdet + quadratic + trace)
def symbolic_logq_not_scaled(self): z0 = self.symbolic_initial diag = at.diagonal(self.L, 0, self.L.ndim - 2, self.L.ndim - 1) logdet = at.log(diag) quaddist = -0.5 * z0**2 - at.log((2 * np.pi)**0.5) logq = quaddist - logdet return logq.sum(range(1, logq.ndim))
def two_gaussians(x): """ Mixture of gaussians likelihood """ log_like1 = (-0.5 * n * at.log(2 * np.pi) - 0.5 * at.log(dsigma) - 0.5 * (x - mu1).T.dot(isigma).dot(x - mu1)) log_like2 = (-0.5 * n * at.log(2 * np.pi) - 0.5 * at.log(dsigma) - 0.5 * (x - mu2).T.dot(isigma).dot(x - mu2)) return at.log(w1 * at.exp(log_like1) + w2 * at.exp(log_like2))
def forward(self, rv_var, rv_value): a, b = self.param_extract_fn(rv_var) if a is not None and b is not None: return at.log(rv_value - a) - at.log(b - rv_value) elif a is not None: return at.log(rv_value - a) elif b is not None: return at.log(b - rv_value) else: return rv_value
def logp(self, states): r"""Create a Theano graph that computes the log-likelihood for a discrete Markov chain. This is the log-likelihood for the joint distribution of states, :math:`S_t`, conditional on state samples, :math:`s_t`, given by the following: .. math:: \int_{S_0} P(S_1 = s_1 \mid S_0) dP(S_0) \prod^{T}_{t=2} P(S_t = s_t \mid S_{t-1} = s_{t-1}) The first term (i.e. the integral) simply computes the marginal :math:`P(S_1 = s_1)`, so another way to express this result is as follows: .. math:: P(S_1 = s_1) \prod^{T}_{t=2} P(S_t = s_t \mid S_{t-1} = s_{t-1}) """ # noqa: E501 states_tt = at.as_tensor(states) if states.ndim > 1 or self.Gammas.ndim > 3 or self.gamma_0.ndim > 1: raise NotImplementedError("Broadcasting not supported.") Gammas_tt = at_broadcast_to(self.Gammas, (states.shape[0], ) + tuple(self.Gammas.shape)[-2:]) gamma_0_tt = self.gamma_0 Gamma_1_tt = Gammas_tt[0] P_S_1_tt = at.dot(gamma_0_tt, Gamma_1_tt)[states_tt[0]] # def S_logp_fn(S_tm1, S_t, Gamma): # return at.log(Gamma[..., S_tm1, S_t]) # # P_S_2T_tt, _ = aesara.scan( # S_logp_fn, # sequences=[ # { # "input": states_tt, # "taps": [-1, 0], # }, # Gammas_tt, # ], # ) P_S_2T_tt = Gammas_tt[at.arange(1, states.shape[0]), states[:-1], states[1:]] log_P_S_1T_tt = at.concatenate( [at.shape_padright(at.log(P_S_1_tt)), at.log(P_S_2T_tt)]) res = log_P_S_1T_tt.sum() res.name = "states_logp" return res
def two_gaussians(x): log_like1 = ( -0.5 * n * aet.log(2 * np.pi) - 0.5 * aet.log(dsigma) - 0.5 * (x - mu1).T.dot(isigma).dot(x - mu1) ) log_like2 = ( -0.5 * n * aet.log(2 * np.pi) - 0.5 * aet.log(dsigma) - 0.5 * (x - mu2).T.dot(isigma).dot(x - mu2) ) return aet.log(w1 * aet.exp(log_like1) + w2 * aet.exp(log_like2))
def test_tt_logdotexp(): np.seterr(over="ignore", under="ignore") aesara.config.compute_test_value = "warn" A = np.c_[[1.0, 2.0], [3.0, 4.0], [10.0, 20.0]] b = np.c_[[0.1], [0.2], [30.0]].T A_tt = at.as_tensor_variable(A) b_tt = at.as_tensor_variable(b) test_res = tt_logdotexp(at.log(A_tt), at.log(b_tt)).eval() assert test_res.shape == (2, 1) assert np.allclose(A.dot(b), np.exp(test_res)) b = np.r_[0.1, 0.2, 30.0] test_res = tt_logdotexp(at.log(A), at.log(b)).eval() assert test_res.shape == (2,) assert np.allclose(A.dot(b), np.exp(test_res)) A = np.c_[[1.0, 2.0], [10.0, 20.0]] b = np.c_[[0.1], [0.2]].T test_res = tt_logdotexp(at.log(A), at.log(b)).eval() assert test_res.shape == (2, 1) assert np.allclose(A.dot(b), np.exp(test_res)) b = np.r_[0.1, 0.2] test_res = tt_logdotexp(at.log(A), at.log(b)).eval() assert test_res.shape == (2,) assert np.allclose(A.dot(b), np.exp(test_res))
def logdet(self): d = float(self.dim) a = self.a_ # . b = self.b_ # . z_ref = self.z_ref # d z = self.z0 # sxd h = self.h # h(a, r) deriv = self.h.deriv # h'(a, r) if self.batched: z = z.swapaxes(0, 1) a = a.dimshuffle(0, "x", "x") b = b.dimshuffle(0, "x", "x") z_ref = z_ref.dimshuffle(0, "x", 1) # a bx-x- # b bx-x- # z bxsxd # z_ref bx-xd r = (z - z_ref).norm(2, axis=-1, keepdims=True) # s har = h(a, r) dar = deriv(a, r) logdet = aet.log( (1.0 + b * har)**(d - 1.0) * (1.0 + b * har + b * dar * r)) if self.batched: return logdet.sum([0, -1]) else: return logdet.sum(-1)
def test_detect_nan(): # Test the code snippet example that detects NaN values. nan_detected = [False] def detect_nan(fgraph, i, node, fn): for output in fn.outputs: if np.isnan(output[0]).any(): print("*** NaN detected ***") debugprint(node) print("Inputs : %s" % [input[0] for input in fn.inputs]) print("Outputs: %s" % [output[0] for output in fn.outputs]) nan_detected[0] = True break x = dscalar("x") f = function( [x], [log(x) * x], mode=MonitorMode(post_func=detect_nan), ) try: old_stdout = sys.stdout sys.stdout = StringIO() f(0) # log(0) * 0 = -inf * 0 = NaN finally: sys.stdout = old_stdout assert nan_detected[0]
def forward(self, x_): x = x_.T n = x.shape[0] lx = aet.log(x) shift = aet.sum(lx, 0, keepdims=True) / n y = lx[:-1] - shift return floatX(y.T)
def test_GpuCrossentropySoftmaxArgmax1HotWithBias(): # This is basic test for GpuCrossentropySoftmaxArgmax1HotWithBias # We check that we loop when their is too much threads n_in = 1000 batch_size = 4097 n_out = 1250 if not isinstance(mode_with_gpu, aesara.compile.DebugMode): n_in = 4098 n_out = 4099 y = tt.lvector("y") b = tt.fvector("b") # we precompute the dot with big shape before to allow the test of # GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error # (the launch timed out and was terminated) on GPU card not # powerful enough. We need the big shape to check for corner # case. dot_result = tt.fmatrix("dot_result") # Seed numpy.random with config.unittests.rseed utt.seed_rng() xx = np.asarray(np.random.rand(batch_size, n_in), dtype=np.float32) yy = np.ones((batch_size, ), dtype="int32") b_values = np.zeros((n_out, ), dtype="float32") W_values = np.asarray(np.random.rand(n_in, n_out), dtype="float32") dot_value = np.asarray(np.dot(xx, W_values), dtype="float32") del W_values p_y_given_x = tt.nnet.softmax(dot_result + b) y_pred = tt.argmax(p_y_given_x, axis=-1) loss = -tt.mean(tt.log(p_y_given_x)[tt.arange(y.shape[0]), y]) dW = tt.grad(loss, dot_result) classify = aesara.function(inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_without_gpu) classify_gpu = aesara.function(inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_with_gpu) assert any([ isinstance(node.op, tt.nnet.CrossentropySoftmaxArgmax1HotWithBias) for node in classify.maker.fgraph.toposort() ]) assert any([ isinstance(node.op, GpuCrossentropySoftmaxArgmax1HotWithBias) for node in classify_gpu.maker.fgraph.toposort() ]) out = classify(yy, b_values, dot_value) gout = classify_gpu(yy, b_values, dot_value) assert len(out) == len(gout) == 3 utt.assert_allclose(out[0], gout[0]) utt.assert_allclose(out[2], gout[2], atol=3e-6) utt.assert_allclose(out[1], gout[1])
def test_optimizer(): # Test that we can remove optimizer nan_detected = [False] def detect_nan(fgraph, i, node, fn): for output in fn.outputs: if np.isnan(output[0]).any(): print("*** NaN detected ***") debugprint(node) print("Inputs : %s" % [input[0] for input in fn.inputs]) print("Outputs: %s" % [output[0] for output in fn.outputs]) nan_detected[0] = True break x = dscalar("x") mode = MonitorMode(post_func=detect_nan) mode = mode.excluding("fusion") f = function([x], [log(x) * x], mode=mode) # Test that the fusion wasn't done assert len(f.maker.fgraph.apply_nodes) == 2 try: old_stdout = sys.stdout sys.stdout = StringIO() f(0) # log(0) * 0 = -inf * 0 = NaN finally: sys.stdout = old_stdout # Test that we still detect the nan assert nan_detected[0]
def forward(self, value, *inputs): """Inverse operation of softplus. y = Log(Exp(x) - 1) = Log(1 - Exp(-x)) + x """ return at.log(1.0 - at.exp(-value)) + value
def logsumexp(x, axis=None, keepdims=True): # Adapted from https://github.com/Theano/Theano/issues/1563 x_max = aet.max(x, axis=axis, keepdims=True) x_max = aet.switch(aet.isinf(x_max), 0, x_max) res = aet.log(aet.sum(aet.exp(x - x_max), axis=axis, keepdims=True)) + x_max return res if keepdims else res.squeeze()
def __call__(self, X): XY = X.dot(X.T) x2 = at.sum(X**2, axis=1).dimshuffle(0, "x") X2e = at.repeat(x2, X.shape[0], axis=1) H = X2e + X2e.T - 2.0 * XY V = at.sort(H.flatten()) length = V.shape[0] # median distance m = at.switch( at.eq((length % 2), 0), # if even vector at.mean(V[((length // 2) - 1):((length // 2) + 1)]), # if odd vector V[length // 2], ) h = 0.5 * m / at.log(floatX(H.shape[0]) + floatX(1)) # RBF Kxy = at.exp(-H / h / 2.0) # Derivative dxkxy = -at.dot(Kxy, X) sumkxy = at.sum(Kxy, axis=-1, keepdims=True) dxkxy = at.add(dxkxy, at.mul(X, sumkxy)) / h return Kxy, dxkxy
def forward(self, rv_var, rv_value): """Inverse operation of softplus. y = Log(Exp(x) - 1) = Log(1 - Exp(-x)) + x """ return at.log(1.0 - at.exp(-rv_value)) + rv_value
def marginal_mixture_logcdf(op, value, rng, weights, *components, **kwargs): # single component if len(components) == 1: # Need to broadcast value across mixture axis mix_axis = -components[0].owner.op.ndim_supp - 1 components_logcdf = logcdf(components[0], at.expand_dims(value, mix_axis)) else: components_logcdf = at.stack( [logcdf(component, value) for component in components], axis=-1, ) mix_logcdf = at.logsumexp(at.log(weights) + components_logcdf, axis=-1) # Squeeze stack dimension # There is a Aesara bug in squeeze with negative axis # https://github.com/aesara-devs/aesara/issues/830 # mix_logp = at.squeeze(mix_logp, axis=-1) mix_logcdf = at.squeeze(mix_logcdf, axis=mix_logcdf.ndim - 1) mix_logcdf = check_parameters( mix_logcdf, 0 <= weights, weights <= 1, at.isclose(at.sum(weights, axis=-1), 1), msg="0 <= weights <= 1, sum(weights) == 1", ) return mix_logcdf
def logp(self, value): """ Calculate log-probability of defined Mixture distribution at specified value. Parameters ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple values are desired the values must be provided in a numpy array or Aesara tensor Returns ------- TensorVariable """ w = self.w return bound( logsumexp(at.log(w) + self._comp_logp(value), axis=-1, keepdims=False), w >= 0, w <= 1, at.allclose(w.sum(axis=-1), 1), broadcast_conditions=False, )
def forward(self, x): """Inverse operation of softplus. y = Log(Exp(x) - 1) = Log(1 - Exp(-x)) + x """ return aet.log(1.0 - aet.exp(-x)) + x
def marginal_mixture_logcdf(op, value, rng, weights, *components, **kwargs): # single component if len(components) == 1: # Need to broadcast value across mixture axis mix_axis = -components[0].owner.op.ndim_supp - 1 components_logcdf = logcdf(components[0], at.expand_dims(value, mix_axis)) else: components_logcdf = at.stack( [logcdf(component, value) for component in components], axis=-1, ) mix_logcdf = at.logsumexp(at.log(weights) + components_logcdf, axis=-1) mix_logcdf = check_parameters( mix_logcdf, 0 <= weights, weights <= 1, at.isclose(at.sum(weights, axis=-1), 1), msg="0 <= weights <= 1, sum(weights) == 1", ) return mix_logcdf
def local_log_pow(node): if node.op == tensor.log: (x, ) = node.inputs if x.owner and x.owner.op == tensor.pow: base, exponent = x.owner.inputs # TODO: reason to be careful with dtypes? return [exponent * tensor.log(base)]
def normal_lccdf(mu, sigma, x): z = (x - mu) / sigma return aet.switch( aet.gt(z, 1.0), aet.log(aet.erfcx(z / aet.sqrt(2.0)) / 2.0) - aet.sqr(z) / 2.0, aet.log1p(-aet.erfc(-z / aet.sqrt(2.0)) / 2.0), )
def logpow(x, m): """ Calculates log(x**m) since m*log(x) will fail when m, x = 0. """ # return m * log(x) return aet.switch(aet.eq(x, 0), aet.switch(aet.eq(m, 0), 0.0, -np.inf), m * aet.log(x))
def logp(value, n, p): return check_parameters( factln(n) - factln(value).sum() + (value * at.log(p)).sum(), at.all(value >= 0), at.all(0 <= p), at.all(p <= 1), at.isclose(p.sum(), 1), )
def normal_lcdf(mu, sigma, x): """Compute the log of the cumulative density function of the normal.""" z = (x - mu) / sigma return aet.switch( aet.lt(z, -1.0), aet.log(aet.erfcx(-z / aet.sqrt(2.0)) / 2.0) - aet.sqr(z) / 2.0, aet.log1p(-aet.erfc(z / aet.sqrt(2.0)) / 2.0), )
def simple_2model_continuous(): mu = -2.1 tau = 1.3 with Model() as model: x = pm.Normal("x", mu, tau=tau, initval=0.1) pm.Deterministic("logx", at.log(x)) pm.Beta("y", alpha=1, beta=1, size=2) return model.initial_point, model