def __call__(self, x): neg_value = np.float64(self.logp_func(pm.floatX(x))) value = -1.0 * nan_to_high(neg_value) if self.use_gradient: neg_grad = self.dlogp_func(pm.floatX(x)) if np.all(np.isfinite(neg_grad)): self.previous_x = x grad = nan_to_num(-1.0*neg_grad) grad = grad.astype(np.float64) else: self.previous_x = x grad = None if self.n_eval % 10 == 0: self.update_progress_desc(neg_value, grad) if self.n_eval > self.maxeval: self.update_progress_desc(neg_value, grad) self.progress.close() raise StopIteration self.n_eval += 1 self.progress.update(1) if self.use_gradient: return value, grad else: return value
def test_vae(): minibatch_size = 10 data = pm.floatX(np.random.rand(100)) x_mini = pm.Minibatch(data, minibatch_size) x_inp = tt.vector() x_inp.tag.test_value = data[:minibatch_size] ae = theano.shared(pm.floatX([.1, .1])) be = theano.shared(pm.floatX(1.)) ad = theano.shared(pm.floatX(1.)) bd = theano.shared(pm.floatX(1.)) enc = x_inp.dimshuffle(0, 'x') * ae.dimshuffle('x', 0) + be mu, rho = enc[:, 0], enc[:, 1] with pm.Model(): # Hidden variables zs = pm.Normal('zs', mu=0, sd=1, shape=minibatch_size) dec = zs * ad + bd # Observation model pm.Normal('xs_', mu=dec, sd=0.1, observed=x_inp) pm.fit(1, local_rv={zs: dict(mu=mu, rho=rho)}, more_replacements={x_inp: x_mini}, more_obj_params=[ae, be, ad, bd])
def test_hh_flow(): cov = pm.floatX([[2, -1], [-1, 3]]) with pm.Model(): pm.MvNormal('mvN', mu=pm.floatX([0, 1]), cov=cov, shape=2) nf = NFVI('scale-hh*2-loc') nf.fit(25000, obj_optimizer=pm.adam(learning_rate=0.001)) trace = nf.approx.sample(10000) cov2 = pm.trace_cov(trace) np.testing.assert_allclose(cov, cov2, rtol=0.07)
def create_shared_params(self, start=None): if start is None: start = self.model.test_point else: start_ = self.model.test_point.copy() update_start_vals(start_, start, self.model) start = start_ start = self.gbij.map(start) return {'mu': theano.shared( pm.floatX(start), 'mu'), 'rho': theano.shared( pm.floatX(np.zeros((self.global_size,))), 'rho')}
def test_var_replacement(): X_mean = pm.floatX(np.linspace(0, 10, 10)) y = pm.floatX(np.random.normal(X_mean*4, .05)) with pm.Model(): inp = pm.Normal('X', X_mean, shape=X_mean.shape) coef = pm.Normal('b', 4.) mean = inp * coef pm.Normal('y', mean, .1, observed=y) advi = pm.fit(100) assert advi.sample_node(mean).eval().shape == (10, ) x_new = pm.floatX(np.linspace(0, 10, 11)) assert advi.sample_node(mean, more_replacements={inp: x_new}).eval().shape == (11, )
def test_free_rv(self): with pm.Model() as model4: Normal('n', observed=[[1, 1], [1, 1]], total_size=[2, 2]) p4 = theano.function([], model4.logpt) with pm.Model() as model5: Normal('n', total_size=[2, Ellipsis, 2], shape=(1, 1), broadcastable=(False, False)) p5 = theano.function([model5.n], model5.logpt) assert p4() == p5(pm.floatX([[1]])) assert p4() == p5(pm.floatX([[1, 1], [1, 1]]))
def test_cloning_available(self): gop = generator(integers()) res = gop ** 2 shared = theano.shared(floatX(10)) res1 = theano.clone(res, {gop: shared}) f = theano.function([], res1) assert f() == np.float32(100)
def apply(self, f): # f: kernel function for KSD f(histogram) -> (k(x,.), \nabla_x k(x,.)) stein = Stein( approx=self.approx, kernel=f, use_histogram=self.approx.all_histograms, temperature=self.temperature) return pm.floatX(-1) * stein.grad
def __local_mu_rho(self): if not self.local_vars: mu, rho = ( tt.constant(pm.floatX(np.asarray([]))), tt.constant(pm.floatX(np.asarray([]))) ) else: mu = [] rho = [] for var in self.local_vars: mu.append(self.known[var][0].ravel()) rho.append(self.known[var][1].ravel()) mu = tt.concatenate(mu) rho = tt.concatenate(rho) mu.name = self.__class__.__name__ + '_local_mu' rho.name = self.__class__.__name__ + '_local_rho' return mu, rho
def randidx(self, size=None): if size is None: size = (1,) elif isinstance(size, tt.TensorVariable): if size.ndim < 1: size = size[None] elif size.ndim > 1: raise ValueError('size ndim should be no more than 1d') else: pass else: size = tuple(np.atleast_1d(size)) return (self._rng .uniform(size=size, low=pm.floatX(0), high=pm.floatX(self.histogram.shape[0]) - pm.floatX(1e-16)) .astype('int32'))
def from_noise(cls, size, jitter=.01, local_rv=None, start=None, model=None, random_seed=None, **kwargs): """Initialize Histogram with random noise Parameters ---------- size : `int` number of initial particles jitter : `float` initial sd local_rv : `dict` mapping {model_variable -> local_variable} Local Vars are used for Autoencoding Variational Bayes See (AEVB; Kingma and Welling, 2014) for details start : `Point` initial point model : :class:`pymc3.Model` PyMC3 model for inference random_seed : None or `int` leave None to use package global RandomStream or other valid value to create instance specific one kwargs : other kwargs passed to init Returns ------- :class:`Empirical` """ hist = cls( None, local_rv=local_rv, model=model, random_seed=random_seed, **kwargs) if start is None: start = hist.model.test_point else: start_ = hist.model.test_point.copy() update_start_vals(start_, start, hist.model) start = start_ start = pm.floatX(hist.gbij.map(start)) # Initialize particles x0 = np.tile(start, (size, 1)) x0 += pm.floatX(np.random.normal(0, jitter, x0.shape)) hist.histogram.set_value(x0) return hist
def test_observed_type(self): X_ = np.random.randn(100, 5) X = pm.floatX(theano.shared(X_)) with pm.Model(): x1 = pm.Normal('x1', observed=X_) x2 = pm.Normal('x2', observed=X) assert x1.type == X.type assert x2.type == X.type
def build_model(): data = np.loadtxt(pm.get_data('efron-morris-75-data.tsv'), delimiter="\t", skiprows=1, usecols=(2,3)) atbats = pm.floatX(data[:,0]) hits = pm.floatX(data[:,1]) N = len(hits) # we want to bound the kappa below BoundedKappa = pm.Bound(pm.Pareto, lower=1.0) with pm.Model() as model: phi = pm.Uniform('phi', lower=0.0, upper=1.0) kappa = BoundedKappa('kappa', alpha=1.0001, m=1.5) thetas = pm.Beta('thetas', alpha=phi*kappa, beta=(1.0-phi)*kappa, shape=N) ys = pm.Binomial('ys', n=atbats, p=thetas, observed=hits) return model
def apply(self, f): # f: kernel function for KSD f(histogram) -> (k(x,.), \nabla_x k(x,.)) input_matrix = self.get_input() stein = Stein( approx=self.approx, kernel=f, input_matrix=input_matrix, temperature=self.temperature) return pm.floatX(-1) * stein.grad
def create_shared_params(self, start=None): if start is None: start = self.model.test_point else: start_ = start.copy() update_start_vals(start_, self.model.test_point, self.model) start = start_ if self.batched: start = start[self.group[0].name][0] else: start = self.bij.map(start) rho = np.zeros((self.ddim,)) if self.batched: start = np.tile(start, (self.bdim, 1)) rho = np.tile(rho, (self.bdim, 1)) return {'mu': theano.shared( pm.floatX(start), 'mu'), 'rho': theano.shared( pm.floatX(rho), 'rho')}
def __init__(self, local_rv=None, model=None, cost_part_grad_scale=1, scale_cost_to_minibatch=False, random_seed=None, **kwargs): model = modelcontext(model) self._scale_cost_to_minibatch = theano.shared(np.int8(0)) self.scale_cost_to_minibatch = scale_cost_to_minibatch if not isinstance(cost_part_grad_scale, theano.Variable): self.cost_part_grad_scale = theano.shared(pm.floatX(cost_part_grad_scale)) else: self.cost_part_grad_scale = pm.floatX(cost_part_grad_scale) self._seed = random_seed self._rng = tt_rng(random_seed) self.model = model self.check_model(model, **kwargs) if local_rv is None: local_rv = {} def get_transformed(v): if hasattr(v, 'transformed'): return v.transformed return v known = {get_transformed(k): v for k, v in local_rv.items()} self.known = known self.local_vars = self.get_local_vars(**kwargs) self.global_vars = self.get_global_vars(**kwargs) self._g_order = ArrayOrdering(self.global_vars) self._l_order = ArrayOrdering(self.local_vars) self.gbij = DictToArrayBijection(self._g_order, {}) self.lbij = DictToArrayBijection(self._l_order, {}) self.symbolic_initial_local_matrix = tt.matrix(self.__class__.__name__ + '_symbolic_initial_local_matrix') self.symbolic_initial_global_matrix = tt.matrix(self.__class__.__name__ + '_symbolic_initial_global_matrix') self.global_flat_view = model.flatten( vars=self.global_vars, order=self._g_order, ) self.local_flat_view = model.flatten( vars=self.local_vars, order=self._l_order, ) self.symbolic_n_samples = self.symbolic_initial_global_matrix.shape[0]
def create_shared_params(self, trace=None): if trace is None: histogram = np.atleast_2d(self.gbij.map(self.model.test_point)) else: histogram = np.empty((len(trace) * len(trace.chains), self.global_size)) i = 0 for t in trace.chains: for j in range(len(trace)): histogram[i] = self.gbij.map(trace.point(j, t)) i += 1 return dict(histogram=theano.shared(pm.floatX(histogram), 'histogram'))
def rslice(self, total, size, seed): if size is None: return slice(None) elif isinstance(size, int): rng = pm.tt_rng(seed) Minibatch.RNG[id(self)].append(rng) return (rng .uniform(size=(size, ), low=0.0, high=pm.floatX(total) - 1e-16) .astype('int64')) else: raise TypeError('Unrecognized size type, %r' % size)
def __call__(self, nmc, **kwargs): op = self.op # type: KSD grad = op.apply(self.tf) if self.approx.all_histograms: z = self.approx.joint_histogram else: z = self.approx.symbolic_random if 'more_obj_params' in kwargs: params = self.obj_params + kwargs['more_obj_params'] else: params = self.test_params + kwargs['more_tf_params'] grad *= pm.floatX(-1) grads = tt.grad(None, params, known_grads={z: grad}) return self.approx.set_size_and_deterministic(grads, nmc, 0, kwargs.get('more_replacements'))
def _quaddist_tau(self, delta): chol_tau = self.chol_tau _, k = delta.shape k = pm.floatX(k) diag = tt.nlinalg.diag(chol_tau) ok = tt.all(diag > 0) chol_tau = tt.switch(ok, chol_tau, 1) diag = tt.nlinalg.diag(chol_tau) delta_trans = tt.dot(delta, chol_tau) quaddist = (delta_trans ** 2).sum(axis=-1) logdet = -tt.sum(tt.log(diag)) return quaddist, logdet, ok
def _quaddist_chol(self, delta): chol_cov = self.chol_cov _, k = delta.shape k = pm.floatX(k) diag = tt.nlinalg.diag(chol_cov) # Check if the covariance matrix is positive definite. ok = tt.all(diag > 0) # If not, replace the diagonal. We return -inf later, but # need to prevent solve_lower from throwing an exception. chol_cov = tt.switch(ok, chol_cov, 1) delta_trans = self.solve_lower(chol_cov, delta.T).T quaddist = (delta_trans ** 2).sum(axis=-1) logdet = tt.sum(tt.log(diag)) return quaddist, logdet, ok
def normalizing_constant(self): """ Constant to divide when we want to scale down loss from minibatches """ t = self.to_flat_input( tt.max([v.scaling for v in self.model.basic_RVs])) t = theano.clone(t, { self.global_input: self.symbolic_random_global_matrix[0], self.local_input: self.symbolic_random_local_matrix[0] }) t = self.set_size_and_deterministic(t, 1, 1) # remove random, we do not it here at all # if not scale_cost_to_minibatch: t=1 t = tt.switch(self._scale_cost_to_minibatch, t, tt.constant(1, dtype=t.dtype)) return pm.floatX(t)
def create_shared_params(self, trace=None, size=None, jitter=1, start=None): if trace is None: if size is None: raise opvi.ParametrizationError('Need `trace` or `size` to initialize') else: if start is None: start = self.model.test_point else: start_ = self.model.test_point.copy() update_start_vals(start_, start, self.model) start = start_ start = pm.floatX(self.bij.map(start)) # Initialize particles histogram = np.tile(start, (size, 1)) histogram += pm.floatX(np.random.normal(0, jitter, histogram.shape)) else: histogram = np.empty((len(trace) * len(trace.chains), self.ddim)) i = 0 for t in trace.chains: for j in range(len(trace)): histogram[i] = self.bij.map(trace.point(j, t)) i += 1 return dict(histogram=theano.shared(pm.floatX(histogram), 'histogram'))
def create_shared_params(self, start=None): if start is None: start = self.model.test_point else: start_ = self.model.test_point.copy() update_start_vals(start_, start, self.model) start = start_ start = pm.floatX(self.gbij.map(start)) n = self.global_size L_tril = ( np.eye(n) [np.tril_indices(n)] .astype(theano.config.floatX) ) return {'mu': theano.shared(start, 'mu'), 'L_tril': theano.shared(L_tril, 'L_tril')}
def adagrad_window(loss_or_grads=None, params=None, learning_rate=0.001, epsilon=.1, n_win=10): """Returns a function that returns parameter updates. Instead of accumulated estimate, uses running window Parameters ---------- loss_or_grads : symbolic expression or list of expressions A scalar loss expression, or a list of gradient expressions params : list of shared variables The variables to generate update expressions for learning_rate : float Learning rate. epsilon : float Offset to avoid zero-division in the normalizer of adagrad. n_win : int Number of past steps to calculate scales of parameter gradients. Returns ------- OrderedDict A dictionary mapping each parameter to its update expression """ if loss_or_grads is None and params is None: return partial(adagrad_window, **_get_call_kwargs(locals())) elif loss_or_grads is None or params is None: raise ValueError('Please provide both `loss_or_grads` and `params` to get updates') grads = get_or_compute_grads(loss_or_grads, params) updates = OrderedDict() for param, grad in zip(params, grads): i = theano.shared(pm.floatX(0)) i_int = i.astype('int32') value = param.get_value(borrow=True) accu = theano.shared( np.zeros(value.shape + (n_win,), dtype=value.dtype)) # Append squared gradient vector to accu_new accu_new = tt.set_subtensor(accu[..., i_int], grad ** 2) i_new = tt.switch((i + 1) < n_win, i + 1, 0) updates[accu] = accu_new updates[i] = i_new accu_sum = accu_new.sum(axis=-1) updates[param] = param - (learning_rate * grad / tt.sqrt(accu_sum + epsilon)) return updates
def test_scale_cost_to_minibatch_works(aux_total_size): mu0 = 1.5 sigma = 1.0 y_obs = np.array([1.6, 1.4]) beta = len(y_obs)/float(aux_total_size) post_mu = np.array([1.88], dtype=theano.config.floatX) post_sd = np.array([1], dtype=theano.config.floatX) # TODO: theano_config # with pm.Model(theano_config=dict(floatX='float64')): # did not not work as expected # there were some numeric problems, so float64 is forced with pm.theanof.change_flags(floatX='float64', warn_float64='ignore'): with pm.Model(): assert theano.config.floatX == 'float64' assert theano.config.warn_float64 == 'ignore' mu = pm.Normal('mu', mu=mu0, sd=sigma) pm.Normal('y', mu=mu, sd=1, observed=y_obs, total_size=aux_total_size) # Create variational gradient tensor mean_field_1 = MeanField() assert mean_field_1.scale_cost_to_minibatch mean_field_1.shared_params['mu'].set_value(post_mu) mean_field_1.shared_params['rho'].set_value(np.log(np.exp(post_sd) - 1)) with pm.theanof.change_flags(compute_test_value='off'): elbo_via_total_size_scaled = -pm.operators.KL(mean_field_1)()(10000) with pm.Model(): mu = pm.Normal('mu', mu=mu0, sd=sigma) pm.Normal('y', mu=mu, sd=1, observed=y_obs, total_size=aux_total_size) # Create variational gradient tensor mean_field_2 = MeanField() assert mean_field_1.scale_cost_to_minibatch mean_field_2.scale_cost_to_minibatch = False assert not mean_field_2.scale_cost_to_minibatch mean_field_2.shared_params['mu'].set_value(post_mu) mean_field_2.shared_params['rho'].set_value(np.log(np.exp(post_sd) - 1)) with pm.theanof.change_flags(compute_test_value='off'): elbo_via_total_size_unscaled = -pm.operators.KL(mean_field_2)()(10000) np.testing.assert_allclose(elbo_via_total_size_unscaled.eval(), elbo_via_total_size_scaled.eval() * pm.floatX(1 / beta), rtol=0.02, atol=1e-1)
def __call__(self, nmc, **kwargs): op = self.op # type: KSD grad = op.apply(self.tf) loc_size = self.approx.local_size local_grad = grad[..., :loc_size] global_grad = grad[..., loc_size:] if 'more_obj_params' in kwargs: params = self.obj_params + kwargs['more_obj_params'] else: params = self.test_params + kwargs['more_tf_params'] grad *= pm.floatX(-1) zl, zg = self.get_input() zl, zg, grad, local_grad, global_grad = self.approx.set_size_and_deterministic( (zl, zg, grad, local_grad, global_grad), nmc, 0) grad = tt.grad(None, params, known_grads=collections.OrderedDict([ (zl, local_grad), (zg, global_grad) ]), disconnected_inputs='ignore') return grad
def integers(): i = 0 while True: yield pm.floatX(i) i += 1
xs = [z[:, np.newaxis] * rng.multivariate_normal(m, np.eye(2), size=n_samples) for z, m in zip(zs, ms)] data = np.sum(np.dstack(xs), axis=2) plt.figure(figsize=(5, 5)) plt.scatter(data[:, 0], data[:, 1], c='g', alpha=0.5) plt.scatter(ms[0, 0], ms[0, 1], c='r', s=100) plt.scatter(ms[1, 0], ms[1, 1], c='b', s=100) from pymc3.math import logsumexp #Model original with pm.Model() as model: mus = [MvNormal('mu_%d' % i, mu=pm.floatX(np.zeros(2)), tau=pm.floatX(0.1 * np.eye(2)), shape=(2,)) for i in range(2)] pi = Dirichlet('pi', a=pm.floatX(0.1 * np.ones(2)), shape=(2,)) xs = DensityDist('x', logp_gmix(mus, pi, np.eye(2)), observed=data) # # #Model for GMM clustering # with pm.Model() as model: # # cluster sizes # p = pm.Dirichlet('p', a=np.array([1., 1.]), shape=2) # # ensure all clusters have some points # p_min_potential = pm.Potential('p_min_potential', tt.switch(tt.min(p) < .1, -np.inf, 0)) #
class TestElementWiseLogp(SeededTest): def build_model(self, distfam, params, shape, transform, testval=None): if testval is not None: testval = pm.floatX(testval) with pm.Model() as m: distfam('x', shape=shape, transform=transform, testval=testval, **params) return m def check_transform_elementwise_logp(self, model): x0 = model.deterministics[0] x = model.free_RVs[0] assert x.ndim == x.logp_elemwiset.ndim pt = model.test_point array = np.random.randn(*pt[x.name].shape) pt[x.name] = array dist = x.distribution logp_nojac = x0.distribution.logp(dist.transform_used.backward(array)) jacob_det = dist.transform_used.jacobian_det(theano.shared(array)) assert x.logp_elemwiset.ndim == jacob_det.ndim elementwiselogp = logp_nojac + jacob_det close_to(x.logp_elemwise(pt), elementwiselogp.eval(), tol) def check_vectortransform_elementwise_logp(self, model, vect_opt=0): x0 = model.deterministics[0] x = model.free_RVs[0] assert (x.ndim - 1) == x.logp_elemwiset.ndim pt = model.test_point array = np.random.randn(*pt[x.name].shape) pt[x.name] = array dist = x.distribution logp_nojac = x0.distribution.logp(dist.transform_used.backward(array)) jacob_det = dist.transform_used.jacobian_det(theano.shared(array)) assert x.logp_elemwiset.ndim == jacob_det.ndim if vect_opt == 0: # the original distribution is univariate elementwiselogp = logp_nojac.sum(axis=-1) + jacob_det else: elementwiselogp = logp_nojac + jacob_det # Hack to get relative tolerance a = x.logp_elemwise(pt) b = elementwiselogp.eval() close_to(a, b, np.abs(0.5 * (a + b) * tol)) @pytest.mark.parametrize('sd,shape', [ (2.5, 2), (5., (2, 3)), (np.ones(3) * 10., (4, 3)), ]) def test_half_normal(self, sd, shape): model = self.build_model(pm.HalfNormal, {'sd': sd}, shape=shape, transform=tr.log) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize('lam,shape', [(2.5, 2), (5., (2, 3)), (np.ones(3), (4, 3))]) def test_exponential(self, lam, shape): model = self.build_model(pm.Exponential, {'lam': lam}, shape=shape, transform=tr.log) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize('a,b,shape', [ (1., 1., 2), (.5, .5, (2, 3)), (np.ones(3), np.ones(3), (4, 3)), ]) def test_beta(self, a, b, shape): model = self.build_model(pm.Beta, { 'alpha': a, 'beta': b }, shape=shape, transform=tr.logodds) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize('lower,upper,shape', [(0., 1., 2), (.5, 5.5, (2, 3)), (pm.floatX(np.zeros(3)), pm.floatX(np.ones(3)), (4, 3))]) def test_uniform(self, lower, upper, shape): interval = tr.Interval(lower, upper) model = self.build_model(pm.Uniform, { 'lower': lower, 'upper': upper }, shape=shape, transform=interval) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize('mu,kappa,shape', [(0., 1., 2), (-.5, 5.5, (2, 3)), (np.zeros(3), np.ones(3), (4, 3))]) def test_vonmises(self, mu, kappa, shape): model = self.build_model(pm.VonMises, { 'mu': mu, 'kappa': kappa }, shape=shape, transform=tr.circular) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize('a,shape', [(np.ones(2), 2), (np.ones((2, 3)) * .5, (2, 3)), (np.ones(3), (4, 3))]) def test_dirichlet(self, a, shape): model = self.build_model(pm.Dirichlet, {'a': a}, shape=shape, transform=tr.stick_breaking) self.check_vectortransform_elementwise_logp(model, vect_opt=1) def test_normal_ordered(self): model = self.build_model(pm.Normal, { 'mu': 0., 'sd': 1. }, shape=3, testval=np.asarray([-1., 1., 4.]), transform=tr.ordered) self.check_vectortransform_elementwise_logp(model, vect_opt=0) @pytest.mark.parametrize('sd,shape', [ (2.5, (2, )), (np.ones(3), (4, 3)), ]) @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") def test_half_normal_ordered(self, sd, shape): testval = np.sort(np.abs(np.random.randn(*shape))) model = self.build_model(pm.HalfNormal, {'sd': sd}, shape=shape, testval=testval, transform=tr.Chain([tr.log, tr.ordered])) self.check_vectortransform_elementwise_logp(model, vect_opt=0) @pytest.mark.parametrize('lam,shape', [(2.5, (2, )), (np.ones(3), (4, 3))]) def test_exponential_ordered(self, lam, shape): testval = np.sort(np.abs(np.random.randn(*shape))) model = self.build_model(pm.Exponential, {'lam': lam}, shape=shape, testval=testval, transform=tr.Chain([tr.log, tr.ordered])) self.check_vectortransform_elementwise_logp(model, vect_opt=0) @pytest.mark.parametrize('a,b,shape', [ (1., 1., (2, )), (np.ones(3), np.ones(3), (4, 3)), ]) def test_beta_ordered(self, a, b, shape): testval = np.sort(np.abs(np.random.rand(*shape))) model = self.build_model(pm.Beta, { 'alpha': a, 'beta': b }, shape=shape, testval=testval, transform=tr.Chain([tr.logodds, tr.ordered])) self.check_vectortransform_elementwise_logp(model, vect_opt=0) @pytest.mark.parametrize('lower,upper,shape', [(0., 1., (2, )), (pm.floatX(np.zeros(3)), pm.floatX(np.ones(3)), (4, 3))]) def test_uniform_ordered(self, lower, upper, shape): interval = tr.Interval(lower, upper) testval = np.sort(np.abs(np.random.rand(*shape))) model = self.build_model(pm.Uniform, { 'lower': lower, 'upper': upper }, shape=shape, testval=testval, transform=tr.Chain([interval, tr.ordered])) self.check_vectortransform_elementwise_logp(model, vect_opt=0) @pytest.mark.parametrize('mu,kappa,shape', [(0., 1., (2, )), (np.zeros(3), np.ones(3), (4, 3))]) def test_vonmises_ordered(self, mu, kappa, shape): testval = np.sort(np.abs(np.random.rand(*shape))) model = self.build_model(pm.VonMises, { 'mu': mu, 'kappa': kappa }, shape=shape, testval=testval, transform=tr.Chain([tr.circular, tr.ordered])) self.check_vectortransform_elementwise_logp(model, vect_opt=0) @pytest.mark.parametrize('lower,upper,shape,transform', [(0., 1., (2, ), tr.stick_breaking), (.5, 5.5, (2, 3), tr.stick_breaking), (np.zeros(3), np.ones(3), (4, 3), tr.Chain([tr.sum_to_1, tr.logodds]))]) def test_uniform_other(self, lower, upper, shape, transform): testval = np.ones(shape) / shape[-1] model = self.build_model(pm.Uniform, { 'lower': lower, 'upper': upper }, shape=shape, testval=testval, transform=transform) self.check_vectortransform_elementwise_logp(model, vect_opt=0) @pytest.mark.parametrize('mu,cov,shape', [ (np.zeros(2), np.diag(np.ones(2)), (2, )), (np.zeros(3), np.diag(np.ones(3)), (4, 3)), ]) def test_mvnormal_ordered(self, mu, cov, shape): testval = np.sort(np.random.randn(*shape)) model = self.build_model(pm.MvNormal, { 'mu': mu, 'cov': cov }, shape=shape, testval=testval, transform=tr.ordered) self.check_vectortransform_elementwise_logp(model, vect_opt=1)
def gen(): for i in range(2): yield floatX(np.ones((10, 10)) * i)
(delta(mu).dot(tau) * delta(mu)).sum(axis=1)) # Log likelihood of Gaussian mixture distribution def logp_gmix(mus, pi, taus, n_components): def logp_(value): logps = [tt.log(pi[i]) + logp_normal(mus[i,:], taus[i], value) for i in range(n_components)] return tt.sum(logsumexp(tt.stacklists(logps)[:, :n_samples], axis=0)) return logp_ ## Prior for model: componentMean = ms + np.random.uniform(0,5,n_dimensions) componentTau = np.random.uniform(0,2,n_dimensions) * np.eye(n_dimensions) with pm.Model() as model: mus = MvNormal('mu', mu=pm.floatX(componentMean), tau=pm.floatX(componentTau), shape=(n_components, n_dimensions)) pi = Dirichlet('pi', a=pm.floatX(0.1 * np.ones(n_components)), shape=(n_components,)) packed_L = [pm.LKJCholeskyCov('packed_L_%d' % i, n=n_dimensions, eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) for i in range(n_components)] L = [pm.expand_packed_triangular(n_dimensions, packed_L[i]) for i in range(n_components)] sigmas = [pm.Deterministic('sigma_%d' % i, tt.dot(L[i],L[i].T)) for i in range(n_components)] taus = [tt.nlinalg.matrix_inverse(sigmas[i]) for i in range(n_components)] xs = DensityDist('x', logp_gmix(mus, pi, taus, n_components), observed=data) with model: advi_fit = pm.fit(n=500000, obj_optimizer=pm.adagrad(learning_rate=1e-1)) advi_trace = advi_fit.sample(10000) advi_summary = pm.summary(advi_trace) pickle_out = open("advi_summary.pickle","wb") pickle.dump(advi_summary, pickle_out)
def run_normal_mv_model_prior(data, K=3, mus=None, mc_samples=10000, jobs=1, n_cols=10, n_rows=100, neigs=1): n_samples, n_feats = data.shape n_samples = n_cols * n_rows max_neigs = 4 * neigs * (neigs + 1) #print max_neigs to_fill = indxs_neigs(range(n_samples), n_cols=n_cols, n_rows=n_rows, n=neigs) inds = np.where(to_fill != -1)[0] to_fill = to_fill[to_fill != -1] aux = tt.ones(n_samples * max_neigs) * -69 with pm.Model() as model: packed_L = pm.LKJCholeskyCov('packed_L', n=n_feats, eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) L = pm.expand_packed_triangular(n_feats, packed_L) sigma = pm.Deterministic('Sigma', L.dot(L.T)) mus = 0. if mus is None else mus mus = pm.Normal('mus', mu=[[10, 10], [55, 55], [105, 105], [155, 155], [205, 205]], sd=10, shape=(K, n_feats)) #sds = pm.HalfNormal('sds',sd = 50, shape = (K,n_feats) ) #mus = pm.Normal('mus', mu = [10,55,105,155,205], sd = sds , shape=(K,n_feats) ) #nu = pm.Exponential('nu', 1./10, shape=(K,n_feats), testval=tt.ones((K,n_feats)) ) #mus = pm.StudentT('mus',nu=nu, mu = [[10],[55],[105],[155],[205]], sd = 100., shape=(K,n_feats)) pi = Dirichlet('pi', a=pm.floatX([1. for _ in range(K)]), shape=K) #TODO one pi per voxel category = pm.Categorical('category', p=pi, shape=n_samples) #pm.Deterministic('pri', tt.as_tensor_variable(get_prior2(category))) #prior = pm.Deterministic('prior',tt.stack( [tt.sum(tt.eq(category[i], category[indxs_neig(i, n_rows=73, n_cols=74)]))/8.0 for i in range(73*74) ] )) #prior = pm.Deterministic('prior',tt.sum(tt.eq(category , category[[j for j in range(8)]].reshape( (8,1) ) ))) aux2 = tt.set_subtensor(aux[inds], category[to_fill]) prior = pm.Deterministic( 'prior', (tt.sum(tt.eq(aux2.reshape( (n_samples, max_neigs)), category.reshape((n_samples, 1))), axis=1) + 0.0) / 8.0) #prior2 = pm.Normal('prior2', mu = prior, sd = 0.5, shape= n_samples) # aux3 = tt.as_tensor_variable(pm.floatX([1,1,2,2,2,2,2,2,2,2]*100 )) # aux3 = tt.set_subtensor( aux3[(tt.eq(category,1)).nonzero()], 2 ) # prior2 = pm.Deterministic('prior2', aux3 ) # xs = DensityDist('x', logp_gmix(mus[category], L, prior, category), observed=data) with model: step2 = pm.ElemwiseCategorical(vars=[category], values=range(K)) #step = pm.CategoricalGibbsMetropolis(vars = [prior] ) trace = sample(mc_samples, step=[step2], n_jobs=jobs, tune=600) pm.traceplot(trace, varnames=['mus', 'pi', 'Sigma']) plt.title('normal mv model 40 cols') mod = stats.mode(trace['category'][int(mc_samples * 0.75):]) #if chains > 1: # print (max(np.max(gr_stats) for gr_stats in pm.gelman_rubin(trace).values())) return model, mod, trace
class TestElementWiseLogp(SeededTest): def build_model(self, distfam, params, size, transform, initval=None): if initval is not None: initval = pm.floatX(initval) with pm.Model() as m: distfam("x", size=size, transform=transform, initval=initval, **params) return m def check_transform_elementwise_logp(self, model): x = model.free_RVs[0] x0 = x.tag.value_var assert x.ndim == logpt(x).ndim pt = model.initial_point array = np.random.randn(*pt[x0.name].shape) transform = x0.tag.transform logp_notrans = logpt(x, transform.backward(x, array), transformed=False) jacob_det = transform.jacobian_det(x, aesara.shared(array)) assert logpt(x).ndim == jacob_det.ndim v1 = logpt(x, array, jacobian=False).eval() v2 = logp_notrans.eval() close_to(v1, v2, tol) def check_vectortransform_elementwise_logp(self, model, vect_opt=0): x = model.free_RVs[0] x0 = x.tag.value_var assert (x.ndim - 1) == logpt(x).ndim pt = model.initial_point array = np.random.randn(*pt[x0.name].shape) transform = x0.tag.transform logp_nojac = logpt(x, transform.backward(x, array), transformed=False) jacob_det = transform.jacobian_det(x, aesara.shared(array)) assert logpt(x).ndim == jacob_det.ndim # Hack to get relative tolerance a = logpt(x, array.astype(aesara.config.floatX), jacobian=False).eval() b = logp_nojac.eval() close_to(a, b, np.abs(0.5 * (a + b) * tol)) @pytest.mark.parametrize( "sd,size", [ (2.5, 2), (5.0, (2, 3)), (np.ones(3) * 10.0, (4, 3)), ], ) def test_half_normal(self, sd, size): model = self.build_model(pm.HalfNormal, {"sd": sd}, size=size, transform=tr.log) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize("lam,size", [(2.5, 2), (5.0, (2, 3)), (np.ones(3), (4, 3))]) def test_exponential(self, lam, size): model = self.build_model(pm.Exponential, {"lam": lam}, size=size, transform=tr.log) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize( "a,b,size", [ (1.0, 1.0, 2), (0.5, 0.5, (2, 3)), (np.ones(3), np.ones(3), (4, 3)), ], ) def test_beta(self, a, b, size): model = self.build_model(pm.Beta, { "alpha": a, "beta": b }, size=size, transform=tr.logodds) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize( "lower,upper,size", [ (0.0, 1.0, 2), (0.5, 5.5, (2, 3)), (pm.floatX(np.zeros(3)), pm.floatX(np.ones(3)), (4, 3)), ], ) def test_uniform(self, lower, upper, size): def transform_params(rv_var): _, _, _, lower, upper = rv_var.owner.inputs lower = at.as_tensor_variable(lower) if lower is not None else None upper = at.as_tensor_variable(upper) if upper is not None else None return lower, upper interval = tr.Interval(transform_params) model = self.build_model(pm.Uniform, { "lower": lower, "upper": upper }, size=size, transform=interval) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize( "lower, c, upper, size", [ (0.0, 1.0, 2.0, 2), (-10, 0, 200, (2, 3)), (np.zeros(3), np.ones(3), np.ones(3), (4, 3)), ], ) def test_triangular(self, lower, c, upper, size): def transform_params(rv_var): _, _, _, lower, _, upper = rv_var.owner.inputs lower = at.as_tensor_variable(lower) if lower is not None else None upper = at.as_tensor_variable(upper) if upper is not None else None return lower, upper interval = tr.Interval(transform_params) model = self.build_model(pm.Triangular, { "lower": lower, "c": c, "upper": upper }, size=size, transform=interval) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize("mu,kappa,size", [(0.0, 1.0, 2), (-0.5, 5.5, (2, 3)), (np.zeros(3), np.ones(3), (4, 3))]) def test_vonmises(self, mu, kappa, size): model = self.build_model(pm.VonMises, { "mu": mu, "kappa": kappa }, size=size, transform=tr.circular) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize("a,size", [(np.ones(2), None), (np.ones((2, 3)) * 0.5, None), (np.ones(3), (4, ))]) def test_dirichlet(self, a, size): model = self.build_model(pm.Dirichlet, {"a": a}, size=size, transform=tr.stick_breaking) self.check_vectortransform_elementwise_logp(model, vect_opt=1) def test_normal_ordered(self): model = self.build_model( pm.Normal, { "mu": 0.0, "sd": 1.0 }, size=3, initval=np.asarray([-1.0, 1.0, 4.0]), transform=tr.ordered, ) self.check_vectortransform_elementwise_logp(model, vect_opt=0) @pytest.mark.parametrize( "sd,size", [ (2.5, (2, )), (np.ones(3), (4, 3)), ], ) @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") def test_half_normal_ordered(self, sd, size): initval = np.sort(np.abs(np.random.randn(*size))) model = self.build_model( pm.HalfNormal, {"sd": sd}, size=size, initval=initval, transform=tr.Chain([tr.log, tr.ordered]), ) self.check_vectortransform_elementwise_logp(model, vect_opt=0) @pytest.mark.parametrize("lam,size", [(2.5, (2, )), (np.ones(3), (4, 3))]) def test_exponential_ordered(self, lam, size): initval = np.sort(np.abs(np.random.randn(*size))) model = self.build_model( pm.Exponential, {"lam": lam}, size=size, initval=initval, transform=tr.Chain([tr.log, tr.ordered]), ) self.check_vectortransform_elementwise_logp(model, vect_opt=0) @pytest.mark.parametrize( "a,b,size", [ (1.0, 1.0, (2, )), (np.ones(3), np.ones(3), (4, 3)), ], ) def test_beta_ordered(self, a, b, size): initval = np.sort(np.abs(np.random.rand(*size))) model = self.build_model( pm.Beta, { "alpha": a, "beta": b }, size=size, initval=initval, transform=tr.Chain([tr.logodds, tr.ordered]), ) self.check_vectortransform_elementwise_logp(model, vect_opt=0) @pytest.mark.parametrize( "lower,upper,size", [(0.0, 1.0, (2, )), (pm.floatX(np.zeros(3)), pm.floatX(np.ones(3)), (4, 3))], ) def test_uniform_ordered(self, lower, upper, size): def transform_params(rv_var): _, _, _, lower, upper = rv_var.owner.inputs lower = at.as_tensor_variable(lower) if lower is not None else None upper = at.as_tensor_variable(upper) if upper is not None else None return lower, upper interval = tr.Interval(transform_params) initval = np.sort(np.abs(np.random.rand(*size))) model = self.build_model( pm.Uniform, { "lower": lower, "upper": upper }, size=size, initval=initval, transform=tr.Chain([interval, tr.ordered]), ) self.check_vectortransform_elementwise_logp(model, vect_opt=1) @pytest.mark.parametrize("mu,kappa,size", [(0.0, 1.0, (2, )), (np.zeros(3), np.ones(3), (4, 3))]) def test_vonmises_ordered(self, mu, kappa, size): initval = np.sort(np.abs(np.random.rand(*size))) model = self.build_model( pm.VonMises, { "mu": mu, "kappa": kappa }, size=size, initval=initval, transform=tr.Chain([tr.circular, tr.ordered]), ) self.check_vectortransform_elementwise_logp(model, vect_opt=0) @pytest.mark.parametrize( "lower,upper,size,transform", [ (0.0, 1.0, (2, ), tr.stick_breaking), (0.5, 5.5, (2, 3), tr.stick_breaking), (np.zeros(3), np.ones(3), (4, 3), tr.Chain([tr.sum_to_1, tr.logodds])), ], ) def test_uniform_other(self, lower, upper, size, transform): initval = np.ones(size) / size[-1] model = self.build_model( pm.Uniform, { "lower": lower, "upper": upper }, size=size, initval=initval, transform=transform, ) self.check_vectortransform_elementwise_logp(model, vect_opt=1) @pytest.mark.parametrize( "mu,cov,size,shape", [ (np.zeros(2), np.diag(np.ones(2)), None, (2, )), (np.zeros(3), np.diag(np.ones(3)), (4, ), (4, 3)), ], ) def test_mvnormal_ordered(self, mu, cov, size, shape): initval = np.sort(np.random.randn(*shape)) model = self.build_model(pm.MvNormal, { "mu": mu, "cov": cov }, size=size, initval=initval, transform=tr.ordered) self.check_vectortransform_elementwise_logp(model, vect_opt=1)
zij = pm.Deterministic('zij', tt.lt(zij_, phii[companyABC])) beta_mu = pm.Deterministic('beta_mu', tt.switch(zij, linerpredi, pi_ij)) # Observed_pred = pm.Weibull("Observed_pred", alpha=mu, beta=sigma, shape=elec_faults.shape) # 观测值 Observed = pm.Weibull("Observed", alpha=alpha, beta=beta_mu, observed=elec_faults) # 观测值 # start = pm.find_MAP() # step = pm.Slice([beta1, u]) # step = pm.NUTS(scaling=cov, is_cov=True) # trace = pm.sample(3000, init='advi', tune=1000) with model1: s = shared(pm.floatX(1)) inference = pm.ADVI(cost_part_grad_scale=s) # ADVI has nearly converged inference.fit(n=20000) # It is time to set `s` to zero s.set_value(0) approx = inference.fit(n=10000) trace = approx.sample(3000, include_transformed=True) elbos1 = -inference.hist chain = trace[2000:] varnames2 = ['beta', 'beta1', 'beta2', 'beta3'] # # pm.plot_posterior(chain2, varnames2, ref_val=0) pm.traceplot(chain) plt.show() pm.traceplot(chain, varnames2)
def logp(self, value): trquaddist, half_collogdet, half_rowlogdet = self._trquaddist(value) m = self.m n = self.n norm = -0.5 * m * n * pm.floatX(np.log(2 * np.pi)) return norm - 0.5 * trquaddist - m * half_collogdet - n * half_rowlogdet
## Build model and sample # Number of iterations for sampler draws = 2000 # Prepare lists of starting points for mu to prevent label-switching problem testvals = [[-2, -2], [0, 0], [2, 2]] # Model structure with pm.Model() as mvgmm: # Prior over component weights p = pm.Dirichlet('p', a=np.array([1.] * K)) # Prior over component means mus = [ pm.MvNormal('mu_%d' % i, mu=pm.floatX(np.zeros(D)), tau=pm.floatX(0.1 * np.eye(D)), shape=(D, ), testval=pm.floatX(testvals[i])) for i in range(K) ] # Cholesky decomposed LKJ prior over component covariance matrices packed_L = [ pm.LKJCholeskyCov('packed_L_%d' % i, n=D, eta=2., sd_dist=pm.HalfCauchy.dist(1)) for i in range(K) ] # Unpack packed_L into full array L = [pm.expand_packed_triangular(D, packed_L[i]) for i in range(K)]
def trainBNN(self,inputsTrain,errInputsTrain, targetsTrain,errTargetsTrain, neuronsPerHiddenlayer,sampler, nsamp,bnnmodelpkl,plotdir, ncores=2,viewBNN=False): """ TRAINS BAYESIAN NEURAL NETWORK ACCORDING TO SPECIFIED TRAINING DATA, SAVES MODEL, AND VISUALIZES BNN IF DESIRED Arguments: inputsTrain - input training set (, [ntrain*ninputs], where ntrain is the number of training measurements and ninputs is the number of inputs) errInputsTrain - errors on input training set (, [ntrain*ninputs]) targetsTrain - target training set (, [ntrain*ntargets], where ntargets is the number of targets) errTargetsTrain - errors on target training set (, [ntrain*ninputs]) neuronsPerHiddenlayer - number of neurons in hidden layer sampler - ADVI variational inference sampler or No U-Turn Sampler (NUTS) (much slower) nsamp - number of samples to generate bnnmodelpkl - name of pickle file to store trained BNN plotdir - directory for storing any associated plots ncores - number of cores to use for NUTS sampler (default 2) viewBNN - whether to visualize and plot BNN (default False) Returns: scaleData - scaled dataset (, [ndata*nvars]) scaleEData - scaled observed errors on dataset (, [ndata*nvars]) """ ntrain,ninputs = np.shape(inputsTrain) ntrain,ntargets = np.shape(targetsTrain) # Calculate and scale inputs and targets inputsMu,inputsSig = self.calcScale(inputsTrain,errInputsTrain) targetsMu,targetsSig = self.calcScale(targetsTrain,errTargetsTrain) inputsTrainScale,errInputsTrainScale = \ self.scaleData(inputsTrain,errInputsTrain,inputsMu,inputsSig) targetsTrainScale,errTargetsTrainScale = \ self.scaleData(targetsTrain,errTargetsTrain,targetsMu,targetsSig) # Initialize weights, biases on neurons, and true X and Y np.random.seed(30) initWtsInHid = np.random.randn(ninputs,neuronsPerHiddenlayer) initBiasInHid = np.random.randn(neuronsPerHiddenlayer) initWtsHidHid = np.random.randn(neuronsPerHiddenlayer,neuronsPerHiddenlayer) initBiasHidHid = np.random.randn(neuronsPerHiddenlayer) initWtsHidOut = np.random.randn(neuronsPerHiddenlayer,ntargets) initBiasHidOut = np.random.randn(ntargets) initX = np.random.randn(ntrain,ninputs) # Specify neural network with pm.Model() as neural_network: # Priors on weights and biases from input to first hidden layer wtsInHid = pm.Normal('wtsInHid', mu = 0, sd = 1, shape = (ninputs,neuronsPerHiddenlayer), testval = initWtsInHid) biasInHid = pm.Normal('biasInHid', mu = 0, sd = 1, shape = (neuronsPerHiddenlayer,), testval = initBiasInHid) # Priors on weights and biases from first hidden layer to second hidden layer wtsHidHid = pm.Normal('wtsHidHid', mu = 0, sd = 1, shape = (neuronsPerHiddenlayer,neuronsPerHiddenlayer), testval = initWtsHidHid) biasHidHid = pm.Normal('biasHidHid', mu = 0, sd = 1, shape = (neuronsPerHiddenlayer,), testval = initBiasHidHid) # Priors on weights and biases from second hidden layer to output wtsHidOut = pm.Normal('wtsHidOut', mu = 0, sd = 1, shape = (neuronsPerHiddenlayer,ntargets), testval = initWtsHidOut) biasHidOut = pm.Normal('biasHidOut', mu = 0, sd = 1, shape = (ntargets,), testval = initBiasHidOut) # Priors on true inputs (mean zeros assuming they have been scaled, and std 1 means similar to measured values) xTrue = pm.Normal('xTrue', mu = 0, sd = 10, shape = (ntrain, ninputs), testval = initX) # Expected outcome actHid1 = TT.nnet.sigmoid(TT.dot(xTrue,wtsInHid)+biasInHid) actHid2 = TT.nnet.sigmoid(TT.dot(actHid1,wtsHidHid)+biasHidHid) actOut = TT.dot(actHid2,wtsHidOut)+biasHidOut yTrue = pm.Deterministic('yTrue',actOut) # Likelihoods of observations (sampling distribution - fixed) xTrainObs = pm.Normal('xTrainObs', mu = xTrue, sd = errInputsTrainScale, observed = inputsTrainScale, total_size = (ntrain,ninputs)) yTrainObs = pm.Normal('yTrainObs', mu = yTrue, sd = errTargetsTrainScale, observed = targetsTrainScale, total_size = (ntrain,ntargets)) # Train BNN print("Training Bayesian neural network with...") with neural_network: if (sampler=="advi"): # Fit with ADVI sampler print(" ...the ADVI sampler...") s = theano.shared(pm.floatX(1)) inference = pm.ADVI(cost_part_grad_scale=s) ftt = pm.fit(n=nsamp, method=inference) trace = ftt.sample(nsamp) fig = plt.figure(figsize=(6,4)) plt.plot(-ftt.hist) plt.ylabel('ELBO') fig.savefig(plotdir+"advi_fitprogress.eps") else: # Fit with NUTS sampler print("... ...the NUTS sampler...") step = pm.NUTS(target_accept=0.95) ntune = 1000 trace = pm.sample(nsamp,random_seed=10,step=step,tune=ntune,cores=ncores) print("...done.") # Save BNN to file print("Saving BNN, trace, and scaling of inputs and outputs to "+bnnmodelpkl+"...") with open(bnnmodelpkl,"wb") as buff: pickle.dump({'inputsMu':inputsMu,\ 'inputsSig':inputsSig,\ 'targetsMu':targetsMu,\ 'targetsSig':targetsSig,\ 'model': neural_network,\ 'neuronsPerHiddenlayer': neuronsPerHiddenlayer,\ 'trace': trace}, buff) print("...done.") if (viewBNN==True): # View neural_network model neural_network # View the free random variables (i.e. the ones you are obtaining posteriors for!) in the model neural_network.free_RVs # If desired plot neural network fig,ax=plt.subplots(7,2,figsize=(16,6)) pm.traceplot(trace,ax=ax) fig.savefig(plotdir+"neural_network.eps",format='eps',dpi=100,bbox_inches='tight') return
h = self.act(self.dfc2(h)) h = h.reshape([zs.shape[0], *self.conved_shape[1:]]) h = self.act(self.dconv1(h)) h = self.dconv2(h) return tt.nnet.sigmoid(h) logger.info("loading dataset") batch_size = 128 train_mnist = datasets.MNIST('./data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])) x_train = train_mnist.train_data.numpy() data = pm.floatX(x_train.reshape(-1, 1, 28, 28)) data /= numpy.max(data) logger.info("defining symbols") n_latent = 2 vae = VAE(n_latent) xs = tt.tensor4("xs") xs.tag.test_value = numpy.zeros((batch_size, 1, 28, 28)).astype('float32') logger.info("building model") with pm.Model() as model: zs = pm.Normal("zs", mu=0, sd=1, shape=(batch_size, n_latent), dtype=theano.config.floatX, total_size=len(data)) xs_ = pm.Normal("xs_", mu=vae.decode(zs), sd=0.1, observed=xs, dtype=theano.config.floatX, total_size=len(data))
def cov(self): x = (self.histogram - self.mean) return x.T.dot(x) / pm.floatX(self.histogram.shape[0])
def _get_scaling(total_size, shape, ndim): """ Gets scaling constant for logp Parameters ---------- total_size : int or list[int] shape : shape shape to scale ndim : int ndim hint Returns ------- scalar """ if total_size is None: coef = pm.floatX(1) elif isinstance(total_size, int): if ndim >= 1: denom = shape[0] else: denom = 1 coef = pm.floatX(total_size) / pm.floatX(denom) elif isinstance(total_size, (list, tuple)): if not all( isinstance(i, int) for i in total_size if (i is not Ellipsis and i is not None)): raise TypeError('Unrecognized `total_size` type, expected ' 'int or list of ints, got %r' % total_size) if Ellipsis in total_size: sep = total_size.index(Ellipsis) begin = total_size[:sep] end = total_size[sep + 1:] if Ellipsis in end: raise ValueError( 'Double Ellipsis in `total_size` is restricted, got %r' % total_size) else: begin = total_size end = [] if (len(begin) + len(end)) > ndim: raise ValueError('Length of `total_size` is too big, ' 'number of scalings is bigger that ndim, got %r' % total_size) elif (len(begin) + len(end)) == 0: return pm.floatX(1) if len(end) > 0: shp_end = shape[-len(end):] else: shp_end = np.asarray([]) shp_begin = shape[:len(begin)] begin_coef = [ pm.floatX(t) / shp_begin[i] for i, t in enumerate(begin) if t is not None ] end_coef = [ pm.floatX(t) / shp_end[i] for i, t in enumerate(end) if t is not None ] coefs = begin_coef + end_coef coef = tt.prod(coefs) else: raise TypeError('Unrecognized `total_size` type, expected ' 'int or list of ints, got %r' % total_size) return tt.as_tensor(pm.floatX(coef))
data = data[data['treatment'] != 'cellblaster'] # Rename treatments data['treatment'] = data['treatment'].apply(lambda x: renamed_treatments[x]) # Sort the data according to the treatments. treatment_order = ['FM1', 'FM2', 'FM3', 'FM4', 'CTRL1', 'CTRL2'] data['treatment'] = data['treatment'].astype('category') data['treatment'].cat.set_categories(treatment_order, inplace=True) data['treatment'] = data['treatment'].cat.codes.astype('int32') data = data.sort_values(['treatment']).reset_index(drop=True) data['site'] = data['site'].astype('category').cat.codes.astype('int32') data['frac_change_colonies'] = ( (data['colonies_post'] - data['colonies_pre']) / data['colonies_pre']) data['frac_change_colonies'] = pm.floatX(data['frac_change_colonies']) del data['screen protector'] # Change dtypes to int32 for GPU usage. def change_dtype(data, dtype='int32'): return data.astype(dtype) cols_to_change_ints = [ 'sample_id', 'colonies_pre', 'colonies_post', 'morphologies_pre', 'morphologies_post', 'phone ID' ] cols_to_change_floats = [ 'year',
def create_minibatch(data): while True: data = np.roll(data, 100, axis=0) yield pm.floatX(data[:100])
def run_lda(args): tf_vectorizer, docs_tr, docs_te = prepare_sparse_matrix_nonlabel(args.n_tr, args.n_te, args.n_word) feature_names = tf_vectorizer.get_feature_names() doc_tr_minibatch = pm.Minibatch(docs_tr.toarray(), args.bsz) doc_tr = shared(docs_tr.toarray()[:args.bsz]) def log_prob(beta, theta): """Returns the log-likelihood function for given documents. K : number of topics in the model V : number of words (size of vocabulary) D : number of documents (in a mini-batch) Parameters ---------- beta : tensor (K x V) Word distributions. theta : tensor (D x K) Topic distributions for documents. """ def ll_docs_f(docs): dixs, vixs = docs.nonzero() vfreqs = docs[dixs, vixs] ll_docs = (vfreqs * pmmath.logsumexp(tt.log(theta[dixs]) + tt.log(beta.T[vixs]), axis=1).ravel()) return tt.sum(ll_docs) / (tt.sum(vfreqs) + 1e-9) return ll_docs_f with pm.Model() as model: beta = Dirichlet("beta", a=pm.floatX((1. / args.n_topic) * np.ones((args.n_topic, args.n_word))), shape=(args.n_topic, args.n_word), ) theta = Dirichlet("theta", a=pm.floatX((10. / args.n_topic) * np.ones((args.bsz, args.n_topic))), shape=(args.bsz, args.n_topic), total_size=args.n_tr, ) doc = pm.DensityDist("doc", log_prob(beta, theta), observed=doc_tr) encoder = ThetaEncoder(n_words=args.n_word, n_hidden=100, n_topics=args.n_topic) local_RVs = OrderedDict([(theta, encoder.encode(doc_tr))]) encoder_params = encoder.get_params() s = shared(args.lr) def reduce_rate(a, h, i): s.set_value(args.lr / ((i / args.bsz) + 1) ** 0.7) with model: approx = pm.MeanField(local_rv=local_RVs) approx.scale_cost_to_minibatch = False inference = pm.KLqp(approx) inference.fit(args.n_iter, callbacks=[reduce_rate, pm.callbacks.CheckParametersConvergence(diff="absolute")], obj_optimizer=pm.adam(learning_rate=s), more_obj_params=encoder_params, total_grad_norm_constraint=200, more_replacements={ doc_tr: doc_tr_minibatch }, ) doc_tr.set_value(docs_tr.toarray()) inp = tt.matrix(dtype="int64") sample_vi_theta = theano.function([inp], approx.sample_node(approx.model.theta, args.n_sample, more_replacements={doc_tr: inp}), ) test = docs_te.toarray() test_n = test.sum(1) beta_pymc3 = pm.sample_approx(approx, draws=args.n_sample)['beta'] theta_pymc3 = sample_vi_theta(test) assert beta_pymc3.shape == (args.n_sample, args.n_topic, args.n_word) assert theta_pymc3.shape == (args.n_sample, args.n_te, args.n_topic) beta_mean = beta_pymc3.mean(0) theta_mean = theta_pymc3.mean(0) pred_rate = theta_mean.dot(beta_mean) pp_test = (test * np.log(pred_rate)).sum(1) / test_n posteriors = { 'theta': theta_pymc3, 'beta': beta_pymc3,} log_top_words(beta_pymc3.mean(0), feature_names, n_top_words=args.n_top_word) save_elbo(approx.hist) save_pp(pp_test) save_draws(posteriors)
def __init__(self, age, site_id, gender, y, model_type='poly2'): self.site_num = len(np.unique(site_id)) self.gender_num = len(np.unique(gender)) self.model_type = model_type self.s = theano.shared(site_id) self.g = theano.shared(gender) self.a = theano.shared(age) if model_type != 'nn': with pm.Model() as model: # Priors mu_prior_intercept = pm.Normal('mu_prior_intercept', mu=0., sigma=1e5) sigma_prior_intercept = pm.HalfCauchy('sigma_prior_intercept', 5) mu_prior_slope = pm.Normal('mu_prior_slope', mu=0., sigma=1e5) sigma_prior_slope = pm.HalfCauchy('sigma_prior_slope', 5) # Random intercepts intercepts = pm.Normal('intercepts', mu=mu_prior_intercept, sigma=sigma_prior_intercept, shape=(self.gender_num, self.site_num)) # Expected value if model_type == 'lin_rand_int': # Random slopes slopes = pm.Normal('slopes', mu=mu_prior_slope, sigma=sigma_prior_slope, shape=(self.gender_num, )) y_hat = intercepts[(self.g, self.s)] + self.a * slopes[(self.g)] # Model error sigma_error = pm.Uniform('sigma_error', lower=0, upper=100) sigma_y = sigma_error elif model_type == 'lin_rand_int_slp': # Random slopes slopes = pm.Normal('slopes', mu=mu_prior_slope, sigma=sigma_prior_slope, shape=(self.gender_num, self.site_num)) y_hat = intercepts[(self.g, self.s)] + self.a * slopes[ (self.g, self.s)] # Model error sigma_error = pm.Uniform('sigma_error', lower=0, upper=100) sigma_y = sigma_error elif model_type == 'lin_rand_int_slp_nse': # Random slopes slopes = pm.Normal('slopes', mu=mu_prior_slope, sigma=sigma_prior_slope, shape=(self.gender_num, self.site_num)) y_hat = intercepts[(self.g, self.s)] + self.a * slopes[ (self.g, self.s)] # Model error sigma_error_site = pm.Uniform('sigma_error_site', lower=0, upper=100, shape=(self.site_num, )) sigma_error_gender = pm.Uniform('sigma_error_gender', lower=0, upper=100, shape=(self.gender_num, )) sigma_y = np.sqrt(sigma_error_site[(self.s)]**2 + sigma_error_gender[(self.g)]**2) elif model_type == 'lin_rand_int_nse': # Random slopes slopes = pm.Normal('slopes', mu=mu_prior_slope, sigma=sigma_prior_slope, shape=(self.gender_num, )) y_hat = intercepts[(self.g, self.s)] + self.a * slopes[(self.g)] # Model error sigma_error_site = pm.Uniform('sigma_error_site', lower=0, upper=100, shape=(self.site_num, )) sigma_error_gender = pm.Uniform('sigma_error_gender', lower=0, upper=100, shape=(self.gender_num, )) sigma_y = np.sqrt(sigma_error_site[(self.s)]**2 + sigma_error_gender[(self.g)]**2) elif model_type == 'poly2': slopes = pm.Normal('slopes', mu=mu_prior_slope, sigma=sigma_prior_slope, shape=(self.gender_num, )) mu_prior_slope_2 = pm.Normal('mu_prior_slope_2', mu=0., sigma=1e5) sigma_prior_slope_2 = pm.HalfCauchy( 'sigma_prior_slope_2', 5) slopes_2 = pm.Normal('slopes_2', mu=mu_prior_slope_2, sigma=sigma_prior_slope_2, shape=(self.gender_num, )) y_hat = intercepts[(self.g, self.s)] + self.a * slopes[ (self.g)] + self.a**2 * slopes_2[(self.g)] # Model error sigma_error_site = pm.Uniform('sigma_error_site', lower=0, upper=100, shape=(self.site_num, )) sigma_error_gender = pm.Uniform('sigma_error_gender', lower=0, upper=100, shape=(self.gender_num, )) sigma_y = np.sqrt(sigma_error_site[(self.s)]**2 + sigma_error_gender[(self.g)]**2) # Data likelihood y_like = pm.Normal('y_like', mu=y_hat, sigma=sigma_y, observed=y) elif model_type == 'nn': age = np.expand_dims(age, axis=1) self.a = theano.shared(age) n_hidden = 2 n_data = 1 init_1 = pm.floatX(np.random.randn(n_data, n_hidden)) init_out = pm.floatX(np.random.randn(n_hidden)) std_init_1 = pm.floatX(np.ones([n_data, n_hidden])) std_init_out = pm.floatX(np.ones([ n_hidden, ])) with pm.Model() as model: weights_in_1_grp = pm.Normal('w_in_1_grp', 0, sd=1., shape=(n_data, n_hidden), testval=init_1) # Group standard-deviation weights_in_1_grp_sd = pm.HalfNormal('w_in_1_grp_sd', sd=1., shape=(n_data, n_hidden), testval=std_init_1) # Group mean distribution from hidden layer to output weights_1_out_grp = pm.Normal('w_1_out_grp', 0, sd=1., shape=(n_hidden, ), testval=init_out) weights_1_out_grp_sd = pm.HalfNormal('w_1_out_grp_sd', sd=1., shape=(n_hidden, ), testval=std_init_out) # Separate weights for each different model weights_in_1_raw = pm.Normal('w_in_1', shape=(self.gender_num, self.site_num, n_data, n_hidden)) # Non-centered specification of hierarchical model weights_in_1 = weights_in_1_raw[ self.g, self.s, :, :] * weights_in_1_grp_sd + weights_in_1_grp weights_1_out_raw = pm.Normal('w_1_out', shape=(self.gender_num, self.site_num, n_hidden)) weights_1_out = weights_1_out_raw[ self.g, self.s, :] * weights_1_out_grp_sd + weights_1_out_grp # Build neural-network using tanh activation function act_1 = pm.math.tanh( theano.tensor.batched_dot(self.a, weights_in_1)) y_hat = theano.tensor.batched_dot(act_1, weights_1_out) sigma_error_site = pm.Uniform('sigma_error_site', lower=0, upper=100, shape=(self.site_num, )) sigma_error_gender = pm.Uniform('sigma_error_gender', lower=0, upper=100, shape=(self.gender_num, )) sigma_y = np.sqrt(sigma_error_site[(self.s)]**2 + sigma_error_gender[(self.g)]**2) # Data likelihood y_like = pm.Normal('y_like', mu=y_hat, sigma=sigma_y, observed=y) self.model = model
def __init__(self, approx, beta=1.0): Operator.__init__(self, approx) self.beta = pm.floatX(beta)
def build_model(self, distfam, params, shape, transform, testval=None): if testval is not None: testval = pm.floatX(testval) with pm.Model() as m: distfam("x", shape=shape, transform=transform, testval=testval, **params) return m
def cov(self): x = self.histogram - self.mean return x.T.dot(x) / pm.floatX(self.histogram.shape[0])
def logp_gmix(mus, pi, taus, n_components): def logp_(value): logps = [ tt.log(pi[i]) + logp_normal(mus[i, :], taus[i], value) for i in range(n_components) ] return tt.sum( logsumexp(tt.stacklists(logps)[:, :n_samples], axis=0)) return logp_ # Sparse model with diagonal covariance: with pm.Model() as model: # Weights of each component: w = Dirichlet('w', a=pm.floatX(alpha), shape=(n_components, )) # Impose sparse structure onto mean with off-diagonal elements all being the same, because background should be the same throughout. mus_signal = MvNormal( 'mus_signal', mu=pm.floatX(signalMean_priorMean), tau=pm.floatX(np.eye(n_dimensions) / signalMean_priorSD**2), shape=n_dimensions) mus_background = MvNormal('mus_background', mu=pm.floatX(backgroundMean_priorMean), tau=pm.floatX( np.eye(n_dimensions) / backgroundMean_priorSD**2), shape=n_dimensions) mus = tt.fill_diagonal( tt.reshape(tt.tile(mus_background, n_components),
sns.despine() ax.set(title='Predicted labels in testing set', xlabel='X', ylabel='Y') # In[17]: print('Accuracy = {}%'.format((Y_test == pred).mean() * 100)) # Hey, our neural network did all right! # ## Lets look at what the classifier has learned # # For this, we evaluate the class probability predictions on a grid over the whole input space. # In[18]: grid = pm.floatX(np.mgrid[-3:3:100j, -3:3:100j]) grid_2d = grid.reshape(2, -1).T dummy_out = np.ones(grid.shape[1], dtype=np.int8) # In[19]: ppc = sample_proba(grid_2d, 500) # ### Probability surface # In[20]: cmap = sns.diverging_palette(250, 12, s=85, l=25, as_cmap=True) fig, ax = plt.subplots(figsize=(12, 9)) contour = ax.contourf(grid[0], grid[1],
def logp(self, value): quaddist, logdet, ok = self._quaddist(value) k = value.shape[-1].astype(theano.config.floatX) norm = -0.5 * k * pm.floatX(np.log(2 * np.pi)) return bound(norm - 0.5 * quaddist - logdet, ok)
def trainBNN(self, inputsTrain, errInputsTrain, targetsTrain, errTargetsTrain, neuronsPerHiddenlayer, sampler, nsamp, bnnmodelpkl, plotdir, ncores=2, viewBNN=False): """ TRAINS BAYESIAN NEURAL NETWORK ACCORDING TO SPECIFIED TRAINING DATA, SAVES MODEL, AND VISUALIZES BNN IF DESIRED Arguments: inputsTrain - input training set (, [ntrain*ninputs], where ntrain is the number of training measurements and ninputs is the number of inputs) and is specifically ra, dec, appJmag, appHmag, appKmag, parallax, Teff, logg, [M/H], [a/M], [C/M], [N/M] errInputsTrain - errors on input training set (, [ntrain*ninputs]) targetsTrain - target training set (, [ntrain*ntargets], where ntargets is the number of targets) errTargetsTrain - errors on target training set (, [ntrain*ninputs]) neuronsPerHiddenlayer - number of neurons in hidden layer sampler - ADVI variational inference sampler or No U-Turn Sampler (NUTS) (much slower) nsamp - number of samples to generate bnnmodelpkl - name of pickle file to store trained BNN plotdir - directory for storing any associated plots ncores - number of cores to use for NUTS sampler (default 2) viewBNN - whether to visualize and plot BNN (default False) Returns: scaleData - scaled dataset (, [ndata*nvars]) scaleEData - scaled observed errors on dataset (, [ndata*nvars]) """ ntrain, ninputs = np.shape(inputsTrain) ntrain, ntargets = np.shape(targetsTrain) # Calculate and scale inputs and targets targetsMu, targetsSig = self.calcScale(targetsTrain, errTargetsTrain) targetsTrainScale,errTargetsTrainScale = \ self.scaleData(targetsTrain,errTargetsTrain,targetsMu,targetsSig) # Initialize BNN weights, biases on neurons, and true X and Y using a # Gaussian with mean 0 and standard deviation 1 np.random.seed(30) ninputsBNN = np.copy(ninputs) initWtsInHid = np.random.randn(ninputsBNN, neuronsPerHiddenlayer) initBiasInHid = np.random.randn(neuronsPerHiddenlayer) initWtsHidOut = np.random.randn(neuronsPerHiddenlayer, ntargets) initBiasHidOut = np.random.randn(ntargets) # Specify neural network with pm.Model() as neural_network: # Priors for true inputs # CHANGE DURING THE FIT xTrue = pm.Normal('xTrue', mu=inputsTrain, sd=errInputsTrain, shape=(ntrain, ninputs), testval=inputsTrain) # Calculate absmag from appmag and parallax truera = xTrue[:, 0] truedec = xTrue[:, 1] trueappJmag = xTrue[:, 2] trueappHmag = xTrue[:, 3] trueappKmag = xTrue[:, 4] trueparallax = xTrue[:, 5] trueabsJmag = trueappJmag - 5 * np.log10(100. / trueparallax) trueabsHmag = trueappHmag - 5 * np.log10(100. / trueparallax) trueabsKmag = trueappKmag - 5 * np.log10(100. / trueparallax) trueJminH = trueabsJmag - trueabsHmag trueHminK = trueabsHmag - trueabsKmag # Priors for true inputs to BNN # CHANGE DURING THE FIT xTrueBNN = TT.stack([ truera, truedec, trueabsJmag, trueJminH, trueHminK, trueparallax, xTrue[:, 6], xTrue[:, 7], xTrue[:, 8], xTrue[:, 9], xTrue[:, 10], xTrue[:, 11] ], axis=0) xTrueBNN = xTrueBNN.reshape([ntrain, ninputs]) # Priors on unknown BNN parameters (weights and biases from inner to # hidden layer and hidden to output layer) # CHANGE DURING THE FIT # testval overrides the default test value, which is the mean wtsInHid = pm.Normal('wtsInHid', mu=0, sd=1, shape=(ninputsBNN, neuronsPerHiddenlayer), testval=initWtsInHid) biasInHid = pm.Normal('biasInHid', mu=0, sd=1, shape=(neuronsPerHiddenlayer, ), testval=initBiasInHid) wtsHidOut = pm.Normal('wtsHidOut', mu=0, sd=1, shape=(neuronsPerHiddenlayer, ntargets), testval=initWtsHidOut) biasHidOut = pm.Normal('biasHidOut', mu=0, sd=1, shape=(ntargets, ), testval=initBiasHidOut) # Apply ANN to get expected value of outcome actHid = TT.nnet.sigmoid(TT.dot(xTrueBNN, wtsInHid) + biasInHid) actOut = TT.dot(actHid, wtsHidOut) + biasHidOut yTrue = pm.Deterministic('yTrue', actOut) # Likelihoods of observations (i.e. the sampling distributions) # FIXED DURING THE FIT xTrainObs = pm.Normal('xTrainObs', mu=xTrue, sd=errInputsTrain, observed=inputsTrain, total_size=(ntrain, ninputs)) yTrainObs = pm.Normal('yTrainObs', mu=yTrue, sd=errTargetsTrainScale, observed=targetsTrainScale, total_size=(ntrain, ntargets)) # Train BNN print("Training Bayesian neural network with...") with neural_network: if (sampler == "advi"): # Fit with ADVI sampler print(" ...the ADVI sampler...") s = theano.shared(pm.floatX(1)) inference = pm.ADVI(cost_part_grad_scale=s) ftt = pm.fit(n=nsamp, method=inference) trace = ftt.sample(nsamp) fig = plt.figure(figsize=(6, 4)) plt.plot(-ftt.hist) plt.ylabel('ELBO') fig.savefig(plotdir + "advi_fitprogress.eps") else: # Fit with NUTS sampler print("... ...the NUTS sampler...") step = pm.NUTS(target_accept=0.95) ntune = 1000 trace = pm.sample(nsamp, random_seed=10, step=step, tune=ntune, cores=ncores) print("...done.") # Save BNN to file print("Saving BNN, trace, and scaling of inputs and outputs to " + bnnmodelpkl + "...") with open(bnnmodelpkl, "wb") as buff: pickle.dump({'targetsMu':targetsMu,\ 'targetsSig':targetsSig,\ 'model': neural_network,\ 'neuronsPerHiddenlayer': neuronsPerHiddenlayer,\ 'trace': trace}, buff) print("...done.") if (viewBNN == True): # View neural_network model neural_network # View the free random variables (i.e. the ones you are obtaining posteriors for!) in the model neural_network.free_RVs # If desired plot neural network fig, ax = plt.subplots(7, 2, figsize=(16, 6)) pm.traceplot(trace, ax=ax) fig.savefig(plotdir + "neural_network.eps", format='eps', dpi=100, bbox_inches='tight') return
def apply(self, f): # f: kernel function for KSD f(histogram) -> (k(x,.), \nabla_x k(x,.)) stein = Stein(self.approx, f, self.input_matrix) return pm.floatX(-1) * stein.grad
probitphi = pm.Normal('probitphi', mu=mu_p, sd=sigma_p, shape=companiesABC, testval=np.ones(companiesABC)) phii = pm.Deterministic('phii', Phi(probitphi)) pi_ij = pm.Uniform('pi_ij', lower=0, upper=1, shape=len(Num_shared.get_value())) zij_ = pm.theanof.tt_rng().uniform(size=companyABC.shape) zij = pm.Deterministic('zij', tt.lt(zij_, phii[Num_shared])) beta_mu = pm.Deterministic('beta_mu', tt.switch(zij, liner, pi_ij)) Observed = pm.Weibull("Observed", alpha=alpha, beta=beta_mu, observed=ys_faults) # 观测值 import theano with model_2: s = theano.shared(pm.floatX(1)) inference = pm.ADVI(cost_part_grad_scale=s) # ADVI has nearly converged inference.fit(n=20000) # It is time to set `s` to zero s.set_value(0) approx = inference.fit(n=10000) trace_2 = approx.sample(3000, include_transformed=True) elbos1 = -inference.hist chain_2 = trace_2[2000:] # varnames2 = ['beta', 'beta1', 'beta2', 'beta3', 'u', 'beta4'] pm.traceplot(chain_2) plt.show() njob = 1
def test_scale_cost_to_minibatch_works(aux_total_size): mu0 = 1.5 sigma = 1.0 y_obs = np.array([1.6, 1.4]) beta = len(y_obs) / float(aux_total_size) # TODO: theano_config # with pm.Model(theano_config=dict(floatX='float64')): # did not not work as expected # there were some numeric problems, so float64 is forced with theano.config.change_flags(floatX="float64", warn_float64="ignore"): assert theano.config.floatX == "float64" assert theano.config.warn_float64 == "ignore" post_mu = np.array([1.88], dtype=theano.config.floatX) post_sigma = np.array([1], dtype=theano.config.floatX) with pm.Model(): mu = pm.Normal("mu", mu=mu0, sigma=sigma) pm.Normal("y", mu=mu, sigma=1, observed=y_obs, total_size=aux_total_size) # Create variational gradient tensor mean_field_1 = MeanField() assert mean_field_1.scale_cost_to_minibatch mean_field_1.shared_params["mu"].set_value(post_mu) mean_field_1.shared_params["rho"].set_value( np.log(np.exp(post_sigma) - 1)) with theano.config.change_flags(compute_test_value="off"): elbo_via_total_size_scaled = -pm.operators.KL(mean_field_1)()( 10000) with pm.Model(): mu = pm.Normal("mu", mu=mu0, sigma=sigma) pm.Normal("y", mu=mu, sigma=1, observed=y_obs, total_size=aux_total_size) # Create variational gradient tensor mean_field_2 = MeanField() assert mean_field_1.scale_cost_to_minibatch mean_field_2.scale_cost_to_minibatch = False assert not mean_field_2.scale_cost_to_minibatch mean_field_2.shared_params["mu"].set_value(post_mu) mean_field_2.shared_params["rho"].set_value( np.log(np.exp(post_sigma) - 1)) with theano.config.change_flags(compute_test_value="off"): elbo_via_total_size_unscaled = -pm.operators.KL(mean_field_2)()( 10000) np.testing.assert_allclose( elbo_via_total_size_unscaled.eval(), elbo_via_total_size_scaled.eval() * pm.floatX(1 / beta), rtol=0.02, atol=1e-1, )
def run_normal_mv_model_mixture(data, K=3, mus=None, mc_samples=10000, jobs=1, n_cols=10, n_rows=100, neigs=1): n_samples, n_feats = data.shape n_samples = n_cols * n_rows max_neigs = 4 * neigs * (neigs + 1) #print max_neigs to_fill = indxs_neigs(range(n_samples), n_cols=n_cols, n_rows=n_rows, n=neigs) inds = np.where(to_fill != -1)[0] to_fill = to_fill[to_fill != -1] aux = tt.ones(n_samples * max_neigs) * -69 shp = (K, n_feats) mus_start = np.percentile(data, np.linspace(1, 100, K), axis=0) with pm.Model() as model: packed_L = pm.LKJCholeskyCov('packed_L', n=n_feats, eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) L = pm.expand_packed_triangular(n_feats, packed_L) sigma = pm.Deterministic('Sigma', L.dot(L.T)) mus = 0. if mus is None else mus sds = pm.HalfNormal('sds', sd=tt.ones(shp) * 100, shape=shp) mus = pm.Normal('mus', mu=tt.as_tensor_variable(mus_start), sd=sds, shape=shp) pi = Dirichlet('pi', a=pm.floatX([1. for _ in range(K)]), shape=K) # #TODO one pi per voxel #category = pm.Categorical('category', p=pi, shape = n_samples ) mvs = [pm.MvNormal.dist(mu=mus[i], chol=L) for i in range(K)] # #aux2 = tt.set_subtensor(aux[inds],category[to_fill]) #prior = pm.Deterministic('prior',(tt.sum(tt.eq( aux2.reshape( (n_samples,max_neigs ) ), # category.reshape( (n_samples,1)) ), axis = 1 )+1)/1.0 ) pesos = pm.Dirichlet('pesos', a=np.ones((K, ))) #obs = pm.Mixture('obs',w = pesos, comp_dists = mvs, observed = data) obs = my_mixture('obs', w=pesos, comp_dists=mvs, observed=data) with model: #step2 = pm.CategoricalGibbsMetropolis(vars=[category] ) trace = sample(mc_samples, n_jobs=jobs, tune=500) pm.traceplot(trace, varnames=['mus', 'pi', 'Sigma', 'mvs', 'pesos']) plt.title('normal mv model 40 cols') logp_simple(mus, category, aux3) mod = stats.mode(trace['category'][int(mc_samples * 0.75):]) #if chains > 1: # print (max(np.max(gr_stats) for gr_stats in pm.gelman_rubin(trace).values())) return model, mod, trace
n_comp = 2 concentration = 1 with pm.Model() as model: # Prior for covariance matrix # packed_L = [pm.LKJCholeskyCov('packedL_%d' % i, n=dimensions, eta=1., sd_dist=pm.Gamma.dist(mu = 2, sigma = 1)) for i in range(n_comp)] # L = [pm.expand_packed_triangular(dimensions, packed_L[i]) for i in range(n_comp)] # Σ = [pm.Deterministic('Σ_%d' % i, L[i].dot(L[i].T)) for i in range(n_comp)] packed_L = pm.LKJCholeskyCov('packedL', n=dimensions, eta=1., sd_dist=pm.Gamma.dist(mu = 2, sigma = 1)) L = pm.expand_packed_triangular(dimensions, packed_L) Σ = pm.Deterministic('Σ', L.dot(L.T)) # Prior for mean: mus = [MvNormal('mu_%d' % i, mu=pm.floatX(np.zeros(dimensions)), tau=pm.floatX(0.1 * np.eye(2)), shape=(dimensions,)) for i in range(n_comp)] # Prior for weights: pi = Dirichlet('pi', a=pm.floatX(concentration * np.ones(n_comp)), shape=(n_comp,)) prior = sample_prior() x = pm.DensityDist('x', logp_gmix(mus, pi, np.eye(2)), observed=data) # Plot prior for some parameters: # print(prior.keys()) # plt.hist(prior['Σ'][:,0,1]) with model: %time hmc_trace = pm.sample(draws=250, tune=100, cores=4) with model: %time fit_advi = pm.fit(n=50000, obj_optimizer=pm.adagrad(learning_rate=1e-1), method = 'advi')
Topic distributions for documents. """ def ll_docs_f(docs): dixs, vixs = docs.nonzero() vfreqs = docs[dixs, vixs] ll_docs = vfreqs * pmmath.logsumexp( tt.log(theta[dixs]) + tt.log(beta.T[vixs]), axis=1).ravel() # Per-word log-likelihood times num of tokens in the whole dataset return tt.sum(ll_docs) return ll_docs_f with pm.Model() as lda_model: theta = Dirichlet('theta', a=pm.floatX(1.0 / n_topics) * np.ones( (sim_counts.shape[0], n_topics)), shape=(sim_counts.shape[0], n_topics), transform=t_stick_breaking(1e-9)) beta = Dirichlet('beta', a=pm.floatX(1.0 / n_topics) * np.ones( (n_topics, sim_counts.shape[1])), shape=(n_topics, sim_counts.shape[1]), transform=t_stick_breaking(1e-9)) doc = pm.DensityDist('doc', logp_lda_doc(beta, theta), observed=sim_counts) ###### Auto-Encoding Variational Bayes ## Encoder class LDAEncoder: """Encode (term-frequency) document vectors to variational means and (log-transformed) stds.