示例#1
0
    def __call__(self, x):
        neg_value = np.float64(self.logp_func(pm.floatX(x)))
        value = -1.0 * nan_to_high(neg_value)
        if self.use_gradient:
            neg_grad = self.dlogp_func(pm.floatX(x))
            if np.all(np.isfinite(neg_grad)):
                self.previous_x = x
            grad = nan_to_num(-1.0*neg_grad)
            grad = grad.astype(np.float64)
        else:
            self.previous_x = x
            grad = None

        if self.n_eval % 10 == 0:
            self.update_progress_desc(neg_value, grad)

        if self.n_eval > self.maxeval:
            self.update_progress_desc(neg_value, grad)
            self.progress.close()
            raise StopIteration

        self.n_eval += 1
        self.progress.update(1)

        if self.use_gradient:
            return value, grad
        else:
            return value
def test_vae():
    minibatch_size = 10
    data = pm.floatX(np.random.rand(100))
    x_mini = pm.Minibatch(data, minibatch_size)
    x_inp = tt.vector()
    x_inp.tag.test_value = data[:minibatch_size]

    ae = theano.shared(pm.floatX([.1, .1]))
    be = theano.shared(pm.floatX(1.))

    ad = theano.shared(pm.floatX(1.))
    bd = theano.shared(pm.floatX(1.))

    enc = x_inp.dimshuffle(0, 'x') * ae.dimshuffle('x', 0) + be
    mu,  rho = enc[:, 0], enc[:, 1]

    with pm.Model():
        # Hidden variables
        zs = pm.Normal('zs', mu=0, sd=1, shape=minibatch_size)
        dec = zs * ad + bd
        # Observation model
        pm.Normal('xs_', mu=dec, sd=0.1, observed=x_inp)

        pm.fit(1, local_rv={zs: dict(mu=mu, rho=rho)},
               more_replacements={x_inp: x_mini}, more_obj_params=[ae, be, ad, bd])
def test_hh_flow():
    cov = pm.floatX([[2, -1], [-1, 3]])
    with pm.Model():
        pm.MvNormal('mvN', mu=pm.floatX([0, 1]), cov=cov, shape=2)
        nf = NFVI('scale-hh*2-loc')
        nf.fit(25000, obj_optimizer=pm.adam(learning_rate=0.001))
        trace = nf.approx.sample(10000)
        cov2 = pm.trace_cov(trace)
    np.testing.assert_allclose(cov, cov2, rtol=0.07)
示例#4
0
 def create_shared_params(self, start=None):
     if start is None:
         start = self.model.test_point
     else:
         start_ = self.model.test_point.copy()
         update_start_vals(start_, start, self.model)
         start = start_
     start = self.gbij.map(start)
     return {'mu': theano.shared(
                 pm.floatX(start), 'mu'),
             'rho': theano.shared(
                 pm.floatX(np.zeros((self.global_size,))), 'rho')}
def test_var_replacement():
    X_mean = pm.floatX(np.linspace(0, 10, 10))
    y = pm.floatX(np.random.normal(X_mean*4, .05))
    with pm.Model():
        inp = pm.Normal('X', X_mean, shape=X_mean.shape)
        coef = pm.Normal('b', 4.)
        mean = inp * coef
        pm.Normal('y', mean, .1, observed=y)
        advi = pm.fit(100)
        assert advi.sample_node(mean).eval().shape == (10, )
        x_new = pm.floatX(np.linspace(0, 10, 11))
        assert advi.sample_node(mean, more_replacements={inp: x_new}).eval().shape == (11, )
示例#6
0
    def test_free_rv(self):
        with pm.Model() as model4:
            Normal('n', observed=[[1, 1],
                                  [1, 1]], total_size=[2, 2])
            p4 = theano.function([], model4.logpt)

        with pm.Model() as model5:
            Normal('n', total_size=[2, Ellipsis, 2], shape=(1, 1), broadcastable=(False, False))
            p5 = theano.function([model5.n], model5.logpt)
        assert p4() == p5(pm.floatX([[1]]))
        assert p4() == p5(pm.floatX([[1, 1],
                                     [1, 1]]))
示例#7
0
 def test_cloning_available(self):
     gop = generator(integers())
     res = gop ** 2
     shared = theano.shared(floatX(10))
     res1 = theano.clone(res, {gop: shared})
     f = theano.function([], res1)
     assert f() == np.float32(100)
示例#8
0
 def apply(self, f):
     # f: kernel function for KSD f(histogram) -> (k(x,.), \nabla_x k(x,.))
     stein = Stein(
         approx=self.approx,
         kernel=f,
         use_histogram=self.approx.all_histograms,
         temperature=self.temperature)
     return pm.floatX(-1) * stein.grad
示例#9
0
文件: opvi.py 项目: aasensio/pymc3
 def __local_mu_rho(self):
     if not self.local_vars:
         mu, rho = (
             tt.constant(pm.floatX(np.asarray([]))),
             tt.constant(pm.floatX(np.asarray([])))
         )
     else:
         mu = []
         rho = []
         for var in self.local_vars:
             mu.append(self.known[var][0].ravel())
             rho.append(self.known[var][1].ravel())
         mu = tt.concatenate(mu)
         rho = tt.concatenate(rho)
     mu.name = self.__class__.__name__ + '_local_mu'
     rho.name = self.__class__.__name__ + '_local_rho'
     return mu, rho
示例#10
0
 def randidx(self, size=None):
     if size is None:
         size = (1,)
     elif isinstance(size, tt.TensorVariable):
         if size.ndim < 1:
             size = size[None]
         elif size.ndim > 1:
             raise ValueError('size ndim should be no more than 1d')
         else:
             pass
     else:
         size = tuple(np.atleast_1d(size))
     return (self._rng
             .uniform(size=size,
                      low=pm.floatX(0),
                      high=pm.floatX(self.histogram.shape[0]) - pm.floatX(1e-16))
             .astype('int32'))
示例#11
0
    def from_noise(cls, size, jitter=.01, local_rv=None,
                   start=None, model=None, random_seed=None, **kwargs):
        """Initialize Histogram with random noise

        Parameters
        ----------
        size : `int`
            number of initial particles
        jitter : `float`
            initial sd
        local_rv : `dict`
            mapping {model_variable -> local_variable}
            Local Vars are used for Autoencoding Variational Bayes
            See (AEVB; Kingma and Welling, 2014) for details
        start : `Point`
            initial point
        model : :class:`pymc3.Model`
            PyMC3 model for inference
        random_seed : None or `int`
            leave None to use package global RandomStream or other
            valid value to create instance specific one
        kwargs : other kwargs passed to init

        Returns
        -------
        :class:`Empirical`
        """
        hist = cls(
            None,
            local_rv=local_rv,
            model=model,
            random_seed=random_seed,
            **kwargs)
        if start is None:
            start = hist.model.test_point
        else:
            start_ = hist.model.test_point.copy()
            update_start_vals(start_, start, hist.model)
            start = start_
        start = pm.floatX(hist.gbij.map(start))
        # Initialize particles
        x0 = np.tile(start, (size, 1))
        x0 += pm.floatX(np.random.normal(0, jitter, x0.shape))
        hist.histogram.set_value(x0)
        return hist
示例#12
0
    def test_observed_type(self):
        X_ = np.random.randn(100, 5)
        X = pm.floatX(theano.shared(X_))
        with pm.Model():
            x1 = pm.Normal('x1', observed=X_)
            x2 = pm.Normal('x2', observed=X)

        assert x1.type == X.type
        assert x2.type == X.type
示例#13
0
def build_model():
    data = np.loadtxt(pm.get_data('efron-morris-75-data.tsv'), delimiter="\t", 
                      skiprows=1, usecols=(2,3))
    
    atbats = pm.floatX(data[:,0])
    hits = pm.floatX(data[:,1])
    
    N = len(hits)
    
    # we want to bound the kappa below
    BoundedKappa = pm.Bound(pm.Pareto, lower=1.0)
    
    with pm.Model() as model:
        phi = pm.Uniform('phi', lower=0.0, upper=1.0)
        kappa = BoundedKappa('kappa', alpha=1.0001, m=1.5)
        thetas = pm.Beta('thetas', alpha=phi*kappa, beta=(1.0-phi)*kappa, shape=N)
        ys = pm.Binomial('ys', n=atbats, p=thetas, observed=hits)
    return model
示例#14
0
 def apply(self, f):
     # f: kernel function for KSD f(histogram) -> (k(x,.), \nabla_x k(x,.))
     input_matrix = self.get_input()
     stein = Stein(
         approx=self.approx,
         kernel=f,
         input_matrix=input_matrix,
         temperature=self.temperature)
     return pm.floatX(-1) * stein.grad
示例#15
0
 def create_shared_params(self, start=None):
     if start is None:
         start = self.model.test_point
     else:
         start_ = start.copy()
         update_start_vals(start_, self.model.test_point, self.model)
         start = start_
     if self.batched:
         start = start[self.group[0].name][0]
     else:
         start = self.bij.map(start)
     rho = np.zeros((self.ddim,))
     if self.batched:
         start = np.tile(start, (self.bdim, 1))
         rho = np.tile(rho, (self.bdim, 1))
     return {'mu': theano.shared(
                 pm.floatX(start), 'mu'),
             'rho': theano.shared(
                 pm.floatX(rho), 'rho')}
示例#16
0
文件: opvi.py 项目: aasensio/pymc3
    def __init__(self, local_rv=None, model=None,
                 cost_part_grad_scale=1,
                 scale_cost_to_minibatch=False,
                 random_seed=None, **kwargs):
        model = modelcontext(model)
        self._scale_cost_to_minibatch = theano.shared(np.int8(0))
        self.scale_cost_to_minibatch = scale_cost_to_minibatch
        if not isinstance(cost_part_grad_scale, theano.Variable):
            self.cost_part_grad_scale = theano.shared(pm.floatX(cost_part_grad_scale))
        else:
            self.cost_part_grad_scale = pm.floatX(cost_part_grad_scale)
        self._seed = random_seed
        self._rng = tt_rng(random_seed)
        self.model = model
        self.check_model(model, **kwargs)
        if local_rv is None:
            local_rv = {}

        def get_transformed(v):
            if hasattr(v, 'transformed'):
                return v.transformed
            return v

        known = {get_transformed(k): v for k, v in local_rv.items()}
        self.known = known
        self.local_vars = self.get_local_vars(**kwargs)
        self.global_vars = self.get_global_vars(**kwargs)
        self._g_order = ArrayOrdering(self.global_vars)
        self._l_order = ArrayOrdering(self.local_vars)
        self.gbij = DictToArrayBijection(self._g_order, {})
        self.lbij = DictToArrayBijection(self._l_order, {})
        self.symbolic_initial_local_matrix = tt.matrix(self.__class__.__name__ + '_symbolic_initial_local_matrix')
        self.symbolic_initial_global_matrix = tt.matrix(self.__class__.__name__ + '_symbolic_initial_global_matrix')

        self.global_flat_view = model.flatten(
            vars=self.global_vars,
            order=self._g_order,
        )
        self.local_flat_view = model.flatten(
            vars=self.local_vars,
            order=self._l_order,
        )
        self.symbolic_n_samples = self.symbolic_initial_global_matrix.shape[0]
示例#17
0
 def create_shared_params(self, trace=None):
     if trace is None:
         histogram = np.atleast_2d(self.gbij.map(self.model.test_point))
     else:
         histogram = np.empty((len(trace) * len(trace.chains), self.global_size))
         i = 0
         for t in trace.chains:
             for j in range(len(trace)):
                 histogram[i] = self.gbij.map(trace.point(j, t))
                 i += 1
     return dict(histogram=theano.shared(pm.floatX(histogram), 'histogram'))
示例#18
0
 def rslice(self, total, size, seed):
     if size is None:
         return slice(None)
     elif isinstance(size, int):
         rng = pm.tt_rng(seed)
         Minibatch.RNG[id(self)].append(rng)
         return (rng
                 .uniform(size=(size, ), low=0.0, high=pm.floatX(total) - 1e-16)
                 .astype('int64'))
     else:
         raise TypeError('Unrecognized size type, %r' % size)
示例#19
0
 def __call__(self, nmc, **kwargs):
     op = self.op  # type: KSD
     grad = op.apply(self.tf)
     if self.approx.all_histograms:
         z = self.approx.joint_histogram
     else:
         z = self.approx.symbolic_random
     if 'more_obj_params' in kwargs:
         params = self.obj_params + kwargs['more_obj_params']
     else:
         params = self.test_params + kwargs['more_tf_params']
         grad *= pm.floatX(-1)
     grads = tt.grad(None, params, known_grads={z: grad})
     return self.approx.set_size_and_deterministic(grads, nmc, 0, kwargs.get('more_replacements'))
示例#20
0
    def _quaddist_tau(self, delta):
        chol_tau = self.chol_tau
        _, k = delta.shape
        k = pm.floatX(k)

        diag = tt.nlinalg.diag(chol_tau)
        ok = tt.all(diag > 0)

        chol_tau = tt.switch(ok, chol_tau, 1)
        diag = tt.nlinalg.diag(chol_tau)
        delta_trans = tt.dot(delta, chol_tau)
        quaddist = (delta_trans ** 2).sum(axis=-1)
        logdet = -tt.sum(tt.log(diag))
        return quaddist, logdet, ok
示例#21
0
    def _quaddist_chol(self, delta):
        chol_cov = self.chol_cov
        _, k = delta.shape
        k = pm.floatX(k)
        diag = tt.nlinalg.diag(chol_cov)
        # Check if the covariance matrix is positive definite.
        ok = tt.all(diag > 0)
        # If not, replace the diagonal. We return -inf later, but
        # need to prevent solve_lower from throwing an exception.
        chol_cov = tt.switch(ok, chol_cov, 1)

        delta_trans = self.solve_lower(chol_cov, delta.T).T
        quaddist = (delta_trans ** 2).sum(axis=-1)
        logdet = tt.sum(tt.log(diag))
        return quaddist, logdet, ok
示例#22
0
文件: opvi.py 项目: aasensio/pymc3
 def normalizing_constant(self):
     """
     Constant to divide when we want to scale down loss from minibatches
     """
     t = self.to_flat_input(
         tt.max([v.scaling for v in self.model.basic_RVs]))
     t = theano.clone(t, {
         self.global_input: self.symbolic_random_global_matrix[0],
         self.local_input: self.symbolic_random_local_matrix[0]
     })
     t = self.set_size_and_deterministic(t, 1, 1)  # remove random, we do not it here at all
     # if not scale_cost_to_minibatch: t=1
     t = tt.switch(self._scale_cost_to_minibatch, t,
                   tt.constant(1, dtype=t.dtype))
     return pm.floatX(t)
示例#23
0
    def create_shared_params(self, trace=None, size=None, jitter=1, start=None):
        if trace is None:
            if size is None:
                raise opvi.ParametrizationError('Need `trace` or `size` to initialize')
            else:
                if start is None:
                    start = self.model.test_point
                else:
                    start_ = self.model.test_point.copy()
                    update_start_vals(start_, start, self.model)
                    start = start_
                start = pm.floatX(self.bij.map(start))
                # Initialize particles
                histogram = np.tile(start, (size, 1))
                histogram += pm.floatX(np.random.normal(0, jitter, histogram.shape))

        else:
            histogram = np.empty((len(trace) * len(trace.chains), self.ddim))
            i = 0
            for t in trace.chains:
                for j in range(len(trace)):
                    histogram[i] = self.bij.map(trace.point(j, t))
                    i += 1
        return dict(histogram=theano.shared(pm.floatX(histogram), 'histogram'))
示例#24
0
 def create_shared_params(self, start=None):
     if start is None:
         start = self.model.test_point
     else:
         start_ = self.model.test_point.copy()
         update_start_vals(start_, start, self.model)
         start = start_
     start = pm.floatX(self.gbij.map(start))
     n = self.global_size
     L_tril = (
         np.eye(n)
         [np.tril_indices(n)]
         .astype(theano.config.floatX)
     )
     return {'mu': theano.shared(start, 'mu'),
             'L_tril': theano.shared(L_tril, 'L_tril')}
示例#25
0
def adagrad_window(loss_or_grads=None, params=None,
                   learning_rate=0.001, epsilon=.1, n_win=10):
    """Returns a function that returns parameter updates.
    Instead of accumulated estimate, uses running window

    Parameters
    ----------
    loss_or_grads : symbolic expression or list of expressions
        A scalar loss expression, or a list of gradient expressions
    params : list of shared variables
        The variables to generate update expressions for
    learning_rate : float
        Learning rate.
    epsilon : float
        Offset to avoid zero-division in the normalizer of adagrad.
    n_win : int
        Number of past steps to calculate scales of parameter gradients.

    Returns
    -------
    OrderedDict
        A dictionary mapping each parameter to its update expression
    """
    if loss_or_grads is None and params is None:
        return partial(adagrad_window, **_get_call_kwargs(locals()))
    elif loss_or_grads is None or params is None:
        raise ValueError('Please provide both `loss_or_grads` and `params` to get updates')
    grads = get_or_compute_grads(loss_or_grads, params)
    updates = OrderedDict()
    for param, grad in zip(params, grads):
        i = theano.shared(pm.floatX(0))
        i_int = i.astype('int32')
        value = param.get_value(borrow=True)
        accu = theano.shared(
            np.zeros(value.shape + (n_win,), dtype=value.dtype))

        # Append squared gradient vector to accu_new
        accu_new = tt.set_subtensor(accu[..., i_int], grad ** 2)
        i_new = tt.switch((i + 1) < n_win, i + 1, 0)
        updates[accu] = accu_new
        updates[i] = i_new

        accu_sum = accu_new.sum(axis=-1)
        updates[param] = param - (learning_rate * grad /
                                  tt.sqrt(accu_sum + epsilon))
    return updates
def test_scale_cost_to_minibatch_works(aux_total_size):
    mu0 = 1.5
    sigma = 1.0
    y_obs = np.array([1.6, 1.4])
    beta = len(y_obs)/float(aux_total_size)
    post_mu = np.array([1.88], dtype=theano.config.floatX)
    post_sd = np.array([1], dtype=theano.config.floatX)

    # TODO: theano_config
    # with pm.Model(theano_config=dict(floatX='float64')):
    # did not not work as expected
    # there were some numeric problems, so float64 is forced
    with pm.theanof.change_flags(floatX='float64', warn_float64='ignore'):
        with pm.Model():
            assert theano.config.floatX == 'float64'
            assert theano.config.warn_float64 == 'ignore'
            mu = pm.Normal('mu', mu=mu0, sd=sigma)
            pm.Normal('y', mu=mu, sd=1, observed=y_obs, total_size=aux_total_size)
            # Create variational gradient tensor
            mean_field_1 = MeanField()
            assert mean_field_1.scale_cost_to_minibatch
            mean_field_1.shared_params['mu'].set_value(post_mu)
            mean_field_1.shared_params['rho'].set_value(np.log(np.exp(post_sd) - 1))

            with pm.theanof.change_flags(compute_test_value='off'):
                elbo_via_total_size_scaled = -pm.operators.KL(mean_field_1)()(10000)

        with pm.Model():
            mu = pm.Normal('mu', mu=mu0, sd=sigma)
            pm.Normal('y', mu=mu, sd=1, observed=y_obs, total_size=aux_total_size)
            # Create variational gradient tensor
            mean_field_2 = MeanField()
            assert mean_field_1.scale_cost_to_minibatch
            mean_field_2.scale_cost_to_minibatch = False
            assert not mean_field_2.scale_cost_to_minibatch
            mean_field_2.shared_params['mu'].set_value(post_mu)
            mean_field_2.shared_params['rho'].set_value(np.log(np.exp(post_sd) - 1))

        with pm.theanof.change_flags(compute_test_value='off'):
            elbo_via_total_size_unscaled = -pm.operators.KL(mean_field_2)()(10000)

        np.testing.assert_allclose(elbo_via_total_size_unscaled.eval(),
                                   elbo_via_total_size_scaled.eval() * pm.floatX(1 / beta), rtol=0.02, atol=1e-1)
示例#27
0
 def __call__(self, nmc, **kwargs):
     op = self.op  # type: KSD
     grad = op.apply(self.tf)
     loc_size = self.approx.local_size
     local_grad = grad[..., :loc_size]
     global_grad = grad[..., loc_size:]
     if 'more_obj_params' in kwargs:
         params = self.obj_params + kwargs['more_obj_params']
     else:
         params = self.test_params + kwargs['more_tf_params']
         grad *= pm.floatX(-1)
     zl, zg = self.get_input()
     zl, zg, grad, local_grad, global_grad = self.approx.set_size_and_deterministic(
         (zl, zg, grad, local_grad, global_grad),
         nmc, 0)
     grad = tt.grad(None, params, known_grads=collections.OrderedDict([
         (zl, local_grad),
         (zg, global_grad)
     ]), disconnected_inputs='ignore')
     return grad
def integers():
    i = 0
    while True:
        yield pm.floatX(i)
        i += 1
示例#29
0
    xs = [z[:, np.newaxis] * rng.multivariate_normal(m, np.eye(2), size=n_samples)
          for z, m in zip(zs, ms)]
    data = np.sum(np.dstack(xs), axis=2)
    
    plt.figure(figsize=(5, 5))
    plt.scatter(data[:, 0], data[:, 1], c='g', alpha=0.5)
    plt.scatter(ms[0, 0], ms[0, 1], c='r', s=100)
    plt.scatter(ms[1, 0], ms[1, 1], c='b', s=100)
    
    from pymc3.math import logsumexp


    #Model original
    with pm.Model() as model:
        mus = [MvNormal('mu_%d' % i,
                        mu=pm.floatX(np.zeros(2)),
                        tau=pm.floatX(0.1 * np.eye(2)),
                        shape=(2,))
               for i in range(2)]
        pi = Dirichlet('pi', a=pm.floatX(0.1 * np.ones(2)), shape=(2,))
        
        xs = DensityDist('x', logp_gmix(mus, pi, np.eye(2)), observed=data)
        
#   
#    #Model for GMM clustering
#    with pm.Model() as model:
#        # cluster sizes
#        p = pm.Dirichlet('p', a=np.array([1., 1.]), shape=2)
#        # ensure all clusters have some points
#        p_min_potential = pm.Potential('p_min_potential', tt.switch(tt.min(p) < .1, -np.inf, 0))
#    
示例#30
0
class TestElementWiseLogp(SeededTest):
    def build_model(self, distfam, params, shape, transform, testval=None):
        if testval is not None:
            testval = pm.floatX(testval)
        with pm.Model() as m:
            distfam('x',
                    shape=shape,
                    transform=transform,
                    testval=testval,
                    **params)
        return m

    def check_transform_elementwise_logp(self, model):
        x0 = model.deterministics[0]
        x = model.free_RVs[0]
        assert x.ndim == x.logp_elemwiset.ndim

        pt = model.test_point
        array = np.random.randn(*pt[x.name].shape)
        pt[x.name] = array
        dist = x.distribution
        logp_nojac = x0.distribution.logp(dist.transform_used.backward(array))
        jacob_det = dist.transform_used.jacobian_det(theano.shared(array))
        assert x.logp_elemwiset.ndim == jacob_det.ndim

        elementwiselogp = logp_nojac + jacob_det

        close_to(x.logp_elemwise(pt), elementwiselogp.eval(), tol)

    def check_vectortransform_elementwise_logp(self, model, vect_opt=0):
        x0 = model.deterministics[0]
        x = model.free_RVs[0]
        assert (x.ndim - 1) == x.logp_elemwiset.ndim

        pt = model.test_point
        array = np.random.randn(*pt[x.name].shape)
        pt[x.name] = array
        dist = x.distribution
        logp_nojac = x0.distribution.logp(dist.transform_used.backward(array))
        jacob_det = dist.transform_used.jacobian_det(theano.shared(array))
        assert x.logp_elemwiset.ndim == jacob_det.ndim

        if vect_opt == 0:
            # the original distribution is univariate
            elementwiselogp = logp_nojac.sum(axis=-1) + jacob_det
        else:
            elementwiselogp = logp_nojac + jacob_det
        # Hack to get relative tolerance
        a = x.logp_elemwise(pt)
        b = elementwiselogp.eval()
        close_to(a, b, np.abs(0.5 * (a + b) * tol))

    @pytest.mark.parametrize('sd,shape', [
        (2.5, 2),
        (5., (2, 3)),
        (np.ones(3) * 10., (4, 3)),
    ])
    def test_half_normal(self, sd, shape):
        model = self.build_model(pm.HalfNormal, {'sd': sd},
                                 shape=shape,
                                 transform=tr.log)
        self.check_transform_elementwise_logp(model)

    @pytest.mark.parametrize('lam,shape', [(2.5, 2), (5., (2, 3)),
                                           (np.ones(3), (4, 3))])
    def test_exponential(self, lam, shape):
        model = self.build_model(pm.Exponential, {'lam': lam},
                                 shape=shape,
                                 transform=tr.log)
        self.check_transform_elementwise_logp(model)

    @pytest.mark.parametrize('a,b,shape', [
        (1., 1., 2),
        (.5, .5, (2, 3)),
        (np.ones(3), np.ones(3), (4, 3)),
    ])
    def test_beta(self, a, b, shape):
        model = self.build_model(pm.Beta, {
            'alpha': a,
            'beta': b
        },
                                 shape=shape,
                                 transform=tr.logodds)
        self.check_transform_elementwise_logp(model)

    @pytest.mark.parametrize('lower,upper,shape',
                             [(0., 1., 2), (.5, 5.5, (2, 3)),
                              (pm.floatX(np.zeros(3)), pm.floatX(np.ones(3)),
                               (4, 3))])
    def test_uniform(self, lower, upper, shape):
        interval = tr.Interval(lower, upper)
        model = self.build_model(pm.Uniform, {
            'lower': lower,
            'upper': upper
        },
                                 shape=shape,
                                 transform=interval)
        self.check_transform_elementwise_logp(model)

    @pytest.mark.parametrize('mu,kappa,shape',
                             [(0., 1., 2), (-.5, 5.5, (2, 3)),
                              (np.zeros(3), np.ones(3), (4, 3))])
    def test_vonmises(self, mu, kappa, shape):
        model = self.build_model(pm.VonMises, {
            'mu': mu,
            'kappa': kappa
        },
                                 shape=shape,
                                 transform=tr.circular)
        self.check_transform_elementwise_logp(model)

    @pytest.mark.parametrize('a,shape', [(np.ones(2), 2),
                                         (np.ones((2, 3)) * .5, (2, 3)),
                                         (np.ones(3), (4, 3))])
    def test_dirichlet(self, a, shape):
        model = self.build_model(pm.Dirichlet, {'a': a},
                                 shape=shape,
                                 transform=tr.stick_breaking)
        self.check_vectortransform_elementwise_logp(model, vect_opt=1)

    def test_normal_ordered(self):
        model = self.build_model(pm.Normal, {
            'mu': 0.,
            'sd': 1.
        },
                                 shape=3,
                                 testval=np.asarray([-1., 1., 4.]),
                                 transform=tr.ordered)
        self.check_vectortransform_elementwise_logp(model, vect_opt=0)

    @pytest.mark.parametrize('sd,shape', [
        (2.5, (2, )),
        (np.ones(3), (4, 3)),
    ])
    @pytest.mark.xfail(condition=(theano.config.floatX == "float32"),
                       reason="Fails on float32")
    def test_half_normal_ordered(self, sd, shape):
        testval = np.sort(np.abs(np.random.randn(*shape)))
        model = self.build_model(pm.HalfNormal, {'sd': sd},
                                 shape=shape,
                                 testval=testval,
                                 transform=tr.Chain([tr.log, tr.ordered]))
        self.check_vectortransform_elementwise_logp(model, vect_opt=0)

    @pytest.mark.parametrize('lam,shape', [(2.5, (2, )), (np.ones(3), (4, 3))])
    def test_exponential_ordered(self, lam, shape):
        testval = np.sort(np.abs(np.random.randn(*shape)))
        model = self.build_model(pm.Exponential, {'lam': lam},
                                 shape=shape,
                                 testval=testval,
                                 transform=tr.Chain([tr.log, tr.ordered]))
        self.check_vectortransform_elementwise_logp(model, vect_opt=0)

    @pytest.mark.parametrize('a,b,shape', [
        (1., 1., (2, )),
        (np.ones(3), np.ones(3), (4, 3)),
    ])
    def test_beta_ordered(self, a, b, shape):
        testval = np.sort(np.abs(np.random.rand(*shape)))
        model = self.build_model(pm.Beta, {
            'alpha': a,
            'beta': b
        },
                                 shape=shape,
                                 testval=testval,
                                 transform=tr.Chain([tr.logodds, tr.ordered]))
        self.check_vectortransform_elementwise_logp(model, vect_opt=0)

    @pytest.mark.parametrize('lower,upper,shape',
                             [(0., 1., (2, )),
                              (pm.floatX(np.zeros(3)), pm.floatX(np.ones(3)),
                               (4, 3))])
    def test_uniform_ordered(self, lower, upper, shape):
        interval = tr.Interval(lower, upper)
        testval = np.sort(np.abs(np.random.rand(*shape)))
        model = self.build_model(pm.Uniform, {
            'lower': lower,
            'upper': upper
        },
                                 shape=shape,
                                 testval=testval,
                                 transform=tr.Chain([interval, tr.ordered]))
        self.check_vectortransform_elementwise_logp(model, vect_opt=0)

    @pytest.mark.parametrize('mu,kappa,shape',
                             [(0., 1., (2, )),
                              (np.zeros(3), np.ones(3), (4, 3))])
    def test_vonmises_ordered(self, mu, kappa, shape):
        testval = np.sort(np.abs(np.random.rand(*shape)))
        model = self.build_model(pm.VonMises, {
            'mu': mu,
            'kappa': kappa
        },
                                 shape=shape,
                                 testval=testval,
                                 transform=tr.Chain([tr.circular, tr.ordered]))
        self.check_vectortransform_elementwise_logp(model, vect_opt=0)

    @pytest.mark.parametrize('lower,upper,shape,transform',
                             [(0., 1., (2, ), tr.stick_breaking),
                              (.5, 5.5, (2, 3), tr.stick_breaking),
                              (np.zeros(3), np.ones(3),
                               (4, 3), tr.Chain([tr.sum_to_1, tr.logodds]))])
    def test_uniform_other(self, lower, upper, shape, transform):
        testval = np.ones(shape) / shape[-1]
        model = self.build_model(pm.Uniform, {
            'lower': lower,
            'upper': upper
        },
                                 shape=shape,
                                 testval=testval,
                                 transform=transform)
        self.check_vectortransform_elementwise_logp(model, vect_opt=0)

    @pytest.mark.parametrize('mu,cov,shape', [
        (np.zeros(2), np.diag(np.ones(2)), (2, )),
        (np.zeros(3), np.diag(np.ones(3)), (4, 3)),
    ])
    def test_mvnormal_ordered(self, mu, cov, shape):
        testval = np.sort(np.random.randn(*shape))
        model = self.build_model(pm.MvNormal, {
            'mu': mu,
            'cov': cov
        },
                                 shape=shape,
                                 testval=testval,
                                 transform=tr.ordered)
        self.check_vectortransform_elementwise_logp(model, vect_opt=1)
 def gen():
     for i in range(2):
         yield floatX(np.ones((10, 10)) * i)
示例#32
0
                         (delta(mu).dot(tau) * delta(mu)).sum(axis=1))

# Log likelihood of Gaussian mixture distribution
def logp_gmix(mus, pi, taus, n_components):
    def logp_(value):        
        logps = [tt.log(pi[i]) + logp_normal(mus[i,:], taus[i], value) for i in range(n_components)]
        return tt.sum(logsumexp(tt.stacklists(logps)[:, :n_samples], axis=0))
    return logp_

## Prior for model:

componentMean = ms + np.random.uniform(0,5,n_dimensions)
componentTau = np.random.uniform(0,2,n_dimensions) * np.eye(n_dimensions)

with pm.Model() as model:
    mus = MvNormal('mu', mu=pm.floatX(componentMean), tau=pm.floatX(componentTau), shape=(n_components, n_dimensions))
    pi = Dirichlet('pi', a=pm.floatX(0.1 * np.ones(n_components)), shape=(n_components,))
    packed_L = [pm.LKJCholeskyCov('packed_L_%d' % i, n=n_dimensions, eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) for i in range(n_components)]
    L = [pm.expand_packed_triangular(n_dimensions, packed_L[i]) for i in range(n_components)]
    sigmas = [pm.Deterministic('sigma_%d' % i, tt.dot(L[i],L[i].T)) for i in range(n_components)]
    taus = [tt.nlinalg.matrix_inverse(sigmas[i]) for i in range(n_components)]
    xs = DensityDist('x', logp_gmix(mus, pi, taus, n_components), observed=data)
    
with model:
    advi_fit = pm.fit(n=500000, obj_optimizer=pm.adagrad(learning_rate=1e-1))  
    
advi_trace = advi_fit.sample(10000)    
advi_summary = pm.summary(advi_trace)

pickle_out = open("advi_summary.pickle","wb")
pickle.dump(advi_summary, pickle_out)
示例#33
0
def run_normal_mv_model_prior(data,
                              K=3,
                              mus=None,
                              mc_samples=10000,
                              jobs=1,
                              n_cols=10,
                              n_rows=100,
                              neigs=1):
    n_samples, n_feats = data.shape
    n_samples = n_cols * n_rows
    max_neigs = 4 * neigs * (neigs + 1)
    #print max_neigs
    to_fill = indxs_neigs(range(n_samples),
                          n_cols=n_cols,
                          n_rows=n_rows,
                          n=neigs)
    inds = np.where(to_fill != -1)[0]
    to_fill = to_fill[to_fill != -1]
    aux = tt.ones(n_samples * max_neigs) * -69

    with pm.Model() as model:

        packed_L = pm.LKJCholeskyCov('packed_L',
                                     n=n_feats,
                                     eta=2.,
                                     sd_dist=pm.HalfCauchy.dist(2.5))
        L = pm.expand_packed_triangular(n_feats, packed_L)
        sigma = pm.Deterministic('Sigma', L.dot(L.T))

        mus = 0. if mus is None else mus

        mus = pm.Normal('mus',
                        mu=[[10, 10], [55, 55], [105, 105], [155, 155],
                            [205, 205]],
                        sd=10,
                        shape=(K, n_feats))
        #sds = pm.HalfNormal('sds',sd = 50, shape = (K,n_feats) )
        #mus = pm.Normal('mus', mu = [10,55,105,155,205], sd = sds , shape=(K,n_feats) )
        #nu = pm.Exponential('nu', 1./10, shape=(K,n_feats), testval=tt.ones((K,n_feats)) )
        #mus = pm.StudentT('mus',nu=nu, mu = [[10],[55],[105],[155],[205]], sd = 100., shape=(K,n_feats))

        pi = Dirichlet('pi', a=pm.floatX([1. for _ in range(K)]), shape=K)
        #TODO one pi per voxel
        category = pm.Categorical('category', p=pi, shape=n_samples)
        #pm.Deterministic('pri', tt.as_tensor_variable(get_prior2(category)))

        #prior = pm.Deterministic('prior',tt.stack( [tt.sum(tt.eq(category[i], category[indxs_neig(i, n_rows=73, n_cols=74)]))/8.0 for i in range(73*74) ] ))

        #prior = pm.Deterministic('prior',tt.sum(tt.eq(category  , category[[j for j in range(8)]].reshape( (8,1) ) )))

        aux2 = tt.set_subtensor(aux[inds], category[to_fill])
        prior = pm.Deterministic(
            'prior', (tt.sum(tt.eq(aux2.reshape(
                (n_samples, max_neigs)), category.reshape((n_samples, 1))),
                             axis=1) + 0.0) / 8.0)
        #prior2 = pm.Normal('prior2', mu = prior, sd = 0.5, shape= n_samples)

        # aux3 = tt.as_tensor_variable(pm.floatX([1,1,2,2,2,2,2,2,2,2]*100 ))
        #        aux3 = tt.set_subtensor( aux3[(tt.eq(category,1)).nonzero()], 2  )
        # prior2 = pm.Deterministic('prior2', aux3 )
        #
        xs = DensityDist('x',
                         logp_gmix(mus[category], L, prior, category),
                         observed=data)

    with model:
        step2 = pm.ElemwiseCategorical(vars=[category], values=range(K))
        #step = pm.CategoricalGibbsMetropolis(vars = [prior] )
        trace = sample(mc_samples, step=[step2], n_jobs=jobs, tune=600)

    pm.traceplot(trace, varnames=['mus', 'pi', 'Sigma'])
    plt.title('normal mv model 40 cols')

    mod = stats.mode(trace['category'][int(mc_samples * 0.75):])
    #if chains > 1:
    #   print (max(np.max(gr_stats) for gr_stats in pm.gelman_rubin(trace).values()))
    return model, mod, trace
示例#34
0
class TestElementWiseLogp(SeededTest):
    def build_model(self, distfam, params, size, transform, initval=None):
        if initval is not None:
            initval = pm.floatX(initval)
        with pm.Model() as m:
            distfam("x",
                    size=size,
                    transform=transform,
                    initval=initval,
                    **params)
        return m

    def check_transform_elementwise_logp(self, model):
        x = model.free_RVs[0]
        x0 = x.tag.value_var
        assert x.ndim == logpt(x).ndim

        pt = model.initial_point
        array = np.random.randn(*pt[x0.name].shape)
        transform = x0.tag.transform
        logp_notrans = logpt(x,
                             transform.backward(x, array),
                             transformed=False)

        jacob_det = transform.jacobian_det(x, aesara.shared(array))
        assert logpt(x).ndim == jacob_det.ndim

        v1 = logpt(x, array, jacobian=False).eval()
        v2 = logp_notrans.eval()
        close_to(v1, v2, tol)

    def check_vectortransform_elementwise_logp(self, model, vect_opt=0):
        x = model.free_RVs[0]
        x0 = x.tag.value_var
        assert (x.ndim - 1) == logpt(x).ndim

        pt = model.initial_point
        array = np.random.randn(*pt[x0.name].shape)
        transform = x0.tag.transform
        logp_nojac = logpt(x, transform.backward(x, array), transformed=False)

        jacob_det = transform.jacobian_det(x, aesara.shared(array))
        assert logpt(x).ndim == jacob_det.ndim

        # Hack to get relative tolerance
        a = logpt(x, array.astype(aesara.config.floatX), jacobian=False).eval()
        b = logp_nojac.eval()
        close_to(a, b, np.abs(0.5 * (a + b) * tol))

    @pytest.mark.parametrize(
        "sd,size",
        [
            (2.5, 2),
            (5.0, (2, 3)),
            (np.ones(3) * 10.0, (4, 3)),
        ],
    )
    def test_half_normal(self, sd, size):
        model = self.build_model(pm.HalfNormal, {"sd": sd},
                                 size=size,
                                 transform=tr.log)
        self.check_transform_elementwise_logp(model)

    @pytest.mark.parametrize("lam,size", [(2.5, 2), (5.0, (2, 3)),
                                          (np.ones(3), (4, 3))])
    def test_exponential(self, lam, size):
        model = self.build_model(pm.Exponential, {"lam": lam},
                                 size=size,
                                 transform=tr.log)
        self.check_transform_elementwise_logp(model)

    @pytest.mark.parametrize(
        "a,b,size",
        [
            (1.0, 1.0, 2),
            (0.5, 0.5, (2, 3)),
            (np.ones(3), np.ones(3), (4, 3)),
        ],
    )
    def test_beta(self, a, b, size):
        model = self.build_model(pm.Beta, {
            "alpha": a,
            "beta": b
        },
                                 size=size,
                                 transform=tr.logodds)
        self.check_transform_elementwise_logp(model)

    @pytest.mark.parametrize(
        "lower,upper,size",
        [
            (0.0, 1.0, 2),
            (0.5, 5.5, (2, 3)),
            (pm.floatX(np.zeros(3)), pm.floatX(np.ones(3)), (4, 3)),
        ],
    )
    def test_uniform(self, lower, upper, size):
        def transform_params(rv_var):
            _, _, _, lower, upper = rv_var.owner.inputs
            lower = at.as_tensor_variable(lower) if lower is not None else None
            upper = at.as_tensor_variable(upper) if upper is not None else None
            return lower, upper

        interval = tr.Interval(transform_params)
        model = self.build_model(pm.Uniform, {
            "lower": lower,
            "upper": upper
        },
                                 size=size,
                                 transform=interval)
        self.check_transform_elementwise_logp(model)

    @pytest.mark.parametrize(
        "lower, c, upper, size",
        [
            (0.0, 1.0, 2.0, 2),
            (-10, 0, 200, (2, 3)),
            (np.zeros(3), np.ones(3), np.ones(3), (4, 3)),
        ],
    )
    def test_triangular(self, lower, c, upper, size):
        def transform_params(rv_var):
            _, _, _, lower, _, upper = rv_var.owner.inputs
            lower = at.as_tensor_variable(lower) if lower is not None else None
            upper = at.as_tensor_variable(upper) if upper is not None else None
            return lower, upper

        interval = tr.Interval(transform_params)
        model = self.build_model(pm.Triangular, {
            "lower": lower,
            "c": c,
            "upper": upper
        },
                                 size=size,
                                 transform=interval)
        self.check_transform_elementwise_logp(model)

    @pytest.mark.parametrize("mu,kappa,size",
                             [(0.0, 1.0, 2), (-0.5, 5.5, (2, 3)),
                              (np.zeros(3), np.ones(3), (4, 3))])
    def test_vonmises(self, mu, kappa, size):
        model = self.build_model(pm.VonMises, {
            "mu": mu,
            "kappa": kappa
        },
                                 size=size,
                                 transform=tr.circular)
        self.check_transform_elementwise_logp(model)

    @pytest.mark.parametrize("a,size", [(np.ones(2), None),
                                        (np.ones((2, 3)) * 0.5, None),
                                        (np.ones(3), (4, ))])
    def test_dirichlet(self, a, size):
        model = self.build_model(pm.Dirichlet, {"a": a},
                                 size=size,
                                 transform=tr.stick_breaking)
        self.check_vectortransform_elementwise_logp(model, vect_opt=1)

    def test_normal_ordered(self):
        model = self.build_model(
            pm.Normal,
            {
                "mu": 0.0,
                "sd": 1.0
            },
            size=3,
            initval=np.asarray([-1.0, 1.0, 4.0]),
            transform=tr.ordered,
        )
        self.check_vectortransform_elementwise_logp(model, vect_opt=0)

    @pytest.mark.parametrize(
        "sd,size",
        [
            (2.5, (2, )),
            (np.ones(3), (4, 3)),
        ],
    )
    @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"),
                       reason="Fails on float32")
    def test_half_normal_ordered(self, sd, size):
        initval = np.sort(np.abs(np.random.randn(*size)))
        model = self.build_model(
            pm.HalfNormal,
            {"sd": sd},
            size=size,
            initval=initval,
            transform=tr.Chain([tr.log, tr.ordered]),
        )
        self.check_vectortransform_elementwise_logp(model, vect_opt=0)

    @pytest.mark.parametrize("lam,size", [(2.5, (2, )), (np.ones(3), (4, 3))])
    def test_exponential_ordered(self, lam, size):
        initval = np.sort(np.abs(np.random.randn(*size)))
        model = self.build_model(
            pm.Exponential,
            {"lam": lam},
            size=size,
            initval=initval,
            transform=tr.Chain([tr.log, tr.ordered]),
        )
        self.check_vectortransform_elementwise_logp(model, vect_opt=0)

    @pytest.mark.parametrize(
        "a,b,size",
        [
            (1.0, 1.0, (2, )),
            (np.ones(3), np.ones(3), (4, 3)),
        ],
    )
    def test_beta_ordered(self, a, b, size):
        initval = np.sort(np.abs(np.random.rand(*size)))
        model = self.build_model(
            pm.Beta,
            {
                "alpha": a,
                "beta": b
            },
            size=size,
            initval=initval,
            transform=tr.Chain([tr.logodds, tr.ordered]),
        )
        self.check_vectortransform_elementwise_logp(model, vect_opt=0)

    @pytest.mark.parametrize(
        "lower,upper,size",
        [(0.0, 1.0, (2, )),
         (pm.floatX(np.zeros(3)), pm.floatX(np.ones(3)), (4, 3))],
    )
    def test_uniform_ordered(self, lower, upper, size):
        def transform_params(rv_var):
            _, _, _, lower, upper = rv_var.owner.inputs
            lower = at.as_tensor_variable(lower) if lower is not None else None
            upper = at.as_tensor_variable(upper) if upper is not None else None
            return lower, upper

        interval = tr.Interval(transform_params)

        initval = np.sort(np.abs(np.random.rand(*size)))
        model = self.build_model(
            pm.Uniform,
            {
                "lower": lower,
                "upper": upper
            },
            size=size,
            initval=initval,
            transform=tr.Chain([interval, tr.ordered]),
        )
        self.check_vectortransform_elementwise_logp(model, vect_opt=1)

    @pytest.mark.parametrize("mu,kappa,size",
                             [(0.0, 1.0, (2, )),
                              (np.zeros(3), np.ones(3), (4, 3))])
    def test_vonmises_ordered(self, mu, kappa, size):
        initval = np.sort(np.abs(np.random.rand(*size)))
        model = self.build_model(
            pm.VonMises,
            {
                "mu": mu,
                "kappa": kappa
            },
            size=size,
            initval=initval,
            transform=tr.Chain([tr.circular, tr.ordered]),
        )
        self.check_vectortransform_elementwise_logp(model, vect_opt=0)

    @pytest.mark.parametrize(
        "lower,upper,size,transform",
        [
            (0.0, 1.0, (2, ), tr.stick_breaking),
            (0.5, 5.5, (2, 3), tr.stick_breaking),
            (np.zeros(3), np.ones(3),
             (4, 3), tr.Chain([tr.sum_to_1, tr.logodds])),
        ],
    )
    def test_uniform_other(self, lower, upper, size, transform):
        initval = np.ones(size) / size[-1]
        model = self.build_model(
            pm.Uniform,
            {
                "lower": lower,
                "upper": upper
            },
            size=size,
            initval=initval,
            transform=transform,
        )
        self.check_vectortransform_elementwise_logp(model, vect_opt=1)

    @pytest.mark.parametrize(
        "mu,cov,size,shape",
        [
            (np.zeros(2), np.diag(np.ones(2)), None, (2, )),
            (np.zeros(3), np.diag(np.ones(3)), (4, ), (4, 3)),
        ],
    )
    def test_mvnormal_ordered(self, mu, cov, size, shape):
        initval = np.sort(np.random.randn(*shape))
        model = self.build_model(pm.MvNormal, {
            "mu": mu,
            "cov": cov
        },
                                 size=size,
                                 initval=initval,
                                 transform=tr.ordered)
        self.check_vectortransform_elementwise_logp(model, vect_opt=1)
示例#35
0
    zij = pm.Deterministic('zij', tt.lt(zij_, phii[companyABC]))

    beta_mu = pm.Deterministic('beta_mu', tt.switch(zij, linerpredi, pi_ij))
    # Observed_pred = pm.Weibull("Observed_pred",  alpha=mu, beta=sigma, shape=elec_faults.shape)  # 观测值
    Observed = pm.Weibull("Observed",
                          alpha=alpha,
                          beta=beta_mu,
                          observed=elec_faults)  # 观测值

    # start = pm.find_MAP()
    # step = pm.Slice([beta1, u])
    # step = pm.NUTS(scaling=cov, is_cov=True)
    # trace = pm.sample(3000, init='advi', tune=1000)

with model1:
    s = shared(pm.floatX(1))
    inference = pm.ADVI(cost_part_grad_scale=s)
    # ADVI has nearly converged
    inference.fit(n=20000)
    # It is time to set `s` to zero
    s.set_value(0)
    approx = inference.fit(n=10000)
    trace = approx.sample(3000, include_transformed=True)
    elbos1 = -inference.hist

chain = trace[2000:]
varnames2 = ['beta', 'beta1', 'beta2', 'beta3']
# # pm.plot_posterior(chain2, varnames2, ref_val=0)
pm.traceplot(chain)
plt.show()
pm.traceplot(chain, varnames2)
示例#36
0
 def logp(self, value):
     trquaddist, half_collogdet, half_rowlogdet = self._trquaddist(value)
     m = self.m
     n = self.n
     norm = -0.5 * m * n * pm.floatX(np.log(2 * np.pi))
     return norm - 0.5 * trquaddist - m * half_collogdet - n * half_rowlogdet
示例#37
0
## Build model and sample
# Number of iterations for sampler
draws = 2000
# Prepare lists of starting points for mu to prevent label-switching problem
testvals = [[-2, -2], [0, 0], [2, 2]]

# Model structure
with pm.Model() as mvgmm:
    # Prior over component weights
    p = pm.Dirichlet('p', a=np.array([1.] * K))

    # Prior over component means
    mus = [
        pm.MvNormal('mu_%d' % i,
                    mu=pm.floatX(np.zeros(D)),
                    tau=pm.floatX(0.1 * np.eye(D)),
                    shape=(D, ),
                    testval=pm.floatX(testvals[i])) for i in range(K)
    ]

    # Cholesky decomposed LKJ prior over component covariance matrices
    packed_L = [
        pm.LKJCholeskyCov('packed_L_%d' % i,
                          n=D,
                          eta=2.,
                          sd_dist=pm.HalfCauchy.dist(1)) for i in range(K)
    ]

    # Unpack packed_L into full array
    L = [pm.expand_packed_triangular(D, packed_L[i]) for i in range(K)]
示例#38
0
    def trainBNN(self,inputsTrain,errInputsTrain,
                 targetsTrain,errTargetsTrain,
                 neuronsPerHiddenlayer,sampler,
                 nsamp,bnnmodelpkl,plotdir,
                 ncores=2,viewBNN=False):
        
        """ TRAINS BAYESIAN NEURAL NETWORK ACCORDING TO SPECIFIED TRAINING DATA, SAVES MODEL,
        AND VISUALIZES BNN IF DESIRED

        Arguments:
            inputsTrain           - input training set (, [ntrain*ninputs], where ntrain is the number of training measurements and ninputs is the number of inputs)
            errInputsTrain        - errors on input training set (, [ntrain*ninputs]) 
            targetsTrain          - target training set (, [ntrain*ntargets], where ntargets is the number of targets)
            errTargetsTrain       - errors on target training set (, [ntrain*ninputs]) 
            neuronsPerHiddenlayer - number of neurons in hidden layer
            sampler               - ADVI variational inference sampler or No U-Turn Sampler (NUTS) (much slower)
            nsamp                 - number of samples to generate
            bnnmodelpkl           - name of pickle file to store trained BNN
            plotdir               - directory for storing any associated plots
            ncores                - number of cores to use for NUTS sampler (default 2)
            viewBNN               - whether to visualize and plot BNN (default False)
        Returns:
            scaleData  - scaled dataset (, [ndata*nvars])
            scaleEData - scaled observed errors on dataset (, [ndata*nvars])
        """    
                     
        ntrain,ninputs  = np.shape(inputsTrain)
        ntrain,ntargets = np.shape(targetsTrain)
        
        # Calculate and scale inputs and targets
        inputsMu,inputsSig   = self.calcScale(inputsTrain,errInputsTrain)
        targetsMu,targetsSig = self.calcScale(targetsTrain,errTargetsTrain)
        inputsTrainScale,errInputsTrainScale = \
            self.scaleData(inputsTrain,errInputsTrain,inputsMu,inputsSig)
        targetsTrainScale,errTargetsTrainScale = \
            self.scaleData(targetsTrain,errTargetsTrain,targetsMu,targetsSig)
                     
        # Initialize weights, biases on neurons, and true X and Y
        np.random.seed(30)
        initWtsInHid   = np.random.randn(ninputs,neuronsPerHiddenlayer)
        initBiasInHid  = np.random.randn(neuronsPerHiddenlayer)
        initWtsHidHid  = np.random.randn(neuronsPerHiddenlayer,neuronsPerHiddenlayer)
        initBiasHidHid = np.random.randn(neuronsPerHiddenlayer)
        initWtsHidOut  = np.random.randn(neuronsPerHiddenlayer,ntargets)
        initBiasHidOut = np.random.randn(ntargets)
        initX          = np.random.randn(ntrain,ninputs)

        # Specify neural network
        with pm.Model() as neural_network:
    
    
            # Priors on weights and biases from input to first hidden layer
            wtsInHid  = pm.Normal('wtsInHid',
                                  mu      = 0,
                                  sd      = 1,
                                  shape   = (ninputs,neuronsPerHiddenlayer),
                                  testval = initWtsInHid)
            biasInHid = pm.Normal('biasInHid',
                                  mu      = 0,
                                  sd      = 1,
                                  shape   = (neuronsPerHiddenlayer,),
                                  testval = initBiasInHid)
                                  
            # Priors on weights and biases from first hidden layer to second hidden layer
            wtsHidHid  = pm.Normal('wtsHidHid',
                                   mu      = 0,
                                   sd      = 1,
                                   shape   = (neuronsPerHiddenlayer,neuronsPerHiddenlayer),
                                   testval = initWtsHidHid)
            biasHidHid = pm.Normal('biasHidHid',
                                   mu      = 0,
                                   sd      = 1,
                                   shape   = (neuronsPerHiddenlayer,),
                                   testval = initBiasHidHid)
    
            # Priors on weights and biases from second hidden layer to output
            wtsHidOut  = pm.Normal('wtsHidOut',
                                   mu      = 0,
                                   sd      = 1,
                                   shape   = (neuronsPerHiddenlayer,ntargets),
                                   testval = initWtsHidOut)
            biasHidOut = pm.Normal('biasHidOut',
                                   mu      = 0,
                                   sd      = 1,
                                   shape   = (ntargets,),
                                   testval = initBiasHidOut)
            
            # Priors on true inputs (mean zeros assuming they have been scaled, and std 1 means similar to measured values)
            xTrue  = pm.Normal('xTrue',
                               mu      = 0,
                               sd      = 10,
                               shape   = (ntrain, ninputs),
                               testval = initX) 
    
            # Expected outcome
            actHid1 = TT.nnet.sigmoid(TT.dot(xTrue,wtsInHid)+biasInHid)
            actHid2 = TT.nnet.sigmoid(TT.dot(actHid1,wtsHidHid)+biasHidHid)
            actOut  = TT.dot(actHid2,wtsHidOut)+biasHidOut 
            yTrue   = pm.Deterministic('yTrue',actOut)
            
            # Likelihoods of observations (sampling distribution - fixed)
            xTrainObs = pm.Normal('xTrainObs',
                                   mu         = xTrue,
                                   sd         = errInputsTrainScale,
                                   observed   = inputsTrainScale,
                                   total_size = (ntrain,ninputs))
            yTrainObs = pm.Normal('yTrainObs',
                                   mu         = yTrue,
                                   sd         = errTargetsTrainScale,
                                   observed   = targetsTrainScale,
                                   total_size = (ntrain,ntargets))          
                               
        # Train BNN
        print("Training Bayesian neural network with...")
        with neural_network:
    
            if (sampler=="advi"):
                # Fit with ADVI sampler
                print("   ...the ADVI sampler...")
                s         = theano.shared(pm.floatX(1))
                inference = pm.ADVI(cost_part_grad_scale=s)
                ftt       = pm.fit(n=nsamp, method=inference)
                trace     = ftt.sample(nsamp)
                fig       = plt.figure(figsize=(6,4))
                plt.plot(-ftt.hist)
                plt.ylabel('ELBO')
                fig.savefig(plotdir+"advi_fitprogress.eps")
                
            else:
                # Fit with NUTS sampler
                print("... ...the NUTS sampler...")
                step  = pm.NUTS(target_accept=0.95)
                ntune = 1000
                trace = pm.sample(nsamp,random_seed=10,step=step,tune=ntune,cores=ncores)
            print("...done.")
            
        # Save BNN to file
        print("Saving BNN, trace, and scaling of inputs and outputs to "+bnnmodelpkl+"...")
        with open(bnnmodelpkl,"wb") as buff:
            pickle.dump({'inputsMu':inputsMu,\
                         'inputsSig':inputsSig,\
                         'targetsMu':targetsMu,\
                         'targetsSig':targetsSig,\
                         'model': neural_network,\
                         'neuronsPerHiddenlayer': neuronsPerHiddenlayer,\
                         'trace': trace}, buff)
        print("...done.")
        
        if (viewBNN==True):
            
            # View neural_network model
            neural_network
        
            # View the free random variables (i.e. the ones you are obtaining posteriors for!) in the model
            neural_network.free_RVs
        
            # If desired plot neural network
            fig,ax=plt.subplots(7,2,figsize=(16,6))
            pm.traceplot(trace,ax=ax)
            fig.savefig(plotdir+"neural_network.eps",format='eps',dpi=100,bbox_inches='tight')
            
        return
示例#39
0
        h = self.act(self.dfc2(h))
        h = h.reshape([zs.shape[0], *self.conved_shape[1:]])
        h = self.act(self.dconv1(h))
        h = self.dconv2(h)
        return tt.nnet.sigmoid(h)


logger.info("loading dataset")
batch_size = 128
train_mnist = datasets.MNIST('./data', train=True, download=True,
                             transform=transforms.Compose([
                                 transforms.ToTensor(),
                                 transforms.Normalize((0.1307,), (0.3081,))
                             ]))
x_train = train_mnist.train_data.numpy()
data = pm.floatX(x_train.reshape(-1, 1, 28, 28))
data /= numpy.max(data)


logger.info("defining symbols")
n_latent = 2
vae = VAE(n_latent)
xs = tt.tensor4("xs")
xs.tag.test_value = numpy.zeros((batch_size, 1, 28, 28)).astype('float32')

logger.info("building model")
with pm.Model() as model:
    zs = pm.Normal("zs", mu=0, sd=1, shape=(batch_size, n_latent),
                   dtype=theano.config.floatX, total_size=len(data))
    xs_ = pm.Normal("xs_", mu=vae.decode(zs), sd=0.1, observed=xs,
                    dtype=theano.config.floatX, total_size=len(data))
示例#40
0
 def cov(self):
     x = (self.histogram - self.mean)
     return x.T.dot(x) / pm.floatX(self.histogram.shape[0])
示例#41
0
文件: model.py 项目: hhy5277/pymc3
def _get_scaling(total_size, shape, ndim):
    """
    Gets scaling constant for logp

    Parameters
    ----------
    total_size : int or list[int]
    shape : shape
        shape to scale
    ndim : int
        ndim hint

    Returns
    -------
    scalar
    """
    if total_size is None:
        coef = pm.floatX(1)
    elif isinstance(total_size, int):
        if ndim >= 1:
            denom = shape[0]
        else:
            denom = 1
        coef = pm.floatX(total_size) / pm.floatX(denom)
    elif isinstance(total_size, (list, tuple)):
        if not all(
                isinstance(i, int)
                for i in total_size if (i is not Ellipsis and i is not None)):
            raise TypeError('Unrecognized `total_size` type, expected '
                            'int or list of ints, got %r' % total_size)
        if Ellipsis in total_size:
            sep = total_size.index(Ellipsis)
            begin = total_size[:sep]
            end = total_size[sep + 1:]
            if Ellipsis in end:
                raise ValueError(
                    'Double Ellipsis in `total_size` is restricted, got %r' %
                    total_size)
        else:
            begin = total_size
            end = []
        if (len(begin) + len(end)) > ndim:
            raise ValueError('Length of `total_size` is too big, '
                             'number of scalings is bigger that ndim, got %r' %
                             total_size)
        elif (len(begin) + len(end)) == 0:
            return pm.floatX(1)
        if len(end) > 0:
            shp_end = shape[-len(end):]
        else:
            shp_end = np.asarray([])
        shp_begin = shape[:len(begin)]
        begin_coef = [
            pm.floatX(t) / shp_begin[i] for i, t in enumerate(begin)
            if t is not None
        ]
        end_coef = [
            pm.floatX(t) / shp_end[i] for i, t in enumerate(end)
            if t is not None
        ]
        coefs = begin_coef + end_coef
        coef = tt.prod(coefs)
    else:
        raise TypeError('Unrecognized `total_size` type, expected '
                        'int or list of ints, got %r' % total_size)
    return tt.as_tensor(pm.floatX(coef))
示例#42
0
data = data[data['treatment'] != 'cellblaster']

# Rename treatments
data['treatment'] = data['treatment'].apply(lambda x: renamed_treatments[x])

# Sort the data according to the treatments.
treatment_order = ['FM1', 'FM2', 'FM3', 'FM4', 'CTRL1', 'CTRL2']
data['treatment'] = data['treatment'].astype('category')
data['treatment'].cat.set_categories(treatment_order, inplace=True)
data['treatment'] = data['treatment'].cat.codes.astype('int32')
data = data.sort_values(['treatment']).reset_index(drop=True)
data['site'] = data['site'].astype('category').cat.codes.astype('int32')

data['frac_change_colonies'] = (
    (data['colonies_post'] - data['colonies_pre']) / data['colonies_pre'])
data['frac_change_colonies'] = pm.floatX(data['frac_change_colonies'])
del data['screen protector']


# Change dtypes to int32 for GPU usage.
def change_dtype(data, dtype='int32'):
    return data.astype(dtype)


cols_to_change_ints = [
    'sample_id', 'colonies_pre', 'colonies_post', 'morphologies_pre',
    'morphologies_post', 'phone ID'
]

cols_to_change_floats = [
    'year',
 def create_minibatch(data):
     while True:
         data = np.roll(data, 100, axis=0)
         yield pm.floatX(data[:100])
示例#44
0
def run_lda(args):
    tf_vectorizer, docs_tr, docs_te = prepare_sparse_matrix_nonlabel(args.n_tr, args.n_te, args.n_word)
    feature_names = tf_vectorizer.get_feature_names()
    doc_tr_minibatch = pm.Minibatch(docs_tr.toarray(), args.bsz)
    doc_tr = shared(docs_tr.toarray()[:args.bsz])

    def log_prob(beta, theta):
        """Returns the log-likelihood function for given documents.

        K : number of topics in the model
        V : number of words (size of vocabulary)
        D : number of documents (in a mini-batch)

        Parameters
        ----------
        beta : tensor (K x V)
            Word distributions.
        theta : tensor (D x K)
            Topic distributions for documents.
        """

        def ll_docs_f(docs):
            dixs, vixs = docs.nonzero()
            vfreqs = docs[dixs, vixs]
            ll_docs = (vfreqs * pmmath.logsumexp(tt.log(theta[dixs]) + tt.log(beta.T[vixs]),
                                                 axis=1).ravel())

            return tt.sum(ll_docs) / (tt.sum(vfreqs) + 1e-9)

        return ll_docs_f

    with pm.Model() as model:
        beta = Dirichlet("beta",
                         a=pm.floatX((1. / args.n_topic) * np.ones((args.n_topic, args.n_word))),
                         shape=(args.n_topic, args.n_word), )

        theta = Dirichlet("theta",
                          a=pm.floatX((10. / args.n_topic) * np.ones((args.bsz, args.n_topic))),
                          shape=(args.bsz, args.n_topic), total_size=args.n_tr, )

        doc = pm.DensityDist("doc", log_prob(beta, theta), observed=doc_tr)

    encoder = ThetaEncoder(n_words=args.n_word, n_hidden=100, n_topics=args.n_topic)
    local_RVs = OrderedDict([(theta, encoder.encode(doc_tr))])
    encoder_params = encoder.get_params()

    s = shared(args.lr)

    def reduce_rate(a, h, i):
        s.set_value(args.lr / ((i / args.bsz) + 1) ** 0.7)

    with model:
        approx = pm.MeanField(local_rv=local_RVs)
        approx.scale_cost_to_minibatch = False
        inference = pm.KLqp(approx)

    inference.fit(args.n_iter,
                  callbacks=[reduce_rate, pm.callbacks.CheckParametersConvergence(diff="absolute")],
                  obj_optimizer=pm.adam(learning_rate=s),
                  more_obj_params=encoder_params,
                  total_grad_norm_constraint=200,
                  more_replacements={ doc_tr: doc_tr_minibatch }, )

    doc_tr.set_value(docs_tr.toarray())
    inp = tt.matrix(dtype="int64")
    sample_vi_theta = theano.function([inp],
        approx.sample_node(approx.model.theta, args.n_sample, more_replacements={doc_tr: inp}), )

    test = docs_te.toarray()
    test_n = test.sum(1)

    beta_pymc3 = pm.sample_approx(approx, draws=args.n_sample)['beta']
    theta_pymc3 = sample_vi_theta(test)

    assert beta_pymc3.shape == (args.n_sample, args.n_topic, args.n_word)
    assert theta_pymc3.shape == (args.n_sample, args.n_te, args.n_topic)

    beta_mean = beta_pymc3.mean(0)
    theta_mean = theta_pymc3.mean(0)

    pred_rate = theta_mean.dot(beta_mean)
    pp_test = (test * np.log(pred_rate)).sum(1) / test_n

    posteriors = { 'theta': theta_pymc3, 'beta': beta_pymc3,}

    log_top_words(beta_pymc3.mean(0), feature_names, n_top_words=args.n_top_word)
    save_elbo(approx.hist)
    save_pp(pp_test)
    save_draws(posteriors)
示例#45
0
文件: hbr.py 项目: lindenmp/nispat
    def __init__(self, age, site_id, gender, y, model_type='poly2'):
        self.site_num = len(np.unique(site_id))
        self.gender_num = len(np.unique(gender))
        self.model_type = model_type
        self.s = theano.shared(site_id)
        self.g = theano.shared(gender)
        self.a = theano.shared(age)
        if model_type != 'nn':
            with pm.Model() as model:
                # Priors
                mu_prior_intercept = pm.Normal('mu_prior_intercept',
                                               mu=0.,
                                               sigma=1e5)
                sigma_prior_intercept = pm.HalfCauchy('sigma_prior_intercept',
                                                      5)
                mu_prior_slope = pm.Normal('mu_prior_slope', mu=0., sigma=1e5)
                sigma_prior_slope = pm.HalfCauchy('sigma_prior_slope', 5)

                # Random intercepts
                intercepts = pm.Normal('intercepts',
                                       mu=mu_prior_intercept,
                                       sigma=sigma_prior_intercept,
                                       shape=(self.gender_num, self.site_num))

                # Expected value
                if model_type == 'lin_rand_int':
                    # Random slopes
                    slopes = pm.Normal('slopes',
                                       mu=mu_prior_slope,
                                       sigma=sigma_prior_slope,
                                       shape=(self.gender_num, ))
                    y_hat = intercepts[(self.g,
                                        self.s)] + self.a * slopes[(self.g)]
                    # Model error
                    sigma_error = pm.Uniform('sigma_error', lower=0, upper=100)
                    sigma_y = sigma_error
                elif model_type == 'lin_rand_int_slp':
                    # Random slopes
                    slopes = pm.Normal('slopes',
                                       mu=mu_prior_slope,
                                       sigma=sigma_prior_slope,
                                       shape=(self.gender_num, self.site_num))
                    y_hat = intercepts[(self.g, self.s)] + self.a * slopes[
                        (self.g, self.s)]
                    # Model error
                    sigma_error = pm.Uniform('sigma_error', lower=0, upper=100)
                    sigma_y = sigma_error
                elif model_type == 'lin_rand_int_slp_nse':
                    # Random slopes
                    slopes = pm.Normal('slopes',
                                       mu=mu_prior_slope,
                                       sigma=sigma_prior_slope,
                                       shape=(self.gender_num, self.site_num))
                    y_hat = intercepts[(self.g, self.s)] + self.a * slopes[
                        (self.g, self.s)]
                    # Model error
                    sigma_error_site = pm.Uniform('sigma_error_site',
                                                  lower=0,
                                                  upper=100,
                                                  shape=(self.site_num, ))
                    sigma_error_gender = pm.Uniform('sigma_error_gender',
                                                    lower=0,
                                                    upper=100,
                                                    shape=(self.gender_num, ))
                    sigma_y = np.sqrt(sigma_error_site[(self.s)]**2 +
                                      sigma_error_gender[(self.g)]**2)
                elif model_type == 'lin_rand_int_nse':
                    # Random slopes
                    slopes = pm.Normal('slopes',
                                       mu=mu_prior_slope,
                                       sigma=sigma_prior_slope,
                                       shape=(self.gender_num, ))
                    y_hat = intercepts[(self.g,
                                        self.s)] + self.a * slopes[(self.g)]
                    # Model error
                    sigma_error_site = pm.Uniform('sigma_error_site',
                                                  lower=0,
                                                  upper=100,
                                                  shape=(self.site_num, ))
                    sigma_error_gender = pm.Uniform('sigma_error_gender',
                                                    lower=0,
                                                    upper=100,
                                                    shape=(self.gender_num, ))
                    sigma_y = np.sqrt(sigma_error_site[(self.s)]**2 +
                                      sigma_error_gender[(self.g)]**2)
                elif model_type == 'poly2':
                    slopes = pm.Normal('slopes',
                                       mu=mu_prior_slope,
                                       sigma=sigma_prior_slope,
                                       shape=(self.gender_num, ))
                    mu_prior_slope_2 = pm.Normal('mu_prior_slope_2',
                                                 mu=0.,
                                                 sigma=1e5)
                    sigma_prior_slope_2 = pm.HalfCauchy(
                        'sigma_prior_slope_2', 5)
                    slopes_2 = pm.Normal('slopes_2',
                                         mu=mu_prior_slope_2,
                                         sigma=sigma_prior_slope_2,
                                         shape=(self.gender_num, ))
                    y_hat = intercepts[(self.g, self.s)] + self.a * slopes[
                        (self.g)] + self.a**2 * slopes_2[(self.g)]
                    # Model error
                    sigma_error_site = pm.Uniform('sigma_error_site',
                                                  lower=0,
                                                  upper=100,
                                                  shape=(self.site_num, ))
                    sigma_error_gender = pm.Uniform('sigma_error_gender',
                                                    lower=0,
                                                    upper=100,
                                                    shape=(self.gender_num, ))
                    sigma_y = np.sqrt(sigma_error_site[(self.s)]**2 +
                                      sigma_error_gender[(self.g)]**2)
                # Data likelihood
                y_like = pm.Normal('y_like',
                                   mu=y_hat,
                                   sigma=sigma_y,
                                   observed=y)

        elif model_type == 'nn':
            age = np.expand_dims(age, axis=1)
            self.a = theano.shared(age)
            n_hidden = 2
            n_data = 1
            init_1 = pm.floatX(np.random.randn(n_data, n_hidden))
            init_out = pm.floatX(np.random.randn(n_hidden))
            std_init_1 = pm.floatX(np.ones([n_data, n_hidden]))
            std_init_out = pm.floatX(np.ones([
                n_hidden,
            ]))
            with pm.Model() as model:
                weights_in_1_grp = pm.Normal('w_in_1_grp',
                                             0,
                                             sd=1.,
                                             shape=(n_data, n_hidden),
                                             testval=init_1)
                # Group standard-deviation
                weights_in_1_grp_sd = pm.HalfNormal('w_in_1_grp_sd',
                                                    sd=1.,
                                                    shape=(n_data, n_hidden),
                                                    testval=std_init_1)
                # Group mean distribution from hidden layer to output
                weights_1_out_grp = pm.Normal('w_1_out_grp',
                                              0,
                                              sd=1.,
                                              shape=(n_hidden, ),
                                              testval=init_out)
                weights_1_out_grp_sd = pm.HalfNormal('w_1_out_grp_sd',
                                                     sd=1.,
                                                     shape=(n_hidden, ),
                                                     testval=std_init_out)
                # Separate weights for each different model
                weights_in_1_raw = pm.Normal('w_in_1',
                                             shape=(self.gender_num,
                                                    self.site_num, n_data,
                                                    n_hidden))
                # Non-centered specification of hierarchical model
                weights_in_1 = weights_in_1_raw[
                    self.g,
                    self.s, :, :] * weights_in_1_grp_sd + weights_in_1_grp

                weights_1_out_raw = pm.Normal('w_1_out',
                                              shape=(self.gender_num,
                                                     self.site_num, n_hidden))
                weights_1_out = weights_1_out_raw[
                    self.g,
                    self.s, :] * weights_1_out_grp_sd + weights_1_out_grp
                # Build neural-network using tanh activation function
                act_1 = pm.math.tanh(
                    theano.tensor.batched_dot(self.a, weights_in_1))
                y_hat = theano.tensor.batched_dot(act_1, weights_1_out)

                sigma_error_site = pm.Uniform('sigma_error_site',
                                              lower=0,
                                              upper=100,
                                              shape=(self.site_num, ))
                sigma_error_gender = pm.Uniform('sigma_error_gender',
                                                lower=0,
                                                upper=100,
                                                shape=(self.gender_num, ))
                sigma_y = np.sqrt(sigma_error_site[(self.s)]**2 +
                                  sigma_error_gender[(self.g)]**2)
                # Data likelihood
                y_like = pm.Normal('y_like',
                                   mu=y_hat,
                                   sigma=sigma_y,
                                   observed=y)

        self.model = model
示例#46
0
 def __init__(self, approx, beta=1.0):
     Operator.__init__(self, approx)
     self.beta = pm.floatX(beta)
示例#47
0
 def build_model(self, distfam, params, shape, transform, testval=None):
     if testval is not None:
         testval = pm.floatX(testval)
     with pm.Model() as m:
         distfam("x", shape=shape, transform=transform, testval=testval, **params)
     return m
示例#48
0
 def cov(self):
     x = self.histogram - self.mean
     return x.T.dot(x) / pm.floatX(self.histogram.shape[0])
示例#49
0
        def logp_gmix(mus, pi, taus, n_components):
            def logp_(value):
                logps = [
                    tt.log(pi[i]) + logp_normal(mus[i, :], taus[i], value)
                    for i in range(n_components)
                ]
                return tt.sum(
                    logsumexp(tt.stacklists(logps)[:, :n_samples], axis=0))

            return logp_

        # Sparse model with diagonal covariance:
        with pm.Model() as model:

            # Weights of each component:
            w = Dirichlet('w', a=pm.floatX(alpha), shape=(n_components, ))

            # Impose sparse structure onto mean with off-diagonal elements all being the same, because background should be the same throughout.
            mus_signal = MvNormal(
                'mus_signal',
                mu=pm.floatX(signalMean_priorMean),
                tau=pm.floatX(np.eye(n_dimensions) / signalMean_priorSD**2),
                shape=n_dimensions)
            mus_background = MvNormal('mus_background',
                                      mu=pm.floatX(backgroundMean_priorMean),
                                      tau=pm.floatX(
                                          np.eye(n_dimensions) /
                                          backgroundMean_priorSD**2),
                                      shape=n_dimensions)
            mus = tt.fill_diagonal(
                tt.reshape(tt.tile(mus_background, n_components),
示例#50
0
sns.despine()
ax.set(title='Predicted labels in testing set', xlabel='X', ylabel='Y')

# In[17]:

print('Accuracy = {}%'.format((Y_test == pred).mean() * 100))

# Hey, our neural network did all right!

# ## Lets look at what the classifier has learned
#
# For this, we evaluate the class probability predictions on a grid over the whole input space.

# In[18]:

grid = pm.floatX(np.mgrid[-3:3:100j, -3:3:100j])
grid_2d = grid.reshape(2, -1).T
dummy_out = np.ones(grid.shape[1], dtype=np.int8)

# In[19]:

ppc = sample_proba(grid_2d, 500)

# ### Probability surface

# In[20]:

cmap = sns.diverging_palette(250, 12, s=85, l=25, as_cmap=True)
fig, ax = plt.subplots(figsize=(12, 9))
contour = ax.contourf(grid[0],
                      grid[1],
示例#51
0
def integers():
    i = 0
    while True:
        yield pm.floatX(i)
        i += 1
示例#52
0
 def logp(self, value):
     quaddist, logdet, ok = self._quaddist(value)
     k = value.shape[-1].astype(theano.config.floatX)
     norm = -0.5 * k * pm.floatX(np.log(2 * np.pi))
     return bound(norm - 0.5 * quaddist - logdet, ok)
示例#53
0
 def gen():
     for i in range(2):
         yield floatX(np.ones((10, 10)) * i)
示例#54
0
    def trainBNN(self,
                 inputsTrain,
                 errInputsTrain,
                 targetsTrain,
                 errTargetsTrain,
                 neuronsPerHiddenlayer,
                 sampler,
                 nsamp,
                 bnnmodelpkl,
                 plotdir,
                 ncores=2,
                 viewBNN=False):
        """ TRAINS BAYESIAN NEURAL NETWORK ACCORDING TO SPECIFIED TRAINING DATA, SAVES MODEL,
        AND VISUALIZES BNN IF DESIRED

        Arguments:
            inputsTrain           - input training set (, [ntrain*ninputs], where 
                                    ntrain is the number of training measurements 
                                    and ninputs is the number of inputs) and is
                                    specifically ra, dec, appJmag, appHmag, appKmag, 
                                    parallax, Teff, logg, [M/H], [a/M], [C/M], [N/M]
            errInputsTrain        - errors on input training set (, [ntrain*ninputs]) 
            targetsTrain          - target training set (, [ntrain*ntargets], where ntargets is the number of targets)
            errTargetsTrain       - errors on target training set (, [ntrain*ninputs]) 
            neuronsPerHiddenlayer - number of neurons in hidden layer
            sampler               - ADVI variational inference sampler or No U-Turn Sampler (NUTS) (much slower)
            nsamp                 - number of samples to generate
            bnnmodelpkl           - name of pickle file to store trained BNN
            plotdir               - directory for storing any associated plots
            ncores                - number of cores to use for NUTS sampler (default 2)
            viewBNN               - whether to visualize and plot BNN (default False)
        Returns:
            scaleData  - scaled dataset (, [ndata*nvars])
            scaleEData - scaled observed errors on dataset (, [ndata*nvars])
        """

        ntrain, ninputs = np.shape(inputsTrain)
        ntrain, ntargets = np.shape(targetsTrain)

        # Calculate and scale inputs and targets
        targetsMu, targetsSig = self.calcScale(targetsTrain, errTargetsTrain)
        targetsTrainScale,errTargetsTrainScale = \
            self.scaleData(targetsTrain,errTargetsTrain,targetsMu,targetsSig)

        # Initialize BNN weights, biases on neurons, and true X and Y using a
        # Gaussian with mean 0 and standard deviation  1
        np.random.seed(30)
        ninputsBNN = np.copy(ninputs)
        initWtsInHid = np.random.randn(ninputsBNN, neuronsPerHiddenlayer)
        initBiasInHid = np.random.randn(neuronsPerHiddenlayer)
        initWtsHidOut = np.random.randn(neuronsPerHiddenlayer, ntargets)
        initBiasHidOut = np.random.randn(ntargets)

        # Specify neural network
        with pm.Model() as neural_network:

            # Priors for true inputs
            # CHANGE DURING THE FIT
            xTrue = pm.Normal('xTrue',
                              mu=inputsTrain,
                              sd=errInputsTrain,
                              shape=(ntrain, ninputs),
                              testval=inputsTrain)

            # Calculate absmag from appmag and parallax
            truera = xTrue[:, 0]
            truedec = xTrue[:, 1]
            trueappJmag = xTrue[:, 2]
            trueappHmag = xTrue[:, 3]
            trueappKmag = xTrue[:, 4]
            trueparallax = xTrue[:, 5]
            trueabsJmag = trueappJmag - 5 * np.log10(100. / trueparallax)
            trueabsHmag = trueappHmag - 5 * np.log10(100. / trueparallax)
            trueabsKmag = trueappKmag - 5 * np.log10(100. / trueparallax)
            trueJminH = trueabsJmag - trueabsHmag
            trueHminK = trueabsHmag - trueabsKmag

            # Priors for true inputs to BNN
            # CHANGE DURING THE FIT
            xTrueBNN = TT.stack([
                truera, truedec, trueabsJmag, trueJminH, trueHminK,
                trueparallax, xTrue[:, 6], xTrue[:, 7], xTrue[:, 8],
                xTrue[:, 9], xTrue[:, 10], xTrue[:, 11]
            ],
                                axis=0)
            xTrueBNN = xTrueBNN.reshape([ntrain, ninputs])

            # Priors on unknown BNN parameters (weights and biases from inner to
            # hidden layer and hidden to output layer)
            # CHANGE DURING THE FIT
            # testval overrides the default test value, which is the mean
            wtsInHid = pm.Normal('wtsInHid',
                                 mu=0,
                                 sd=1,
                                 shape=(ninputsBNN, neuronsPerHiddenlayer),
                                 testval=initWtsInHid)
            biasInHid = pm.Normal('biasInHid',
                                  mu=0,
                                  sd=1,
                                  shape=(neuronsPerHiddenlayer, ),
                                  testval=initBiasInHid)
            wtsHidOut = pm.Normal('wtsHidOut',
                                  mu=0,
                                  sd=1,
                                  shape=(neuronsPerHiddenlayer, ntargets),
                                  testval=initWtsHidOut)
            biasHidOut = pm.Normal('biasHidOut',
                                   mu=0,
                                   sd=1,
                                   shape=(ntargets, ),
                                   testval=initBiasHidOut)

            # Apply ANN to get expected value of outcome
            actHid = TT.nnet.sigmoid(TT.dot(xTrueBNN, wtsInHid) + biasInHid)
            actOut = TT.dot(actHid, wtsHidOut) + biasHidOut
            yTrue = pm.Deterministic('yTrue', actOut)

            # Likelihoods of observations (i.e. the sampling distributions)
            # FIXED DURING THE FIT
            xTrainObs = pm.Normal('xTrainObs',
                                  mu=xTrue,
                                  sd=errInputsTrain,
                                  observed=inputsTrain,
                                  total_size=(ntrain, ninputs))
            yTrainObs = pm.Normal('yTrainObs',
                                  mu=yTrue,
                                  sd=errTargetsTrainScale,
                                  observed=targetsTrainScale,
                                  total_size=(ntrain, ntargets))

        # Train BNN
        print("Training Bayesian neural network with...")
        with neural_network:

            if (sampler == "advi"):
                # Fit with ADVI sampler
                print("   ...the ADVI sampler...")
                s = theano.shared(pm.floatX(1))
                inference = pm.ADVI(cost_part_grad_scale=s)
                ftt = pm.fit(n=nsamp, method=inference)
                trace = ftt.sample(nsamp)
                fig = plt.figure(figsize=(6, 4))
                plt.plot(-ftt.hist)
                plt.ylabel('ELBO')
                fig.savefig(plotdir + "advi_fitprogress.eps")

            else:
                # Fit with NUTS sampler
                print("... ...the NUTS sampler...")
                step = pm.NUTS(target_accept=0.95)
                ntune = 1000
                trace = pm.sample(nsamp,
                                  random_seed=10,
                                  step=step,
                                  tune=ntune,
                                  cores=ncores)
            print("...done.")

        # Save BNN to file
        print("Saving BNN, trace, and scaling of inputs and outputs to " +
              bnnmodelpkl + "...")
        with open(bnnmodelpkl, "wb") as buff:
            pickle.dump({'targetsMu':targetsMu,\
                         'targetsSig':targetsSig,\
                         'model': neural_network,\
                         'neuronsPerHiddenlayer': neuronsPerHiddenlayer,\
                         'trace': trace}, buff)
        print("...done.")

        if (viewBNN == True):

            # View neural_network model
            neural_network

            # View the free random variables (i.e. the ones you are obtaining posteriors for!) in the model
            neural_network.free_RVs

            # If desired plot neural network
            fig, ax = plt.subplots(7, 2, figsize=(16, 6))
            pm.traceplot(trace, ax=ax)
            fig.savefig(plotdir + "neural_network.eps",
                        format='eps',
                        dpi=100,
                        bbox_inches='tight')
        return
示例#55
0
 def apply(self, f):
     # f: kernel function for KSD f(histogram) -> (k(x,.), \nabla_x k(x,.))
     stein = Stein(self.approx, f, self.input_matrix)
     return pm.floatX(-1) * stein.grad
示例#56
0
    probitphi = pm.Normal('probitphi', mu=mu_p, sd=sigma_p, shape=companiesABC, testval=np.ones(companiesABC))
    phii = pm.Deterministic('phii', Phi(probitphi))

    pi_ij = pm.Uniform('pi_ij', lower=0, upper=1, shape=len(Num_shared.get_value()))

    zij_ = pm.theanof.tt_rng().uniform(size=companyABC.shape)
    zij = pm.Deterministic('zij', tt.lt(zij_, phii[Num_shared]))

    beta_mu = pm.Deterministic('beta_mu', tt.switch(zij, liner, pi_ij))

    Observed = pm.Weibull("Observed", alpha=alpha, beta=beta_mu, observed=ys_faults)  # 观测值

import theano

with model_2:
    s = theano.shared(pm.floatX(1))
    inference = pm.ADVI(cost_part_grad_scale=s)
    # ADVI has nearly converged
    inference.fit(n=20000)
    # It is time to set `s` to zero
    s.set_value(0)
    approx = inference.fit(n=10000)
    trace_2 = approx.sample(3000, include_transformed=True)
    elbos1 = -inference.hist

chain_2 = trace_2[2000:]
# varnames2 = ['beta', 'beta1', 'beta2', 'beta3', 'u', 'beta4']
pm.traceplot(chain_2)
plt.show()

njob = 1
示例#57
0
def test_scale_cost_to_minibatch_works(aux_total_size):
    mu0 = 1.5
    sigma = 1.0
    y_obs = np.array([1.6, 1.4])
    beta = len(y_obs) / float(aux_total_size)

    # TODO: theano_config
    # with pm.Model(theano_config=dict(floatX='float64')):
    # did not not work as expected
    # there were some numeric problems, so float64 is forced
    with theano.config.change_flags(floatX="float64", warn_float64="ignore"):

        assert theano.config.floatX == "float64"
        assert theano.config.warn_float64 == "ignore"

        post_mu = np.array([1.88], dtype=theano.config.floatX)
        post_sigma = np.array([1], dtype=theano.config.floatX)

        with pm.Model():
            mu = pm.Normal("mu", mu=mu0, sigma=sigma)
            pm.Normal("y",
                      mu=mu,
                      sigma=1,
                      observed=y_obs,
                      total_size=aux_total_size)
            # Create variational gradient tensor
            mean_field_1 = MeanField()
            assert mean_field_1.scale_cost_to_minibatch
            mean_field_1.shared_params["mu"].set_value(post_mu)
            mean_field_1.shared_params["rho"].set_value(
                np.log(np.exp(post_sigma) - 1))

            with theano.config.change_flags(compute_test_value="off"):
                elbo_via_total_size_scaled = -pm.operators.KL(mean_field_1)()(
                    10000)

        with pm.Model():
            mu = pm.Normal("mu", mu=mu0, sigma=sigma)
            pm.Normal("y",
                      mu=mu,
                      sigma=1,
                      observed=y_obs,
                      total_size=aux_total_size)
            # Create variational gradient tensor
            mean_field_2 = MeanField()
            assert mean_field_1.scale_cost_to_minibatch
            mean_field_2.scale_cost_to_minibatch = False
            assert not mean_field_2.scale_cost_to_minibatch
            mean_field_2.shared_params["mu"].set_value(post_mu)
            mean_field_2.shared_params["rho"].set_value(
                np.log(np.exp(post_sigma) - 1))

        with theano.config.change_flags(compute_test_value="off"):
            elbo_via_total_size_unscaled = -pm.operators.KL(mean_field_2)()(
                10000)

        np.testing.assert_allclose(
            elbo_via_total_size_unscaled.eval(),
            elbo_via_total_size_scaled.eval() * pm.floatX(1 / beta),
            rtol=0.02,
            atol=1e-1,
        )
示例#58
0
def run_normal_mv_model_mixture(data,
                                K=3,
                                mus=None,
                                mc_samples=10000,
                                jobs=1,
                                n_cols=10,
                                n_rows=100,
                                neigs=1):
    n_samples, n_feats = data.shape
    n_samples = n_cols * n_rows
    max_neigs = 4 * neigs * (neigs + 1)
    #print max_neigs
    to_fill = indxs_neigs(range(n_samples),
                          n_cols=n_cols,
                          n_rows=n_rows,
                          n=neigs)
    inds = np.where(to_fill != -1)[0]
    to_fill = to_fill[to_fill != -1]
    aux = tt.ones(n_samples * max_neigs) * -69
    shp = (K, n_feats)
    mus_start = np.percentile(data, np.linspace(1, 100, K), axis=0)

    with pm.Model() as model:

        packed_L = pm.LKJCholeskyCov('packed_L',
                                     n=n_feats,
                                     eta=2.,
                                     sd_dist=pm.HalfCauchy.dist(2.5))
        L = pm.expand_packed_triangular(n_feats, packed_L)
        sigma = pm.Deterministic('Sigma', L.dot(L.T))

        mus = 0. if mus is None else mus

        sds = pm.HalfNormal('sds', sd=tt.ones(shp) * 100, shape=shp)

        mus = pm.Normal('mus',
                        mu=tt.as_tensor_variable(mus_start),
                        sd=sds,
                        shape=shp)

        pi = Dirichlet('pi', a=pm.floatX([1. for _ in range(K)]), shape=K)
        #        #TODO one pi per voxel
        #category = pm.Categorical('category', p=pi, shape = n_samples )
        mvs = [pm.MvNormal.dist(mu=mus[i], chol=L) for i in range(K)]

        #
        #aux2 = tt.set_subtensor(aux[inds],category[to_fill])
        #prior = pm.Deterministic('prior',(tt.sum(tt.eq( aux2.reshape( (n_samples,max_neigs ) ),
        #                                               category.reshape( (n_samples,1)) ), axis = 1 )+1)/1.0 )

        pesos = pm.Dirichlet('pesos', a=np.ones((K, )))
        #obs = pm.Mixture('obs',w = pesos, comp_dists = mvs, observed = data)
        obs = my_mixture('obs', w=pesos, comp_dists=mvs, observed=data)

        with model:
            #step2 = pm.CategoricalGibbsMetropolis(vars=[category] )
            trace = sample(mc_samples, n_jobs=jobs, tune=500)

    pm.traceplot(trace, varnames=['mus', 'pi', 'Sigma', 'mvs', 'pesos'])
    plt.title('normal mv model 40 cols')
    logp_simple(mus, category, aux3)
    mod = stats.mode(trace['category'][int(mc_samples * 0.75):])
    #if chains > 1:
    #   print (max(np.max(gr_stats) for gr_stats in pm.gelman_rubin(trace).values()))
    return model, mod, trace
n_comp = 2
concentration = 1

with pm.Model() as model:
    # Prior for covariance matrix
    
    # packed_L = [pm.LKJCholeskyCov('packedL_%d' % i, n=dimensions, eta=1., sd_dist=pm.Gamma.dist(mu = 2, sigma = 1)) for i in range(n_comp)]
    # L = [pm.expand_packed_triangular(dimensions, packed_L[i]) for i in range(n_comp)]
    # Σ = [pm.Deterministic('Σ_%d' % i, L[i].dot(L[i].T)) for i in range(n_comp)]
   
    packed_L = pm.LKJCholeskyCov('packedL', n=dimensions, eta=1., sd_dist=pm.Gamma.dist(mu = 2, sigma = 1))
    L = pm.expand_packed_triangular(dimensions, packed_L)
    Σ = pm.Deterministic('Σ', L.dot(L.T))
    
    # Prior for mean:
    mus = [MvNormal('mu_%d' % i, mu=pm.floatX(np.zeros(dimensions)), tau=pm.floatX(0.1 * np.eye(2)), shape=(dimensions,)) for i in range(n_comp)]
    # Prior for weights:
    pi = Dirichlet('pi', a=pm.floatX(concentration * np.ones(n_comp)), shape=(n_comp,))   
    prior = sample_prior()
    x = pm.DensityDist('x', logp_gmix(mus, pi, np.eye(2)), observed=data)
    
# Plot prior for some parameters:
# print(prior.keys())
# plt.hist(prior['Σ'][:,0,1])

with model:
    %time hmc_trace = pm.sample(draws=250, tune=100, cores=4)

with model:
    %time fit_advi = pm.fit(n=50000, obj_optimizer=pm.adagrad(learning_rate=1e-1), method = 'advi')
示例#60
0
        Topic distributions for documents.
    """
    def ll_docs_f(docs):
        dixs, vixs = docs.nonzero()
        vfreqs = docs[dixs, vixs]
        ll_docs = vfreqs * pmmath.logsumexp(
            tt.log(theta[dixs]) + tt.log(beta.T[vixs]), axis=1).ravel()
        # Per-word log-likelihood times num of tokens in the whole dataset
        return tt.sum(ll_docs)

    return ll_docs_f


with pm.Model() as lda_model:
    theta = Dirichlet('theta',
                      a=pm.floatX(1.0 / n_topics) * np.ones(
                          (sim_counts.shape[0], n_topics)),
                      shape=(sim_counts.shape[0], n_topics),
                      transform=t_stick_breaking(1e-9))
    beta = Dirichlet('beta',
                     a=pm.floatX(1.0 / n_topics) * np.ones(
                         (n_topics, sim_counts.shape[1])),
                     shape=(n_topics, sim_counts.shape[1]),
                     transform=t_stick_breaking(1e-9))
    doc = pm.DensityDist('doc', logp_lda_doc(beta, theta), observed=sim_counts)


###### Auto-Encoding Variational Bayes
## Encoder
class LDAEncoder:
    """Encode (term-frequency) document vectors to variational means and (log-transformed) stds.