示例#1
0
def create_changepoint_model(spike_array, states, fit, samples):
    """
    spike_array :: Shape : tastes, trials, neurons, time_bins
    states :: number of states to include in the model 
    fit :: number of iterations to fit for
    samples :: number of samples to generate from the fit model
    """
    # If model already doesn't exist, then create new one
    #spike_array = this_dat_binned
    # Unroll arrays along taste axis
    spike_array_long = np.reshape(spike_array, (-1, *spike_array.shape[-2:]))

    # Find mean firing for initial values
    tastes = spike_array.shape[0]
    split_list = np.array_split(spike_array, states, axis=-1)
    # Cut all to the same size
    min_val = min([x.shape[-1] for x in split_list])
    split_array = np.array([x[..., :min_val] for x in split_list])
    mean_vals = np.mean(split_array, axis=(2, -1)).swapaxes(0, 1)
    mean_vals += 0.01  # To avoid zero starting prob
    mean_nrn_vals = np.mean(mean_vals, axis=(0, 1))

    # Find evenly spaces switchpoints for initial values
    idx = np.arange(spike_array.shape[-1])  # Index
    array_idx = np.broadcast_to(idx, spike_array_long.shape)
    idx_range = idx.max() - idx.min()
    even_switches = np.linspace(0, idx.max(), states + 1)
    even_switches_normal = even_switches / np.max(even_switches)

    taste_label = np.repeat([0, 1, 2, 3], 30)
    trial_num = array_idx.shape[0]

    # Being constructing model
    with pm.Model() as model:

        # Hierarchical firing rates
        # Refer to model diagram
        # Mean firing rate of neuron AT ALL TIMES
        lambda_nrn = pm.Exponential('lambda_nrn',
                                    1 / mean_nrn_vals,
                                    shape=(mean_vals.shape[-1]))
        # Priors for each state, derived from each neuron
        # Mean firing rate of neuron IN EACH STATE (averaged across tastes)
        lambda_state = pm.Exponential('lambda_state',
                                      lambda_nrn,
                                      shape=(mean_vals.shape[1:]))
        # Mean firing rate of neuron PER STATE PER TASTE
        lambda_latent = pm.Exponential('lambda',
                                       lambda_state[np.newaxis, :, :],
                                       testval=mean_vals,
                                       shape=(mean_vals.shape))

        # Changepoint time variable
        # INDEPENDENT TAU FOR EVERY TRIAL
        a = pm.HalfNormal('a_tau', 3., shape=states - 1)
        b = pm.HalfNormal('b_tau', 3., shape=states - 1)

        # Stack produces states x trials --> That gets transposed
        # to trials x states and gets sorted along states (axis=-1)
        # Sort should work the same way as the Ordered transform -->
        # see rv_sort_test.ipynb
        tau_latent = pm.Beta('tau_latent', a, b,
                               shape = (trial_num, states-1),
                               testval = \
                                       tt.tile(even_switches_normal[1:(states)],
                                           (array_idx.shape[0],1))).sort(axis=-1)

        tau = pm.Deterministic(
            'tau',
            idx.min() + (idx.max() - idx.min()) * tau_latent)

        # Sigmoing to create transitions based off tau
        # Hardcoded 3-5 states
        weight_1_stack = tt.nnet.sigmoid(\
                array_idx - tau[:,0][...,np.newaxis,np.newaxis])
        weight_2_stack = tt.nnet.sigmoid(\
                array_idx - tau[:,1][...,np.newaxis,np.newaxis])
        if states > 3:
            weight_3_stack = tt.nnet.sigmoid(\
                    array_idx - tau[:,2][...,np.newaxis,np.newaxis])
        if states > 4:
            weight_4_stack = tt.nnet.sigmoid(\
                    array_idx - tau[:,3][...,np.newaxis,np.newaxis])

        # Generate firing rates from lambda and sigmoid weights
        if states == 3:
            # 3 states
            lambda_ = np.multiply(1 - weight_1_stack,
                            lambda_latent[taste_label,0][:,:,np.newaxis]) + \
                    np.multiply(weight_1_stack * (1 - weight_2_stack),
                            lambda_latent[taste_label][:,1][:,:,np.newaxis]) + \
                    np.multiply(weight_2_stack,
                                lambda_latent[taste_label,2][:,:,np.newaxis])

        elif states == 4:
            # 4 states
            lambda_ = np.multiply(1 - weight_1_stack,
                            lambda_latent[taste_label,0][:,:,np.newaxis]) + \
                    np.multiply(weight_1_stack * (1 - weight_2_stack),
                            lambda_latent[taste_label][:,1][:,:,np.newaxis]) + \
                    np.multiply(weight_2_stack * (1 - weight_3_stack),
                            lambda_latent[taste_label][:,2][:,:,np.newaxis]) + \
                    np.multiply(weight_3_stack,
                                lambda_latent[taste_label,3][:,:,np.newaxis])

        elif states == 5:
            # 5 states
            lambda_ = np.multiply(1 - weight_1_stack,
                            lambda_latent[taste_label,0][:,:,np.newaxis]) + \
                    np.multiply(weight_1_stack * (1 - weight_2_stack),
                            lambda_latent[taste_label][:,1][:,:,np.newaxis]) + \
                    np.multiply(weight_2_stack * (1 - weight_3_stack),
                            lambda_latent[taste_label][:,2][:,:,np.newaxis]) +\
                    np.multiply(weight_3_stack * (1 - weight_4_stack),
                            lambda_latent[taste_label][:,3][:,:,np.newaxis])+ \
                    np.multiply(weight_4_stack,
                            lambda_latent[taste_label,4][:,:,np.newaxis])

        # Add observations
        observation = pm.Poisson("obs", lambda_, observed=spike_array_long)

    return model
示例#2
0
    0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1
],
                                    value=-999)
year = np.arange(1851, 1962)
'''Model generation'''

HansModel = pm.Model()

with HansModel:
    switchpoint = pm.DiscreteUniform('switchpoint',
                                     lower=year.min(),
                                     upper=year.max(),
                                     testval=1900)

    # prior
    early_rate = pm.Exponential('early_rate', 1)
    late_rate = pm.Exponential('late_rate', 1)

    # Allocate rate
    rate = pm.switch(switchpoint >= year, early_rate, late_rate)

    # Likelihood
    disaster = pm.Poisson('disaster', mu=rate, observed=disaster_data)
''' MCMC setting '''
with HansModel:
    # Step1 = pm.Slice(vars=[early_rate,late_rate,switchpoint,disaster.missing_values[0]])
    trace = pm.sample(1000, step=pm.NUTS())

pm.traceplot(trace)
print pm.summary(trace)
plt.show()
示例#3
0
from pandas_datareader import data
import pymc3 as pm
import matplotlib.pyplot as plt
import numpy as np

returns = data.get_data_google('SPY', start='2008-5-1',
                               end='2009-12-1')['Close'].pct_change()
print(returns)

with pm.Model() as sp500_model:
    nu = pm.Exponential('nu', 1. / 10, testval=5.)
    sigma = pm.Exponential('sigma', 1. / .02, testval=.1)
    s = pm.GaussianRandomWalk('s', sigma**-2, shape=len(returns))
    volatility_process = pm.Deterministic('volatility_process',
                                          pm.math.exp(-2 * s))
    r = pm.StudentT('r', nu, lam=volatility_process, observed=returns)

with sp500_model:
    trace = pm.sample(2000)
pm.traceplot(trace, [nu, sigma])

fig, ax = plt.subplots(figsize=(15, 8))
returns.plot(ax=ax)
ax.plot(returns.index, 1 / np.exp(trace['s', ::5].T), 'r', alpha=.03)
ax.set(title='volatility_process', xlabel='time', ylabel='volatility')
ax.legend(['S&P500', 'stochastic volatility process'])
plt.show()
        predict = tt.set_subtensor(predict[counter:counter + 1], th_g_pred_s)

    return predict


# In[ ]:

model_C = pm.Model()
alpha1 = 3.
beta1 = 0.05
alpha2 = 1.0
# define the distribution
with model_C:
    sigma2s = pm.InverseGamma('sigma2s', alpha=alpha1, beta=beta1, shape=1)
    sigma2 = pm.Deterministic('sigma2', tt.tile(sigma2s, th.shape[0]))
    gamma2 = pm.Exponential(name='gamma2', lam=alpha2)
    ln_k_guess = pm.Normal(name='ln_k_guess',
                           mu=0,
                           sigma=tt.sqrt(gamma2),
                           shape=1)
    y_mean = pm.Deterministic('y_mean', Solver(ln_k_guess))
    y = pm.Normal(name='y', mu=y_mean, sigma=tt.sqrt(sigma2), observed=thg)

# In[12]:

with model_C:
    mcmc_res_C = pm.sample(draws=5000, step=pm.NUTS())

#_=pm.plot_posterior(mcmc_res_C, var_names=['ln_k_guess'])

# In[ ]:
示例#5
0
  plt.plot(x_values, x_pdf, label=r'$\nu$ = {}'.format(df))

x_pdf = stats.norm.pdf(x_values)
plt.plot(x_values, x_pdf, label=r'$\nu = \infty$')
plt.xlabel('$x$')
plt.ylabel('$p(x)$')
plt.legend(loc=0, fontsize=14)
plt.xlim(-7, 7)
plt.savefig('img306.png', dpi=300, figsize=(5.5, 5.5))

plt.figure()

with pm.Model() as model_t:
  mu = pm.Uniform('mu', 40, 75)
  sigma = pm.HalfNormal('sigma', sd=10)
  nu = pm.Exponential('nu', 1/30)
  y = pm.StudentT('y', mu=mu, sd=sigma, nu=nu, observed=data)
  trace_t = pm.sample(1100)

chain_t = trace_t[100:]
pm.traceplot(chain_t)
plt.savefig('img308.png', dpi=300, figsize=(5.5, 5.5))

plt.figure()

#pm.df_summary(chain_t)
pm.summary(chain_t)

y_pred = pm.sample_ppc(chain_t, 100, model_t, size=len(data))
sns.kdeplot(data, c='b')
for i in y_pred['y']:
示例#6
0
    def set_likelihood(self):
        """
        Convert any bilby likelihoods to PyMC3 distributions.
        """

        # create theano Op for the log likelihood if not using a predefined model
        pymc3, STEP_METHODS, floatX = self._import_external_sampler()
        theano, tt, as_op = self._import_theano()

        class LogLike(tt.Op):

            itypes = [tt.dvector]
            otypes = [tt.dscalar]

            def __init__(self, parameters, loglike, priors):
                self.parameters = parameters
                self.likelihood = loglike
                self.priors = priors

                # set the fixed parameters
                for key in self.priors.keys():
                    if isinstance(self.priors[key], float):
                        self.likelihood.parameters[key] = self.priors[key]

                self.logpgrad = LogLikeGrad(self.parameters, self.likelihood,
                                            self.priors)

            def perform(self, node, inputs, outputs):
                theta, = inputs
                for i, key in enumerate(self.parameters):
                    self.likelihood.parameters[key] = theta[i]

                outputs[0][0] = np.array(self.likelihood.log_likelihood())

            def grad(self, inputs, g):
                theta, = inputs
                return [g[0] * self.logpgrad(theta)]

        # create theano Op for calculating the gradient of the log likelihood
        class LogLikeGrad(tt.Op):

            itypes = [tt.dvector]
            otypes = [tt.dvector]

            def __init__(self, parameters, loglike, priors):
                self.parameters = parameters
                self.Nparams = len(parameters)
                self.likelihood = loglike
                self.priors = priors

                # set the fixed parameters
                for key in self.priors.keys():
                    if isinstance(self.priors[key], float):
                        self.likelihood.parameters[key] = self.priors[key]

            def perform(self, node, inputs, outputs):
                theta, = inputs

                # define version of likelihood function to pass to derivative function
                def lnlike(values):
                    for i, key in enumerate(self.parameters):
                        self.likelihood.parameters[key] = values[i]
                    return self.likelihood.log_likelihood()

                # calculate gradients
                grads = derivatives(theta,
                                    lnlike,
                                    abseps=1e-5,
                                    mineps=1e-12,
                                    reltol=1e-2)

                outputs[0][0] = grads

        with self.pymc3_model:
            #  check if it is a predefined likelhood function
            if isinstance(self.likelihood, GaussianLikelihood):
                # check required attributes exist
                if (not hasattr(self.likelihood, 'sigma')
                        or not hasattr(self.likelihood, 'x')
                        or not hasattr(self.likelihood, 'y')):
                    raise ValueError(
                        "Gaussian Likelihood does not have all the correct attributes!"
                    )

                if 'sigma' in self.pymc3_priors:
                    # if sigma is suppled use that value
                    if self.likelihood.sigma is None:
                        self.likelihood.sigma = self.pymc3_priors.pop('sigma')
                    else:
                        del self.pymc3_priors['sigma']

                for key in self.pymc3_priors:
                    if key not in self.likelihood.function_keys:
                        raise ValueError(
                            "Prior key '{}' is not a function key!".format(
                                key))

                model = self.likelihood.func(self.likelihood.x,
                                             **self.pymc3_priors)

                # set the distribution
                pymc3.Normal('likelihood',
                             mu=model,
                             sd=self.likelihood.sigma,
                             observed=self.likelihood.y)
            elif isinstance(self.likelihood, PoissonLikelihood):
                # check required attributes exist
                if (not hasattr(self.likelihood, 'x')
                        or not hasattr(self.likelihood, 'y')):
                    raise ValueError(
                        "Poisson Likelihood does not have all the correct attributes!"
                    )

                for key in self.pymc3_priors:
                    if key not in self.likelihood.function_keys:
                        raise ValueError(
                            "Prior key '{}' is not a function key!".format(
                                key))

                # get rate function
                model = self.likelihood.func(self.likelihood.x,
                                             **self.pymc3_priors)

                # set the distribution
                pymc3.Poisson('likelihood',
                              mu=model,
                              observed=self.likelihood.y)
            elif isinstance(self.likelihood, ExponentialLikelihood):
                # check required attributes exist
                if (not hasattr(self.likelihood, 'x')
                        or not hasattr(self.likelihood, 'y')):
                    raise ValueError(
                        "Exponential Likelihood does not have all the correct attributes!"
                    )

                for key in self.pymc3_priors:
                    if key not in self.likelihood.function_keys:
                        raise ValueError(
                            "Prior key '{}' is not a function key!".format(
                                key))

                # get mean function
                model = self.likelihood.func(self.likelihood.x,
                                             **self.pymc3_priors)

                # set the distribution
                pymc3.Exponential('likelihood',
                                  lam=1. / model,
                                  observed=self.likelihood.y)
            elif isinstance(self.likelihood, StudentTLikelihood):
                # check required attributes exist
                if (not hasattr(self.likelihood, 'x')
                        or not hasattr(self.likelihood, 'y')
                        or not hasattr(self.likelihood, 'nu')
                        or not hasattr(self.likelihood, 'sigma')):
                    raise ValueError(
                        "StudentT Likelihood does not have all the correct attributes!"
                    )

                if 'nu' in self.pymc3_priors:
                    # if nu is suppled use that value
                    if self.likelihood.nu is None:
                        self.likelihood.nu = self.pymc3_priors.pop('nu')
                    else:
                        del self.pymc3_priors['nu']

                for key in self.pymc3_priors:
                    if key not in self.likelihood.function_keys:
                        raise ValueError(
                            "Prior key '{}' is not a function key!".format(
                                key))

                model = self.likelihood.func(self.likelihood.x,
                                             **self.pymc3_priors)

                # set the distribution
                pymc3.StudentT('likelihood',
                               nu=self.likelihood.nu,
                               mu=model,
                               sd=self.likelihood.sigma,
                               observed=self.likelihood.y)
            elif isinstance(
                    self.likelihood,
                (GravitationalWaveTransient, BasicGravitationalWaveTransient)):
                # set theano Op - pass _search_parameter_keys, which only contains non-fixed variables
                logl = LogLike(self._search_parameter_keys, self.likelihood,
                               self.pymc3_priors)

                parameters = dict()
                for key in self._search_parameter_keys:
                    try:
                        parameters[key] = self.pymc3_priors[key]
                    except KeyError:
                        raise KeyError(
                            "Unknown key '{}' when setting GravitationalWaveTransient likelihood"
                            .format(key))

                # convert to theano tensor variable
                values = tt.as_tensor_variable(list(parameters.values()))

                pymc3.DensityDist('likelihood',
                                  lambda v: logl(v),
                                  observed={'v': values})
            else:
                raise ValueError("Unknown likelihood has been provided")
示例#7
0
    def __init__(
        self,
        cell_state_mat: np.ndarray,
        X_data: np.ndarray,
        n_comb: int = 50,
        data_type: str = "float32",
        n_iter=20000,
        learning_rate=0.005,
        total_grad_norm_constraint=200,
        verbose=True,
        var_names=None,
        var_names_read=None,
        obs_names=None,
        fact_names=None,
        sample_id=None,
        cell_number_prior={
            "cells_per_spot": 8,
            "factors_per_spot": 7,
            "combs_per_spot": 2.5
        },
        cell_number_var_prior={
            "cells_mean_var_ratio": 1,
            "factors_mean_var_ratio": 1,
            "combs_mean_var_ratio": 1
        },
        phi_hyp_prior={
            "mean": 3,
            "sd": 1
        },
        spot_fact_mean_var_ratio=5,
        exper_gene_level_mean_var_ratio=10,
    ):

        ############# Initialise parameters ################
        super().__init__(
            cell_state_mat,
            X_data,
            data_type,
            n_iter,
            learning_rate,
            total_grad_norm_constraint,
            verbose,
            var_names,
            var_names_read,
            obs_names,
            fact_names,
            sample_id,
        )

        self.phi_hyp_prior = phi_hyp_prior
        self.n_comb = n_comb
        self.spot_fact_mean_var_ratio = spot_fact_mean_var_ratio
        self.exper_gene_level_mean_var_ratio = exper_gene_level_mean_var_ratio

        # generate parameters for samples
        self.spot2sample_df = pd.get_dummies(sample_id)
        # convert to np.ndarray
        self.spot2sample_mat = self.spot2sample_df.values
        self.n_exper = self.spot2sample_mat.shape[1]
        # assign extra data to dictionary with (1) shared parameters (2) input data
        self.extra_data_tt = {
            "spot2sample":
            theano.shared(self.spot2sample_mat.astype(self.data_type))
        }
        self.extra_data = {
            "spot2sample": self.spot2sample_mat.astype(self.data_type)
        }

        cell_number_prior["factors_per_combs"] = (
            cell_number_prior["factors_per_spot"] /
            cell_number_prior["combs_per_spot"])
        for k in cell_number_var_prior.keys():
            cell_number_prior[k] = cell_number_var_prior[k]
        self.cell_number_prior = cell_number_prior

        ############# Define the model ################
        self.model = pm.Model()

        with self.model:

            # =====================Gene expression level scaling======================= #
            # scale cell state factors by gene_level
            self.gene_factors = pm.Deterministic("gene_factors",
                                                 self.cell_state)
            # self.gene_factors = self.cell_state
            # tt.printing.Print('gene_factors sum')(gene_factors.sum(0).shape)
            # tt.printing.Print('gene_factors sum')(gene_factors.sum(0))

            # =====================Spot factors======================= #
            # prior on spot factors reflects the number of cells, fraction of their cytoplasm captured,
            # times heterogeniety in the total number of mRNA between individual cells with each cell type
            self.cells_per_spot = pm.Gamma(
                "cells_per_spot",
                mu=cell_number_prior["cells_per_spot"],
                sigma=np.sqrt(cell_number_prior["cells_per_spot"] /
                              cell_number_prior["cells_mean_var_ratio"]),
                shape=(self.n_obs, 1),
            )
            self.comb_per_spot = pm.Gamma(
                "combs_per_spot",
                mu=cell_number_prior["combs_per_spot"],
                sigma=np.sqrt(cell_number_prior["combs_per_spot"] /
                              cell_number_prior["combs_mean_var_ratio"]),
                shape=(self.n_obs, 1),
            )

            shape = self.comb_per_spot / np.array(self.n_comb).reshape((1, 1))
            rate = tt.ones((1, 1)) / self.cells_per_spot * self.comb_per_spot
            self.combs_factors = pm.Gamma("combs_factors",
                                          alpha=shape,
                                          beta=rate,
                                          shape=(self.n_obs, self.n_comb))

            self.factors_per_combs = pm.Gamma(
                "factors_per_combs",
                mu=cell_number_prior["factors_per_combs"],
                sigma=np.sqrt(cell_number_prior["factors_per_combs"] /
                              cell_number_prior["factors_mean_var_ratio"]),
                shape=(self.n_comb, 1),
            )
            c2f_shape = self.factors_per_combs / np.array(self.n_fact).reshape(
                (1, 1))
            self.comb2fact = pm.Gamma("comb2fact",
                                      alpha=c2f_shape,
                                      beta=self.factors_per_combs,
                                      shape=(self.n_comb, self.n_fact))

            self.spot_factors = pm.Gamma(
                "spot_factors",
                mu=pm.math.dot(self.combs_factors, self.comb2fact),
                sigma=pm.math.sqrt(
                    pm.math.dot(self.combs_factors, self.comb2fact) /
                    self.spot_fact_mean_var_ratio),
                shape=(self.n_obs, self.n_fact),
            )

            # =====================Spot-specific additive component======================= #
            # molecule contribution that cannot be explained by cell state signatures
            # these counts are distributed between all genes not just expressed genes
            self.spot_add_hyp = pm.Gamma("spot_add_hyp", 1, 1, shape=2)
            self.spot_add = pm.Gamma("spot_add",
                                     self.spot_add_hyp[0],
                                     self.spot_add_hyp[1],
                                     shape=(self.n_obs, 1))

            # =====================Gene-specific additive component ======================= #
            # per gene molecule contribution that cannot be explained by cell state signatures
            # these counts are distributed equally between all spots (e.g. background, free-floating RNA)
            self.gene_add_hyp = pm.Gamma("gene_add_hyp", 1, 1, shape=2)
            self.gene_add = pm.Gamma("gene_add",
                                     self.gene_add_hyp[0],
                                     self.gene_add_hyp[1],
                                     shape=(self.n_exper, self.n_var))

            # =====================Gene-specific overdispersion ======================= #
            self.phi_hyp = pm.Gamma("phi_hyp",
                                    mu=phi_hyp_prior["mean"],
                                    sigma=phi_hyp_prior["sd"],
                                    shape=(1, 1))
            self.gene_E = pm.Exponential("gene_E",
                                         self.phi_hyp,
                                         shape=(self.n_exper, self.n_var))

            # =====================Expected expression ======================= #
            # expected expression
            self.mu_biol = (
                pm.math.dot(self.spot_factors, self.gene_factors.T) +
                pm.math.dot(self.extra_data_tt["spot2sample"], self.gene_add) +
                self.spot_add)
            # tt.printing.Print('mu_biol')(self.mu_biol.shape)

            # =====================DATA likelihood ======================= #
            # Likelihood (sampling distribution) of observations & add overdispersion via NegativeBinomial / Poisson
            self.data_target = pm.NegativeBinomial(
                "data_target",
                mu=self.mu_biol,
                alpha=pm.math.dot(self.extra_data_tt["spot2sample"],
                                  1 / tt.pow(self.gene_E, 2)),
                observed=self.x_data,
                total_size=self.X_data.shape,
            )

            # =====================Compute nUMI from each factor in spots  ======================= #
            self.nUMI_factors = pm.Deterministic("nUMI_factors",
                                                 (self.spot_factors *
                                                  (self.gene_factors).sum(0)))
示例#8
0
def test_pymc3_convert_dists():
    """Just a basic check that all PyMC3 RVs will convert to and from Theano RVs."""
    tt.config.compute_test_value = "ignore"
    theano.config.cxx = ""

    with pm.Model() as model:
        norm_rv = pm.Normal("norm_rv", 0.0, 1.0, observed=1.0)
        mvnorm_rv = pm.MvNormal("mvnorm_rv",
                                np.r_[0.0],
                                np.c_[1.0],
                                shape=1,
                                observed=np.r_[1.0])
        cauchy_rv = pm.Cauchy("cauchy_rv", 0.0, 1.0, observed=1.0)
        halfcauchy_rv = pm.HalfCauchy("halfcauchy_rv", 1.0, observed=1.0)
        uniform_rv = pm.Uniform("uniform_rv", observed=1.0)
        gamma_rv = pm.Gamma("gamma_rv", 1.0, 1.0, observed=1.0)
        invgamma_rv = pm.InverseGamma("invgamma_rv", 1.0, 1.0, observed=1.0)
        exp_rv = pm.Exponential("exp_rv", 1.0, observed=1.0)
        halfnormal_rv = pm.HalfNormal("halfnormal_rv", 1.0, observed=1.0)
        beta_rv = pm.Beta("beta_rv", 2.0, 2.0, observed=1.0)
        binomial_rv = pm.Binomial("binomial_rv", 10, 0.5, observed=5)
        dirichlet_rv = pm.Dirichlet("dirichlet_rv",
                                    np.r_[0.1, 0.1],
                                    observed=np.r_[0.1, 0.1])
        poisson_rv = pm.Poisson("poisson_rv", 10, observed=5)
        bernoulli_rv = pm.Bernoulli("bernoulli_rv", 0.5, observed=0)
        betabinomial_rv = pm.BetaBinomial("betabinomial_rv",
                                          0.1,
                                          0.1,
                                          10,
                                          observed=5)
        categorical_rv = pm.Categorical("categorical_rv",
                                        np.r_[0.5, 0.5],
                                        observed=1)
        multinomial_rv = pm.Multinomial("multinomial_rv",
                                        5,
                                        np.r_[0.5, 0.5],
                                        observed=np.r_[2])
        negbinomial_rv = pm.NegativeBinomial("negbinomial_rv",
                                             10.2,
                                             0.5,
                                             observed=5)

    # Convert to a Theano `FunctionGraph`
    fgraph = model_graph(model)

    rvs_by_name = {
        n.owner.inputs[1].name: n.owner.inputs[1]
        for n in fgraph.outputs
    }

    pymc_rv_names = {n.name for n in model.observed_RVs}
    assert all(
        isinstance(rvs_by_name[n].owner.op, RandomVariable)
        for n in pymc_rv_names)

    # Now, convert back to a PyMC3 model
    pymc_model = graph_model(fgraph)

    new_pymc_rv_names = {n.name for n in pymc_model.observed_RVs}
    pymc_rv_names == new_pymc_rv_names
示例#9
0
with pm.Model() as model:

    # Community's prior.
    community_prior = pm.HalfCauchy('community_diric', beta=1, shape=93)
    # Community distribution for this user
    community_weight = pm.Dirichlet('community_weight',
                                    a=community_prior,
                                    shape=93)

    # Action's prior.
    action_prior = pm.HalfCauchy('action_diric', beta=1, shape=3)
    # Action distribution for this user
    action_weight = pm.Dirichlet('action_weight', a=action_prior, shape=3)

    # Score Prior
    score_sd = pm.Exponential('score_sd', lam=1)
    # Score for this action
    score_numeral = pm.Lognormal('score_numeral',
                                 mu=data[:, 2].astype(float),
                                 sd=score_sd,
                                 shape=len(data))

    # Numerize community and action
    community_numeral = tt.dot(community_matrix, community_weight)
    action_numeral = tt.dot(action_matrix, action_weight)

    # Draw coefficient of community, action, score and intercept
    community_coef = pm.Normal('community_coef', mu=0, sd=1)
    action_coef = pm.Normal('action_coef', mu=0, sd=1)
    score_coef = pm.Normal('score_coef', mu=0, sd=1)
    intercept = pm.Normal('intercept', mu=0, sd=1)
    return std_series


# %%
data["Divorce_std"] = standardize(data["Divorce"])
data["Marriage_std"] = standardize(data["Marriage"])
data["MedianAgeMarriage_std"] = standardize(data["MedianAgeMarriage"])

# %%
data["MedianAgeMarriage"].std()

# %%
with pm.Model() as m_5_1:
    a = pm.Normal("a", 0, 0.2)
    bA = pm.Normal("bA", 0, 0.5)
    sigma = pm.Exponential("sigma", 1)
    mu = pm.Deterministic("mu", a + bA * data["MedianAgeMarriage_std"])

    divorce_rate_std = pm.Normal("divorce_rate_std",
                                 mu=mu,
                                 sigma=sigma,
                                 observed=data["Divorce_std"].values)
    prior_samples = pm.sample_prior_predictive()
    m_5_1_trace = pm.sample()

# %%
az.plot_trace(m_5_1_trace, var_names=["a", "bA"])

# %%
fig, ax = plt.subplots()
示例#11
0
# chain2 = trace2
# varnames1 = ['beta', 'beta1', 'beta2', 'beta3', 'beta4']
# pm.traceplot(chain2, varnames1)
# plt.show()
#
# # 画出自相关曲线
# pm.autocorrplot(chain2)
# plt.show()

# ======================================================================
# student分布有较好的效果,但是部分参数的收敛性不是很好
# 加了误差项
with pm.Model() as mulpartial_model:
    # define priors
    sigma = pm.HalfCauchy('sigma', 10)
    nu = pm.Exponential('nu', 1 / 10)
    mu_a = pm.Uniform('mu_a', -10, 10)
    sigma_a = pm.HalfNormal('sigma_a', sd=20)
    sigma_a1 = pm.HalfCauchy('sigma_a1', 10)

    beta = pm.Normal('beta', mu=mu_a, sd=sigma_a, shape=companiesABC)
    beta1 = pm.Normal('beta1', 0, 5)
    beta2 = pm.Normal('beta2', 0, 12)
    beta3 = pm.Normal('beta3', 0, 20)
    beta4 = pm.Normal('beta4', 0, sd=sigma_a1)

    # define likelihood 建立与时间相关的函数
    theta = beta[
        companyABC] + beta1 * elec_year1 + beta2 * elec_tem1 + beta3 * elec_RH1 + beta4 * elec_tem1 * elec_RH1
    Observed = pm.StudentT("Observed",
                           mu=theta,
示例#12
0
def find_self_ref_increases_for_spec(refcounts_main,
                                     stats_table,
                                     expecrefs,
                                     provs_and_specs,
                                     specialty=None):
    ## takes referrals data with "dater", "self_ref", "ref_spec" and returns potential change increases in
    ## change points.
    if specialty == None:
        provs_and_specs = provs_and_specs
    else:
        provs_and_specs = provs_and_specs[provs_and_specs['ref_spec'].isin(
            specialty)]
    provs = list(set(provs_and_specs['ref_prov']))
    length = len(provs)
    counter = 0
    for spec in specialty:
        provs = list(
            set(provs_and_specs[provs_and_specs['ref_spec'] == spec]
                ['ref_prov']))
        for idx, prov in enumerate(provs):
            counter += 1
            print('{0:0.4f} complete'.format(counter / length))
            ## assign lambdas and tau to stochastic variables
            refcounts = np.array(
                refcounts_main.loc[np.in1d(refcounts_main['ref_prov'], prov),
                                   'self_ref'])
            n_refcounts = len(refcounts)

            with pm.Model() as model:
                alpha = 1.0 / refcounts.mean()  # Recall count_data is the
                # variable that holds our txt counts
                lambda_1 = pm.Exponential("lambda_1", alpha)
                lambda_2 = pm.Exponential("lambda_2", alpha)
                tau = pm.DiscreteUniform("tau", lower=0, upper=n_refcounts)

            ## create a combined function for lambda (it is still a RV)
            with model:
                idx = np.arange(n_refcounts)  # Index
                lambda_ = pm.math.switch(tau >= idx, lambda_1, lambda_2)

            ## combine the data with our proposed data generation scheme
            with model:
                observation = pm.Poisson("obs", lambda_, observed=refcounts)

            ## inference
            with model:
                step = pm.Metropolis()
                trace = pm.sample(25, tune=2500, step=step)

            lambda_1_samples = trace['lambda_1']
            lambda_2_samples = trace['lambda_2']
            tau_samples = trace['tau']

            N = tau_samples.shape[0]
            expected_refs_per_week = np.zeros(n_refcounts)
            for week in range(0, n_refcounts):
                ix = week < tau_samples
                expected_refs_per_week[week] = (
                    lambda_1_samples[ix].sum() +
                    lambda_2_samples[~ix].sum()) / N

            expecrefs[prov] = expected_refs_per_week
            stats_table.loc[prov, 'specialty'] = spec
            stats_table.loc[prov, 'tau_mean'] = np.mean(tau_samples)
            stats_table.loc[prov, 'tau_std'] = np.std(tau_samples)
            stats_table.loc[prov, 'mean1'] = np.mean(lambda_1_samples)
            stats_table.loc[prov, 'mean2'] = np.mean(lambda_2_samples)
            stats_table.loc[prov, 'mean_diff'] = st.ttest_ind(
                lambda_1_samples, lambda_2_samples)[1]
    return stats_table, expecrefs
示例#13
0
import numpy as np
from matplotlib import pyplot as plt
import scipy.stats as stats
import pymc3 as pm

plt.figure(figsize=(8.5, 4.5))

with pm.Model() as model:
    parameter = pm.Exponential("poisson_param", 1)
    data_generator = pm.Poisson("data_generator", parameter)
    data_plus_one = data_generator + 1

print(parameter.tag.test_value)

with pm.Model() as model:
    theta = pm.Exponential("theta", 2)
    data_generator = pm.Poisson("data_generator", theta)

print(theta.tag.test_value)

with pm.Model() as ab_testing:
    p_A = pm.Uniform("P(A)", 0, 1)
    p_B = pm.Uniform("P(B)", 0, 1)

print(theta.random)

print("parameter.tag.test_value =", parameter.tag.test_value)
print("data_generator.tag.test_value =", data_generator.tag.test_value)
print("data_plus_one.tag.test_value =", data_plus_one.tag.test_value)

with pm.Model() as model:
with pm.Model() as model:
    # -------------------------------------------------------------------------
    # Priors
    # -------------------------------------------------------------------------
    beta = pm.Normal('beta', mu=0, sd=10)
    beta_day = pm.Normal('beta_day', mu=0, sd=10)

    # -------------------------------------------------------------------------
    # Likelihood
    # -------------------------------------------------------------------------
    loglamb_observed = beta + beta_day * day_within_period1
    lamb_observed = np.exp(loglamb_observed)
    #    Y_hat_observed = pm.Exponential('Y_hat_observed', lam = lamb_observed, observed = time_to_next_event[~censored])
    Y_latent = pm.Exponential('Y_latent',
                              lam=lamb_observed,
                              shape=len(test_obs1),
                              testval=test_obs1)
    Y_observed = pm.Potential(
        'Y_observed',
        selfreport_mem(Y_latent, time_to_next_event1, windowmin1, windowmax1))

    loglamb_censored = beta + beta_day * day_within_period[
        censored]  # Switched model to 1 parameter for both censored/uncensored (makes sense if final obs is "real")
    lamb_censored = np.exp(loglamb_censored)
    Y_hat_censored = pm.Potential(
        'Y_hat_censored',
        exponential_log_complementary_cdf(x=time_to_next_event[censored],
                                          lam=lamb_censored))

#%%
# Sample from posterior distribution
示例#15
0
def exponential_beta(n=2):
    with pm.Model() as model:
        x = pm.Beta('x', 3, 1, shape=n, transform=None)
        y = pm.Exponential('y', 1, shape=n, transform=None)

    return model.test_point, model, None
# deaths
total_deaths = deaths.groupby("day").sum()
dt_d = total_deaths.index
xx_d = np.arange(len(total_cases.index))
yy_d = total_deaths['deaths']

n_samples = 300
n_tune = 300
SEED = 1
N_COMPS = 2
N_DATA = len(xx)
xx2 = np.stack([xx, xx]).T
yy2 = np.stack([yy, yy]).T
with pm.Model() as model:
    k = pm.TruncatedNormal('k', mu=2 * yy[-1], sigma=yy[-1], lower=0, shape=N)
    sigma = pm.Exponential('sigma', lam=1 / 1e5, shape=N)
    dt = pm.Normal('dt', mu=30, sd=10, shape=N)
    tm = pm.Uniform('tm', lower=xx[0], upper=xx[-1], shape=N)
    yhat = k * pm.math.invlogit(np.log(81) / dt * (xx2 - tm))
    comps = pm.Normal.dist(mu=yhat, sigma=sigma, shape=(N, len(xx)))
    w = pm.Dirichlet('w', np.ones(N))
    obs = pm.Mixture('obs', w=w, comp_dists=comps, observed=yy)

    trace = pm.sample(draws=n_samples, tune=n_tune, random_seed=SEED, cores=3)

ALPHA = 0.05
params = np.vstack([trace['k'], trace['dt'], trace['tm']])


def plot_projection(ax, p=0.05, **kwargs):
    extended_xx = np.arange(len(xx) * 2)
示例#17
0
# 50%      52.875000
# 75%      54.960000
# max      68.580000

# normal
with pm.Model() as model_g:
    mu = pm.Uniform('mu', lower=40, upper=70)
    sigma = pm.HalfNormal('sigma', sd=10)
    y = pm.Normal('y', mu=mu, sd=sigma, observed=data)
    trace_g = pm.sample(1000)

# students t
with pm.Model() as model_t:
    mu = pm.Uniform('mu', 40, 70)
    sigma = pm.HalfNormal('sigma', sd=10)
    v = pm.Exponential('v', 1 / 30)
    y = pm.StudentT('y', mu=mu, sd=sigma, nu=v, observed=data)
    trace_t = pm.sample(1000)

data2 = Series(data, copy=True)
data2[48] = 65
data2[49] = 63
data2[50] = 69

data2.loc[data2 < 60].describe()
data2.describe()

# add some outliers
with pm.Model() as model_g2:
    mu = pm.Uniform('mu', lower=40, upper=70)
    sigma = pm.HalfNormal('sigma', sd=10)
    distri = stats.t(df)
    x_pdf = distri.pdf(x_values)
    plt.plot(x_values, x_pdf, label=fr'$\nu = {df}$', lw=3)

x_pdf = stats.norm.pdf(x_values)
plt.plot(x_values, x_pdf, 'k--', label=r'$\nu = \infty$')
plt.xlabel('x')
plt.yticks([])
plt.legend()
plt.xlim(-5, 5)

# %%
with pm.Model() as model_t:
    μ = pm.Uniform('μ', 40, 75)
    σ = pm.HalfNormal('σ', sd = 10)
    ν = pm.Exponential('ν', 1/30)
    y = pm.StudentT('y', mu=μ, sd=σ, nu=ν, observed=data)
    trace_t = pm.sample(1000)

az.plot_trace(trace_t)

# %%
az.summary(trace_t)


# %%
y_ppc_t = pm.sample_posterior_predictive(
    trace_t, 100, model_t, random_seed=123)
y_pred_t = az.from_pymc3(trace=trace_t, posterior_predictive=y_ppc_t)
az.plot_ppc(y_pred_t, figsize=(12, 6), mean=False)
ax[0].legend(fontsize=15)
def model():
    global data
    alpha_prior = 10.
    beta_prior = 0.1
    alpha_init = np.ones((N_GROUPS,1))
    noise_init = np.ones((N_GROUPS,1))*1e-2

    parts_ones = np.ones((TOTAL_PARTS))
    data_ones = np.ones(len(data[0]))

    hds = store_hds_old(paren_lst,filt)
    ns = np.sum(data, axis=1)


    m_ass = np.where(assignments == 0)
    k_ass = np.where(assignments == 1)
    t_ass = np.where(assignments==2)
    a_ass = np.where(assignments==3)
    n_monk = len(m_ass[0])
    n_kid = len(k_ass[0])
    n_tsim = len(t_ass[0])
    n_adult = len(a_ass[0])

    smooth =  np.ones((TOTAL_PARTS,N_ALGS)) * beta_prior

    #bias in choice of starting parenthesis
    start_p = store_start_p(paren_lst, n=TOTAL_PARTS, lst = ["("])
    start_np = 1 - start_p

    with pm.Model() as m:
        alpha = pm.Exponential('alpha', alpha_prior,
                 shape=(N_GROUPS,1))
 
        alpha = np.ones((N_GROUPS, 1)) * 10.


        beta = pm.Dirichlet('beta', np.ones((N_GROUPS, N_ALGS))*beta_prior,
                        # testval=np.ones(N_ALGS),
                            shape=(N_GROUPS,N_ALGS)) 



        theta = pm.Dirichlet('theta',  alpha[assignments] * beta[assignments], 
                           shape=(TOTAL_PARTS,N_ALGS)) 


        #noise_pr_a = pm.Exponential('n_pr_a', 1.,shape=N_GROUPS)
        #noise_pr_b = pm.Exponential('n_pr_b', 1.,shape=N_GROUPS)

        #noise_pr_a = np.ones(N_GROUPS) * 10.
        #noise_pr_b = np.ones(N_GROUPS) * 10.


        noise = pm.Beta("noise", 1,2, shape=TOTAL_PARTS, testval=0.1)
        #noise = pm.Beta("noise", noise_pr_a[assignments],noise_pr_b[assignments], 
                    #    shape=TOTAL_PARTS)


       # noise = pm.Beta("noise", 1,1, shape=N_GROUPS, testval=0.1)




        new_algs = map(lambda x: theta[x].dot(format_algs_theano(hds, noise[x])), np.arange(TOTAL_PARTS))
        theta_resp = tt.concatenate([new_algs], axis=0)
        #theta_resp = theta.dot(algorithms)

        """
        noise = pm.Beta("noise", 1,9, shape=N_GROUPS, testval=0.1)
        noise_alg = algorithms + noise[assignments]
        new_algs = format_algs_theano_bypart(hds, noise, 
                                    total_parts=TOTAL_PARTS, 
                                   n_algs=N_ALGS,max_hd=max_hd)
        theta_resp = theta.dot(new_algs)

        #theta_resp = theta.dot(algorithms) 
        monkey_theta = theta[m_ass]
        kid_theta = theta[k_ass]
        tsim_theta = theta[t_ass]
        adult_theta = theta[a_ass]


        new_algs_monkey = format_algs_theano(hds, noise[0])
        new_algs_kid = format_algs_theano(hds, noise[1])
        new_algs_tsim = format_algs_theano(hds, noise[2])
        new_algs_adult = format_algs_theano(hds, noise[3])


        monkey_algs = monkey_theta.dot(new_algs_monkey)
        kid_algs = kid_theta.dot(new_algs_kid)
        tsim_algs = tsim_theta.dot(new_algs_tsim)
        adult_algs = adult_theta.dot(new_algs_adult)

        theta_resp = tt.concatenate([monkey_algs, kid_algs,
                 tsim_algs, adult_algs], axis=0)
        """
        bias = pm.Beta("bias", 1,1,shape=(TOTAL_PARTS,1))

        biased_theta_resps = start_p * bias * theta_resp + start_np * (1.-bias) * theta_resp
        sum_norm = biased_theta_resps.sum(axis=1).reshape((TOTAL_PARTS,1))
        biased_theta_resps = biased_theta_resps / sum_norm

        #biased_theta_resps = theta_resp

        pm.Multinomial('resp', n=ns, p = biased_theta_resps, 
               shape=(TOTAL_PARTS, N_RESPS), observed=data)

        #db = Text('trace')

        trace = pm.sample(MCMC_STEPS,
            tune=BURNIN,target_accept=0.9, thin=MCMC_THIN)
        print_star("Model Finished!")


    if MCMC_CHAINS > 1:
        print pm.gelman_rubin(trace)

    summary = pm.df_summary(trace)
    which = 45
    samp =100



    return trace, summary
import pymc3 as pm
from scipy.stats import poisson
import seaborn as sns

# Config
os.chdir("/home/jovyan/work")
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
plt.rcParams["figure.figsize"] = (12, 3)

# Preparation
N = 1000
true_lams = [20, 50]
true_tau = 300
data = np.hstack([
    poisson(true_lams[0]).rvs(true_tau),
    poisson(true_lams[1]).rvs(N - true_tau),
])

# Modeling
with pm.Model() as model:
    lam_1 = pm.Exponential("lam_1", data.mean())
    lam_2 = pm.Exponential("lam_2", data.mean())
    tau = pm.DiscreteUniform("tau", lower=0, upper=N-1)
    idx = np.arange(N)
    lam = pm.math.switch(tau > idx, lam_1, lam_2)
    female = pm.Poisson("target", lam, observed=data)
    step = pm.Metropolis()
    trace = pm.sample(20000, tune=5000, step=step, chains=10)
    pm.traceplot(trace[1000:], grid=True)
plt.savefig("./results/3-15-c-inference.png")
    def __init__(self,
                 cell_state_mat: np.ndarray,
                 X_data: np.ndarray,
                 Y_data: np.ndarray,
                 n_comb: int = 50,
                 data_type: str = 'float32',
                 n_iter=20000,
                 learning_rate=0.005,
                 total_grad_norm_constraint=200,
                 verbose=True,
                 var_names=None,
                 var_names_read=None,
                 obs_names=None,
                 fact_names=None,
                 sample_id=None,
                 gene_level_prior={
                     'mean': 1 / 2,
                     'sd': 1 / 4
                 },
                 gene_level_var_prior={'mean_var_ratio': 1},
                 cell_number_prior={
                     'cells_per_spot': 8,
                     'factors_per_spot': 7,
                     'combs_per_spot': 2.5
                 },
                 cell_number_var_prior={
                     'cells_mean_var_ratio': 1,
                     'factors_mean_var_ratio': 1,
                     'combs_mean_var_ratio': 1
                 },
                 phi_hyp_prior={
                     'mean': 3,
                     'sd': 1
                 },
                 spot_fact_mean_var_ratio=0.5):

        ############# Initialise parameters ################
        super().__init__(cell_state_mat, X_data, data_type, n_iter,
                         learning_rate, total_grad_norm_constraint, verbose,
                         var_names, var_names_read, obs_names, fact_names,
                         sample_id)

        self.Y_data = Y_data
        self.n_npro = Y_data.shape[1]
        self.y_data = theano.shared(Y_data.astype(self.data_type))
        self.n_rois = Y_data.shape[0]
        # Total number of gene counts in each region of interest, divided by 10^5:
        self.l_r = np.array([np.sum(X_data[i, :]) for i in range(self.n_rois)
                             ]).reshape(self.n_rois, 1) * 10**(-5)

        for k in gene_level_var_prior.keys():
            gene_level_prior[k] = gene_level_var_prior[k]

        self.gene_level_prior = gene_level_prior
        self.phi_hyp_prior = phi_hyp_prior
        self.n_comb = n_comb
        self.spot_fact_mean_var_ratio = spot_fact_mean_var_ratio

        cell_number_prior['factors_per_combs'] = (
            cell_number_prior['factors_per_spot'] /
            cell_number_prior['combs_per_spot'])
        for k in cell_number_var_prior.keys():
            cell_number_prior[k] = cell_number_var_prior[k]
        self.cell_number_prior = cell_number_prior

        ############# Define the model ################
        self.model = pm.Model()

        with self.model:

            # ============================ Negative Probe Binding ===================== #
            # Negative probe counts scale linearly with the total number of counts in a region of interest.
            # The linear slope is drawn from a gamma distribution. Mean and variance are inferred from the data
            # and are the same for the non-specific binding term for gene probes further below.
            self.b_n_hyper = pm.Gamma('b_n_hyper',
                                      alpha=np.array((3, 1)),
                                      beta=np.array((1, 1)),
                                      shape=2)
            self.b_n = pm.Gamma('b_n',
                                mu=self.b_n_hyper[0],
                                sigma=self.b_n_hyper[1],
                                shape=(1, self.n_npro))
            self.y_rn = self.b_n * self.l_r

            # ===================== Non-specific binding additive component ======================= #
            # Additive term for non-specific binding of gene probes are drawn from a gamma distribution with
            # the same mean and variance as for negative probes above.
            self.gene_add = pm.Gamma('gene_add',
                                     mu=self.b_n_hyper[0],
                                     sigma=self.b_n_hyper[1],
                                     shape=(1, self.n_genes))

            # =====================Gene expression level scaling======================= #
            # Explains difference in expression between genes and
            # how it differs in single cell and spatial technology
            # compute hyperparameters from mean and sd
            shape = gene_level_prior['mean']**2 / gene_level_prior['sd']**2
            rate = gene_level_prior['mean'] / gene_level_prior['sd']**2
            shape_var = shape / gene_level_prior['mean_var_ratio']
            rate_var = rate / gene_level_prior['mean_var_ratio']
            self.gene_level_alpha_hyp = pm.Gamma('gene_level_alpha_hyp',
                                                 mu=shape,
                                                 sigma=np.sqrt(shape_var),
                                                 shape=(1, 1))
            self.gene_level_beta_hyp = pm.Gamma('gene_level_beta_hyp',
                                                mu=rate,
                                                sigma=np.sqrt(rate_var),
                                                shape=(1, 1))

            self.gene_level = pm.Gamma('gene_level',
                                       self.gene_level_alpha_hyp,
                                       self.gene_level_beta_hyp,
                                       shape=(self.n_genes, 1))

            self.gene_factors = pm.Deterministic('gene_factors',
                                                 self.cell_state)

            # =====================Spot factors======================= #
            # prior on spot factors reflects the number of cells, fraction of their cytoplasm captured,
            # times heterogeniety in the total number of mRNA between individual cells with each cell type
            self.cells_per_spot = pm.Gamma('cells_per_spot',
                                           mu=cell_number_prior['cells_per_spot'],
                                           sigma=np.sqrt(cell_number_prior['cells_per_spot'] \
                                                         / cell_number_prior['cells_mean_var_ratio']),
                                           shape=(self.n_cells, 1))
            self.comb_per_spot = pm.Gamma('combs_per_spot',
                                          mu=cell_number_prior['combs_per_spot'],
                                          sigma=np.sqrt(cell_number_prior['combs_per_spot'] \
                                                        / cell_number_prior['combs_mean_var_ratio']),
                                          shape=(self.n_cells, 1))

            shape = self.comb_per_spot / np.array(self.n_comb).reshape((1, 1))
            rate = tt.ones((1, 1)) / self.cells_per_spot * self.comb_per_spot
            self.combs_factors = pm.Gamma('combs_factors',
                                          alpha=shape,
                                          beta=rate,
                                          shape=(self.n_cells, self.n_comb))

            self.factors_per_combs = pm.Gamma('factors_per_combs',
                                              mu=cell_number_prior['factors_per_combs'],
                                              sigma=np.sqrt(cell_number_prior['factors_per_combs'] \
                                                            / cell_number_prior['factors_mean_var_ratio']),
                                              shape=(self.n_comb, 1))
            c2f_shape = self.factors_per_combs / np.array(self.n_fact).reshape(
                (1, 1))
            self.comb2fact = pm.Gamma('comb2fact',
                                      alpha=c2f_shape,
                                      beta=self.factors_per_combs,
                                      shape=(self.n_comb, self.n_fact))

            self.spot_factors = pm.Gamma('spot_factors', mu=pm.math.dot(self.combs_factors, self.comb2fact),
                                         sigma=pm.math.sqrt(pm.math.dot(self.combs_factors, self.comb2fact) \
                                                            / self.spot_fact_mean_var_ratio),
                                         shape=(self.n_cells, self.n_fact))

            # =====================Spot-specific additive component======================= #
            # molecule contribution that cannot be explained by cell state signatures
            # these counts are distributed between all genes not just expressed genes
            self.spot_add_hyp = pm.Gamma('spot_add_hyp', 1, 1, shape=2)
            self.spot_add = pm.Gamma('spot_add',
                                     self.spot_add_hyp[0],
                                     self.spot_add_hyp[1],
                                     shape=(self.n_cells, 1))

            # =====================Gene-specific overdispersion ======================= #
            self.phi_hyp = pm.Gamma('phi_hyp',
                                    mu=phi_hyp_prior['mean'],
                                    sigma=phi_hyp_prior['sd'],
                                    shape=(1, 1))
            self.gene_E = pm.Exponential('gene_E',
                                         self.phi_hyp,
                                         shape=(self.n_genes, 1))

            # =====================Expected expression ======================= #
            # Expected counts for negative probes and gene probes concatenated into one array. Note that non-specific binding
            # scales linearly with the total number of counts (l_r) in this model.
            self.mu_biol = tt.concatenate([self.y_rn, pm.math.dot(self.spot_factors, self.gene_factors.T) * self.gene_level.T \
                                    + self.gene_add * self.l_r + self.spot_add], axis = 1)

            # =====================DATA likelihood ======================= #
            # Likelihood (sampling distribution) of observations & add overdispersion via NegativeBinomial / Poisson
            self.data_target = pm.NegativeBinomial(
                'data_target',
                mu=self.mu_biol,
                alpha=tt.concatenate([
                    np.repeat(10**10, self.n_npro).reshape(1, self.n_npro), 1 /
                    (self.gene_E.T * self.gene_E.T)
                ],
                                     axis=1),
                observed=tt.concatenate([self.y_data, self.x_data], axis=1))

            # =====================Compute nUMI from each factor in spots  ======================= #
            self.nUMI_factors = pm.Deterministic(
                'nUMI_factors', (self.spot_factors *
                                 (self.gene_factors * self.gene_level).sum(0)))
示例#22
0
from IPython.core.pylabtools import figsize
import numpy as np
import pymc3 as pm
import theano.tensor as tt
import matplotlib.pyplot as plt

count_data = np.loadtxt("demos/bayesian-programming/014_poisson.csv")
n_count_data = len(count_data)

with pm.Model() as model:
    alpha = 1.0 / count_data.mean()  # Recall count_data is the
    # variable that holds our txt counts
    lambda_1 = pm.Exponential("lambda_1", alpha)
    lambda_2 = pm.Exponential("lambda_2", alpha)

    tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data - 1)

with model:
    idx = np.arange(n_count_data)  # Index
    lambda_ = pm.math.switch(tau > idx, lambda_1, lambda_2)

with model:
    observation = pm.Poisson("obs", lambda_, observed=count_data)

with model:
    step = pm.Metropolis()
    trace = pm.sample(10000, tune=5000, step=step)

lambda_1_samples = trace['lambda_1']
lambda_2_samples = trace['lambda_2']
tau_samples = trace['tau']
示例#23
0
    def build(self):
        """ Builds and returns the Generative model. Also sets self.model """

        p_delay = get_delay_distribution(incubation=self.delay)
        nonzero_days = self.observed.total.gt(0)
        len_observed = len(self.observed)
        convolution_ready_gt = self._get_convolution_ready_gt(len_observed)
        x = np.arange(len_observed)[:, None]

        coords = {
            "date": self.observed.index.values,
            "nonzero_date":
            self.observed.index.values[self.observed.total.gt(0)],
        }
        with pm.Model(coords=coords) as self.model:

            # Let log_r_t walk randomly with a fixed prior of ~0.035. Think
            # of this number as how quickly r_t can react.
            log_r_t = pm.GaussianRandomWalk("log_r_t",
                                            sigma=0.035,
                                            dims=["date"])
            r_t = pm.Deterministic("r_t", pm.math.exp(log_r_t), dims=["date"])

            # For a given seed population and R_t curve, we calculate the
            # implied infection curve by simulating an outbreak. While this may
            # look daunting, it's simply a way to recreate the outbreak
            # simulation math inside the model:
            # https://staff.math.su.se/hoehle/blog/2020/04/15/effectiveR0.html
            seed = pm.Exponential("seed", 1 / 0.02)
            y0 = tt.zeros(len_observed)
            y0 = tt.set_subtensor(y0[0], seed)
            outputs, _ = theano.scan(
                fn=lambda t, gt, y, r_t: tt.set_subtensor(
                    y[t], tt.sum(r_t * y * gt)),
                sequences=[tt.arange(1, len_observed), convolution_ready_gt],
                outputs_info=y0,
                non_sequences=r_t,
                n_steps=len_observed - 1,
            )
            infections = pm.Deterministic("infections",
                                          outputs[-1],
                                          dims=["date"])

            # Convolve infections to confirmed positive reports based on a known
            # p_delay distribution. See patients.py for details on how we calculate
            # this distribution.
            test_adjusted_positive = pm.Deterministic(
                "test_adjusted_positive",
                conv2d(
                    tt.reshape(infections, (1, len_observed)),
                    tt.reshape(p_delay, (1, len(p_delay))),
                    border_mode="full",
                )[0, :len_observed],
                dims=["date"])

            # Picking an exposure with a prior that exposure never goes below
            # 0.1 * max_tests. The 0.1 only affects early values of Rt when
            # testing was minimal or when data errors cause underreporting
            # of tests.
            tests = pm.Data("tests", self.observed.total.values, dims=["date"])
            exposure = pm.Deterministic("exposure",
                                        pm.math.clip(
                                            tests,
                                            self.observed.total.max() * 0.1,
                                            1e9),
                                        dims=["date"])

            # Test-volume adjust reported cases based on an assumed exposure
            # Note: this is similar to the exposure parameter in a Poisson
            # regression.
            positive = pm.Deterministic("positive",
                                        exposure * test_adjusted_positive,
                                        dims=["date"])

            # Save data as part of trace so we can access in inference_data
            observed_positive = pm.Data("observed_positive",
                                        self.observed.positive.values,
                                        dims=["date"])
            nonzero_observed_positive = pm.Data(
                "nonzero_observed_positive",
                self.observed.positive[nonzero_days.values].values,
                dims=["nonzero_date"])

            positive_nonzero = pm.NegativeBinomial(
                "nonzero_positive",
                mu=positive[nonzero_days.values],
                alpha=pm.Gamma("alpha", mu=6, sigma=1),
                observed=nonzero_observed_positive,
                dims=["nonzero_date"])

        return self.model
示例#24
0
文件: model.py 项目: smba/pm-prophet
    def generate_priors(self):
        """Set up the priors for the model."""
        with self.model:
            if "sigma" not in self.priors:
                self.priors["sigma"] = pm.HalfCauchy("sigma_%s" % self.name,
                                                     10,
                                                     testval=1.0)

            if "seasonality" not in self.priors and self.seasonality:
                self.priors["seasonality"] = pm.Laplace(
                    "seasonality_%s" % self.name,
                    0,
                    self.seasonality_prior_scale,
                    shape=len(self.seasonality),
                )
            if "holidays" not in self.priors and self.holidays:
                self.priors["holidays"] = pm.Laplace(
                    "holidays_%s" % self.name,
                    0,
                    self.holidays_prior_scale,
                    shape=len(self.holidays),
                )
            if "regressors" not in self.priors and self.regressors:
                if self.positive_regressors_coefficients:
                    self.priors["regressors"] = pm.Exponential(
                        "regressors_%s" % self.name,
                        self.regressors_prior_scale,
                        shape=len(self.regressors),
                    )
                else:
                    self.priors["regressors"] = pm.Laplace(
                        "regressors_%s" % self.name,
                        0,
                        self.regressors_prior_scale,
                        shape=len(self.regressors),
                    )
            if self.growth and "growth" not in self.priors:
                self.priors["growth"] = pm.Normal("growth_%s" % self.name, 0,
                                                  0.1)
            if (len(self.changepoints) and "changepoints" not in self.priors
                    and len(self.changepoints)):
                if self.auto_changepoints:
                    k = self.n_changepoints
                    alpha = pm.Gamma("alpha", 1.0, 1.0)
                    beta = pm.Beta("beta", 1.0, alpha, shape=k)
                    w1 = pm.Deterministic(
                        "w1",
                        tt.concatenate(
                            [[1], tt.extra_ops.cumprod(1 - beta)[:-1]]) * beta,
                    )
                    w, _ = theano.map(
                        fn=lambda x: tt.switch(tt.gt(x, 1e-4), x, 0),
                        sequences=[w1])
                    self.w = pm.Deterministic("w", w)
                else:
                    k = len(self.changepoints)
                    w = 1
                cgpt = pm.Deterministic(
                    "cgpt",
                    pm.Laplace("cgpt_inner",
                               0,
                               self.changepoints_prior_scale,
                               shape=k) * w,
                )
                self.priors["changepoints"] = pm.Deterministic(
                    "changepoints_%s" % self.name, cgpt)
            if self.intercept and "intercept" not in self.priors:
                self.priors["intercept"] = pm.Normal(
                    "intercept_%s" % self.name,
                    self.data["y"].mean(),
                    self.data["y"].std() * 2,
                )

        self.priors_names = {k: v.name for k, v in self.priors.items()}
示例#25
0
               help='Toggle to print summary of trace')
p.add_argument('--samples',
               type=int,
               default=15000,
               help='Number of sampling iterations')
args = p.parse_args()

# Get the dataset
# Format:
# 10 rows of <T_{i}, X_{i}> pairs
dataset = np.genfromtxt('dataset.txt', delimiter=' ')

# Create the model
pumps_mcmc_model = pymc3.Model()
with pumps_mcmc_model:
    alpha = pymc3.Exponential('alpha', 1.0)
    beta = pymc3.Gamma('beta', 0.1, 1.0)
    for i in range(dataset.shape[0]):
        theta = pymc3.Gamma('theta{}'.format(i), alpha, beta)
        lambd = pymc3.Deterministic('lambda{}'.format(i),
                                    theta * dataset[i, 0])
        x = pymc3.Poisson('x{}'.format(i), lambd, observed=dataset[i, 1])

# Perform Metropolis-Hastings algorithm step
# and print the trace of variables
with pumps_mcmc_model:
    step = pymc3.Metropolis(proposal_dist=getattr(
        pymc3.step_methods.metropolis, args.proposal_dist))
    trace = pymc3.sample(args.samples, step=step)
    if args.print_summary:
        print(pymc3.summary(trace))
示例#26
0
from pymc3.distributions.timeseries import GaussianRandomWalk
from scipy import optimize
import pandas as pd

# load data
returns = pd.read_csv(
    'https://raw.githubusercontent.com/pymc-devs/pymc3/master/pymc3/examples/data/SP500.csv',
    index_col='date')['change']
## data exploration
#fig, ax = plt.subplots(figsize=(14, 8))
#returns.plot(label='S&P500')
#ax.set(xlabel='time', ylabel='returns')
#ax.legend();

with pm.Model() as model:
    step_size = pm.Exponential('step_size', 50.)
    s = GaussianRandomWalk('s', sd=step_size, shape=len(returns))

    nu = pm.Exponential('nu', .1)

    r = pm.StudentT('r', nu=nu, lam=pm.math.exp(-2 * s), observed=returns)
with model:
    trace = pm.sample(2000, cores=1, target_accep=0.9)

with model:
    pm.traceplot(trace, varnames=['step_size', 'nu'])

    fig, ax = plt.subplots()
    plt.plot(trace['s'].T, 'b', alpha=.03)
    ax.set(title=str(s), xlabel='time', ylabel='log volatility')
示例#27
0
def sim_prior_lin(model, x, vars=['α', 'β']):
    prior_ = pm.sample_prior_predictive(vars=vars, model=model)
    prior_array = pd.DataFrame(prior_).to_numpy()
    prior_mu = prior_array.dot(x)
    return prior_mu

def link(sim, x):
    sim_array = pd.DataFrame(sim).to_numpy()
    return sim_array.dot(x)


with pm.Model() as m1:
    α = pm.Normal('α', 1, 1)
    β = pm.Normal('β', 0, 1)
    σ = pm.Exponential('σ',  1)
    μi = α + β * (dA1.rugged_s.values - rbar)
    log_yi = pm.Normal('log_yi', μi, σ, observed=dA1.log_gdp_s)

rugged_seq = np.linspace(-0.1, 1.1, num=30).reshape(-1, 1)
rugged_seq.T.shape
x = np.r_[np.ones((1,30)), rugged_seq.T]

m1_μ = sim_prior_lin(m1, x, )
m1_μ.shape

with pm.Model() as m1i:
    α = pm.Normal('α', 1, 0.1)
    β = pm.Normal('β', 0, 0.3)
    σ = pm.Exponential('σ',  1)
    μi = α + β * (dA1.rugged_s.values - rbar)
示例#28
0
文件: model.py 项目: pindash/best
    def __init__(self, y1, y2):
        self.y1 = y1 = np.array(y1)
        self.y2 = y2 = np.array(y2)

        assert y1.ndim == 1
        assert y2.ndim == 1

        y_all = np.concatenate((y1, y2))

        self.mu_loc = mu_loc = np.mean(y_all)
        self.mu_scale = mu_scale = np.std(y_all) * 1000

        self.sigma_low = sigma_low = np.std(y_all) / 1000
        self.sigma_high = sigma_high = np.std(y_all) * 1000

        self.nu_min = nu_min = 2.5
        self.nu_mean = nu_mean = 30
        self._nu_param = nu_mean - nu_min

        with pm.Model() as self._model:
            # Note: the IDE might give a warning for these because it thinks
            #  distributions like pm.Normal() don't have a string "name" argument,
            #  but this is false – pm.Distribution redefined __new__, so the
            #  first argument indeed is the name (a string).
            group1_mean = pm.Normal('Group 1 mean', mu=mu_loc, sd=mu_scale)
            group2_mean = pm.Normal('Group 2 mean', mu=mu_loc, sd=mu_scale)

            nu = pm.Exponential('nu - %g' % nu_min, 1 /
                                (nu_mean - nu_min)) + nu_min
            _ = pm.Deterministic('Normality', nu)

            group1_logsigma = pm.Uniform('Group 1 log sigma',
                                         lower=np.log(sigma_low),
                                         upper=np.log(sigma_high))
            group2_logsigma = pm.Uniform('Group 2 log sigma',
                                         lower=np.log(sigma_low),
                                         upper=np.log(sigma_high))
            group1_sigma = pm.Deterministic('Group 1 sigma',
                                            np.exp(group1_logsigma))
            group2_sigma = pm.Deterministic('Group 2 sigma',
                                            np.exp(group2_logsigma))

            lambda1 = group1_sigma**(-2)
            lambda2 = group2_sigma**(-2)

            group1_sd = pm.Deterministic('Group 1 SD',
                                         group1_sigma * (nu / (nu - 2))**0.5)
            group2_sd = pm.Deterministic('Group 2 SD',
                                         group2_sigma * (nu / (nu - 2))**0.5)

            _ = pm.StudentT('Group 1 data',
                            observed=y1,
                            nu=nu,
                            mu=group1_mean,
                            lam=lambda1)
            _ = pm.StudentT('Group 2 data',
                            observed=y2,
                            nu=nu,
                            mu=group2_mean,
                            lam=lambda2)

            diff_of_means = pm.Deterministic('Difference of means',
                                             group1_mean - group2_mean)
            _ = pm.Deterministic('Difference of SDs', group1_sd - group2_sd)
            _ = pm.Deterministic(
                'Effect size', diff_of_means / np.sqrt(
                    (group1_sd**2 + group2_sd**2) / 2))
    def __init__(
            self,
            cell_state_mat: np.ndarray,
            X_data: np.ndarray,
            n_comb: int = 50,
            data_type: str = 'float32',
            n_iter=20000,
            learning_rate=0.005,
            total_grad_norm_constraint=200,
            verbose=True,
            var_names=None, var_names_read=None,
            obs_names=None, fact_names=None, sample_id=None,
            gene_level_prior={'mean': 1 / 2, 'sd': 1 / 4},
            gene_level_var_prior={'mean_var_ratio': 1},
            cell_number_prior={'cells_per_spot': 8,
                               'factors_per_spot': 7,
                               'combs_per_spot': 2.5},
            cell_number_var_prior={'cells_mean_var_ratio': 1,
                                   'factors_mean_var_ratio': 1,
                                   'combs_mean_var_ratio': 1},
            phi_hyp_prior={'mean': 3, 'sd': 1},
            spot_fact_mean_var_ratio=0.5
    ):

        ############# Initialise parameters ################
        super().__init__(cell_state_mat, X_data,
                         data_type, n_iter,
                         learning_rate, total_grad_norm_constraint,
                         verbose, var_names, var_names_read,
                         obs_names, fact_names, sample_id)

        for k in gene_level_var_prior.keys():
            gene_level_prior[k] = gene_level_var_prior[k]

        self.gene_level_prior = gene_level_prior
        self.phi_hyp_prior = phi_hyp_prior
        self.n_comb = n_comb
        self.spot_fact_mean_var_ratio = spot_fact_mean_var_ratio

        cell_number_prior['factors_per_combs'] = (cell_number_prior['factors_per_spot'] /
                                                  cell_number_prior['combs_per_spot'])
        for k in cell_number_var_prior.keys():
            cell_number_prior[k] = cell_number_var_prior[k]
        self.cell_number_prior = cell_number_prior

        ############# Define the model ################
        self.model = pm.Model()

        with self.model:

            # =====================Gene expression level scaling======================= #
            # Explains difference in expression between genes and 
            # how it differs in single cell and spatial technology
            # compute hyperparameters from mean and sd
            shape = gene_level_prior['mean'] ** 2 / gene_level_prior['sd'] ** 2
            rate = gene_level_prior['mean'] / gene_level_prior['sd'] ** 2
            shape_var = shape / gene_level_prior['mean_var_ratio']
            rate_var = rate / gene_level_prior['mean_var_ratio']
            self.gene_level_alpha_hyp = pm.Gamma('gene_level_alpha_hyp',
                                                 mu=shape, sigma=np.sqrt(shape_var),
                                                 shape=(1, 1))
            self.gene_level_beta_hyp = pm.Gamma('gene_level_beta_hyp',
                                                mu=rate, sigma=np.sqrt(rate_var),
                                                shape=(1, 1))

            self.gene_level = pm.Gamma('gene_level', self.gene_level_alpha_hyp,
                                       self.gene_level_beta_hyp, shape=(self.n_genes, 1))

            # scale cell state factors by gene_level
            self.gene_factors = pm.Deterministic('gene_factors', self.cell_state)
            # tt.printing.Print('gene_factors sum')(gene_factors.sum(0).shape)
            # tt.printing.Print('gene_factors sum')(gene_factors.sum(0))

            # =====================Spot factors======================= #
            # prior on spot factors reflects the number of cells, fraction of their cytoplasm captured, 
            # times heterogeniety in the total number of mRNA between individual cells with each cell type
            self.cells_per_spot = pm.Gamma('cells_per_spot',
                                           mu=cell_number_prior['cells_per_spot'],
                                           sigma=np.sqrt(cell_number_prior['cells_per_spot'] \
                                                         / cell_number_prior['cells_mean_var_ratio']),
                                           shape=(self.n_cells, 1))
            self.comb_per_spot = pm.Gamma('combs_per_spot',
                                          mu=cell_number_prior['combs_per_spot'],
                                          sigma=np.sqrt(cell_number_prior['combs_per_spot'] \
                                                        / cell_number_prior['combs_mean_var_ratio']),
                                          shape=(self.n_cells, 1))

            shape = self.comb_per_spot / np.array(self.n_comb).reshape((1, 1))
            rate = tt.ones((1, 1)) / self.cells_per_spot * self.comb_per_spot
            self.combs_factors = pm.Gamma('combs_factors', alpha=shape, beta=rate,
                                          shape=(self.n_cells, self.n_comb))

            self.factors_per_combs = pm.Gamma('factors_per_combs',
                                              mu=cell_number_prior['factors_per_combs'],
                                              sigma=np.sqrt(cell_number_prior['factors_per_combs'] \
                                                            / cell_number_prior['factors_mean_var_ratio']),
                                              shape=(self.n_comb, 1))
            c2f_shape = self.factors_per_combs / np.array(self.n_fact).reshape((1, 1))
            self.comb2fact = pm.Gamma('comb2fact', alpha=c2f_shape, beta=self.factors_per_combs,
                                      shape=(self.n_comb, self.n_fact))

            self.spot_factors = pm.Gamma('spot_factors', mu=pm.math.dot(self.combs_factors, self.comb2fact),
                                         sigma=pm.math.sqrt(pm.math.dot(self.combs_factors, self.comb2fact) \
                                                            / self.spot_fact_mean_var_ratio),
                                         shape=(self.n_cells, self.n_fact))

            # =====================Spot-specific additive component======================= #
            # molecule contribution that cannot be explained by cell state signatures
            # these counts are distributed between all genes not just expressed genes
            self.spot_add_hyp = pm.Gamma('spot_add_hyp', 1, 1, shape=2)
            self.spot_add = pm.Gamma('spot_add', self.spot_add_hyp[0],
                                     self.spot_add_hyp[1], shape=(self.n_cells, 1))

            # =====================Gene-specific additive component ======================= #
            # per gene molecule contribution that cannot be explained by cell state signatures
            # these counts are distributed equally between all spots (e.g. background, free-floating RNA)
            self.gene_add_hyp = pm.Gamma('gene_add_hyp', 1, 1, shape=2)
            self.gene_add = pm.Gamma('gene_add', self.gene_add_hyp[0],
                                     self.gene_add_hyp[1], shape=(self.n_genes, 1))

            # =====================Gene-specific overdispersion ======================= #
            self.phi_hyp = pm.Gamma('phi_hyp', mu=phi_hyp_prior['mean'],
                                    sigma=phi_hyp_prior['sd'], shape=(1, 1))
            self.gene_E = pm.Exponential('gene_E', self.phi_hyp, shape=(self.n_genes, 1))

            # =====================Expected expression ======================= #
            # expected expression
            self.mu_biol = pm.math.dot(self.spot_factors, self.gene_factors.T) * self.gene_level.T \
                           + self.gene_add.T + self.spot_add
            # tt.printing.Print('mu_biol')(self.mu_biol.shape)

            # =====================DATA likelihood ======================= #
            # Likelihood (sampling distribution) of observations & add overdispersion via NegativeBinomial / Poisson
            self.data_target = pm.NegativeBinomial('data_target', mu=self.mu_biol,
                                                   alpha=1 / (self.gene_E.T * self.gene_E.T),
                                                   observed=self.x_data,
                                                   total_size=self.X_data.shape)

            # =====================Compute nUMI from each factor in spots  ======================= #                          
            self.nUMI_factors = pm.Deterministic('nUMI_factors',
                                                 (self.spot_factors * (self.gene_factors * self.gene_level).sum(0)))
示例#30
0
ax.set_ylabel("y")
ax.set_title("The third Ascombe's quartet")
plt.show()
# center the x data
x = x - x.mean()

# ----------------------- specify a probabilistic model for the data ----------------------- #

with pm.Model() as model_t:
    # set the prior over the intercept and the coefficient
    alpha = pm.Normal("alpha", mu=y.mean(), sd=1)
    beta = pm.Normal("beta", mu=0, sd=1)
    # set the prior over the errors variance
    sigma = pm.HalfNormal("sigma", 5)
    # set the prior on the degrees of freedom
    vu_ = pm.Exponential("vu_", 1 / 29)
    vu = pm.Deterministic("vu", vu_ + 1)
    # get the likelihood on the data
    obs = pm.StudentT("obs",
                      mu=alpha + beta * x,
                      sigma=sigma,
                      nu=vu,
                      observed=y)
    # inference step
    trace = pm.sample(2000)

# _------------------ compare the result of a simple linear regression (which assumes gaussian errors) and the robust linear regression ------------------ #

# get the coefficient and intercept from a scipy linear regression
beta_c, alpha_c = ss.linregress(x, y)[:2]
# plot the non robust linear regression