示例#1
0
    def generate_priors(self):
        """Set up the priors for the model."""
        with self.model:
            if 'sigma' not in self.priors:
                self.priors['sigma'] = pm.HalfCauchy('sigma_%s' % self.name, 10, testval=1.)

            if 'seasonality' not in self.priors and self.seasonality:
                self.priors['seasonality'] = pm.Laplace('seasonality_%s' % self.name, 0, self.seasonality_prior_scale,
                                                        shape=len(self.seasonality))
            if 'holidays' not in self.priors and self.holidays:
                self.priors['holidays'] = pm.Laplace('holidays_%s' % self.name, 0, self.holidays_prior_scale,
                                                     shape=len(self.holidays))
            if 'regressors' not in self.priors and self.regressors:
                if self.positive_regressors_coefficients:
                    self.priors['regressors'] = pm.Exponential('regressors_%s' % self.name, self.regressors_prior_scale,
                                                               shape=len(self.regressors))
                else:
                    self.priors['regressors'] = pm.Laplace('regressors_%s' % self.name, 0, self.regressors_prior_scale,
                                                           shape=len(self.regressors))
            if self.growth and 'growth' not in self.priors:
                self.priors['growth'] = pm.Normal('growth_%s' % self.name, 0, 10)
            if len(self.changepoints) and 'changepoints' not in self.priors and len(self.changepoints):
                self.priors['changepoints'] = pm.Laplace('changepoints_%s' % self.name, 0,
                                                         self.changepoints_prior_scale,
                                                         shape=len(self.changepoints))
            if self.intercept and 'intercept' not in self.priors:
                self.priors['intercept'] = pm.Normal('intercept_%s' % self.name, self.data['y'].mean(),
                                                     self.data['y'].std() * 2)

        self.priors_names = {k: v.name for k, v in self.priors.items()}
示例#2
0
def lasso_regression(X, y_obs, ylabel='y'):
    num_obs, num_feats = X.eval().shape
    with pm.Model() as mlasso:
        sd_beta = pm.HalfCauchy('sd_beta', beta=2.5)
        sig = pm.HalfCauchy('sigma', beta=2.5)
        bias = pm.Laplace('bias', mu=0, b=sd_beta)
        w = pm.Laplace('w', mu=0, b=sd_beta, shape=num_feats)
        mu_ = pm.Deterministic('mu', bias + tt.dot(X, w))
        y = pm.Normal('y', mu=mu_, sd=sig, observed=y_obs.squeeze())
    return mlasso
示例#3
0
def hier_lasso_regr(X, y_obs, add_bias=True, ylabel='y'):
    num_obs, num_feats = X.shape
    with pm.Model() as mlasso:
        hyp_beta = pm.HalfCauchy('hyp_beta', beta=2.5)
        hyp_mu = pm.HalfCauchy('hyp_mu', mu=0, beta=2.5)
        sig = pm.HalfCauchy('sigma', beta=2.5)
        bias = pm.Laplace('bias', mu=hyp_mu, b=hyp_beta)
        w = pm.Laplace('w', mu=hyp_mu, b=hyp_beta, shape=num_feats)
        mu_ = pm.Deterministic('mu', bias + tt.dot(X, w))
        y = pm.Normal('y', mu=mu_, sd=sig, observed=y_obs.squeeze())
    return mlasso
def lasso_regr_impute_y(X, y_obs, ylabel='y'):
    num_obs, num_feats = X.eval().shape
    with pm.Model() as mlass_y_na:
        sd_beta = pm.HalfCauchy('sd_beta', beta=2.5)
        sig = pm.HalfCauchy('sigma', beta=2.5)
        bias = pm.Laplace('bias', mu=0, b=sd_beta)
        w = pm.Laplace('w', mu=0, b=sd_beta, shape=num_feats)
        mu_ = pm.Deterministic('mu', bias + tt.dot(X, w))
        mu_y_obs = pm.Normal('mu_y_obs', 0.5, 1)
        sigma_y_obs = pm.HalfCauchy('sigma_y_obs', 1)
        y_obs_ = pm.Normal('y_obs', mu_y_obs, sigma_y_obs, observed=y_obs.squeeze())
        y = pm.Normal(ylabel, mu=y_obs_, sd=sig)
    return mlass_y_na
def hier_lasso_regr(X, y_obs, add_bias=True, ylabel='y'):
    X_ = pm.floatX(X)
    Y_ = pm.floatX(y_obs)
    n_features = X_.eval().shape[1]
    with pm.Model() as mlasso:
        hyp_beta = pm.HalfCauchy('hyp_beta', beta=2.5)
        hyp_mu = pm.HalfCauchy('hyp_mu', mu=0, beta=2.5)
        sig = pm.HalfCauchy('sigma', beta=2.5)
        bias = pm.Laplace('bias', mu=hyp_mu, b=hyp_beta)
        w = pm.Laplace('w', mu=hyp_mu, b=hyp_beta, shape=n_features)
        mu_ = pm.Deterministic('mu', bias + tt.dot(X_, w))
        y = pm.Normal(ylabel, mu=mu_, sd=sig, observed=Y_)
    return mlasso
示例#6
0
    def definition(self, model, X, scale_factor):
        t = X["t"].values
        group, n_groups, self.groups_ = get_group_definition(
            X, self.pool_cols, self.pool_type)
        self.s = np.linspace(0, np.max(t), self.n_changepoints + 2)[1:-1]

        with model:
            A = (t[:, None] > self.s) * 1.0

            if self.pool_type == "partial":
                sigma_k = pm.HalfCauchy(self._param_name("sigma_k"),
                                        beta=self.growth_prior_scale)
                offset_k = pm.Normal(self._param_name("offset_k"),
                                     mu=0,
                                     sd=1,
                                     shape=n_groups)
                k = pm.Deterministic(self._param_name("k"), offset_k * sigma_k)

                sigma_delta = pm.HalfCauchy(self._param_name("sigma_delta"),
                                            beta=self.changepoints_prior_scale)
                offset_delta = pm.Laplace(
                    self._param_name("offset_delta"),
                    0,
                    1,
                    shape=(n_groups, self.n_changepoints),
                )
                delta = pm.Deterministic(self._param_name("delta"),
                                         offset_delta * sigma_delta)

            else:
                delta = pm.Laplace(
                    self._param_name("delta"),
                    0,
                    self.changepoints_prior_scale,
                    shape=(n_groups, self.n_changepoints),
                )
                k = pm.Normal(self._param_name("k"),
                              0,
                              self.growth_prior_scale,
                              shape=n_groups)

            m = pm.Normal(self._param_name("m"), 0, 5, shape=n_groups)

            gamma = -self.s * delta[group, :]

            g = (k[group] + pm.math.sum(A * delta[group], axis=1)) * t + (
                m[group] + pm.math.sum(A * gamma, axis=1))
        return g
示例#7
0
    def trend_model(self, m, t, n_changepoints, jump_prior_scale,
                    growth_prior_scale, changepoint_range):
        s = np.linspace(0, changepoint_range * np.max(t),
                        n_changepoints + 1)[1:]

        # * 1 casts the boolean to integers
        A = (t[:, None] > s) * 1

        with m:
            # initial growth
            k = pm.Normal('k', 0, growth_prior_scale)

            if jump_prior_scale is None:
                jump_prior_scale = pm.Exponential('tau', 1.5)

            # rate of change
            delta = pm.Laplace('delta',
                               0,
                               jump_prior_scale,
                               shape=n_changepoints)
            # offset
            m = pm.Normal('m', 0, 0.25)
            gamma = -s * delta

            g = (k + self.det_dot(A, delta)) * t + (m + self.det_dot(A, gamma))
        return g, A, s
def reg_hs_regression(X, y_obs, ylabel='likelihood', **kwargs):
    """See Piironen & Vehtari, 2017 (DOI: 10.1214/17-EJS1337SI)"""
    n_features = X_.eval().shape[1]
    if tau_0 is None:
        m0 = n_features/2
        n_obs = X_.eval().shape[0]
        tau_0 = m0 / ((n_features - m0) * np.sqrt(n_obs))
    with pm.Model() as model:
        tau = pm.HalfCauchy('tau', tau_0)
        sd_bias = pm.HalfCauchy('sd_bias', beta=2.5)
        lamb_m = pm.HalfCauchy('lambda_m', beta=1)
        slab_scale = kwargs.pop('slab_scale', 3)
        slab_scale_sq = slab_scale ** 2
        slab_df = kwargs.pop('slab_df', 8)
        half_slab_df = slab_df / 2
        # Regularization bit
        c_sq = pm.InverseGamma('c_sq', alpha=half_slab_df,
                               beta=half_slab_df * slab_scale_sq)
        lamb_m_bar = tt.sqrt(c_sq) * lamb_m / (tt.sqrt(c_sq +
                                                       tt.pow(tau, 2) *
                                                       tt.pow(lamb_m, 2)
                                                      )
                                              )
        w = pm.Normal('w', mu=0, sd=tau*lamb_m_bar, shape=n_features)
        bias = pm.Laplace('bias', mu=0, b=sd_bias)
        mu_ = tt.dot(X_, w) + bias
        sig = pm.HalfCauchy('sigma', beta=5)
        y = pm.Normal(ylabel, mu=mu_, sd=sig, observed=y_obs)
        model.name = "regularized_hshoe_reg"
示例#9
0
    def mcmc(self, prior='normal'):
        model = pm.Model()
        with model:
            # set the prior distribution of weights
            if prior == 'normal':
                W = pm.Normal('w', mu=0, sigma=1, shape=self.X.shape[1])
            elif prior == 'laplace':
                W = pm.Laplace('w', 0, b=1, shape=self.X.shape[1])
            elif prior == 'horseshoe':
                sigma = pm.HalfNormal('sigma', 2)
                tau_0 = 10 / (self.X.shape[1] - 10) * sigma / tt.sqrt(
                    self.X.shape[0])
                tau = pm.HalfCauchy('tau', tau_0)
                lambda_m = pm.HalfCauchy('lambda', 1)

                W = pm.Normal('w',
                              mu=0,
                              sigma=tau * lambda_m,
                              shape=self.X.shape[1])
            elif prior == 'spike':
                pass
            else:
                print("Invlaid prior type.")
                return None

        return self.get_trace(W, model)
示例#10
0
    def run_model(self, **kwargs):
        """Run Bayesian model using prefit Y's for each Gene and Dataset distribution"""
        # Importing here since Theano base_compiledir needs to be set prior to import
        import pymc3 as pm

        click.echo("Building model")
        with pm.Model() as self.model:
            # Constants
            N = len(self.backgrounds)
            M = len(self.training_genes)
            MN = M * N

            # Prior constants
            mu_exp = self.df[self.training_genes].mean().mean()
            sd_exp = self.df[self.training_genes].std().mean()

            # Gene Model Priors
            gm_sd = pm.InverseGamma("gm_sd", 1, 1, shape=MN)
            gm_mu = pm.Normal("gm_mu", mu_exp, sd_exp, shape=MN)

            # Gene model
            pm.Normal(
                "x_hat",
                mu=gm_mu[self.x_ix],
                sd=gm_sd[self.x_ix],
                shape=MN,
                observed=self.index_df.value,
            )
            x = pm.Normal("x", mu=gm_mu, sd=gm_sd, shape=MN)

            # Likelihood priors
            eps = pm.InverseGamma("eps", 1, 1)
            if N == 1:
                beta = [1]
            else:
                beta = pm.Dirichlet("beta", a=np.ones(N))

            # Likelihood
            norm = np.zeros(M)
            gm_sd_2d = gm_sd.reshape((M, N))
            for i in range(N):
                norm += beta[i] / gm_sd_2d[:, i]
            norm = pm.Deterministic("norm", norm)

            y = pm.Deterministic(
                "y", pm.math.dot((x / gm_sd).reshape((M, N)), beta))
            norm_eps = pm.Deterministic("norm_eps", eps / norm)
            sample_genes = self.sample[self.training_genes].values
            pm.Laplace("y_hat",
                       mu=(y / norm)[self.s_ix],
                       b=norm_eps,
                       observed=sample_genes)

            trace = pm.sample(**kwargs)
        self.trace = trace
        click.echo("Calculating posterior predictive samples")
        self.ppc = pm.sample_posterior_predictive(trace, model=self.model)
示例#11
0
def run_model(sample: pd.Series,
              df: pd.DataFrame,
              training_genes: List[str],
              group: str = 'tissue',
              **kwargs):
    """
    Run Bayesian model using prefit Y's for each Gene and Dataset distribution

    Args:
        sample: N-of-1 sample to run
        df: Background dataframe to use in comparison
        training_genes: Genes to use during training
        group:
        **kwargs:

    Returns:
        Model and Trace from PyMC3
    """
    # Importing here since Theano base_compiledir needs to be set prior to import
    import pymc3 as pm

    classes = sorted(df[group].unique())
    df = df[[group] + training_genes]

    # Collect fits
    ys = {}
    for gene in training_genes:
        for i, dataset in enumerate(classes):
            cat_mu, cat_sd = st.norm.fit(df[df[group] == dataset][gene])
            # Standard deviation can't be initialized to 0, so set to 0.1
            cat_sd = 0.1 if cat_sd == 0 else cat_sd
            ys[f'{gene}={dataset}'] = (cat_mu, cat_sd)

    click.echo('Building model')
    with pm.Model() as model:
        # Linear model priors
        a = pm.Normal('a', mu=0, sd=1)
        b = [1] if len(classes) == 1 else pm.Dirichlet('b',
                                                       a=np.ones(len(classes)))
        # Model error
        eps = pm.InverseGamma('eps', 2.1, 1)

        # Linear model declaration
        for gene in tqdm(training_genes):
            mu = a
            for i, dataset in enumerate(classes):
                name = f'{gene}={dataset}'
                y = pm.Normal(name, *ys[name])
                mu += b[i] * y

            # Embed mu in laplacian distribution
            pm.Laplace(gene, mu=mu, b=eps, observed=sample[gene])
        # Sample
        trace = pm.sample(**kwargs)
    return model, trace
示例#12
0
 def generate_priors(self):
     with self.model:
         if 'sigma' not in self.priors:
             self.priors['sigma'] = pm.HalfCauchy('sigma_%s' % self.name, 10, testval=1.)
         if 'seasonality' not in self.priors and self.seasonality:
             self.priors['seasonality'] = pm.Laplace('seasonality_%s' % self.name, 0, 10,
                                                     shape=len(self.seasonality))
         if 'holidays' not in self.priors and self.holidays:
             self.priors['holidays'] = pm.Laplace('holidays_%s' % self.name, 0, 10, shape=len(self.holidays))
         if 'regressors' not in self.priors and self.regressors:
             self.priors['regressors'] = pm.Normal('regressors_%s' % self.name, 0, 10,
                                                   shape=len(self.regressors))
         if self.growth and 'growth' not in self.priors:
             self.priors['growth'] = pm.Normal('growth_%s' % self.name, 0, 0.5)
         if self.growth and 'change_points' not in self.priors and len(self.change_points):
             self.priors['change_points'] = pm.Laplace('change_points_%s' % self.name, 0, 0.5,
                                                       shape=len(self.change_points))
         if self.intercept and 'intercept' not in self.priors:
             self.priors['intercept'] = pm.Normal('intercept_%s' % self.name, self.data['y'].mean(),
                                                  self.data['y'].std() * 2, testval=1.0)
def get_mcmc_sample_for_laplace_prior(X, y):
    # This should return a pymc3 Trace object
    with pm.Model() as laplace_model:
        theta = pm.Laplace("theta", mu=0, b=.5,
                           shape=X.shape[1])  # mu and b are hyperparameters
        mu = tt.dot(X, theta)
        y_obs = pm.Normal("y_obs", mu=mu, sigma=1, observed=y)

        trace = pm.sample(
            500, return_inferencedata=False)  # we choose to sample 500 points

    return trace
示例#14
0
def run_model(sample,
              df,
              training_genes,
              weights,
              group: str = 'tissue',
              **kwargs):
    """
    Run Bayesian model using prefit Y's for each Gene and Dataset distribution

    Args:
        sample: N-of-1 sample to run
        df: Background dataframe to use in comparison
        training_genes: Genes to use during training
        group: Column to use to distinguish different groups
        **kwargs:

    Returns:
        Model and Trace from PyMC3
    """
    classes = sorted(df[group].unique())
    df = df[[group] + training_genes]

    # Collect fits
    ys = {}
    for gene in training_genes:
        for i, dataset in enumerate(classes):
            cat_mu, cat_sd = st.norm.fit(df[df[group] == dataset][gene])
            # Standard deviation can't be initialized to 0, so set to 0.1
            cat_sd = 0.1 if cat_sd == 0 else cat_sd
            ys[f'{gene}={dataset}'] = (cat_mu, cat_sd)

    print('Building model')
    with pm.Model() as model:
        # Linear model priors
        a = pm.Normal('a', mu=0, sd=1)
        # Model error
        eps = pm.InverseGamma('eps', 2.1, 1)

        # TODO: Try tt.stack to declare mu more intelligently via b * y
        # Linear model declaration
        for gene in tqdm(training_genes):
            mu = a
            for i, dataset in enumerate(classes):
                name = f'{gene}={dataset}'
                y = pm.Normal(name, *ys[name])
                mu += weights[i] * y

            # Embed mu in laplacian distribution
            pm.Laplace(gene, mu=mu, b=eps, observed=sample[gene])
        # Sample
        trace = pm.sample(**kwargs)
    return model, trace
示例#15
0
def pm_lasso_model(X, y, b):

    lasso = pm.Model()
    with lasso:
        beta = pm.Laplace('beta', 0, b=1, shape=X.shape[1])
        y_hat = tt.dot(X, beta)
        likelihood = pm.Normal('likelihood', y_hat, observed=y)

        trace = pm.sample(1000)

    b_hat = trace.get_values('beta').mean(0)
    b_sig = trace.get_values('beta').std(0)
    plot_beta(b, b_hat, std=b_sig)

    return trace
示例#16
0
    def definition(self, model, X, scale_factor):
        t = X["t"].values
        self.s = np.linspace(0, np.max(t), self.n_changepoints + 2)[1:-1]

        with model:
            A = (t[:, None] > self.s) * 1.0
            k = pm.Normal("k", 0, self.growth_prior_scale)
            delta = pm.Laplace("delta",
                               0,
                               self.changepoints_prior_scale,
                               shape=self.n_changepoints)
            m = pm.Normal("m", 0, 5)
            gamma = -self.s * delta

            g = (k + dot(A, delta)) * t + (m + dot(A, gamma))
        return g
示例#17
0
def fit_bayesian_model(cases: np.ndarray) -> List[AtgModelFit]:
    base_length = 800
    with pm.Model() as model:  # noqa: F841
        days = np.arange(len(cases))

        alpha = pm.Uniform("alpha", 1, 20)
        shift = pm.Uniform("shift", 0, 40)
        peak = pm.Uniform("peak", 30, 80)
        mult = pm.Uniform("mult", -30, 20)

        tg = pm.Deterministic("tg", peak / alpha)
        x = pm.math.maximum(0, (days - shift) / tg)
        x_prev = pm.math.maximum(0, (days - 1 - shift) / tg)
        exp_cases = pm.Deterministic(
            "exp_cases",
            pm.math.exp(mult - x) * (x**alpha) - pm.math.exp(mult - x_prev) *
            (x_prev**alpha),
        )

        sigma = pm.HalfCauchy("sigma", beta=500)
        # likelihood = pm.Normal("y", mu=exp_cases, sigma=sigma, observed=cases)
        # likelihood = pm.Cauchy("y", alpha=exp_cases, beta=sigma, observed=cases)
        likelihood = pm.Laplace("obs", mu=exp_cases, b=sigma,
                                observed=cases)  # noqa: F841

        step = pm.NUTS(target_accept=0.9)
        start = pm.find_MAP()
        trace = pm.sample(2 * base_length,
                          chains=4,
                          cores=4,
                          tune=base_length,
                          start=start,
                          step=step)
        trace = trace[base_length:]

    pm.traceplot(trace)
    plt.show()

    alpha_t = trace["alpha"][-base_length:]
    shift_t = trace["shift"][-base_length:]
    tg_t = trace["tg"][-base_length:]
    a_t = np.exp(trace["mult"][-base_length:]) * tg_t

    return [
        AtgModelFit(exp=alpha, tg=tg, a=a, t0=shift + 1)
        for alpha, tg, a, shift in zip(alpha_t, tg_t, a_t, shift_t)
    ]
def get_mcmc_sample_for_laplace_prior(X, y):
    # This should return a pymc3 Trace object

    lasso = pm.Model()
    with lasso:
        prior_location = 0
        prior_scale = 1
        theta = pm.Laplace('theta',
                           mu=prior_location,
                           b=prior_scale,
                           shape=X.shape[1])
        y_noiseless = tt.dot(X, theta)
        likelihood = pm.Normal('likelihood', y_noiseless, observed=y)
        step = pm.Metropolis(tune_interval=1)
        trace = pm.sample(1000)

    return trace
def hs_regression(X_, y_obs, ylabel='likelihood', tau_0=None, regularized=False, **kwargs):
    """See Piironen & Vehtari, 2017 (DOI: 10.1214/17-EJS1337SI)"""

    n_features = X_.eval().shape[1]
    if tau_0 is None:
        m0 = n_features/2
        n_obs = X_.eval().shape[0]
        tau_0 = m0 / ((n_features - m0) * np.sqrt(n_obs))
    with pm.Model() as model:
        tau = pm.HalfCauchy('tau', tau_0)
        sd_bias = pm.HalfCauchy('sd_bias', beta=2.5)
        lamb_m = pm.HalfCauchy('lambda_m', beta=1)
        w = pm.Normal('w', mu=0, sd=tau*lamb_m, shape=n_features)
        bias = pm.Laplace('bias', mu=0, b=sd_bias)
        mu_ = tt.dot(X_, w) + bias
        sig = pm.HalfCauchy('sigma', beta=5)
        y = pm.Normal(ylabel, mu=mu_, sd=sig, observed=y_obs)
        model.name = "horseshoe_reg"
    return model
示例#20
0
    def add_trend(self, t, idx, s, A):

        if self.trend_hierarchical:

            with self.model:
                
                # Hyper priors, RVs
                k_mu = pm.Normal('k_mu', mu=0., sd=10) # sd=10
                k_sigma = pm.HalfCauchy('k_sigma', testval=1, beta=5) # beta=5
                
                m_mu = pm.Normal('m_mu', mu=0., sd=10) # sd=10
                m_sigma = pm.HalfCauchy('m_sigma', testval=1, beta=5) # beta=5

                delta_b = pm.HalfCauchy('delta_b', testval=0.1, beta=0.1) # beta=0.1

        else:

            # No RVs, fixed parameters
            k_mu = 0
            k_sigma = 5
            
            m_mu = 0
            m_sigma = 10

            delta_b = 0.1

        with self.model:
            
            # Priors
            k = pm.Normal('k', k_mu, k_sigma, shape=self.n_series)
            m = pm.Normal('m', m_mu, m_sigma, shape=self.n_series)

            delta = pm.Laplace('delta', 0, delta_b, shape = (self.n_series, self.n_changepoints))
                    
            # Starting point (offset)
            g_t = m[idx]
            
            # Linear trend w/ changepoints
            gamma = -s * delta[idx, :]
            g_t += (k[idx] + (A * delta[idx, :]).sum(axis=1)) * t + (A * gamma).sum(axis=1)

        return g_t
def get_bayes_markers(obs, inp, flevel, mgenes):
    basic_model = pm.Model()
    with basic_model:
        #priors for random intercept
        mu_a = pm.Normal('mu_a', mu=0., sigma=2)
        sigma_a = pm.HalfNormal('sigma_a', 1)
        # Intercept for each cluster, distributed around group mean mu_a
        # Above we just set mu and sd to a fixed value while here we
        # plug in a common group distribution for all a and b (which are
        # vectors of length n_counties).
        num_levels = len(np.unique(flevel))
        alpha = pm.Normal('alpha', mu=mu_a, sigma=sigma_a, shape=num_levels)

        # Priors for unknown model parameters
        #beta = pm.Beta("beta", alpha=1/2, beta = 1/2, shape=20)
        beta = pm.Laplace("beta", mu=0, b=1, shape=mgenes)
        #beta = pm.Normal("beta", mu=0, sigma = 0.5, shape=30)

        # Likelihood (sampling distribution) of observations
        Y_obs = pm.Bernoulli("Y_obs",
                             logit_p=alpha[flevel] + inp @ beta,
                             observed=obs)

        map_estimate = pm.find_MAP(model=basic_model)
        map_betas = np.where(map_estimate['beta'] > 0)[0]
        #print(map_betas)
    with basic_model:
        # draw 500 posterior samples
        trace = pm.sample(200)
    df = az.summary(trace, round_to=2)
    #print(df)
    betas = df.iloc[9:-1, :]
    #betas with significant values
    sample_betas = betas[betas["hpd_3%"] > 0].index.values
    #return the actual number (0,mgenes-1) of the genes that were significant
    offset = num_levels + 1
    beta_values = np.where(df.index.isin(sample_betas))[0] - offset
    print(beta_values)
    return beta_values
    def run_model(self, **kwargs):
        """Run Bayesian model using prefit Y's for each Gene and Dataset distribution"""
        # Importing here since Theano base_compiledir needs to be set prior to import
        import pymc3 as pm

        # Collect fits
        self.fits = self.t_fits()

        click.echo("Building model")
        with pm.Model() as self.model:
            # Convex model priors
            b = ([1] if len(self.backgrounds) == 1 else pm.Dirichlet(
                "b", a=np.ones(len(self.backgrounds))))
            # Model error
            eps = pm.InverseGamma("eps", 1, 1)

            # Convex model declaration
            for gene in tqdm(self.training_genes):
                y, norm_term = 0, 0
                for i, dataset in enumerate(self.backgrounds):
                    name = f"{gene}={dataset}"
                    fit = self.fits.loc[name]
                    x = pm.StudentT(name, nu=fit.nu, mu=fit.mu, lam=fit.lam)
                    y += (b[i] / fit.sd) * x
                    norm_term += b[i] / fit.sd

                # y_g = \frac{\sum_d \frac{\beta * x}{\sigma} + \epsilon}{\sum_d\frac{\beta}{\sigma}}
                # Embed mu in laplacian distribution
                pm.Laplace(
                    gene,
                    mu=y / norm_term,
                    b=eps / norm_term,
                    observed=self.sample[gene],
                )
            # Sample
            self.trace = pm.sample(**kwargs)
示例#23
0
    sib_mean = pm.Exponential("sib_mean", 1.0)
    siblings_imp = pm.Poisson("siblings_imp", sib_mean, observed=siblings)

    p_disab = pm.Beta("p_disab", 1.0, 1.0)
    disability_imp = pm.Bernoulli("disability_imp",
                                  p_disab,
                                  observed=masked_values(disability,
                                                         value=-999))

    p_mother = pm.Beta("p_mother", 1.0, 1.0)
    mother_imp = pm.Bernoulli("mother_imp",
                              p_mother,
                              observed=masked_values(mother_hs, value=-999))

    s = pm.HalfCauchy("s", 5.0, testval=5)
    beta = pm.Laplace("beta", 0.0, 100.0, shape=7, testval=0.1)

    expected_score = (beta[0] + beta[1] * male + beta[2] * siblings_imp +
                      beta[3] * disability_imp + beta[4] * age +
                      beta[5] * mother_imp + beta[6] * early_ident)

    observed_score = pm.Normal("observed_score",
                               expected_score,
                               s,
                               observed=score)

with model:
    start = pm.find_MAP()
    step1 = pm.NUTS([beta, s, p_disab, p_mother, sib_mean], scaling=start)
    step2 = pm.BinaryGibbsMetropolis(
        [mother_imp.missing_values, disability_imp.missing_values])
示例#24
0
def model_factory(X_continuos, X_categorical_selection, X_categorical_gender,
                  X_categorical_audience, X_categorical_browser,
                  X_categorical_city, X_categorical_device, y_data,
                  variables_to_be_used, variant_df, arviz_inference, samples):
    """ please check run_model_oob's function docstring below for a description  
        of the inputs.
    """

    with pm.Model(coords=coords) as varying_intercept_slope_noncentered:

        # build tensors from Pandas DataFrame/Series
        X_continuos_var = pm.Data('X_continuos',
                                  X_continuos,
                                  dims=("X_continuos_index"))
        X_categorical_selection_var = pm.Data(
            'X_categorical_selection',
            X_categorical_selection,
            dims=("X_categorical_selection_index"))
        X_categorical_gender_var = pm.Data('X_categorical_gender',
                                           X_categorical_gender,
                                           dims=("X_categorical_gender_index"))
        X_categorical_audience_var = pm.Data(
            'X_categorical_audience',
            X_categorical_audience,
            dims=("X_categorical_audience_index"))
        X_categorical_browser_var = pm.Data(
            'X_categorical_browser',
            X_categorical_browser,
            dims=("X_categorical_browser_index"))
        X_categorical_city_var = pm.Data('X_categorical_city',
                                         X_categorical_city,
                                         dims=("X_categorical_city_index"))
        X_categorical_device_var = pm.Data('X_categorical_device',
                                           X_categorical_device,
                                           dims=("X_categorical_device_index"))

        # hyperpriors for intercept
        mu_alpha_tmp = pm.Laplace('mu_alpha_tmp',
                                  mu=0.05,
                                  b=1.,
                                  shape=(variant_df.shape[0] - 1))
        mu_alpha = theano.tensor.concatenate([[0], mu_alpha_tmp])

        sigma_alpha_tmp = pm.HalfNormal('sigma_alpha_tmp',
                                        sigma=1.,
                                        shape=(variant_df.shape[0] - 1))
        sigma_alpha = theano.tensor.concatenate([[0], sigma_alpha_tmp])

        # prior for non-centered random intercepts
        u = pm.Laplace('u', mu=0.05, b=1.)

        # random intercept
        alpha_eq = mu_alpha + u * sigma_alpha
        alpha_eq_deter = pm.Deterministic('alpha_eq_deter', alpha_eq)
        alpha = pm.Laplace('alpha',
                           mu=alpha_eq_deter,
                           b=1.,
                           shape=(variant_df.shape[0]))

        #######################################################################

        # hyperpriors for slopes (continuos)
        mu_beta_continuos_tmp = pm.Laplace('mu_beta_continuos_tmp',
                                           mu=0.05,
                                           b=1.,
                                           shape=(1,
                                                  (variant_df.shape[0] - 2)))
        mu_beta_continuos = theano.tensor.concatenate(
            [np.zeros((1, 1)), mu_beta_continuos_tmp], axis=1)
        sigma_beta_continuos_tmp = pm.HalfNormal(
            'sigma_beta_continuos_tmp',
            sigma=1.,
            shape=(1, (variant_df.shape[0] - 2)))
        sigma_beta_continuos = theano.tensor.concatenate(
            [np.zeros((1, 1)), sigma_beta_continuos_tmp], axis=1)

        # prior for non-centered random slope (continuos)
        g = pm.Laplace('g', mu=0.05, b=1., shape=(1, 1))

        # random slopes (continuos)
        beta_continuos_eq = mu_beta_continuos + pm.math.dot(
            g, sigma_beta_continuos)
        beta_con_deter_percentage = pm.Deterministic(
            'beta_con_deter_percentage', beta_continuos_eq)
        beta_con_tmp_percentage = pm.Laplace('beta_con_tmp_percentage',
                                             mu=beta_con_deter_percentage,
                                             b=1.,
                                             shape=(1,
                                                    (variant_df.shape[0] - 1)))
        beta_con_percentage = theano.tensor.concatenate(
            [np.zeros((1, 1)), beta_con_tmp_percentage], axis=1)

        # expected value (continuos)
        dot_product_continuos = pm.math.dot(
            theano.tensor.shape_padaxis(X_continuos_var, axis=1),
            beta_con_percentage)

        #######################################################################

        # hyperpriors for slopes (categorical_selection)
        mu_beta_categorical_selection_tmp = pm.Laplace(
            'mu_beta_categorical_selection_tmp',
            mu=0.05,
            b=1.,
            shape=(len(pd.unique(X_categorical_selection)),
                   (variant_df.shape[0] - 2)))
        mu_beta_categorical_selection = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_selection)), 1)),
            mu_beta_categorical_selection_tmp
        ],
                                                                  axis=1)
        sigma_beta_categorical_selection_tmp = pm.HalfNormal(
            'sigma_beta_categorical_selection_tmp',
            sigma=1.,
            shape=(len(pd.unique(X_categorical_selection)),
                   (variant_df.shape[0] - 2)))
        sigma_beta_categorical_selection = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_selection)), 1)),
            sigma_beta_categorical_selection_tmp
        ],
                                                                     axis=1)

        # prior for non-centered random slope (categorical_selection)
        non_centered_selection = pm.Laplace(
            'non_centered_selection',
            mu=0.05,
            b=1.,
            shape=(len(pd.unique(X_categorical_selection)),
                   len(pd.unique(X_categorical_selection))))

        #random slopes (categorical_selection)
        beta_categorical_eq_selection = mu_beta_categorical_selection + pm.math.dot(
            non_centered_selection, sigma_beta_categorical_selection)
        beta_cat_deter_selection = pm.Deterministic(
            'beta_cat_deter_selection', beta_categorical_eq_selection)
        beta_cat_tmp_selection = pm.Laplace(
            'beta_cat_tmp_selection',
            mu=beta_cat_deter_selection,
            b=1.,
            shape=(len(pd.unique(X_categorical_selection)),
                   (variant_df.shape[0] - 1)))
        beta_cat_selection = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_selection)), 1)),
            beta_cat_tmp_selection
        ],
                                                       axis=1)

        #######################################################################

        # hyperpriors for slopes (categorical_gender)
        mu_beta_categorical_gender_tmp = pm.Laplace(
            'mu_beta_categorical_gender_tmp',
            mu=0.05,
            b=1.,
            shape=(len(pd.unique(X_categorical_gender)),
                   (variant_df.shape[0] - 2)))
        mu_beta_categorical_gender = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_gender)), 1)),
            mu_beta_categorical_gender_tmp
        ],
                                                               axis=1)
        sigma_beta_categorical_gender_tmp = pm.HalfNormal(
            'sigma_beta_categorical_gender_tmp',
            sigma=1.,
            shape=(len(pd.unique(X_categorical_gender)),
                   (variant_df.shape[0] - 2)))
        sigma_beta_categorical_gender = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_gender)), 1)),
            sigma_beta_categorical_gender_tmp
        ],
                                                                  axis=1)

        # prior for non-centered random slope (categorical_gender)
        non_centered_gender = pm.Laplace(
            'non_centered_gender',
            mu=0.05,
            b=1.,
            shape=(len(pd.unique(X_categorical_gender)),
                   len(pd.unique(X_categorical_gender))))

        #random slopes (categorical_gender)
        beta_categorical_eq_gender = mu_beta_categorical_gender + pm.math.dot(
            non_centered_gender, sigma_beta_categorical_gender)
        beta_cat_deter_gender = pm.Deterministic('beta_cat_deter_gender',
                                                 beta_categorical_eq_gender)
        beta_cat_tmp_gender = pm.Laplace('beta_cat_tmp_gender',
                                         mu=beta_cat_deter_gender,
                                         b=1.,
                                         shape=(len(
                                             pd.unique(X_categorical_gender)),
                                                (variant_df.shape[0] - 1)))
        beta_cat_gender = theano.tensor.concatenate([
            np.zeros(
                (len(pd.unique(X_categorical_gender)), 1)), beta_cat_tmp_gender
        ],
                                                    axis=1)

        # hyperpriors for slopes (categorical_audience)
        mu_beta_categorical_audience_tmp = pm.Laplace(
            'mu_beta_categorical_audience_tmp',
            mu=0.05,
            b=1.,
            shape=(len(pd.unique(X_categorical_audience)),
                   (variant_df.shape[0] - 2)))
        mu_beta_categorical_audience = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_audience)), 1)),
            mu_beta_categorical_audience_tmp
        ],
                                                                 axis=1)
        sigma_beta_categorical_audience_tmp = pm.HalfNormal(
            'sigma_beta_categorical_audience_tmp',
            sigma=1.,
            shape=(len(pd.unique(X_categorical_audience)),
                   (variant_df.shape[0] - 2)))
        sigma_beta_categorical_audience = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_audience)), 1)),
            sigma_beta_categorical_audience_tmp
        ],
                                                                    axis=1)

        # prior for non-centered random slope (categorical_audience)
        non_centered_audience = pm.Laplace(
            'non_centered_audience',
            mu=0.05,
            b=1.,
            shape=(len(pd.unique(X_categorical_audience)),
                   len(pd.unique(X_categorical_audience))))

        #random slopes (categorical_audience)
        beta_categorical_eq_audience = mu_beta_categorical_audience + pm.math.dot(
            non_centered_audience, sigma_beta_categorical_audience)
        beta_cat_deter_audience = pm.Deterministic(
            'beta_cat_deter_audience', beta_categorical_eq_audience)
        beta_cat_tmp_audience = pm.Laplace(
            'beta_cat_tmp_audience',
            mu=beta_cat_deter_audience,
            b=1.,
            shape=(len(pd.unique(X_categorical_audience)),
                   (variant_df.shape[0] - 1)))
        beta_cat_audience = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_audience)), 1)),
            beta_cat_tmp_audience
        ],
                                                      axis=1)

        #######################################################################

        # hyperpriors for slopes (categorical_browser)
        mu_beta_categorical_browser_tmp = pm.Laplace(
            'mu_beta_categorical_browser_tmp',
            mu=0.05,
            b=1.,
            shape=(len(pd.unique(X_categorical_browser)),
                   (variant_df.shape[0] - 2)))
        mu_beta_categorical_browser = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_browser)), 1)),
            mu_beta_categorical_browser_tmp
        ],
                                                                axis=1)
        sigma_beta_categorical_browser_tmp = pm.HalfNormal(
            'sigma_beta_categorical_browser_tmp',
            sigma=1.,
            shape=(len(pd.unique(X_categorical_browser)),
                   (variant_df.shape[0] - 2)))
        sigma_beta_categorical_browser = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_browser)), 1)),
            sigma_beta_categorical_browser_tmp
        ],
                                                                   axis=1)

        # prior for non-centered random slope (categorical_browser)
        non_centered_browser = pm.Laplace(
            'non_centered_browser',
            mu=0.05,
            b=1.,
            shape=(len(pd.unique(X_categorical_browser)),
                   len(pd.unique(X_categorical_browser))))

        #random slopes (categorical_browser)
        beta_categorical_eq_browser = mu_beta_categorical_browser + pm.math.dot(
            non_centered_browser, sigma_beta_categorical_browser)
        beta_cat_deter_browser = pm.Deterministic('beta_cat_deter_browser',
                                                  beta_categorical_eq_browser)
        beta_cat_tmp_browser = pm.Laplace(
            'beta_cat_tmp_browser',
            mu=beta_cat_deter_browser,
            b=1.,
            shape=(len(pd.unique(X_categorical_browser)),
                   (variant_df.shape[0] - 1)))
        beta_cat_browser = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_browser)), 1)),
            beta_cat_tmp_browser
        ],
                                                     axis=1)

        #######################################################################

        # hyperpriors for slopes (categorical_city)
        mu_beta_categorical_city_tmp = pm.Laplace(
            'mu_beta_categorical_city_tmp',
            mu=0.05,
            b=1.,
            shape=(len(pd.unique(X_categorical_city)),
                   (variant_df.shape[0] - 2)))
        mu_beta_categorical_city = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_city)), 1)),
            mu_beta_categorical_city_tmp
        ],
                                                             axis=1)
        sigma_beta_categorical_city_tmp = pm.HalfNormal(
            'sigma_beta_categorical_city_tmp',
            sigma=1.,
            shape=(len(pd.unique(X_categorical_city)),
                   (variant_df.shape[0] - 2)))
        sigma_beta_categorical_city = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_city)), 1)),
            sigma_beta_categorical_city_tmp
        ],
                                                                axis=1)

        # prior for non-centered random slope (categorical_city)
        non_centered_city = pm.Laplace(
            'non_centered_city',
            mu=0.05,
            b=1.,
            shape=(len(pd.unique(X_categorical_city)),
                   len(pd.unique(X_categorical_city))))

        #random slopes (categorical_city)
        beta_categorical_eq_city = mu_beta_categorical_city + pm.math.dot(
            non_centered_city, sigma_beta_categorical_city)
        beta_cat_deter_city = pm.Deterministic('beta_cat_deter_city',
                                               beta_categorical_eq_city)
        beta_cat_tmp_city = pm.Laplace('beta_cat_tmp_city',
                                       mu=beta_cat_deter_city,
                                       b=1.,
                                       shape=(len(
                                           pd.unique(X_categorical_city)),
                                              (variant_df.shape[0] - 1)))
        beta_cat_city = theano.tensor.concatenate([
            np.zeros(
                (len(pd.unique(X_categorical_city)), 1)), beta_cat_tmp_city
        ],
                                                  axis=1)

        #######################################################################

        # hyperpriors for slopes (categorical_device)
        mu_beta_categorical_device_tmp = pm.Laplace(
            'mu_beta_categorical_device_tmp',
            mu=0.05,
            b=1.,
            shape=(len(pd.unique(X_categorical_device)),
                   (variant_df.shape[0] - 2)))
        mu_beta_categorical_device = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_device)), 1)),
            mu_beta_categorical_device_tmp
        ],
                                                               axis=1)
        sigma_beta_categorical_device_tmp = pm.HalfNormal(
            'sigma_beta_categorical_device_tmp',
            sigma=1.,
            shape=(len(pd.unique(X_categorical_device)),
                   (variant_df.shape[0] - 2)))
        sigma_beta_categorical_device = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_device)), 1)),
            sigma_beta_categorical_device_tmp
        ],
                                                                  axis=1)

        # prior for non-centered random slope (categorical_device)
        non_centered_device = pm.Laplace(
            'non_centered_device',
            mu=0.05,
            b=1.,
            shape=(len(pd.unique(X_categorical_device)),
                   len(pd.unique(X_categorical_device))))

        #random slopes (categorical_device)
        beta_categorical_eq_device = mu_beta_categorical_device + pm.math.dot(
            non_centered_device, sigma_beta_categorical_device)
        beta_cat_deter_device = pm.Deterministic('beta_cat_deter_device',
                                                 beta_categorical_eq_device)
        beta_cat_tmp_device = pm.Laplace('beta_cat_tmp_device',
                                         mu=beta_cat_deter_device,
                                         b=1.,
                                         shape=(len(
                                             pd.unique(X_categorical_device)),
                                                (variant_df.shape[0] - 1)))
        beta_cat_device = theano.tensor.concatenate([
            np.zeros(
                (len(pd.unique(X_categorical_device)), 1)), beta_cat_tmp_device
        ],
                                                    axis=1)
        # theano.printing.Print('vector', attrs=['shape'])(beta_cat_device)

        #######################################################################

        # hyperpriors for epsilon
        sigma_epsilon = pm.HalfNormal('sigma_epsilon',
                                      sigma=1.,
                                      shape=(variant_df.shape[0]))

        # epsilon
        epsilon = pm.HalfNormal(
            'epsilon',
            sigma=sigma_epsilon,  # not working
            shape=(variant_df.shape[0]))

        #######################################################################

        y_hat_tmp = (alpha + dot_product_continuos +
                     beta_cat_selection[X_categorical_selection_var, :] +
                     beta_cat_gender[X_categorical_gender_var, :] +
                     beta_cat_audience[X_categorical_audience_var, :] +
                     beta_cat_browser[X_categorical_browser_var, :] +
                     beta_cat_city[X_categorical_city_var, :] +
                     beta_cat_device[X_categorical_device_var, :] + epsilon)

        # softmax
        y_hat = theano.tensor.nnet.softmax(y_hat_tmp)
        # theano.printing.Print('vector', attrs=['shape'])(y_hat)

        # likelihood
        y_likelihood = pm.Categorical('y_likelihood', p=y_hat, observed=y_data)

        # predicting new values from the posterior distribution of the previously trained model
        # Check whether the predicted output is correct (e.g. if we have 4 classes to be predicted,
        # then there should be present the numbers 0, 1, 2, 3 ... no more, no less!)
        post_pred_big_tmp = pm.sample_posterior_predictive(
            trace=arviz_inference, samples=samples)

    return post_pred_big_tmp
示例#25
0
 def _get_beta(self):
     beta = pymc3.Laplace('beta',
                          0,
                          self.weight_prior_scale,
                          shape=self.input_data_dimension)
     return beta
示例#26
0
 def _get_alpha(self):
     alpha = pymc3.Laplace('alpha', 0, self.weight_prior_scale)
     return alpha
    def generate_priors(self):
        """Set up the priors for the model."""
        with self.model:

            if self.minibatch:
                self.series = self.g['train']['n_series_idx'].eval()
            else:
                self.series = self.g['train']['n_series_idx']

            self.series_full = self.g['train']['n_series_idx_full']

            # In the case of a normal likelihood we need to define sigma
            self.priors['sigma'] = pm.HalfNormal('sigma',
                                                 0.001,
                                                 shape=self.g['train']['s'])

            if self.piecewise_out:
                # Normal likelihood
                if self.partial_pool_mean:
                    for group in self.levels:
                        # priors for the group effects
                        # we want a partial pooling effect, so reduce the sd of the several parameters,
                        # while defining a wider hyperparameter

                        # Hyperprior for the mean
                        self.priors["mu_b_%s" % group] = pm.Normal("mu_b_%s" %
                                                                   group,
                                                                   mu=0.0,
                                                                   sd=0.01)
                        self.priors["mu_k_%s" % group] = pm.Normal(
                            'mu_k_%s' % group, 0.0, 0.01)
                        self.priors["mu_m_%s" % group] = pm.Normal(
                            'mu_m_%s' % group, 0.0, 0.01)
                        self.priors["mu_a_%s" % group] = pm.Normal(
                            'mu_a_%s' % group, 0.0, 0.1)
                        # Hyperprior for the std
                        self.priors["sd_b_%s" % group] = pm.HalfNormal(
                            "sd_b_%s" % group, sd=0.01)
                        self.priors["sd_k_%s" % group] = pm.HalfNormal(
                            'sd_k_%s' % group, 0.01)
                        self.priors["sd_m_%s" % group] = pm.HalfNormal(
                            'sd_m_%s' % group, 0.01)
                        self.priors["sd_a_%s" % group] = pm.HalfNormal(
                            'sd_a_%s' % group, 0.01)

                        # Partially pooled parameters
                        self.priors["b_%s" % group] = pm.Normal(
                            "b_%s" % group,
                            self.priors["mu_b_%s" % group],
                            self.priors["sd_b_%s" % group],
                            shape=(self.changepoints.shape[0],
                                   self.g['train']['groups_n'][group]))
                        self.priors["k_%s" % group] = pm.Normal(
                            "k_%s" % group,
                            self.priors["mu_k_%s" % group],
                            self.priors["sd_k_%s" % group],
                            shape=self.g['train']['groups_n'][group])
                        self.priors["m_%s" % group] = pm.Normal(
                            "m_%s" % group,
                            self.priors["mu_m_%s" % group],
                            self.priors["sd_m_%s" % group],
                            shape=self.g['train']['groups_n'][group])
                        self.priors["a_%s" % group] = pm.Normal(
                            "a_%s" % group,
                            self.priors["mu_a_%s" % group],
                            self.priors["sd_a_%s" % group],
                            shape=self.g['train']['groups_n'][group])
                else:
                    self.priors["k"] = pm.Normal('k',
                                                 0.0,
                                                 0.01,
                                                 shape=self.g['train']['s'])
                    self.priors["m"] = pm.Normal('m',
                                                 0.0,
                                                 0.01,
                                                 shape=self.g['train']['s'])
                    self.priors["b"] = pm.Normal(
                        'b',
                        0.,
                        0.01,
                        shape=(self.changepoints.shape[0],
                               self.g['train']['s']))

            # prior for the periodic kernel (seasonality)
            self.priors["period"] = pm.Laplace("period", self.season, 0.1)

            for group in self.levels:

                # priors for the kernels of each group

                # The inverse gamma is very useful to inform our prior dist of the length scale
                # because it supresses both zero and infinity.
                # The data don't inform length scales larger than the maximum covariate distance
                # and shorter than the minimum covariate distance (distance between time points which
                # is always 1 in our case).

                # Parameters expQuad kernel
                self.priors["l_t_%s" % group] = pm.InverseGamma(
                    'l_t_%s' % group,
                    4,
                    self.g['train']['n'],
                    shape=self.g['train']['groups_n'][group])
                self.priors["eta_t_%s" % group] = pm.HalfNormal(
                    'eta_t_%s' % group,
                    1,
                    shape=self.g['train']['groups_n'][group])

                # Parameters periodic kernel
                self.priors["l_p_%s" % group] = pm.HalfNormal(
                    'l_p_%s' % group,
                    0.5,
                    shape=self.g['train']['groups_n'][group])
                self.priors["eta_p_%s" % group] = pm.HalfNormal(
                    'eta_p_%s' % group,
                    1.5,
                    shape=self.g['train']['groups_n'][group])

                # Parameters white noise kernel
                self.priors["sigma_%s" % group] = pm.HalfNormal(
                    'sigma_%s' % group,
                    0.001,
                    shape=self.g['train']['groups_n'][group])

                if self.piecewise_out:
                    # If piecewise_out is true the piecewise function is defined outside
                    # of the Gaussian processes, therefore nothing is done here
                    pass
                elif np.any(self.changepoints):
                    if self.partial_pool_mean:
                        # Parameters for the piecewise linear function defined as GPs mean functions
                        # with a normal likelihood -> wider intervals as we don't have the log-link
                        # function
                        self.priors["hy_b_%s" % group] = pm.Normal("hy_b_%s" %
                                                                   group,
                                                                   mu=0.0,
                                                                   sd=0.1)
                        self.priors["hy_k_%s" % group] = pm.Normal(
                            'hy_k_%s' % group, 0.0, 0.1)
                        self.priors["hy_m_%s" % group] = pm.Normal(
                            'hy_m_%s' % group, 0.0, 0.1)
                        # priors for the group effects
                        # we want a partial pooling effect, so reduce the sd of the several parameters,
                        # while defining a wider hyperparameter
                        # Partially pooled parameters
                        self.priors["b_%s" % group] = pm.Normal(
                            "b_%s" % group,
                            self.priors["hy_b_%s" % group],
                            0.01,
                            shape=(self.changepoints.shape[0],
                                   self.g['train']['groups_n'][group]))
                        self.priors["k_%s" % group] = pm.Normal(
                            "k_%s" % group,
                            self.priors["hy_k_%s" % group],
                            0.01,
                            shape=self.g['train']['groups_n'][group])
                        self.priors["m_%s" % group] = pm.Normal(
                            "m_%s" % group,
                            self.priors["hy_m_%s" % group],
                            0.01,
                            shape=self.g['train']['groups_n'][group])
                    else:
                        # Parameters for the piecewise linear function defined as GPs mean functions
                        # with a normal likelihood -> wider intervals as we don't have the log-link
                        # function
                        self.priors["b_%s" % group] = pm.Normal(
                            'b_%s' % group,
                            0.0,
                            0.05,
                            shape=(self.changepoints.shape[0],
                                   self.g['train']['groups_n'][group]))
                        self.priors["k_%s" % group] = pm.Normal(
                            'k_%s' % group,
                            0.0,
                            0.1,
                            shape=self.g['train']['groups_n'][group])
                        self.priors["m_%s" % group] = pm.Normal(
                            'm_%s' % group,
                            0.0,
                            0.1,
                            shape=self.g['train']['groups_n'][group])
                elif self.kernel_lin_mean:
                    # Parameters linear kernel to model the mean of the GP
                    self.priors["c_%s" % group] = pm.Normal(
                        'c_%s' % group,
                        0,
                        0.1,
                        shape=self.g['train']['groups_n'][group])
                    self.priors["sigma_l_%s" % group] = pm.HalfNormal(
                        'sigma_l_%s' % group,
                        1,
                        shape=self.g['train']['groups_n'][group])
示例#28
0
        'Ex',
        initialize_elasticity(ll.N,
                              b=0.01,
                              sd=1,
                              alpha=None,
                              m_compartments=m_compartments,
                              r_compartments=r_compartments))

    Ey_t = T.as_tensor_variable(Ey)

    e_measured = pm.Normal('log_e_measured',
                           mu=np.log(en),
                           sd=0.2,
                           shape=(n_exp, len(e_inds)))
    e_unmeasured = pm.Laplace('log_e_unmeasured',
                              mu=0,
                              b=0.1,
                              shape=(n_exp, len(e_laplace_inds)))
    log_en_t = T.concatenate(
        [e_measured, e_unmeasured,
         T.zeros((n_exp, len(e_zero_inds)))], axis=1)[:, e_indexer]

    pm.Deterministic('log_en_t', log_en_t)

    # Priors on external concentrations
    yn_t = pm.Normal('yn_t',
                     mu=0,
                     sd=10,
                     shape=(n_exp, ll.ny),
                     testval=0.1 * np.random.randn(n_exp, ll.ny))

    chi_ss, vn_ss = ll.steady_state_theano(Ex_t, Ey_t, T.exp(log_en_t), yn_t)
示例#29
0
def initialize_elasticity(N, name=None, b=0.01, alpha=5, sd=1,
                          m_compartments=None, r_compartments=None):
    """ Initialize the elasticity matrix, adjusting priors to account for
    reaction stoichiometry. Uses `SkewNormal(mu=0, sd=sd, alpha=sign*alpha)`
    for reactions in which a metabolite participates, and a `Laplace(mu=0,
    b=b)` for off-target regulation. 

    Also accepts compartments for metabolites and reactions. If given,
    metabolites are only given regulatory priors if they come from the same
    compartment as the reaction.
    
    Parameters
    ==========

    N : np.ndarray
        A (nm x nr) stoichiometric matrix for the given reactions and metabolites
    name : string
        A name to be used for the returned pymc3 probabilities
    b : float
        Hyperprior to use for the Laplace distributions on regulatory interactions
    alpha : float
        Hyperprior to use for the SkewNormal distributions. As alpha ->
        infinity, these priors begin to resemble half-normal distributions.
    sd : float
        Scale parameter for the SkewNormal distribution.
    m_compartments : list
        Compartments of metabolites. If None, use a densely connected
        regulatory prior.
    r_compartments : list
        Compartments of reactions

    Returns
    =======

    E : pymc3 matrix
        constructed elasticity matrix

    """
    
    if name is None:
        name = 'ex'

    if m_compartments is not None:
        assert r_compartments is not None, \
            "reaction and metabolite compartments must both be given"

        regulation_array = np.array(
            [[a in b for a in m_compartments]
              for b in r_compartments]).flatten()
        
    else:
        # If compartment information is not given, assume all metabolites and
        # reactions are in the same compartment
        regulation_array = np.array([True] * (N.shape[0] * N.shape[1]))


    # Guess an elasticity matrix from the smallbone approximation
    e_guess = -N.T

    # Find where the guessed E matrix has zero entries
    e_flat = e_guess.flatten()
    nonzero_inds = np.where(e_flat != 0)[0]
    offtarget_inds = np.where(e_flat == 0)[0]
    e_sign = np.sign(e_flat[nonzero_inds])

    # For the zero entries, determine whether regulation is feasible based on
    # the compartment comparison
    offtarget_reg = regulation_array[offtarget_inds]
    reg_inds = offtarget_inds[offtarget_reg]
    zero_inds = offtarget_inds[~offtarget_reg]

    num_nonzero = len(nonzero_inds)
    num_regulations = len(reg_inds)
    num_zeros = len(zero_inds)
    
    # Get an index vector that 'unrolls' a stacked [kinetic, capacity, zero]
    # vector into the correct order
    flat_indexer = np.hstack([nonzero_inds, reg_inds, zero_inds]).argsort()
        
    if alpha is not None:
        e_kin_entries = pm.SkewNormal(
            name + '_kinetic_entries', sd=sd, alpha=alpha, shape=num_nonzero,
            testval= 0.1 + np.abs(np.random.randn(num_nonzero)))
    else:
        e_kin_entries = pm.HalfNormal(
            name + '_kinetic_entries', sd=sd, shape=num_nonzero,
            testval= 0.1 + np.abs(np.random.randn(num_nonzero)))
    
    e_cap_entries = pm.Laplace(
        name + '_capacity_entries', mu=0, b=b, shape=num_regulations,
        testval=b * np.random.randn(num_regulations))
    
    flat_e_entries = T.concatenate(
        [e_kin_entries * e_sign,  # kinetic entries
         e_cap_entries,           # capacity entries
         T.zeros(num_zeros)])     # different compartments
        
    E = flat_e_entries[flat_indexer].reshape(N.T.shape)
    
    return E
    sys.exit()
jpfont = FontProperties(fname=FontPath)
#%% 回帰モデルからのデータ生成
n = 50
np.random.seed(99)
u = st.norm.rvs(scale=0.7, size=n)
x = st.uniform.rvs(loc=-np.sqrt(3.0), scale=2.0 * np.sqrt(3.0), size=n)
y = 1.0 + 2.0 * x + u
#%% 回帰モデルの係数と誤差項の分散の事後分布の設定(ラプラス+半コーシー分布)
b0 = np.zeros(2)
tau_coef = np.ones(2)
tau_sigma = 1.0
regression_laplace_halfcauchy = pm.Model()
with regression_laplace_halfcauchy:
    sigma = pm.HalfCauchy('sigma', beta=tau_sigma)
    a = pm.Laplace('a', mu=b0[0], b=tau_coef[0])
    b = pm.Laplace('b', mu=b0[1], b=tau_coef[1])
    y_hat = a + b * x
    likelihood = pm.Normal('y', mu=y_hat, sigma=sigma, observed=y)
#%% 事後分布からのサンプリング
n_draws = 5000
n_chains = 4
n_tune = 1000
with regression_laplace_halfcauchy:
    trace = pm.sample(draws=n_draws,
                      chains=n_chains,
                      tune=n_tune,
                      random_seed=123)
    print(pm.summary(trace))
#%% 事後分布のグラフの作成
k = b0.size