示例#1
0
文件: shlm.py 项目: dirmeier/shm
    def _hlm(self, model, gamma):
        with model:
            logger.info("Using tau_b_alpha: {}".format(self.tau_b_alpha))
            tau_b = pm.InverseGamma("tau_b",
                                    alpha=self.tau_b_alpha,
                                    beta=1.,
                                    shape=1)
            beta = pm.Normal("beta", 0, sd=tau_b, shape=self.n_gene_condition)

            logger.info("Using tau_iota_alpha: {}".format(self.tau_iota_alpha))
            l_tau = pm.InverseGamma("tau_iota",
                                    alpha=self.tau_iota_alpha,
                                    beta=1.,
                                    shape=1)
            l = pm.Normal("iota", mu=0, sd=l_tau, shape=self.n_interventions)

            mu = (gamma[self._gene_data_idx] + beta[self._gene_cond_data_idx] +
                  l[self._intervention_data_idx])

            if self.family == Family.gaussian:
                logger.info("Using sd_alpha: {}".format(self.sd_alpha))
                sd = pm.InverseGamma("sd",
                                     alpha=self.sd_alpha,
                                     beta=1.,
                                     shape=1)
                pm.Normal("x",
                          mu=mu,
                          sd=sd,
                          observed=np.squeeze(self.data[READOUT].values))
            else:
                raise NotImplementedError("Only gaussian family so far")

        return tau_b, beta, l_tau, l, sd
示例#2
0
    def _hlm(self, model, gamma):
        with model:
            logger.info("Using tau_b_alpha: {}".format(self.tau_b_alpha))
            tau_b = pm.InverseGamma("tau_b",
                                    alpha=self.tau_b_alpha,
                                    beta=1.,
                                    shape=1)
            beta = pm.Normal("beta", 0, sd=tau_b, shape=self.n_gene_condition)

            logger.info("Using tau_iota_alpha: {}".format(self.tau_iota_alpha))
            l_tau = pm.InverseGamma("tau_iota",
                                    alpha=self.tau_iota_alpha,
                                    beta=1.,
                                    shape=1)
            l = pm.Normal("iota", mu=0, sd=l_tau, shape=self.n_interventions)

            logger.info("Using kappa_sd: {}".format(self.kappa_sd))
            c = pm.Normal("kappa", 0, self.kappa_sd, shape=1)

            if self._affinity == "data":
                logger.info("Using affinity from data")
                q = self.data[AFFINITY].values
            elif self._affinity == "leaveout":
                logger.info("Using no affinity")
                q = 1
            elif self._affinity == "estimate":
                logger.info("Estimating affinity from data")
                q = pm.Uniform("aff",
                               lower=0,
                               upper=1,
                               shape=self.n_interventions)
            else:
                raise ValueError("Wrong affinity")

            mu = l[self._intervention_data_idx]

            ll = (gamma[self._gene_data_idx] + beta[self._gene_cond_data_idx] +
                  c * self.data[COPYNUMBER].values)
            if self._affinity == "estimate":
                mu += q[self._intervention_data_idx] * ll
            else:
                mu += q * ll

            if self.family == Family.gaussian:
                logger.info("Using sd_alpha: {}".format(self.sd_alpha))
                sd = pm.InverseGamma("sd",
                                     alpha=self.sd_alpha,
                                     beta=1.,
                                     shape=1)
                pm.Normal("x",
                          mu=mu,
                          sd=sd,
                          observed=np.squeeze(self.data[READOUT].values))
            else:
                raise NotImplementedError("Only gaussian family so far")

        if self._affinity == "estimate":
            return tau_b, beta, l_tau, l, sd, q, c
        return tau_b, beta, l_tau, l, sd, c
 def garch_baseline_model(data):
     with pm.Model() as model:
         omega = pm.InverseGamma("omega", alpha=2.5, beta=0.05)
         alpha1 = pm.Uniform("alpha1", 0, 1)
         beta1 = pm.Uniform("beta1", 0, 1)
         vol = pm.InverseGamma("omega", alpha=2.5, beta=0.05)
         returns = pm.GARCH11('returns', omega=omega, alpha1=alpha1, beta1=beta1, initial_vol=vol, shape=len(data), observed=data['returns'])
     return model
示例#4
0
def update_bayesian_modeling(mean_upd, var_upd, alpha_upd, beta_upd, inv_a_upd,
                             inv_b_upd, iv_upd, strategy, stock_price,
                             strike_price, risk_free, time):
    with pm.Model() as update_model:
        prior = pm.InverseGamma('bv', inv_a_upd, inv_b_upd)

        likelihood = pm.InverseGamma('like',
                                     inv_a_upd,
                                     inv_b_upd,
                                     observed=iv_upd)

    with update_model:
        # step = pm.Metropolis()

        v_trace_update = pm.sample(10000, tune=1000)
        #print(v_trace['bv'][:])
        trace_update = v_trace_update['bv'][:]
        #print(trace)

    pm.traceplot(v_trace_update)
    plt.show()

    pm.autocorrplot(v_trace_update)
    plt.show()

    pm.plot_posterior(v_trace_update[100:],
                      color='#87ceeb',
                      point_estimate='mean')
    plt.show()

    s = pm.summary(v_trace_update).round(2)
    print("\n Summary")
    print(s)

    a = np.random.choice(trace_update, 10000, replace=True)
    ar = []
    for i in range(9999):
        t = a[i] / 100
        ar.append(t)
    #print("Bayesian Volatility Values", ar)

    op = []
    for i in range(9999):
        temp = BS_price(strategy, stock_price, strike_price, risk_free, ar[i],
                        time)
        op.append(temp)
    #print("Bayesian Option Prices", op)

    plt.hist(ar, bins=50)
    plt.title("Volatility")
    plt.ylabel("Frequency")
    plt.show()

    plt.hist(op, bins=50)
    plt.title("Option Price")
    plt.ylabel("Frequency")
    plt.show()
    return trace_update
示例#5
0
    def run_model(self, **kwargs):
        """Run Bayesian model using prefit Y's for each Gene and Dataset distribution"""
        # Importing here since Theano base_compiledir needs to be set prior to import
        import pymc3 as pm

        click.echo("Building model")
        with pm.Model() as self.model:
            # Constants
            N = len(self.backgrounds)
            M = len(self.training_genes)
            MN = M * N

            # Prior constants
            mu_exp = self.df[self.training_genes].mean().mean()
            sd_exp = self.df[self.training_genes].std().mean()

            # Gene Model Priors
            gm_sd = pm.InverseGamma("gm_sd", 1, 1, shape=MN)
            gm_mu = pm.Normal("gm_mu", mu_exp, sd_exp, shape=MN)

            # Gene model
            pm.Normal(
                "x_hat",
                mu=gm_mu[self.x_ix],
                sd=gm_sd[self.x_ix],
                shape=MN,
                observed=self.index_df.value,
            )
            x = pm.Normal("x", mu=gm_mu, sd=gm_sd, shape=MN)

            # Likelihood priors
            eps = pm.InverseGamma("eps", 1, 1)
            if N == 1:
                beta = [1]
            else:
                beta = pm.Dirichlet("beta", a=np.ones(N))

            # Likelihood
            norm = np.zeros(M)
            gm_sd_2d = gm_sd.reshape((M, N))
            for i in range(N):
                norm += beta[i] / gm_sd_2d[:, i]
            norm = pm.Deterministic("norm", norm)

            y = pm.Deterministic(
                "y", pm.math.dot((x / gm_sd).reshape((M, N)), beta))
            norm_eps = pm.Deterministic("norm_eps", eps / norm)
            sample_genes = self.sample[self.training_genes].values
            pm.Laplace("y_hat",
                       mu=(y / norm)[self.s_ix],
                       b=norm_eps,
                       observed=sample_genes)

            trace = pm.sample(**kwargs)
        self.trace = trace
        click.echo("Calculating posterior predictive samples")
        self.ppc = pm.sample_posterior_predictive(trace, model=self.model)
示例#6
0
def run_pymc3(model_error):
    np.random.seed(182152)

    my_loglike = LogLike(model_error)
    logl = LogLikeWithGrad(my_loglike)
    with pm.Model():
        # Define priors !Make sure that this list corresponds to the right extraction for latent_parameters
        b = pm.Normal("b", 3.0, sd=1.0)
        noise_function_prior_mean = 0.2  #mean=b/(a-1) var=b**2/(a-1)**/a -> a=mean**2/var but>2, so usually a=4 is a
        # good choice, then b=mean*(alpha-1), with alpha=4 this results in b=mean*3
        sigma_std_function = pm.InverseGamma("sigma_std_function",
                                             alpha=4.,
                                             beta=noise_function_prior_mean *
                                             3)
        noise_derivative_prior_mean = 0.2  #mean=b/(a-1) var=b**2/(a-1)**/a -> a=mean**2/var but>2, so usually a=4 is a
        sigma_std_derivative = pm.InverseGamma(
            "sigma_std_derivative",
            alpha=4.,
            beta=noise_derivative_prior_mean * 3)

        theta = tt.as_tensor_variable(
            [b, sigma_std_function, sigma_std_derivative])
        #        pm.DensityDist("likelihood", lambda v: logl(v), observed={"v": theta})
        pm.Potential("likelihood", logl(theta))

        # Inference!
        trace = pm.sample(
            draws=2000,
            step=pm.Metropolis(),
            chains=4,
            tune=100,
            discard_tuned_samples=True,
        )

    print(trace)
    s = pm.summary(trace)
    print(s)
    means = s["mean"]
    sds = s["sd"]
    print(
        f"PM: Posterior for 'b' = {means['b']:6.3f} with sd {sds['b']:6.3f}.")
    print(
        f"PM: Posterior for 'sigma_std_function' = {means['sigma_std_function']:6.3f} with sd "
        f"{sds['sigma_std_function']:6.3f}.")
    print(
        f"PM: Posterior for 'sigma_std_derivative' = {means['sigma_std_derivative']:6.3f} with sd "
        f"{sds['sigma_std_derivative']:6.3f}.")
    #print(trace.stat_names)
    #accept = trace.get_sampler_stats('accept')
    #print("accept", accept)
    #pm.traceplot(trace, priors=[b.distribution,sigma_std_function.distribution, sigma_std_derivative.distribution]);
    plt.show()
    return trace
 def build_model(self, name='normal_model'):
     # Define Stochastic variables
     with pm.Model(name=name) as self.model:
         # Global mean pitch angle
         self.mu_phi = pm.Uniform('mu_phi', lower=0, upper=90)
         self.sigma_phi = pm.InverseGamma('sigma_phi',
                                          alpha=2,
                                          beta=15,
                                          testval=8)
         self.sigma_gal = pm.InverseGamma('sigma_gal',
                                          alpha=2,
                                          beta=15,
                                          testval=8)
         # define a mean galaxy pitch angle
         self.phi_gal = pm.TruncatedNormal(
             'phi_gal',
             mu=self.mu_phi,
             sd=self.sigma_phi,
             lower=0,
             upper=90,
             shape=len(self.galaxies),
         )
         # draw arm pitch angles centred around this mean
         self.phi_arm = pm.TruncatedNormal(
             'phi_arm',
             mu=self.phi_gal[self.gal_arm_map],
             sd=self.sigma_gal,
             lower=0,
             upper=90,
             shape=len(self.gal_arm_map),
         )
         # convert to a gradient for a linear fit
         self.b = tt.tan(np.pi / 180 * self.phi_arm)
         # arm offset parameter
         self.c = pm.Cauchy('c',
                            alpha=0,
                            beta=10,
                            shape=self.n_arms,
                            testval=np.tile(0, self.n_arms))
         # radial noise
         self.sigma_r = pm.InverseGamma('sigma_r', alpha=2, beta=0.5)
         r = pm.Deterministic(
             'r',
             tt.exp(self.b[self.point_arm_map] * self.data['theta'] +
                    self.c[self.point_arm_map]))
         # likelihood function
         self.likelihood = pm.Normal(
             'Likelihood',
             mu=r,
             sigma=self.sigma_r,
             observed=self.data['r'],
         )
    def build_model(self, name=''):
        # Define Stochastic variables
        with pm.Model(name=name) as self.model:
            # Global mean pitch angle
            self.phi_gal = pm.Uniform('phi_gal',
                                      lower=0,
                                      upper=90,
                                      shape=len(self.galaxies))
            # note we don't model inter-galaxy dispersion here
            # intra-galaxy dispersion
            self.sigma_gal = pm.InverseGamma('sigma_gal',
                                             alpha=2,
                                             beta=20,
                                             testval=5)
            # arm offset parameter
            self.c = pm.Cauchy('c',
                               alpha=0,
                               beta=10,
                               shape=self.n_arms,
                               testval=np.tile(0, self.n_arms))

            # radial noise
            self.sigma_r = pm.InverseGamma('sigma_r', alpha=2, beta=0.5)

            # define prior for Student T degrees of freedom
            # self.nu = pm.Uniform('nu', lower=1, upper=100)

            # Define Dependent variables
            self.phi_arm = pm.TruncatedNormal(
                'phi_arm',
                mu=self.phi_gal[self.gal_arm_map],
                sd=self.sigma_gal,
                lower=0,
                upper=90,
                shape=self.n_arms)

            # convert to a gradient for a linear fit
            self.b = tt.tan(np.pi / 180 * self.phi_arm)
            r = pm.Deterministic(
                'r',
                tt.exp(self.b[self.data['arm_index'].values] *
                       self.data['theta'] +
                       self.c[self.data['arm_index'].values]))

            # likelihood function
            self.likelihood = pm.StudentT(
                'Likelihood',
                mu=r,
                sigma=self.sigma_r,
                nu=1,  #self.nu,
                observed=self.data['r'],
            )
示例#9
0
    def build_model(self, name=''):
        # Define Stochastic variables
        with pm.Model(name=name) as self.model:
            # Global mean pitch angle
            self.phi_gal = pm.Uniform('phi_gal',
                                      lower=0,
                                      upper=90,
                                      shape=len(self.galaxies))
            # note we don't model inter-galaxy dispersion here
            # intra-galaxy dispersion
            self.sigma_gal = pm.InverseGamma('sigma_gal',
                                             alpha=2,
                                             beta=20,
                                             testval=5)
            # arm offset parameter
            self.c = pm.Cauchy('c',
                               alpha=0,
                               beta=10,
                               shape=self.n_arms,
                               testval=np.tile(0, self.n_arms))

            # radial noise
            self.sigma_r = pm.InverseGamma('sigma_r', alpha=2, beta=0.5)

            # ----- Define Dependent variables -----

            # Phi arm is drawn from a truncated normal centred on phi_gal with
            # spread sigma_gal
            gal_idx = self.gal_arm_map.astype('int32')
            self.phi_arm = pm.TruncatedNormal('phi_arm',
                                              mu=self.phi_gal[gal_idx],
                                              sd=self.sigma_gal,
                                              lower=0,
                                              upper=90,
                                              shape=self.n_arms)

            # transform to gradient for fitting
            self.b = tt.tan(np.pi / 180 * self.phi_arm)

            # r = exp(theta * tan(phi) + c)
            # do not track this as it uses a lot of memory
            arm_idx = self.data['arm_index'].values.astype('int32')
            r = tt.exp(self.b[arm_idx] * self.data['theta'] + self.c[arm_idx])

            # likelihood function (assume likelihood here)
            self.likelihood = pm.Normal(
                'Likelihood',
                mu=r,
                sigma=self.sigma_r,
                observed=self.data['r'],
            )
def reg_hs_regression(X, y_obs, ylabel='likelihood', **kwargs):
    """See Piironen & Vehtari, 2017 (DOI: 10.1214/17-EJS1337SI)"""
    n_features = X_.eval().shape[1]
    if tau_0 is None:
        m0 = n_features/2
        n_obs = X_.eval().shape[0]
        tau_0 = m0 / ((n_features - m0) * np.sqrt(n_obs))
    with pm.Model() as model:
        tau = pm.HalfCauchy('tau', tau_0)
        sd_bias = pm.HalfCauchy('sd_bias', beta=2.5)
        lamb_m = pm.HalfCauchy('lambda_m', beta=1)
        slab_scale = kwargs.pop('slab_scale', 3)
        slab_scale_sq = slab_scale ** 2
        slab_df = kwargs.pop('slab_df', 8)
        half_slab_df = slab_df / 2
        # Regularization bit
        c_sq = pm.InverseGamma('c_sq', alpha=half_slab_df,
                               beta=half_slab_df * slab_scale_sq)
        lamb_m_bar = tt.sqrt(c_sq) * lamb_m / (tt.sqrt(c_sq +
                                                       tt.pow(tau, 2) *
                                                       tt.pow(lamb_m, 2)
                                                      )
                                              )
        w = pm.Normal('w', mu=0, sd=tau*lamb_m_bar, shape=n_features)
        bias = pm.Laplace('bias', mu=0, b=sd_bias)
        mu_ = tt.dot(X_, w) + bias
        sig = pm.HalfCauchy('sigma', beta=5)
        y = pm.Normal(ylabel, mu=mu_, sd=sig, observed=y_obs)
        model.name = "regularized_hshoe_reg"
def make_state_model_AR1(data, observe):
    '''
    model for Two-State StoVol

    :param data: observation data
    :param observe: column name of y
    :return: PyMC model
    '''
    # Prepare data
    nstate = data['covid_state_US'].nunique()
    log_returns = data[observe].to_numpy()
    state_idx = data["covid_state_US"].to_numpy()

    with pm.Model() as model:
        # Data
        _returns = pm.Data("_returns", log_returns)
        _state_idx = pm.intX(pm.Data("state_idx", state_idx))
        # Prior
        scale = pm.InverseGamma("scale", alpha=2.5, beta=0.05, shape=nstate)
        log_vol = pm.GaussianRandomWalk('log_vol',
                                        mu=0,
                                        sigma=scale[_state_idx],
                                        shape=len(data))
        nu = pm.Exponential("nu", 0.1)
        # Likelihood
        returns = pm.StudentT("returns",
                              nu=nu,
                              lam=np.exp(-2 * log_vol),
                              observed=_returns)
    return model
示例#12
0
def hs_regression(X, y_obs, ylabel='y', tau_0=None, regularized=False):
    """See Piironen & Vehtari, 2017 (DOI: 10.1214/17-EJS1337SI)"""
    if tau_0 is None:
        M = X.shape[1]
        m0 = M / 2
        N = X.shape[0]
        tau_0 = m0 / ((M - m0) * np.sqrt(N))
    if regularized:
        slab_scale = kwargs.pop('slab_scale', 3)
        slab_scale_sq = slab_scale**2
        slab_df = kwargs.pop('slab_df', 8)
        half_slab_df = slab_df / 2
        with pm.Model() as mhsr:
            tau = pm.HalfCauchy('tau', tau_0)
            c_sq = pm.InverseGamma('c_sq',
                                   alpha=half_slab_df,
                                   beta=half_slab_df * slab_scale_sq)
            lamb_m = pm.HalfCauchy('lambda_m', beta=1)
            lamb_m_bar = tt.sqrt(c_sq) * lamb_m / (
                tt.sqrt(c_sq + tt.pow(tau, 2) * tt.pow(lamb_m, 2)))
            w = pm.Normal('w', mu=0, sd=tau * lamb_m_bar, shape=X.shape[1])
            mu_ = pm.Deterministic('mu', tt.dot(X, w))
            sig = pm.HalfCauchy('sigma', beta=10)
            y = pm.Normal('y', mu=mu_, sd=sig, observed=y_obs.squeeze())
        return mhsr
    else:
        with pm.Model() as mhs:
            tau = pm.HalfCauchy('tau', tau_0)
            lamb_m = pm.HalfCauchy('lambda_m', beta=1)
            w = pm.Normal('w', mu=0, sd=tau * lamb_m, shape=X.shape[1])
            mu_ = pm.Deterministic('mu', tt.dot(X, w))
            sig = pm.HalfCauchy('sigma', beta=10)
            y = pm.Normal('y', mu=mu_, sd=sig, observed=y_obs.squeeze())
        return mhs
示例#13
0
def analyze_data(X, y, if_scale=True):
    """
    Function to analyze data
    :param X: input features
    :param y: output
    :param if_scale: if normalize=True, we normalize X and y
    :return: trace, result, yticks
    """
    epa_cols = X.columns.tolist()
    if if_scale:
        X = scale(X, axis=0)
        y = scale(y, axis=0)
    with pm.Model() as Model_Linthipe_SOC:
        alpha = pm.Normal('alpha', mu=0, sd=1)
        beta = pm.Normal('beta', mu=0, sd=1, shape=X.shape[1])
        sigma = pm.InverseGamma('sigma', alpha=2, beta=1)
        y_fit = pm.Normal('y_fit',
                          mu=alpha + pm.math.dot(X, beta),
                          sd=sigma**(1.0 / 2),
                          observed=y)
    with Model_Linthipe_SOC:
        trace = pm.sample(1000, step=pm.Metropolis(), chains=2)
    result = pm.summary(trace)
    ind = result.index.tolist()
    ind[1:len(epa_cols) + 1] = epa_cols
    result.index = ind
    yticks = ['alpha'] + epa_cols + ["sigma"]
    return trace, result, yticks
示例#14
0
    def group_model(self):

        with pm.Model() as gmodel:
            # uniform priors on h
            m = pm.DiscreteUniform('h', 0., 20.)
            std = pm.InverseGamma('s', 3., 0.5)
            mean = 2 * m + 1
            alphas = np.arange(1., 101., 5.)
            p = self.discreteNormal(alphas, mean, std)

            for i in range(self.nruns):
                hab_ten = pm.Categorical('h_{}'.format(i), p)

                alpha = tt.as_tensor_variable([hab_ten])
                probs_a, probs_r = self.inferrer(alpha)

                # use a DensityDist
                pm.Categorical('actions_{}'.format(i),
                               probs_a,
                               observed=self.actions[i])
                pm.Categorical('rewards_{}'.format(i),
                               probs_r,
                               observed=self.rewards[i])

        return gmodel
示例#15
0
def pm_horseshoe(X, y, b):

    m = 10
    ss = 3
    dof = 25

    horseshoe = pm.Model()
    with horseshoe:
        sigma = pm.HalfNormal('sigma', 2)
        tau_0 = m / (X.shape[1] - m) * sigma / tt.sqrt(X.shape[0])

        tau = pm.HalfCauchy('tau', tau_0)
        c2 = pm.InverseGamma('c2', dof / 2, dof / 2 * ss**2)
        lam = pm.HalfCauchy('lam', 1, shape=X.shape[1])

        l1 = lam * tt.sqrt(c2)
        l2 = tt.sqrt(c2 + tau * tau * lam * lam)
        lam_d = l1 / l2

        beta = pm.Normal('beta', 0, tau * lam_d, shape=X.shape[1])
        y_hat = tt.dot(X, beta)

        likelihood = pm.Normal('likelihood', y_hat, observed=y)
        trace = pm.sample(1000)

    b_hat = trace.get_values('beta').mean(0)
    b_sig = trace.get_values('beta').std(0)
    plot_beta(b, b_hat, std=b_sig)
示例#16
0
    def _gamma_mix(self, model, z):
        with model:
            logger.info("Using tau_g_alpha: {}".format(self.tau_g_alpha))
            tau_g = pm.InverseGamma("tau_g",
                                    alpha=self.tau_g_alpha,
                                    beta=1.,
                                    shape=self.n_states)

            logger.info("Using mean_g: {}".format(self.gamma_means))
            if self.n_states == 2:
                logger.info("Building two-state model")
                mean_g = pm.Normal("mu_g",
                                   mu=self.gamma_means,
                                   sd=1,
                                   shape=self.n_states)
                pm.Potential("m_opot",
                             var=tt.switch(mean_g[1] - mean_g[0] < 0., -np.inf,
                                           0.))
            else:
                logger.info("Building three-state model")
                mean_g = pm.Normal("mu_g",
                                   mu=self.gamma_means,
                                   sd=1,
                                   shape=self.n_states)
                pm.Potential(
                    'm_opot',
                    tt.switch(mean_g[1] - mean_g[0] < 0, -np.inf, 0) +
                    tt.switch(mean_g[2] - mean_g[1] < 0, -np.inf, 0))

            gamma = pm.Normal("gamma", mean_g[z], tau_g[z], shape=self.n_genes)

        return tau_g, mean_g, gamma
示例#17
0
def test_pymc3_convert_dists():
    """Just a basic check that all PyMC3 RVs will convert to and from Theano RVs."""
    tt.config.compute_test_value = "ignore"
    theano.config.cxx = ""

    with pm.Model() as model:
        norm_rv = pm.Normal("norm_rv", 0.0, 1.0, observed=1.0)
        mvnorm_rv = pm.MvNormal("mvnorm_rv",
                                np.r_[0.0],
                                np.c_[1.0],
                                shape=1,
                                observed=np.r_[1.0])
        cauchy_rv = pm.Cauchy("cauchy_rv", 0.0, 1.0, observed=1.0)
        halfcauchy_rv = pm.HalfCauchy("halfcauchy_rv", 1.0, observed=1.0)
        uniform_rv = pm.Uniform("uniform_rv", observed=1.0)
        gamma_rv = pm.Gamma("gamma_rv", 1.0, 1.0, observed=1.0)
        invgamma_rv = pm.InverseGamma("invgamma_rv", 1.0, 1.0, observed=1.0)
        exp_rv = pm.Exponential("exp_rv", 1.0, observed=1.0)
        halfnormal_rv = pm.HalfNormal("halfnormal_rv", 1.0, observed=1.0)
        beta_rv = pm.Beta("beta_rv", 2.0, 2.0, observed=1.0)
        binomial_rv = pm.Binomial("binomial_rv", 10, 0.5, observed=5)
        dirichlet_rv = pm.Dirichlet("dirichlet_rv",
                                    np.r_[0.1, 0.1],
                                    observed=np.r_[0.1, 0.1])
        poisson_rv = pm.Poisson("poisson_rv", 10, observed=5)
        bernoulli_rv = pm.Bernoulli("bernoulli_rv", 0.5, observed=0)
        betabinomial_rv = pm.BetaBinomial("betabinomial_rv",
                                          0.1,
                                          0.1,
                                          10,
                                          observed=5)
        categorical_rv = pm.Categorical("categorical_rv",
                                        np.r_[0.5, 0.5],
                                        observed=1)
        multinomial_rv = pm.Multinomial("multinomial_rv",
                                        5,
                                        np.r_[0.5, 0.5],
                                        observed=np.r_[2])

    # Convert to a Theano `FunctionGraph`
    fgraph = model_graph(model)

    rvs_by_name = {
        n.owner.inputs[1].name: n.owner.inputs[1]
        for n in fgraph.outputs
    }

    pymc_rv_names = {n.name for n in model.observed_RVs}
    assert all(
        isinstance(rvs_by_name[n].owner.op, RandomVariable)
        for n in pymc_rv_names)

    # Now, convert back to a PyMC3 model
    pymc_model = graph_model(fgraph)

    new_pymc_rv_names = {n.name for n in pymc_model.observed_RVs}
    pymc_rv_names == new_pymc_rv_names
示例#18
0
def run_model(sample: pd.Series,
              df: pd.DataFrame,
              training_genes: List[str],
              group: str = 'tissue',
              **kwargs):
    """
    Run Bayesian model using prefit Y's for each Gene and Dataset distribution

    Args:
        sample: N-of-1 sample to run
        df: Background dataframe to use in comparison
        training_genes: Genes to use during training
        group:
        **kwargs:

    Returns:
        Model and Trace from PyMC3
    """
    # Importing here since Theano base_compiledir needs to be set prior to import
    import pymc3 as pm

    classes = sorted(df[group].unique())
    df = df[[group] + training_genes]

    # Collect fits
    ys = {}
    for gene in training_genes:
        for i, dataset in enumerate(classes):
            cat_mu, cat_sd = st.norm.fit(df[df[group] == dataset][gene])
            # Standard deviation can't be initialized to 0, so set to 0.1
            cat_sd = 0.1 if cat_sd == 0 else cat_sd
            ys[f'{gene}={dataset}'] = (cat_mu, cat_sd)

    click.echo('Building model')
    with pm.Model() as model:
        # Linear model priors
        a = pm.Normal('a', mu=0, sd=1)
        b = [1] if len(classes) == 1 else pm.Dirichlet('b',
                                                       a=np.ones(len(classes)))
        # Model error
        eps = pm.InverseGamma('eps', 2.1, 1)

        # Linear model declaration
        for gene in tqdm(training_genes):
            mu = a
            for i, dataset in enumerate(classes):
                name = f'{gene}={dataset}'
                y = pm.Normal(name, *ys[name])
                mu += b[i] * y

            # Embed mu in laplacian distribution
            pm.Laplace(gene, mu=mu, b=eps, observed=sample[gene])
        # Sample
        trace = pm.sample(**kwargs)
    return model, trace
示例#19
0
    def sample(self, y, locations, X=None, **kwargs):
        self.X = X
        self.y = y
        self.locations = locations

        param_dicts_and_new_names = zip(
            (self.error_scale_parameter, self.scale_distribution_params),
            ('(sigma): noise scale', '(eta): kernel scale multiplier'))

        for param_dicts, new_name in param_dicts_and_new_names:
            if 'name' not in param_dicts:
                param_dicts.update({'name': new_name})

        with self.model:

            if self.kernel_type is None:
                self.kernel_type = Matern52
                self.kernel_parameter = {
                    'ls':
                    pm.InverseGamma(name='(rho): spatial correlation',
                                    alpha=1,
                                    beta=1),
                    'input_dim':
                    self.locations.shape[1]
                }

            gp_kernel = self.kernel_type(**self.kernel_parameter)

            if self.scale_distribution_for_kernel is None:
                self.scale_distribution_for_kernel = InverseGamma
                self.scale_distribution_params = {'alpha': 1, 'beta': 1}

            scale_for_kernel = pm.math.sqr(
                self.scale_distribution_for_kernel(
                    **self.scale_distribution_params))

            if self.error_scale_distribution is None:
                self.error_scale_distribution = HalfCauchy
                self.error_scale_parameter = {'beta': 5}

            self.gp = pm.gp.MarginalSparse(cov_func=scale_for_kernel *
                                           gp_kernel,
                                           approx="FITC")
            inducing_points = pm.gp.util.kmeans_inducing_points(
                20, self.locations)
            error_variable = self.error_scale_distribution(
                **self.error_scale_parameter)

            y_ = self.gp.marginal_likelihood("y",
                                             X=self.locations,
                                             Xu=inducing_points,
                                             y=self.y,
                                             noise=error_variable)
            self.trace = pm.sample(**kwargs)
    def build_model(self, n=None, name='archimedian_model'):
        with pm.Model(name=name) as self.model:
            if n is None:
                # one n per galaxy, or per arm?
                self.n_choice = pm.Categorical('n_choice', [1, 1, 0, 1, 1],
                                               testval=1,
                                               shape=len(self.galaxies))
                self.n = pm.Deterministic('n', self.n_choice - 2)
                self.chirality_correction = tt.switch(self.n < 0, -1, 1)
            else:
                msg = 'Parameter $n$ must be a nonzero float'
                try:
                    n = float(n)
                except ValueError:
                    pass
                finally:
                    assert isinstance(n, float) and n != 0, msg

                self.n_choice = None
                self.n = pm.Deterministic('n',
                                          np.repeat(n, len(self.galaxies)))

            self.chirality_correction = tt.switch(self.n < 0, -1, 1)
            self.a = pm.HalfCauchy('a', beta=1, testval=1, shape=self.n_arms)
            self.psi = pm.Normal(
                'psi',
                mu=0,
                sigma=1,
                testval=0.1,
                shape=self.n_arms,
            )
            self.sigma_r = pm.InverseGamma('sigma_r', alpha=2, beta=0.5)
            # Unfortunately, as we need to reverse the theta points for arms
            # with n < 1, and rotate all arms to start at theta = 0,
            # we need to do some model-mangling
            self.t_mins = Series({
                i: self.data.query('arm_index == @i')['theta'].min()
                for i in np.unique(self.data['arm_index'])
            })
            r_stack = [
                self.a[i] * tt.power(
                    (self.data.query('arm_index == @i')['theta'].values -
                     self.t_mins[i] + self.psi[i]),
                    1 / self.n[int(self.gal_arm_map[i])])
                [::self.chirality_correction[int(self.gal_arm_map[i])]]
                for i in np.unique(self.data['arm_index'])
            ]
            r = pm.Deterministic('r', tt.concatenate(r_stack))
            self.likelihood = pm.StudentT(
                'Likelihood',
                mu=r,
                sigma=self.sigma_r,
                observed=self.data['r'].values,
            )
示例#21
0
def fixture_model():
    with pm.Model() as model:
        n = 5
        dim = 4
        with pm.Model():
            cov = pm.InverseGamma("cov", alpha=1, beta=1)
            x = pm.Normal("x", mu=np.ones((dim,)), sigma=pm.math.sqrt(cov), shape=(n, dim))
            eps = pm.HalfNormal("eps", np.ones((n, 1)), shape=(n, dim))
            mu = pm.Deterministic("mu", at.sum(x + eps, axis=-1))
            y = pm.Normal("y", mu=mu, sigma=1, shape=(n,))
    return model, [cov, x, eps, y]
示例#22
0
def run_model(sample,
              df,
              training_genes,
              weights,
              group: str = 'tissue',
              **kwargs):
    """
    Run Bayesian model using prefit Y's for each Gene and Dataset distribution

    Args:
        sample: N-of-1 sample to run
        df: Background dataframe to use in comparison
        training_genes: Genes to use during training
        group: Column to use to distinguish different groups
        **kwargs:

    Returns:
        Model and Trace from PyMC3
    """
    classes = sorted(df[group].unique())
    df = df[[group] + training_genes]

    # Collect fits
    ys = {}
    for gene in training_genes:
        for i, dataset in enumerate(classes):
            cat_mu, cat_sd = st.norm.fit(df[df[group] == dataset][gene])
            # Standard deviation can't be initialized to 0, so set to 0.1
            cat_sd = 0.1 if cat_sd == 0 else cat_sd
            ys[f'{gene}={dataset}'] = (cat_mu, cat_sd)

    print('Building model')
    with pm.Model() as model:
        # Linear model priors
        a = pm.Normal('a', mu=0, sd=1)
        # Model error
        eps = pm.InverseGamma('eps', 2.1, 1)

        # TODO: Try tt.stack to declare mu more intelligently via b * y
        # Linear model declaration
        for gene in tqdm(training_genes):
            mu = a
            for i, dataset in enumerate(classes):
                name = f'{gene}={dataset}'
                y = pm.Normal(name, *ys[name])
                mu += weights[i] * y

            # Embed mu in laplacian distribution
            pm.Laplace(gene, mu=mu, b=eps, observed=sample[gene])
        # Sample
        trace = pm.sample(**kwargs)
    return model, trace
示例#23
0
    def create_model(self,
                     x=None,
                     aD=None,
                     bD=None,
                     aA=None,
                     bA=None,
                     aN=None,
                     bN=None,
                     delta_t=None,
                     N=None):
        with pm.Model() as model:
            D = pm.InverseGamma('D', alpha=aD, beta=bD)
            A = pm.Gamma('A', alpha=aA, beta=bA)
            sN = pm.InverseGamma('sN', alpha=aN, beta=bN)

            B = pm.Deterministic('B', pm.math.exp(-delta_t * D / A))

            path = Ornstein_Uhlenbeck('path', A=A, B=B, shape=(N, ))

            X_obs = pm.Normal('X_obs', mu=path, sd=sN, observed=x)

        return model
示例#24
0
文件: shlm.py 项目: dirmeier/shm
    def _set_simple_model(self):
        with pm.Model() as model:
            logger.info("Using tau_g_alpha: {}".format(self.tau_g_alpha))
            tau_g = pm.InverseGamma("tau_g",
                                    alpha=self.tau_g_alpha,
                                    beta=1.,
                                    shape=1)
            mean_g = pm.Normal("mu_g", mu=0, sd=1, shape=1)
            gamma = pm.Normal("gamma", mean_g, tau_g, shape=self.n_genes)
        param_hlm = self._hlm(model, gamma)

        self._set_steps(model, None, tau_g, mean_g, gamma, *param_hlm)
        return self
示例#25
0
    def create_model(self,
                     x=None,
                     aB=None,
                     bB=None,
                     aA=None,
                     bA=None,
                     delta_t=None,
                     N=None):
        with pm.Model() as model:
            B = pm.Beta('B', alpha=aB, beta=bB)
            A = pm.InverseGamma('A', alpha=aA, beta=bA)

            path = Ornstein_Uhlenbeck('path', B=B, A=A, observed=x)
        return model
    def sample(self, y, locations, X=None, approximation=False, **kwargs):
        self.X = X
        self.y = y
        self.locations = locations

        with self.model:
            if self.kernel_type is None:
                self.kernel_type = Matern52
                self.kernel_parameter = {
                    'ls':
                    pm.InverseGamma(name='rho: spatial correlation',
                                    alpha=1,
                                    beta=1),
                    'input_dim':
                    self.locations.shape[1]
                }

            gp_kernel = self.kernel_type(**self.kernel_parameter)

            if self.scale_distribution_for_kernel is None:
                self.scale_distribution_for_kernel = InverseGamma
                self.scale_distribution_params = {
                    'name': 'kernel scale multiplier',
                    'alpha': 1,
                    'beta': 1
                }

            scale_for_kernel = pm.math.sqr(
                self.scale_distribution_for_kernel(
                    **self.scale_distribution_params))

            if self.error_distribution is None:
                self.error_distribution = Normal
                self.error_parameter = {
                    'sigma': InverseGamma.dist(alpha=1, beta=1)
                }

            cov_kernel_func = scale_for_kernel * gp_kernel

            self.gp = pm.gp.Latent(cov_func=cov_kernel_func)
            gaussian_process_mean = self.gp.prior('f', X=self.locations)
            y_ = self.error_distribution(name='y',
                                         mu=gaussian_process_mean,
                                         observed=self.y,
                                         **self.error_parameter)
            if approximation:
                self.trace = pm.fit(method='advi', n=100_00).sample()
            else:
                self.trace = pm.sample(**kwargs)
示例#27
0
    def create_model(self,
                     x=None,
                     aD=None,
                     bD=None,
                     aA=None,
                     bA=None,
                     delta_t=None,
                     N=None):
        with pm.Model() as model:
            D = pm.Gamma('D', alpha=aD, beta=bD)
            A = pm.InverseGamma('A', alpha=aA, beta=bA)

            B = pm.Deterministic('B', pm.math.exp(-delta_t * D / A))

            path = Ornstein_Uhlenbeck('path', A=A, B=B, observed=x)
        return model
示例#28
0
 def model_factory(x_2_data, x_3_data, x_4_data, x_5_data, x_6_data, x_7_data, 
                    x_8_data, y_data, x_1_data):
     
     with pm.Model() as varying_intercept_slope_noncentered:
       
         # Priors
         mu_a = pm.Normal('mu_a', mu = 0.05, sd = 2)
         sigma_a = pm.HalfCauchy('sigma_a', 5)
         
         mu_b_1 = pm.InverseGamma('mu_b_1', mu = 0.05, sigma = 2)
         sigma_b_1 = pm.HalfCauchy('sigma_b_1', 5)
         mu_b_2 = pm.InverseGamma('mu_b_2', mu = 0.05, sigma = 2)
         sigma_b_2 = pm.HalfCauchy('sigma_b_2', 5)
         mu_b_3 = pm.InverseGamma('mu_b_3', mu = 0.05, sigma = 2)
         sigma_b_3 = pm.HalfCauchy('sigma_b_3', 5)
         mu_b_4 = pm.InverseGamma('mu_b_4', mu = 0.05, sigma = 2)
         sigma_b_4 = pm.HalfCauchy('sigma_b_4', 5)
         mu_b_5 = pm.InverseGamma('mu_b_5', mu = 0.05, sigma = 2)
         sigma_b_5 = pm.HalfCauchy('sigma_b_5', 5)
         mu_b_6 = pm.InverseGamma('mu_b_6', mu = 0.05, sigma = 2)
         sigma_b_6 = pm.HalfCauchy('sigma_b_6', 5)
         mu_b_7 = pm.InverseGamma('mu_b_7', mu = 0.05, sigma = 2)
         sigma_b_7 = pm.HalfCauchy('sigma_b_7', 5)
                
         # Non-center random intercepts + slopes
         u = pm.Normal('u', mu = 0, sd = 2, shape = len(hierachical_type))
         a = mu_a + u * sigma_a
                
         # Random slopes
         b_1 = mu_b_1 + u * sigma_b_1
         b_2 = mu_b_2 + u * sigma_b_2
         b_3 = mu_b_3 + u * sigma_b_3
         b_4 = mu_b_4 + u * sigma_b_4
         b_5 = mu_b_5 + u * sigma_b_5
         b_6 = mu_b_6 + u * sigma_b_6
         b_7 = mu_b_7 + u * sigma_b_7
                
         # Expected value
         y_hat = (a[x_1_data] + b_1[x_1_data]*x_2_data + b_2[x_1_data]*x_3_data + 
                 b_3[x_1_data]*x_4_data + b_4[x_1_data]*x_5_data + b_5[x_1_data]*x_6_data + 
                 b_6[x_1_data]*x_7_data + b_7[x_1_data]*x_8_data) 
                
         # Data likelihood (discrete distributions only)
         pm.Bernoulli('y_like', logit_p = y_hat, observed = y_data)
         
     # dump trace model     
     joblib.dump(varying_intercept_slope_noncentered, os.path.sep.join([BASE_DIR_OUTPUT, output_file_name_2]))   
             
     return varying_intercept_slope_noncentered
def minicube_pymc_fit(xax, data, guesses, ncomps=1, sample=True,
                      fmin=opt.fmin_bfgs, fmin_kwargs={}, **sampler_kwargs):
    '''
    pymc fitting of a set of single Gaussians.
    '''

    basic_model = pm.Model()

    with basic_model:

        params_dict = {}

        for i in range(ncomps):
            model_i, params_dict_i = \
                spatial_gaussian_model(xax, data, guesses, comp_num=i)
            if i == 0:
                model = model_i
            else:
                model += model_i

            params_dict.update(params_dict_i)

        sigma_n = pm.InverseGamma('sigma_n', alpha=1, beta=1)
        Y_obs = pm.Normal('Y_obs', mu=model, sd=sigma_n, observed=data)

        start = pm.find_MAP(fmin=fmin, **fmin_kwargs)
        # Use the initial guesses for the Bernoulli parameters
        for i in range(ncomps):
            start['on{}'.format(i)] = guesses['on{}'.format(i)]

        if sample:
            # An attempt to use variational inference b/c it would be
            # way faster. This fails terribly in every case I've tried
            # trace = pm.fit(500, start=start, method='svgd',
            #                inf_kwargs=dict(n_particles=100,
            #                                temperature=1e-4),
            #                ).sample(500)
            trace = pm.sample(start=start, **sampler_kwargs)

    if sample:
        medians = parameter_medians(trace)
        stddevs = parameter_stddevs(trace)

        return medians, stddevs, trace, basic_model
    else:
        return start, basic_model
示例#30
0
 def train(self, niter = 1000, random_seed=123, tune=500, cores = 4):
     ### model training 
     with self.scallop_model:
         # hyperparameter priors
         l = pm.InverseGamma("l", 5, 5, shape = self.dim)
         sigma_f = pm.HalfNormal("sigma_f", 1)
         
         # convariance function and marginal GP
         K = sigma_f ** 2 * pm.gp.cov.ExpQuad(self.dim, ls = l)
          
         self.gp = pm.gp.Marginal(cov_func=K)
 
         # marginal likelihood
         # convariance function and marginal GP
         sigma_n = pm.HalfNormal("sigma_n",1)
         tot_catch = self.gp.marginal_likelihood("tot_catch", X = self.x, y = self.y, noise = sigma_n)
     
         # model fitting
         self.trace = pm.sample(niter, random_seed=random_seed, progressbar=True, tune=tune, cores = cores)