示例#1
0
def test_logistic_regression(crossed_data):
    # Tests passing link="logit" is equivalent to using tt.nnet.sigmoid
    model0 = Model(
        "threecats['b'] ~ continuous + dummy", crossed_data, family="bernoulli", link="logit"
    )
    fitted0 = model0.fit(
        tune=0,
        draws=1000,
    )

    # build model using fit, pymc3 and theano link function
    model1 = Model(
        "threecats['b'] ~ continuous + dummy",
        crossed_data,
        family="bernoulli",
        link=tt.nnet.sigmoid,
    )
    fitted1 = model1.fit(
        tune=0,
        draws=1000,
    )

    # check that using a theano link function works
    assert np.allclose(az.summary(fitted0)["mean"], az.summary(fitted1)["mean"], atol=0.2)

    # check that term names agree
    assert set(model0.term_names) == set(model1.term_names)

    # check that common effect design matrices are the same,
    # even if term names / level names / order of columns is different
    X0 = set(
        [
            tuple(t.data[:, lev])
            for t in model0.common_terms.values()
            for lev in range(len(t.levels))
        ]
    )
    X1 = set(
        [
            tuple(t.data[:, lev])
            for t in model1.common_terms.values()
            for lev in range(len(t.levels))
        ]
    )

    assert X0 == X1

    # check that models have same priors for common effects
    priors0 = {x.name: x.prior.args for x in model0.terms.values() if not x.group_specific}
    priors1 = {x.name: x.prior.args for x in model1.terms.values() if not x.group_specific}
    # check dictionary keys
    assert set(priors0) == set(priors1)
    # check dictionary values
    def dicts_close(a, b):
        if set(a) != set(b):
            return False
        else:
            return [np.allclose(a[x], b[x], atol=0, rtol=0.01) for x in a.keys()]

    assert all([dicts_close(priors0[x], priors1[x]) for x in priors0.keys()])
示例#2
0
def exercise4():
    with pm.Model() as basic_model:
        probabilities = [0.3, 0.7, 0.95]

        likelihood_params = np.array(
            [np.divide(1, 3) * (1 + 2 * prob) for prob in probabilities])

        group = pm.Categorical('group', p=np.array([1, 1, 1]))

        p = pm.Deterministic('p', theano.shared(likelihood_params)[group])

        positive_answers = pm.Binomial('positive_answers',
                                       n=num_questions,
                                       p=p,
                                       observed=[7])

        trace = pm.sample(4000, progressbar=True)

        az.plot_trace(trace)

        plt.show()

        az.plot_posterior(trace)

        plt.show()

        az.summary(trace)
        return trace
示例#3
0
 def test_disaster_model_missing(self):
     model = build_disaster_model(masked=True)
     with model:
         # Initial values for stochastic nodes
         start = {"early_mean": 2.0, "late_mean": 3.0}
         # Use slice sampler for means (other variables auto-selected)
         step = pm.Slice([model.early_mean_log__, model.late_mean_log__])
         tr = pm.sample(500, tune=50, start=start, step=step, chains=2)
         az.summary(tr)
示例#4
0
def get_params(n, input_params, input_traces=False):
    """
    Helper function to extract parameters from fit to polynomial of degree n using pymc3 traces

    Parameters
    ----------
    n: integer
        indicates the power of the polynomial fit
    input_params : can be either best_fit or traces

        best_fit : dict with 1-D numpy arrays of floats
            parameter values from the model and covariance matrix
        traces : pymc3 MultiTrace object
            Traces generated from MCMC sampling
    input_traces : bool
        indicates whether input is best_fit or traces

    Returns
    -------
    params, param_errors: 1-D numpy arrays of floats
        parameter values from the model
        standard deviations of each parameter

    """

    # extract parameters and uncertainty using arviz
    if input_traces == True:

        params_list = []
        params_uncert = []
        for parameter in ['C_{}'.format(i) for i in range(n + 1)]:
            params_list.append(
                az.summary(input_params, round_to=9)['mean'][parameter])
            params_uncert.append(
                az.summary(input_params, round_to=9)['sd'][parameter])
        params = np.array(params_list)
        params_errors = np.array(params_uncert)

    else:
        best_fit_list = []
        best_fit_errors_list = []

        parameters = []
        errors = {}
        for i in range(n + 1):
            parameters = parameters + ['C_{}'.format(i)]
            errors['C_{}'.format(i)] = np.sqrt(
                input_params['covariance matrix'][i][i])
        for parameter in parameters:
            best_fit_list.append(input_params[parameter])
            best_fit_errors_list.append(errors[parameter])

        params = np.array(best_fit_list)
        params_errors = np.array(best_fit_errors_list)

    return params, params_errors
示例#5
0
 def test_disaster_model(self):
     model = build_disaster_model(masked=False)
     with model:
         # Initial values for stochastic nodes
         start = {"early_mean": 2, "late_mean": 3.0}
         # Use slice sampler for means (other variables auto-selected)
         step = pm.Slice(
             [model["early_mean_log__"], model["late_mean_log__"]])
         idata = pm.sample(500, tune=50, start=start, step=step, chains=2)
         az.summary(idata)
示例#6
0
def test_bayes_nonparametric():

    data = pd.DataFrame({'x':[0,5], 'y':[0,0], 'z':[0,1]})

    # 2 connected segments, non-parametric changepoint
    model = sgmt.bayes(['y~1+x', '0+x'], data=data)

    # 3 connected segments, static changepoints
    model = sgmt.bayes(['y~1+x'] + 2*['0+x'], data=data)

    # 2 disconnected segments, static changepoint
    model = sgmt.bayes(['y~1+x', '1+x'], data=data)

    # 2 connected segments, covariates, and parametric changepoints
    # here, income is a covariate in the model so it is ambiguous which
    # varaible the changepoint will operate over
    # so we need to specify it explicitly
    data = pd.DataFrame({'score':[0,0], 'time':[0,5], 'income':[12,33], 'IQ':[0,1]})
    model = sgmt.bayes(['score~1+time+income', '1+time+income'],
                        x_var='time',
                        data=data)

    # focus on estimation
    data = pd.DataFrame({'y':rng.random(size=100), 'x':rng.random(size=100)})
    model = sgmt.bayes(['y~1+x', '0+x'], data=data)
    # fit model
    trace = model.fit()
    # summarize the model estimation
    az.summary(trace)

    # 3 connected segments, static but explicit changepoints
    model = sgmt.bayes(['y~1+x'] + 2*['0+x'], changepoints=2*['1'], data=data)

    # 2 disconnected segments, static but explicit changepoint
    model = sgmt.bayes(['y~1+x', '1+x'], changepoints=['1'], data=data)


    # we also allow explicit indication of the the outcome
    # variable as it might ease the writing of segment specifications
    data = pd.DataFrame({'y':[0,0], 'x':[0,5]})
    sgmt.bayes(5 * ['1+x'], changepoints=4*['1'], y_var='y', data=data)

    # at some point we can permit these sort of implicit specs
    # fill in changepoints with intercepts
    #sgmt.bayes(['y', '1+x', '1+x'], data=data)
    # equivalent to:
    # sgmt.bayes(['y', '1+x', '1+x'], changepoints=['1', '1','1'], data=data)

    # focus on estimation
    data = pd.DataFrame({'y':rng.random(size=100), 'x':rng.random(size=100)})
    model = sgmt.bayes(['y~1+x', '0+x'], changepoints=['1'], data=data)
    # fit model
    idata = model.fit()
    # summarize the model estimation
    az.summary(idata)
示例#7
0
def basic_test():

    # Initialize model
    with pm.Model() as model:

        # E.g., to define a flat prior
        # with some limits
        #z = pm.Uniform('z', lower=0.0, upper=3.0)

        # prior
        mu = pm.Normal('mu', mu=0, sigma=1)

        # Observed data
        obs = pm.Normal('obs', mu=mu, sigma=1, observed=np.random.randn(1000))

        # Run sampler
        idata = pm.sample(2000, tune=1500, return_inferencedata=True)

    print(idata.posterior.dims)

    az.plot_trace(idata)

    summary = az.summary(idata)

    print("Summary:")
    print(summary)

    plt.show()

    return None
示例#8
0
def skew_normal_prog():
    sample_b = poisson.rvs(lambda_b, size=2500)
    sample_u = poisson.rvs(mu=lambda_u, size=2500)

    a_b, loc_b, scale_b = stats.skewnorm.fit(sample_b)
    a_u, loc_u, scale_u = stats.skewnorm.fit(sample_u)

    basic_model = pm.Model()
    with basic_model:
        x1 = pm.SkewNormal('x1', mu=loc_b, sigma=scale_b, alpha=a_b)
        x2 = pm.SkewNormal('x2', mu=loc_b, sigma=scale_b, alpha=a_b)
        u = pm.SkewNormal('u',
                          mu=loc_u,
                          sigma=scale_u,
                          alpha=a_u,
                          observed=x1 + x2)

    with basic_model:
        trace = pm.sample(5000)

    #the two posterior mean are numerically slightly different
    #we average them
    skew_mean = np.mean(az.summary(trace)["mean"])
    neg_x1 = np.mean(trace.get_values('x1') < 0)
    neg_x2 = np.mean(trace.get_values('x2') < 0)
    skew_prob = np.mean([neg_x1, neg_x2])
    return (skew_mean, skew_prob)
示例#9
0
    def model_summary(self):
        """

        """
        if self.summary is None:
            self.summary = az.summary(self.trace, var_names=["~chol","~vals"], round_to=2)
        pass
示例#10
0
def samplePosterior(model, N, fit_intercept=False, fit_slope=True):
    """
    Monte Carlo for the posterior. Sample posterior predictive
    """
    RANDOM_SEED = 58
    with model:
        step = pm.NUTS()
        trace = pm.sample(N, step)

        if fit_intercept and not fit_slope:
            var_names = ["Intercept", "Y_obs"]
            summary_names = ["Intercept"]
        elif not fit_intercept and fit_slope:
            var_names = ["slope", "Y_obs"]
            summary_names = ["slope"]
        else:
            var_names = ["Intercept", "slope", "Y_obs"]
            summary_names = ["Intercept", "slope"]

        ppc = pm.sample_posterior_predictive(trace,
                                             var_names=var_names,
                                             random_seed=RANDOM_SEED)

    summary = az.summary(trace, var_names=summary_names, round_to=3)
    print(summary)

    params = {}
    for name in summary_names:
        params[name] = {}
        params[name]['hpd_3%'] = summary['hpd_3%'][name]
        params[name]['hpd_mean'] = summary['mean'][name]
        params[name]['hpd_97%'] = summary['hpd_97%'][name]

    return params, ppc['Y_obs']
示例#11
0
    def fit(self,steps=1000, tune=1000, summarise=False):
        """ Fit the model to infer the correlation coefficient

        Parameters
        ----------
        steps : int, optional, default 1000
            Number of MCMC steps per chain after burn-in
        tune : int, optional, default 1000
            Number of steps per chain for burn-in
        summarise : bool, default False
            Whether to produce the table summary (also available through summarise())

        """
        with self.model:
            self.trace = pm.sample(
                steps, tune=tune, target_accept=0.9, compute_convergence_checks=False,return_inferencedata=True
            )
            self.fitted=True
        if summarise:
            self.summary = az.summary(self.trace, var_names=["~chol"], round_to=2)
            #self.rho = [self.summary['hdi_3%'][chol_corr[1,0]],self.summary['mean'][chol_corr[1,0]],self.summary['hdi_97%'][chol_corr[1,0]]]
            print(self.summary)
            return self.trace, self.summary

        return self.trace
示例#12
0
def poisson_prog_monthly():
    basic_model = pm.Model()
    with basic_model:
        x1 = pm.Poisson ('x1', mu = lambda_b)
        x2 = pm.Poisson ('x2', mu = lambda_b)
        x3 = pm.Poisson ('x3', mu = lambda_b)
        x4 = pm.Poisson ('x4', mu = lambda_b)
        x5 = pm.Poisson ('x5', mu = lambda_b)
        x6 = pm.Poisson ('x6', mu = lambda_b)
        x7 = pm.Poisson ('x7', mu = lambda_b)
        x8 = pm.Poisson ('x8', mu = lambda_b)
        x9 = pm.Poisson ('x9', mu = lambda_b)
        x10 = pm.Poisson ('x10', mu = lambda_b)
        x11 = pm.Poisson ('x11', mu = lambda_b)
        x12 = pm.Poisson ('x12', mu = lambda_b)
        q1 =  pm.Poisson ('q1',  mu = 3 * lambda_b, observed = x1 + x2 + x3 + x4)
        q2 =  pm.Poisson ('q2',  mu = 3 * lambda_b, observed = x5 + x6 + x7 + x8)
        q3 =  pm.Poisson ('q3',  mu = 3 * lambda_b, observed = x9 + x10 + x11 + x12)
        s1 =  pm.Poisson ('s1',  mu = 5 * lambda_b, observed = x1 + x2 + x3 + x4 + x5 + x6)
        s2 =  pm.Poisson ('s2',  mu = 8 * lambda_b, observed = x7 + x8 + x9 + x10 + x11 + x12)
        y =  pm.Poisson  ('y',   mu = 14 *lambda_b,  observed = x1 + x2 + x3 + x4 + x5 + x6 +x7 + x8 + x9 + x10 + x11 + x12)
        

        
    with basic_model:
        trace = pm.sample(5000)
    
    #the two posterior mean are numerically slightly different
    #we average them            
    pois_mean = np.mean(az.summary(trace)["mean"])
    return (pois_mean)
示例#13
0
def test_categorical_term():
    data = pd.DataFrame(
        {
            "y": np.random.normal(size=6),
            "x1": np.random.normal(size=6),
            "x2": [1, 1, 0, 0, 1, 1],
            "g1": ["a"] * 3 + ["b"] * 3,
            "g2": ["x", "x", "z", "z", "y", "y"],
        }
    )
    model = Model("y ~ x1 + x2 + g1 + (g1|g2) + (x2|g2)", data)
    fitted = model.fit(draws=10)
    df = az.summary(fitted)
    names = [
        "Intercept",
        "x1",
        "x2",
        "g1[b]",
        "1|g2_sigma",
        "1|g2[x]",
        "1|g2[y]",
        "1|g2[z]",
        "g1|g2_sigma[b]",
        "g1|g2[b, x]",
        "g1|g2[b, y]",
        "g1|g2[b, z]",
        "x2|g2_sigma",
        "x2|g2[x]",
        "x2|g2[y]",
        "x2|g2[z]",
        "y_sigma",
    ]
    assert list(df.index) == names
示例#14
0
def infer_nonrobust_model():
    with pm.Model() as model_0:
        α = pm.Normal('α', mu=0, sd=10)
        β = pm.Normal('β', mu=0, sd=10)

        μ = α + pm.math.dot(x_c, β)
        θ = pm.Deterministic('θ', pm.math.sigmoid(μ))
        bd = pm.Deterministic('bd', -α / β)  # decision boundary

        yl = pm.Bernoulli('yl', p=θ, observed=y_0)

        trace = pm.sample(1000, cores=1, chains=2)

    varnames = ['α', 'β', 'bd']
    az.summary(trace, varnames)
    return trace
示例#15
0
def test_fit_hmc_m32():
    """Generate samples from the posterior distribution"""
    n_cpu = 1
    np.random.seed(1)
    N = 50
    t = np.linspace(0, 1, N)
    y = np.sin(12 * t) + 0.66 * np.cos(25 * t) + np.random.randn(N) * 0.1
    df = pd.DataFrame(index=t, data=y, columns=['y'])

    par = [
        dict(name='mscale', value=9.313e-01, bounds=(0, None), prior=Gamma(4, 4)),
        dict(name='lscale', value=1.291e-01, bounds=(0, None), prior=InverseGamma(3.5, 0.5)),
        dict(name='sigv', value=9.241e-02, bounds=(0, None), prior=InverseGamma(3.5, 0.5)),
    ]
    reg = Regressor(Matern32(par))
    fit = reg.fit(df=df, outputs='y', options={'init': 'fixed', 'n_cpu': n_cpu})
    # return df, reg, fit
    diagnostic = fit.diagnostic
    assert isinstance(diagnostic, pd.DataFrame)
    assert np.all(diagnostic['ebfmi'] > 0.8)
    assert np.all(diagnostic['mean accept_prob'] > 0.7)
    assert np.sum(diagnostic['sum diverging']) == 0
    assert np.sum(diagnostic['sum max_tree_depth']) == 0

    summary = az.summary(fit.posterior, round_to='none')
    assert isinstance(summary, pd.DataFrame)
    assert np.all(summary['r_hat'] < 1.01)
    assert np.all(summary[['ess_mean', 'ess_sd', 'ess_bulk', 'ess_tail']] > 1000)
    # mcse for ess_mean = 1000
    assert summary['mean']['mscale'] == pytest.approx(1.107023, abs=3 * 0.009261)
    assert summary['mean']['lscale'] == pytest.approx(0.146614, abs=3 * 0.001074)
    assert summary['mean']['sigv'] == pytest.approx(0.096477, abs=3 * 0.000515)
    assert summary['mean']['lp_'] == pytest.approx(2.919439, abs=3 * 0.038186)

    xm, xsd = reg.posterior_state_distribution(
        trace=fit.posterior, df=df, outputs='y', smooth=True, n_cpu=n_cpu
    )
    assert isinstance(xm, np.ndarray)
    assert isinstance(xsd, np.ndarray)
    assert xm.shape == (4000, len(df), reg.ss.nx)
    assert xsd.shape == (4000, len(df), reg.ss.nx)
    assert np.mean(np.mean((df['y'].values - xm[:, :, 0]) ** 2, axis=1) ** 0.5) == pytest.approx(
        5.839e-2, abs=1e-2
    )

    ym, ysd = reg.posterior_predictive(trace=fit.posterior, df=df, outputs='y', n_cpu=n_cpu)
    assert isinstance(ym, np.ndarray)
    assert isinstance(ysd, np.ndarray)
    assert ym.shape == (4000, len(df))
    assert ysd.shape == (4000, len(df))
    assert np.mean(np.mean((df['y'].values - ym) ** 2, axis=1) ** 0.5) == pytest.approx(
        3.728e-2, abs=1e-2
    )

    pw_loglik = reg.pointwise_log_likelihood(trace=fit.posterior, df=df, outputs='y', n_cpu=n_cpu)
    assert isinstance(pw_loglik, dict)
    assert pw_loglik['log_likelihood'].shape == (4, 1000, len(df))
    # 0.026 ~ pw_loglik['log_likelihood'].sum(axis=2).std() / np.sqrt(1000)
    assert pw_loglik['log_likelihood'].sum(axis=2).mean() == pytest.approx(-1.394, abs=3.256e-2)
示例#16
0
def bms(L, hdi_prob=0.95, **sample_kwargs):
    """This function computes the exceedance probabilities (xp)
    and expected relative frequencies (r) from an array of log-evidences.

    Args:
        L (numpy.ndarray): Array of model log-evidences (higher is better fit).
            Array shape should be (K models; N subjects)

        **sample_kwargs: Additional arguments to the pymc.sample function.
            Currently `cores=1` seems to be necessary.

    Returns:
        dict: Dictionary with values xp and r.

    Reference:
        Stephan, K. E., Penny, W. D., Daunizeau, J., Moran, R. J., & Friston, K. J. (2009). Bayesian model selection for group studies. Neuroimage, 46(4), 1004-1017.
    """

    K, N = L.shape

    with pm.Model() as bms:

        def lookup_L(L, N):
            """This function looks up the log-evidences for all N subjects,
            given the current model labels m.
            """
            return L[tt.cast(m, dtype="int32"),
                     tt.cast(tt.arange(N), dtype="int32")]

        # Priors
        alpha = pm.Uniform("alpha", 0, N, shape=K, testval=np.ones(K))

        # Model
        r = pm.Dirichlet("r", a=alpha, testval=np.ones(K) / K)
        m = pm.Categorical("m", p=r, shape=N, testval=0)

        # Look up log evidence
        ll = pm.DensityDist("ll", logp=lookup_L, observed=dict(L=L, N=N))

        # Sample
        inferencedata = pm.sample(return_inferencedata=True, **sample_kwargs)

    # Build results
    result = {}
    result["summary"] = az.summary(inferencedata,
                                   hdi_prob=hdi_prob,
                                   var_names=["alpha", "r"])
    result["xp"] = np.array([
        np.mean(inferencedata.posterior["r"].data[:, :, k] ==
                inferencedata.posterior["r"].data.max(axis=-1))
        for k in range(K)
    ])
    r_unscaled = np.array([
        np.mean(inferencedata.posterior["r"].data[:, :, k]) for k in range(K)
    ])
    result["r"] = r_unscaled / r_unscaled.sum()

    return result
def get_summary(RB_model, trace, hdi_prob=.94, kind='all'):
    with RB_model:
        #  (hdi_prob=.94 is default)
        az_summary = az.summary(trace,
                                round_to=4,
                                hdi_prob=hdi_prob,
                                kind=kind)

    return az_summary
示例#18
0
    def data_summary(self, printout=True):
        """

        """
        #if self.summary is None:
        self.summary_data = az.summary(self.trace, var_names=["vals"], filter_vars="like", round_to=2)
        if printout:
            print(self.summary_data)
        return self.summary_data
示例#19
0
 def summary(self):
     """ Returns a summary of the sample statistics
     """
     try:
         summary = az.summary(self.samples, credible_interval=0.9)
     except ModuleNotFoundError:
         print("caclulating ess relies on arviz and arviz is not installed")
         summary = None
     return summary
示例#20
0
def plot_ppc_and_score(trace, data, ax=None, title='PPC', paras=None):

    # Sample PPC
    ppc_trace = pm.sample_posterior_predictive(trace=trace, var_names=['y'])

    # Calculate LOO score
    loo = az.loo(trace).loo
    loo_text = "LOO = %.2f"%loo

    # Aggregate binary responses
    new_trace = []
    for soa in sorted(set((data.SOA_IN_FRAMES))):
        new_trace.append(ppc_trace['y'][:,(data.SOA_IN_FRAMES==soa) & 
                                        (data.PROBE_SALIENT==0)].mean(axis=1))
        new_trace.append(ppc_trace['y'][:,(data.SOA_IN_FRAMES==soa) & 
                                        (data.PROBE_SALIENT==1)].mean(axis=1))
    ppc_trace = {'y': np.array(new_trace).T}
        
    # Prepare axes if none provided
    if ax is None: f,ax= plt.subplots() 

    # Get SOAs and condition mask from data
    SOAs = sorted(set(data['SOA_IN_MS'])) 
    cond  = data.groupby(['SOA_IN_MS', 'PROBE_SALIENT'])['PROBE_SALIENT'].min().values 

    # Plot
    az.plot_hdi(y=ppc_trace['y'][:,cond==0],x=SOAs, color='k', ax=ax, 
                hdi_prob=0.95, fill_kwargs={'alpha' : 0.23})  
    az.plot_hdi(y=ppc_trace['y'][:,cond==1],x=SOAs, color='g', ax=ax, 
                hdi_prob=0.95, fill_kwargs={'alpha' : 0.23})  
    ax.plot(SOAs, np.mean(ppc_trace['y'][:,cond==0],axis=0), color='k')  
    ax.plot(SOAs, np.mean(ppc_trace['y'][:,cond==1],axis=0), color='g')  
    pf_mean = data.groupby(['SOA_IN_MS', 'PROBE_SALIENT']).mean().PROBE_FIRST_RESPONSE
    pf_count = data.groupby(['SOA_IN_MS', 'PROBE_SALIENT']).sum().PROBE_FIRST_RESPONSE
    pf_obs =  data.groupby(['SOA_IN_MS', 'PROBE_SALIENT']).count().PROBE_FIRST_RESPONSE
    pf_ci = abs(np.array(prop_ci(pf_count.values, pf_obs.values)) - pf_mean.values)

    ax.plot(SOAs, pf_mean.values[::2], 'k.')   
    ax.errorbar(np.array(SOAs)-0.5, pf_mean.values[::2],
                pf_ci[:,::2], fmt='none', color='k', alpha=0.5)
    ax.plot(SOAs, pf_mean.values[1::2], 'g.')   
    ax.errorbar(np.array(SOAs)+0.5, pf_mean.values[1::2],
                pf_ci[:,1::2], fmt='none', color='g', alpha=0.5)
    ax.axvline(0, linestyle='dashed')
    ax.axhline(0.5, linestyle='dashed')
    ax.text(-20,0, loo_text)

    if paras is not None:
        for i, varname in enumerate(paras):
            stats = az.summary(trace, var_names=[varname], hdi_prob=.95)  
            for j, s in enumerate(stats['mean']):
                text = r'$' + varname + r'$: %.2f [%.2f, %.2f]'
                text = text%(s, stats['hdi_2.5%'][j], stats['hdi_97.5%'][j])
                posx, posy = .1 + .5 - (1 - j) * .5, 0.95 - (.05*i) - ((1-j)*.5)
                ax.text(posx, posy, text, transform = ax.transAxes, color=['k','g'][j])
    ax.set_title(title)
示例#21
0
    def summarise(self):
        """ Summarise the results of the model

        Parameters
        ----------
        None
        """
        self.summary = az.summary(self.trace, var_names=["~chol"], round_to=2)
        print(self.summary)
        return self.summary
示例#22
0
def compute_and_save_summary(output_dir, var_names, traces, **kwargs):
    summary = arviz.summary(traces, var_names=var_names)
    summary_dict = summary.to_dict()
    summary_dict.update(kwargs)
    for key, value in traces.items():
        if key[-6:] == "_calls":
            summary_dict["per_chain_" + key] = [int(v[-1]) for v in value]
    with open(os.path.join(output_dir, "summary.json"), mode="w") as f:
        json.dump(summary_dict, f, ensure_ascii=False, indent=2)
    return summary, summary_dict
示例#23
0
    def plot_traces(self,
                    burnin: int = 200,
                    show_plot: bool = False) -> Optional[plt]:
        """
        Convenience function to plot the traces with overlaid means and values.
        :param burnin: the number of initial steps to discard. This is so to enable the samples to be representatives
               of the distributions to be approximated and prevent the random starting point from spoiling the data
               too much.
        :param show_plot: whether to display the plot. Default: False.
        :return: plt: the updated matplotlib.pyplot status
        """
        if self.trace is None:
            logger.warning(
                "trace has not yet been created. Call find_map_and_sample before attempting to plot the trace"
            )
            return None
        else:
            with warnings.catch_warnings():
                warnings.simplefilter(
                    'ignore',
                    category=FutureWarning)  #  disables a range of warnings
                ax = az.plot_trace(
                    self.trace[burnin:],
                    figsize=(12, len(self.trace.varnames) * 1.5),
                    lines={
                        k: v['mean']
                        for k, v in az.summary(self.trace[burnin:]).iterrows()
                    })

                for i, mn in enumerate(
                        az.summary(self.trace[burnin:])['mean']):
                    ax[i, 0].annotate('{:.2f}'.format(mn),
                                      xy=(mn, 0),
                                      xycoords='data',
                                      xytext=(5, 10),
                                      textcoords='offset points',
                                      rotation=90,
                                      va='bottom',
                                      fontsize='large',
                                      color='#AA0022')
                if show_plot:
                    plt.show()
                return plt
示例#24
0
def summary(trace, burn_in=0):
    """Summary of Random Variables from the `trace`
  
  `burn_in` period ignores the initial super-noisy samples.
  """
    num_samples = count_samples(trace)  # count the samples in trace
    if burn_in > 0:  # if there is burn-in period, filter out those samples
        trace = get_last_n_from_trace(trace, num_samples - burn_in)
    trace, _ = disentangle_trace(trace)  # get disentangle_trace for summary
    return az.summary(trace)  # use arviz's summary function
示例#25
0
def infer_robust_model():
    with pm.Model() as model_0:
        α = pm.Normal('α', mu=0, sd=10)
        β = pm.Normal('β', mu=0, sd=10)

        μ = α + pm.math.dot(x_c, β)
        θ = pm.Deterministic('θ', pm.math.sigmoid(μ))
        bd = pm.Deterministic('bd', -α / β)  # decision boundary

        #yl = pm.Bernoulli('yl', p=θ, observed=y_0)
        π = pm.Beta('π', 1., 1.)  # probability of contamination
        p = π * 0.5 + (1 - π) * θ  # true prob or 0.5
        yl = pm.Bernoulli('yl', p=p, observed=y_0)

        trace = pm.sample(1000, cores=1, chains=2)

    varnames = ['α', 'β', 'bd', 'π']
    az.summary(trace, varnames)
    return trace
示例#26
0
def crude_mixedMLbayse(df_merged,
                       x_feature,
                       y_feature,
                       covars='False',
                       logit=False):

    #TODO: Replace covars variable with actual selection of indivdual features

    df_merged = df_merged.replace(-9, np.nan).replace('-9', np.nan).replace(
        999, np.nan).replace(888, np.nan)

    if covars == 'False':

        data = df_merged[[x_feature, y_feature,
                          'CohortType']].dropna(how='any', axis='rows')

        fit_string = y_feature + '~' + x_feature

    if covars == 'True':

        data = add_confound(df_merged, x_feature, y_feature)

        ## create the model string for
        fit_string = y_feature + '~'

        cnt = 0
        ## filter out target, at birth, and reference dummy variables in model
        for x in data.columns:

            #data.drop(['education'], inplace = True, axis = 0)

            if x != 'birthWt' and x !='Outcome_weeks' and x!= 'Outcome' and x != 'PIN_Patient' and x != 'SGA' and x != 'LGA' \
                and x !='birthLen' and x != 'CohortType' and x != 'race' and x!='race_1' and x!= 'smoking' and x != 'smoking_3' \
                and x != 'education_5' and x != 'education':

                if cnt == 0:
                    fit_string += ' ' + x + ' '
                else:
                    fit_string += ' + ' + x + ' '
                cnt += 1

    print('mixedML string:')
    print(fit_string)
    fit_string += '+ (1|CohortType)'
    if logit == False:
        model = bmb.Model(data)
        results = model.fit(fit_string)
    else:
        model = bmb.Model(data)
        results = model.fit(fit_string, family='bernoulli', link='logit')

    ## miced linear model with group variable = CohortType
    mdf = az.summary(results)
    return mdf
示例#27
0
def az_v_sigma2_plot(stan_fit, var_list=['v', 'sigma2']):
    """
        Function to demonstrate pystan v convergence result through R_hat table, autocorrelation (3 chians), and trace plot
        """

    #        print(az.summary(stan_fit, var_names=["v","sigma2",'W'], filter_vars="like"))
    print(az.summary(stan_fit, var_names=var_list + ['W']))
    #        az.plot_trace(stan_fit, var_names=['v','sigma2'], filter_vars="like")
    az.plot_trace(stan_fit, var_names=var_list)
    az.plot_autocorr(stan_fit, var_names=var_list)

    az.plot_pair(stan_fit, var_names=var_list, divergences=True)
示例#28
0
    def summary(self):
        """Return summary statistics of posterior parameter samples.

        Default statistics are: ``mean``, ``sd``, ``hdi_3%``, ``hdi_97%``,
        ``mcse_mean``, ``mcse_sd``, ``ess_bulk``, ``ess_tail``, and ``r_hat``.
        ``r_hat`` is only computed for traces with 2 or more chains.

        Returns
        -------
        pandas.DataFrame
            A dataframe of the summary.
        """
        return az.summary(self.data)
示例#29
0
def toy_model(v_samp,
              z_samp,
              logp_prior,
              size=500,
              samples=50,
              steps=1000,
              tune=1000,
              a_true=1.2,
              b_true=-0.5,
              width_true=0.05,
              extratext='_true'):
    ''' The pymc3 linear model z(v) = a*v+b with natural width in log space log_width. The prior contribution to likelihood is an argument to the function.'''
    with pm.Model() as model:
        a = pm.Normal("a", mu=0, sigma=10, testval=a_true)
        b = pm.Normal("b", mu=0, sigma=10, testval=b_true)
        log_width = pm.Normal("log_width",
                              mu=np.log(width_true),
                              sigma=2.0,
                              testval=np.log(width_true))

        mu = a * v_samp + b

        # The line has some width: we're calling it a Gaussian in n
        logp_hyper = -0.5 * (z_samp - mu)**2 * pm.math.exp(
            -2 * log_width) - log_width
        # Here we account for the intermediate prior
        logp = logp_hyper - logp_prior

        # Compute the marginalized likelihood
        max_logp = tt.max(logp, axis=1)
        # max_logp = np.zeros(len(logM_samp))
        marg_logp = max_logp + pm.math.log(
            pm.math.sum(pm.math.exp(logp - max_logp[:, None]), axis=1))
        pm.Potential('marg_logp', marg_logp)

        trace = pm.sample(draws=steps,
                          tune=tune,
                          target_accept=0.9,
                          init='adapt_full',
                          return_inferencedata=False)
        # az.plot_trace(trace)
        print(az.summary(trace, round_to=2))
        print(a_true, b_true, np.log(width_true))
        corner.corner(pm.trace_to_dataframe(trace),
                      truths=[a_true] + [b_true] +
                      [np.log(width_true)])  # Corner plot!
        plt.savefig("PriorToy/Corner_N1000_vfcomplex_prior_samp_mixed%s.png" %
                    (extratext),
                    bbox_inches='tight',
                    dpi=150)
    return
示例#30
0
def poisson_prog():
    basic_model = pm.Model()
    with basic_model:
        x1 = pm.Poisson('x1', mu=lambda_b)
        x2 = pm.Poisson('x2', mu=lambda_b)
        u = pm.Poisson('u', mu=lambda_u, observed=x1 + x2)

    with basic_model:
        trace = pm.sample(5000)

    #the two posterior mean are numerically slightly different
    #we average them
    pois_mean = np.mean(az.summary(trace)["mean"])
    return (pois_mean)