Exemplo n.º 1
0
def main():

    with pm.Model() as model:
        # Using a strong prior. Meaning the mean is towards zero than towards 1
        prior = pm.Beta('prior', 0.5, 3)

        output = pm.Binomial('output', n=100, observed=50, p=prior)

        step = pm.Metropolis()
        trace = pm.sample(1000, step=step)
        pm.traceplot(trace)

    pm.plot_posterior(trace, figsize=(5, 5), kde_plot=True,
                      rope=[0.45, 0.55])  # Rope is an interval that you define
    # This is a value you eppect. You can check
    # If ROPE fall on HPD or not. If it falls, it means
    # our value is within HPD and may be increasing sample
    # size would make our mean estimate better.

    # gelman rubin
    pm.gelman_rubin(trace)

    # forestplot
    pm.forestplot(trace, varnames=['prior'])

    # summary [look at mc error here. This is the std error, should be low]
    pm.df_summary(trace)

    #autocorrelation
    pm.autocorrplot(trace)

    # effective size
    pm.effective_n(trace)['prior']
def plot_traces_pymc(trcs, varnames=None):
    ''' Convenience fn: plot traces with overlaid means and values 
        Handle nested traces for hierarchical models
    '''

    nrows = len(trcs.varnames)
    if varnames is not None:
        nrows = len(varnames)
    
    ax = pm.traceplot(trcs, varnames=varnames, figsize=(12, nrows*1.4),
                      lines={k: v['mean'] for k, v in 
                                pm.df_summary(trcs,varnames=varnames).iterrows()},
                      combined=True)

    # don't label the nested traces (a bit clumsy this: consider tidying)
    dfmns = pm.df_summary(trcs, varnames=varnames)['mean'].reset_index()
    dfmns.rename(columns={'index':'featval'}, inplace=True)
    dfmns = dfmns.loc[dfmns['featval'].apply(lambda x: re.search('__[1-9]{1,}', x) is None)]
    dfmns['draw'] = dfmns['featval'].apply(lambda x: re.search('__0{1}$', x) is None)
    dfmns['pos'] = np.arange(dfmns.shape[0])
    dfmns.set_index('pos', inplace=True)

    for i, r in dfmns.iterrows():
        if r['draw']:
            ax[i,0].annotate('{:.2f}'.format(r['mean']), xy=(r['mean'],0)
                    ,xycoords='data', xytext=(5,10)
                    ,textcoords='offset points', rotation=90
                    ,va='bottom', fontsize='large', color='#AA0022') 
Exemplo n.º 3
0
def main():

    df = generateData(a=5, b=2, latent_error_y=30)
    #Parameters beta are [5, 2]
    #Variance is 30

    g = sns.lmplot(x='x', y='y', data=df, fit_reg=True, size=6,
                   scatter_kws={'alpha':0.8, 's':60})


    #Encode model specification as design matrices
    fml = 'y ~ 1 + x'
    (mx_en, mx_ex) = pt.dmatrices(fml, df, return_type='dataframe', NA_action='raise')

    #Fit OLS model
    smfit = sm.OLS(endog=mx_en,exog=mx_ex, hasconst=True).fit()
    print(smfit.summary())

    #Model specifications are wrapped in a with-statement
    with pm.Model() as mdl_ols:

        #Use GLM submodule for simplified patsy-like model spec
        #Use Normal family - normal distribution likelihood, HalfCauchy distribution priors
        pm.glm.GLM.from_formula('y ~ 1 + x', df, family=pm.glm.families.Normal())

        #Find MAP(maximum a posterior) using Powell optimization
        #Mode of the posterior distribution
        start_MAP = pm.find_MAP(fmin=fmin_powell, disp=True)

        #Take samples using NUTS from the joint probability distribution
        #Iteratively converges by minimising loss on posterior predictive distribution yhat with respect to true y
        trc_ols = pm.sample(2000, start=start_MAP, step=pm.NUTS())


    ax = pm.traceplot(trc_ols[-1000:], figsize=(12, len(trc_ols.varnames)*1.5),
                      lines = {k: v['mean'] for k, v in pm.df_summary(trc_ols[-1000:]).iterrows()})

    print(pm.df_summary(trc_ols[-1000:]))

    xlims = (df['x'].min() - np.ptp(df['x']) / 10
             , df['x'].max() + np.ptp(df['x']) / 10)

    plotPosteriorCr(mdl_ols, trc_ols, df, xlims)



    plt.show()

    print('x')
Exemplo n.º 4
0
def fitFlat(x, y):

    model = pm.Model()

    with model:
        # Priors for unknown model parameters
        a = pm.Flat('a')
        b = pm.Flat('b')
        c = pm.Flat('c')

        # Expected value of outcome
        mu = Model(x, a, b, c)

        # Likelihood (sampling distribution) of observations
        Like = pm.Normal('Like', mu=mu, sd=0.01 * np.ones_like(y), observed=y)

        # do sampling
        trace = pm.sample(1000,
                          progressbar=False,
                          init='ADVI',
                          step=pm.NUTS(),
                          njobs=1)

        # give summary
        summary = pm.df_summary(trace)

        return summary
Exemplo n.º 5
0
def fit(x, y, lowerVec, upperVec):

    lA, lB, lC = lowerVec
    uA, uB, uC = upperVec

    model = pm.Model()

    with model:
        # Priors for unknown model parameters

        a = pm.Uniform('a', lower=lA, upper=uA)
        b = pm.Uniform('b', lower=lB, upper=uB)
        c = pm.Uniform('c', lower=lC, upper=uC)

        # Expected value of outcome
        mu = Model(x, a, b, c)

        # Likelihood (sampling distribution) of observations
        Like = pm.Normal('Like', mu=mu, sd=0.1 * np.ones_like(y), observed=y)

        # do sampling
        trace = pm.sample(1000,
                          progressbar=False,
                          init='ADVI',
                          step=pm.NUTS(),
                          njobs=1)

        # give summary
        summary = pm.df_summary(trace)

        return summary
Exemplo n.º 6
0
def fit(x,y,meanVec,stdVec,errors):
    
    aMu,bMu,cMu = meanVec
    aStd,bStd,cStd = stdVec
    
    model = pm.Model()
    
    if False:    
        df = pd.DataFrame(np.transpose([x,y,errors]),columns=['x','y','error'])
        print df
    
    with model:    
        # Priors for unknown model parameters
        a = pm.Normal('a', mu=aMu, sd=aStd)
        b = pm.Normal('b', mu=bMu, sd=bStd)
        c = pm.Normal('c', mu=cMu, sd=cStd)    
            
        # Expected value of outcome
        mu = Model(x,a,b,c)
    
        # Likelihood (sampling distribution) of observations
        Like = pm.Normal('Like', mu=mu, sd=errors, observed=y)
        
        # do sampling        
        trace = pm.sample(1000,progressbar=False,init='ADVI',step = pm.NUTS(),njobs=1)
        
        # give summary
        summary = pm.df_summary(trace)
        
        return summary
Exemplo n.º 7
0
def fit(x,y,errors,signA):
        
    model = pm.Model()
    
    if False:    
        df = pd.DataFrame(np.transpose([x,y,errors]),columns=['x','y','error'])
        print df
    
    with model:    
        # Priors for unknown model parameters
        LowerA = 0.
        UpperA = 0.1
        if signA == -1.0:
            UpperA = 0.
            LowerA = -0.1
            
        a = pm.Uniform('a', lower=LowerA, upper=UpperA)        
        b = pm.Uniform('b', lower=0., upper=1.0)
        c = pm.Uniform('c', lower=0., upper=1.0)    
            
        # Expected value of outcome
        mu = Model(x,a,b,c)
    
        # Likelihood (sampling distribution) of observations
        Like = pm.Normal('Like', mu=mu, sd=errors, observed=y)
        
        # do sampling        
        trace = pm.sample(1000,progressbar=False,init='ADVI',step = pm.NUTS(),njobs=1)
        
        # give summary
        summary = pm.df_summary(trace)
        
        return summary
    def plot_traces_pymc(self, trcs, varnames=None):
        ''' Convenience fn: plot traces with overlaid means and values
        Code adapted from:
            https://github.com/jonsedar/pymc3_vs_pystan/blob/master/convenience_functions.py
        '''        

        nrows = len(trcs.varnames)
        if varnames is not None:
            nrows = len(varnames)

        ax = pm.traceplot(trcs, varnames=varnames, figsize=(12,nrows*1.4),
                          lines={k: v['mean'] for k, v in 
                                 pm.df_summary(trcs,varnames=varnames).iterrows()})

        for i, mn in enumerate(pm.df_summary(trcs, varnames=varnames)['mean']):
            ax[i,0].annotate('{:.2f}'.format(mn), xy=(mn,0), xycoords='data',
                             xytext=(5,10), textcoords='offset points', rotation=90,
                             va='bottom', fontsize='large', color='#AA0022')
def plot_traces(trcs, varnames=None):
    '''
    Convenience fn: plot traces with overlaid means and values
    INPUT: pymc trace
    OUTPUT: display of model coefficient distributions
    '''

    nrows = len(trcs.varnames)
    if varnames is not None:
        nrows = len(varnames)

    ax = pm.traceplot(
        trcs,
        varnames=varnames,
        figsize=(12, nrows * 1.4),
        lines={
            k: v['mean']
            for k, v in pm.df_summary(trcs, varnames=varnames).iterrows()
        },
        combined=True)

    # don't label the nested traces (a bit clumsy this: consider tidying)
    dfmns = pm.df_summary(trcs, varnames=varnames)['mean'].reset_index()
    dfmns.rename(columns={'index': 'featval'}, inplace=True)
    dfmns = dfmns.loc[dfmns['featval'].apply(
        lambda x: re.search('__[1-9]{1,}', x) is None)]
    dfmns['draw'] = dfmns['featval'].apply(
        lambda x: re.search('__0{1}$', x) is None)
    dfmns['pos'] = np.arange(dfmns.shape[0])
    dfmns.set_index('pos', inplace=True)

    for i, r in dfmns.iterrows():
        if r['draw']:
            ax[i, 0].annotate('{:.2f}'.format(r['mean']),
                              xy=(r['mean'], 0),
                              xycoords='data',
                              xytext=(5, 10),
                              textcoords='offset points',
                              rotation=90,
                              va='bottom',
                              fontsize='large',
                              color='#AA0022')
Exemplo n.º 10
0
    def summary(self):
        def trace_sd(x):
            return pd.Series(np.std(x, 0), name='sd')

        def trace_mean(x):
            return pd.Series(np.mean(x, 0), name='mean')

        def trace_quantiles(x):
            return pd.DataFrame(pm.quantiles(x, [1, 5, 25, 50, 75, 95, 99]))

        summary = pm.df_summary(
            self.trace, stat_funcs=[trace_mean, trace_sd, trace_quantiles])
        return summary
Exemplo n.º 11
0
def plot_trace(trace):
    # Traceplot with vertical lines at the mean value
    ax = pm.traceplot(
        trace,
        figsize=(14, len(trace.varnames) * 1.8),
        lines={k: v['mean']
               for k, v in pm.df_summary(trace).iterrows()})

    matplotlib.rcParams['font.size'] = 16

    # Labels with the median value
    for i, mn in enumerate(pm.df_summary(trace)['mean']):
        ax[i, 0].annotate('{:0.2f}'.format(mn),
                          xy=(mn, 0),
                          xycoords='data',
                          size=8,
                          xytext=(-18, 18),
                          textcoords='offset points',
                          rotation=90,
                          va='bottom',
                          fontsize='large',
                          color='red')
Exemplo n.º 12
0
    def _nuts_inference(self, inference_args):
        """
        Runs NUTS inference.

        Parameters
        ----------
        inference_args : dict, arguments to be passed to the PyMC3 sample method. See PyMC3 doc for permissible values.
        """
        with self.cached_model:
            step = pm.NUTS()
            nuts_trace = pm.sample(step=step, **inference_args)

        self.trace = nuts_trace
        self.summary = pm.df_summary(self.trace)
Exemplo n.º 13
0
Arquivo: dic.py Projeto: srodney/snsed
def plot_traces(traces, retain=1000):
    '''
	Convenience function:
	Plot traces with overlaid means and values
	'''

    ax = pm.traceplot(
        traces[-retain:],
        figsize=(12, len(traces.varnames) * 1.5),
        lines={
            k: v['mean']
            for k, v in pm.df_summary(traces[-retain:]).iterrows()
        })

    for i, mn in enumerate(pm.df_summary(traces[-retain:])['mean']):
        ax[i, 0].annotate('{:.2f}'.format(mn),
                          xy=(mn, 0),
                          xycoords='data',
                          xytext=(5, 10),
                          textcoords='offset points',
                          rotation=90,
                          va='bottom',
                          fontsize='large',
                          color='#AA0022')
Exemplo n.º 14
0
    def _advi_inference(self, inference_args):
        """
        Runs variational ADVI and then samples from those results.

        Parameters
        ----------
        inference_args : dict, arguments to be passed to the PyMC3 fit method. See PyMC3 doc for permissible values.
        """
        with self.cached_model:
            inference = pm.ADVI()
            approx = pm.fit(method=inference, **inference_args)

        self.approx = approx
        self.trace = approx.sample(draws=self.default_advi_sample_draws)
        self.summary = pm.df_summary(self.trace)
        self.advi_hist = inference.hist
Exemplo n.º 15
0
    def summary(self, varnames=None):
        """Generate summary statistics for model as Pandas dataframe.
        
           Parameters
           ----------
           varnames : iterable of str or None, optional
               The model variables to generate summaries for (default None).
               If None, defaults to all variables.
               
          Returns
          -------
          summary : pandas.DataFrame
              The dataframe with summary statistics.
        """

        varnames = varnames or self.model_variables
        return pm.df_summary(self.trace, varnames=varnames)
def predict_test(trc, X_test, X_train, hyper=0):
    '''
    Calculate mean prediction values for test data using mean coefficient values
    INPUT: pymc trace, df test, df train, number of hyperpriors
    OUTPUT: np array of mean prediction values for test data
    '''
    coeff = pm.df_summary(trc[-500:])
    X_test_std = standardize_2sd_test(X_test[fts_num], X_train[fts_num])
    preds = []
    for i in range(len(X_test)):
        if X_test.iloc[i, :]['Reservoir_Code'] == 0:
            pred = coeff.ix[0 + hyper, 0] + np.dot(
                X_test_std.iloc[i, :].values, coeff.ix[hyper + 3:-1 - hyper,
                                                       0].values)
        if X_test.iloc[i, :]['Reservoir_Code'] == 1:
            pred = coeff.ix[1 + hyper, 0] + np.dot(
                X_test_std.iloc[i, :].values, coeff.ix[hyper + 3:-1 - hyper,
                                                       0].values)
        else:
            pred = coeff.ix[2 + hyper, 0] + np.dot(
                X_test_std.iloc[i, :].values, coeff.ix[hyper + 3:-1 - hyper,
                                                       0].values)
        preds.append(pred)
    return np.array(preds)
Exemplo n.º 17
0
# plt.show()
print(pm.dic(trace2, unpooled_model))

# x_shared.set_value([6, 6, 7])
# x_shared1.set_value([20, 40, 40])
# y_shared.set_value([0, 0, 0])
elec_year1 = np.delete(elec_year, np.s_[:6])
elec_year1 = np.append([2, 3, 4, 5, 6, 7], elec_year1)
x_shared.set_value(elec_year1)
with unpooled_model:
    trace3 = pm.sample(3000)
    post_pred = pm.sample_ppc(trace3)
abc = post_pred['Observed'].mean(axis=0)
print(abc)

print(pm.df_summary(trace2, varnames2))
# 读取后验区间,加.mean()是为了转换为np型数据便于计算
aaa = pm.df_summary(trace2, varnames2)
bbb = pd.DataFrame(aaa)
hpd2_5 = bbb['hpd_2.5']
hpd97_5 = bbb['hpd_97.5']

hpd25_beta = hpd2_5[:1].mean()
hpd975_beta = hpd97_5[:1].mean()

hpd25_early_rate = hpd2_5[1:2].mean()
hpd975_early_rate = hpd97_5[1:2].mean()

hpd25_late_rate = hpd2_5[2:3].mean()
hpd975_late_rate = hpd97_5[2:3].mean()
Exemplo n.º 18
0
    # start = pm.find_MAP()
    step1 = pm.Slice([tau1, a_0])
    trace2 = pm.sample(1000, tune=500, step=step1)
chain2 = trace2
varnames1 = [ 'a0', 'δ', 'sigma', 'tau1']
pm.plot_posterior(chain2, varnames1, kde_plot=True)
plt.show()

pm.energyplot(chain2)  # 能量图对比,重合度越高表示模型越优
plt.show()
# 画出自相关曲线
varnames1 = [ 'a0', 'δ', 'sigma', 'tau1']
pm.autocorrplot(chain2, varnames1)
plt.show()
print(pm.df_summary(chain2, varnames1))

print(pm.waic(trace=trace2, model=partial_model))
# ======================================================================
# 后验分析:
# 画出后验与原始图形对比图
#
# ======================================================================
# Bx_.set_value([7,8] , [5,6])
with partial_model:
    pp_trace = pm.sample_ppc(trace2, 1000)

# pp_trace['Observed'].mean(axis=0)

fig, ax = plt.subplots(figsize=(8, 6))
# ax.plot(x_plot, spline(x_plot), c='k', label="True function")
Exemplo n.º 19
0
    xp = elec_year2[ip * 7:(ip + 1) * 7, :]  # 原始数据
    yp = elec_faults2[ip * 7:(ip + 1) * 7, :]
    ax.plot(xp, yp, marker='o', alpha=.8)

    yipred_yplot = np.array([
        yipred_mean[i * 6:(i + 1) * 6]
        for i in np.arange(7 * ip, (ip + 1) * 7)
    ])
    xipred = np.array([np.arange(6) + 1 for i in np.arange(7)])
    ax.plot(xipred, yipred_yplot[:], 'k+-', color='r')

plt.tight_layout()
plt.show()

varnames2 = ['beta', 'beta1', 'beta2', 'beta3', 'beta4', 'u']
tmp = pm.df_summary(chain, varnames2)
betaMAP = tmp['mean'][0]
beta1MAP = tmp['mean'][np.arange(companiesABC) + 1]
beta2MAP = tmp['mean'][np.arange(companiesABC) + 1 * companiesABC + 1]
beta3MAP = tmp['mean'][np.arange(companiesABC) + 2 * companiesABC + 1]
beta4MAP = tmp['mean'][np.arange(companiesABC) + 3 * companiesABC + 1]
uMAP = tmp['mean'][4 * companiesABC + 1]
# am0MAP = tmp['mean'][4*companiesABC+2]
# am1MAP = tmp['mean'][4*companiesABC+3]
# print(am0MAP)
# print(beta1MAP)
# print(tmp)
# print(beta2MAP)
# print(beta3MAP)

ppcsamples = 500
Exemplo n.º 20
0
def model():
    global data
    alpha_prior = 0.1
    beta_prior = 1.
    alpha_init = np.ones((N_GROUPS, 1))
    noise_init = np.ones((N_GROUPS, 1)) * 1e-2

    parts_ones = np.ones((TOTAL_PARTS))
    data_ones = np.ones(len(data[0]))

    hds = store_hds_old(paren_lst, filt)
    ns = np.sum(data, axis=1)

    smooth = np.ones((TOTAL_PARTS, N_ALGS)) * beta_prior

    #bias in choice of starting parenthesis
    start_p = store_start_p(paren_lst, n=TOTAL_PARTS, lst=["("])
    start_np = 1 - start_p

    #init_beta = np.array([0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1])*10
    init_beta = np.ones(N_ALGS) * beta_prior

    print "Starting MCMC...."
    with pm.Model() as m:
        alpha = pm.Exponential('alpha', alpha_prior, shape=1, testval=2)

        #alpha = tt.as_tensor([10])
        #alpha = pm.Deterministic('alpha', alpha)

        beta = pm.Dirichlet(
            'beta',
            init_beta,
            #np.ones(N_ALGS)*beta_prior,
            # testval=np.ones(N_ALGS),
            shape=N_ALGS)

        theta = pm.Dirichlet('theta',
                             alpha * beta,
                             shape=(TOTAL_PARTS, N_ALGS))

        nw1 = 1
        nw2 = 9

        noise = pm.Beta("noise", nw1, nw2, shape=TOTAL_PARTS, testval=0.05)
        #noise = tt.tile(noise, (1, N_ALGS))

        new_algs = map(
            lambda x: theta[x].dot(format_algs_theano(hds, noise[x])),
            np.arange(TOTAL_PARTS))

        theta_resp = tt.concatenate([new_algs], axis=0)
        #theta_resp = theta + noise * 0.5

        bias = pm.Beta("bias", 1, 1, shape=(TOTAL_PARTS, 1))
        #bias = tt.tile(bias, (1,N_ALGS))

        biased_theta_resps = start_p * bias * theta_resp + start_np * (
            1. - bias) * theta_resp
        sum_norm = biased_theta_resps.sum(axis=1).reshape((TOTAL_PARTS, 1))
        biased_theta_resps = biased_theta_resps / sum_norm

        #biased_theta_resps = theta_resp

        pm.Multinomial('resp',
                       n=ns,
                       p=biased_theta_resps,
                       shape=(TOTAL_PARTS, N_RESPS),
                       observed=data)

        #db = Text('trace')
        step = pm.NUTS()
        #step = pm.Metropolis()

        trace = pm.sample(MCMC_STEPS,
                          step=step,
                          tune=BURNIN,
                          target_accept=0.9,
                          thin=MCMC_THIN)
        print_star("Model Finished!")

    if MCMC_CHAINS > 1:
        print pm.gelman_rubin(trace)

    summary = pm.df_summary(trace)

    print summary
    which = 45
    samp = 100

    return trace, summary
Exemplo n.º 21
0
    # theta = beta + beta1 * elec_year + beta2 * elec_tem1 + beta3 * elec_hPa1 + beta4 * elec_RH1
    theta = beta + beta1 * elec_year1 + beta2 * elec_tem1
    Observed = pm.StudentT("Observed",
                           mu=theta,
                           sd=sigma,
                           nu=nu,
                           observed=elec_faults1)  # 观测值

    start = pm.find_MAP()
    # step = pm.Metropolis()
    trace1 = pm.sample(4000, start=start)
chain1 = trace1[1000:]
varnames1 = ['beta', 'beta1', 'beta2']
pm.traceplot(chain1, varnames1)
plt.show()
print(pm.df_summary(trace1, varnames1))
# 画出自相关曲线
pm.autocorrplot(chain1)
plt.show()

faults_m = np.mean(elec_faults)
faults_sd = np.std(elec_faults)
year_m = np.mean(elec_year)
year_std = np.std(elec_year)
tem_m = np.mean(elec_tem)
tem_std = np.std(elec_tem)
hPa_m = np.mean(elec_hPa)
hPa_std = np.std(elec_hPa)
RH_m = np.mean(elec_RH)
RH_std = np.std(elec_RH)
Exemplo n.º 22
0
    u = pm.Normal('u', 0, 0.01)

    beta_mu = pm.Deterministic('beta_mu', tt.exp(u + beta + \
                                             (beta1[Num_shared] * xs_year + beta2[Num_shared] * xs_char1 +\
                                              beta3[Num_shared] * xs_char2 + beta4[Num_shared] * xs_year * xs_year)))

    Observed = pm.Weibull("Observed", alpha=alpha, beta=beta_mu, observed=ys_faults)  # 观测值
    trace_1 = pm.sample(3000,  init='advi+adapt_diag' )
pm.traceplot(trace_1)
plt.show()

burnin = 2000
chain = trace_1[burnin:]
# get MAP estimate
varnames2 = ['beta', 'beta1', 'beta2', 'beta3','beta4', 'u']
tmp = pm.df_summary(chain, varnames2)
betaMAP = tmp['mean'][0]
beta1MAP = tmp['mean'][np.arange(companiesABC) + 1]
beta2MAP = tmp['mean'][np.arange(companiesABC) + 1*companiesABC+1]
beta3MAP = tmp['mean'][np.arange(companiesABC) + 2*companiesABC+1]
beta4MAP = tmp['mean'][np.arange(companiesABC) + 3*companiesABC+1]
uMAP = tmp['mean'][4*companiesABC+1]

# 模型拟合效果图
ppcsamples = 500
ppcsize = 100
# ppc = defaultdict(list)
burnin = 2000
fig = plt.figure(figsize=(16, 8))
fig.text(0.5, -0.02, 'Test Interval (ms)', ha='center', fontsize=20)
fig.text(-0.02, 0.5, 'Proportion of Long Responses', va='center', rotation='vertical', fontsize=20)
Exemplo n.º 23
0
    ax.text(x=0.7, y=1.2, s="Pr(Lab > Class) = %.3f" % p)
    ax.legend()
    plt.savefig(save_filebase + feature + 'Comparison.pdf')
    plt.close()


sample_directory = "C:/Users/robsc/Documents/GitHub/MultiModalAnalysis/REFLECT/saved_samples/"
with open(sample_directory+"pooled_5000.pkl", 'rb') as buff:
    pool_samps = pickle.load(buff)
with open(sample_directory+"individual_5000.pkl", 'rb') as buff:
    ind_samps = pickle.load(buff)
with open(sample_directory+"hierarchical_5000.pkl", 'rb') as buff:
    hier_samps = pickle.load(buff)

index_list = ["Intercept"] + features + ["Uncertainty"]
ps_df = pm.df_summary(pool_samps)
ps_df.index = index_list
hs_df = pm.df_summary(hier_samps)
in_df = pm.df_summary(ind_samps)

reflect_indices = [e for e in hs_df.index if 'reflect' in e]
rh_df = hs_df.loc[reflect_indices, :]
rh_df.index = ["%s" % e for e in index_list]
ri_df = in_df.loc[reflect_indices, :]
ri_df.index = ["%s" % e for e in index_list]

leads_indices = [e for e in hs_df.index if 'leads' in e]
lh_df = hs_df.loc[leads_indices, :]
lh_df.index = ["%s" % e for e in index_list]
li_df = in_df.loc[leads_indices, :]
li_df.index = ["%s" % e for e in index_list]
    # y ~ Normal(m[g] * p, s)
    mu_est = pm.Deterministic("mu_est", T.sum(effects[g] * predictors, 1))
    yd = pm.Normal('y', mu_est, s[g]**-2, observed=y)

    start = pm.find_MAP()
    #h = find_hessian(start)

    step = pm.NUTS(model.vars, scaling=start)

with model:
    trace = pm.sample(3000, step, start)

#%%
pm.traceplot(trace)
dftmp = pm.df_summary(trace, varnames=['group_effects'])
print(dftmp['mean'])
import statsmodels.formula.api as smf
# from patsy import dmatrices
import pandas as pd
tbl = pd.DataFrame(predictors, columns=['C1', 'C2', 'C3'])
tbl['group'] = pd.Series(group, dtype="category")
tbl['yd'] = y
md2 = smf.mixedlm("yd ~ -1 + C1 + C2 + C3", tbl, groups=tbl["group"])
mdf2 = md2.fit()
print(mdf2.summary())
#%%
X = np.tile(group_predictors[group], (1, 3)) * predictors
beta0 = np.linalg.lstsq(X, y)
fitted = np.dot(X, beta0[0])
Exemplo n.º 25
0
        }
        for p in pathways
    }
    y_bmp = {}
    g = {}

    def logp_f(f, b, eps):
        if f in evidence:
            return T.log(1 - math.e**(-1 * b) + epsilon)
        if f in metfrag_evidence:
            a_p = (1.0 / (1 - metfrag_evidence[f])) - 1
            return a_p * T.log(1 - math.e**(-1 * b) + epsilon) - b
        return T.log(eps) - b

    psi = {}
    for feat, pathways in reverse_path_dict.items():
        y_bmp[feat] = sum([bmp[pname][feat] for pname in pathways])
        g[feat] = Bernoulli('g_' + feat, 1 - math.e**(-y_bmp[feat]))
        psi[feat] = pymc3.Potential('psi_' + feat,
                                    logp_f(feat, y_bmp[feat], eps))
if __name__ == '__main__':
    n = 1000
    with model:
        trace = pymc3.sample(n)
        t1 = trace
        print(pymc3.df_summary(trace))
        trace = pymc3.sample(10 * n)
        t2 = trace
        print(pymc3.df_summary(trace))
        print(pymc3.stats.compare([t1, t2]))
Exemplo n.º 26
0
                    sd=std / n_hidden**.5,
                    shape=[n_hidden, K],
                    testval=W1_init)
        b1 = Normal('b1',
                    mu=0,
                    sd=std / n_hidden**.5,
                    shape=[K],
                    testval=b1_init)

    # Building NN likelihood
    h1 = tt.nnet.softplus(tt.dot(X_shared, W0) + b0)
    mu_est = tt.dot(h1, W1) + b1

    # Regression likelihood
    Normal('y_hat', mu=mu_est, sd=std_out, observed=Y_shared)

# Inference
with neural_network:
    # Sample from posterior
    v_params = pm.advi(n=n_iter)
    trace = pm.sample_vp(v_params, draws=5000)

print(pm.df_summary(trace))
pm.traceplot(trace)

# Posterior predictive samples
ppc = pm.sample_ppc(trace, samples=500)

pred = ppc['y_hat']
mse = np.mean((pred - Y_train)**2)
print('MC test MSE: ', mse)
Exemplo n.º 27
0
def mixed_effects():


    le = preprocessing.LabelEncoder()
    # Convert categorical variables to integer
    # participants_idx = le.fit_transform(messages['prev_sender'])

    classes = 'FF49_industry'
    # classes = 'underwriter_tier'
    # classes = 'amends'

    print("Grouping by: {}".format(classes))

    FF49_industry = le.fit_transform(df['FF49_industry'])
    class_idx = le.fit_transform(df[classes])
    n_classes = len(le.classes_)


    NSamples = 50000
    burn = NSamples/10
    thin = 2

    covariates = [
            'Intercept',
            '#Syndicate Members',
            '#Lead Underwriters',
            'Underwriter Rank',
            # 'FF49 Industry',
            'Amends Down',
            '#S1A Amendments',
            'Share Overhang',
            'log(1+Sales)',
            'log(Proceeds)',
            'CASI',
            # 'media_1st_pricing',
            # 'VC',
            'IPO Market Returns',
            'Industry Returns',
            'BAA Spread',
            ]

    y = df['days_to_first_price_update'].values
    # y = np.ma.masked_values(list(df.days_to_first_price_update), value=-999)



    with pm.Model() as model:

        # Parameters:
        intercept = pm.Gamma('Intercept', alpha=.1, beta=.1, shape=n_classes)

        beta_underwriter_syndicate_size = pm.Normal('#Syndicate Members', mu=0, sd=20)
        beta_underwriter_num_leads = pm.Normal('#Lead Underwriters', mu=0, sd=20)
        beta_underwriter_rank_avg = pm.Normal('Underwriter Rank', mu=0, sd=20)
        beta_num_SEC_amendments = pm.Normal('#S1A Amendments', mu=0, sd=20)
        # beta_FF49_industry = pm.Normal('FF49 Industry', mu=0, sd=20)
        beta_amends_down = pm.Normal('Amends Down', mu=0, sd=20)
        beta_share_overhang = pm.Normal('Share Overhang', mu=0, sd=20)
        beta_log_sales = pm.Normal('log(1+Sales)', mu=0, sd=20)
        beta_log_proceeds = pm.Normal('log(Proceeds)', mu=0, sd=20)
        beta_CASI = pm.Normal('CASI', mu=0, sd=20)
        # beta_media_1st_pricing = pm.Normal('media_1st_pricing', mu=0, sd=20)
        # beta_VC = pm.Normal('VC', mu=0, sd=20)
        beta_BAA_spread = pm.Normal('BAA Spread', mu=0, sd=20)
        beta_M3_initial_returns = pm.Normal('IPO Market Returns', mu=0, sd=20)
        beta_M3_indust_rets = pm.Normal('Industry Returns', mu=0, sd=20)

        # Hyperparameters
        ## alpha: hyperparameters for neg-binom distribution
        alpha = pm.Gamma('alpha', alpha=.1, beta=.1)



        # #Poisson Model Formula
        mu = 1 + tt.exp(
                intercept[class_idx]
                + beta_underwriter_syndicate_size * df.underwriter_syndicate_size
                + beta_underwriter_num_leads * df.underwriter_num_leads
                + beta_underwriter_rank_avg * df.underwriter_rank_avg
                # + beta_FF49_industry * FF49_industry
                + beta_amends_down * df['Amends Down']
                + beta_num_SEC_amendments * df.num_SEC_amendments
                + beta_share_overhang * df['Share Overhang']
                + beta_log_sales * df['log(1+Sales)']
                + beta_CASI * df['CASI']
                + beta_log_proceeds * df['log(Proceeds)']
                # + beta_media_1st_pricing * df.media_1st_pricing
                # + beta_VC * df.VC
                + beta_BAA_spread * df['BAA Spread']
                + beta_M3_initial_returns * df.M3_initial_returns
                + beta_M3_indust_rets * df.M3_indust_rets
                    )

        # Dependent Variable
        BoundedNegativeBinomial = pm.Bound(pm.NegativeBinomial, lower=1)
        y_est = BoundedNegativeBinomial('y_est', mu=mu, alpha=alpha, observed=y)
        y_pred = BoundedNegativeBinomial('y_pred', mu=mu, alpha=alpha, shape=y.shape)
        # y_est = pm.NegativeBinomial('y_est', mu=mu, alpha=alpha, observed=y)
        # y_pred = pm.NegativeBinomial('y_pred', mu=mu, alpha=alpha, shape=y.shape)
        # y_est = pm.Poisson('y_est', mu=mu, observed=data)
        # y_pred = pm.Poisson('y_pred', mu=mu, shape=data.shape)

        start = pm.find_MAP()
        step = pm.Metropolis(start=start)
        # step = pm.NUTS()
        # backend = pm.backends.Text('test')
        # trace = pm.sample(NSamples, step, start=start, chain=1, njobs=2, progressbar=True, trace=backend)
        trace = pm.sample(NSamples, step, start=start, njobs=1, progressbar=True)

        trace2 = trace
        trace = trace[-burn::thin]

        # waic = pm.waic(trace)
        # dic = pm.dic(trace)



    # with pm.Model() as model:
    #     trace_loaded = pm.backends.sqlite.load('FF49_industry.sqlite')
        # y_pred.dump('FF49_industry_missing/y_pred')


    ## POSTERIOR PREDICTIVE CHECKS
    y_pred = trace.get_values('y_pred')
    pm.summary(trace, vars=covariates)


    # PARAMETER POSTERIORS
    anno_kwargs = {'xycoords': 'data', 'textcoords': 'offset points',
                    'rotation': 90, 'va': 'bottom', 'fontsize': 'large'}
    anno_kwargs2 = {'xycoords': 'data', 'textcoords': 'offset points',
                    'rotation': 0, 'va': 'bottom', 'fontsize': 'large'}


    n0, n1, n2, n3 = 1, 5, 9, 14 # numbering for posterior plots
    # intercepts
    # mn = pm.df_summary(trace)['mean']['Intercept_log__0']
    # ax[0,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(0,15), color=blue, **anno_kwargs2)
    # mn = pm.df_summary(trace)['mean']['Intercept_log__1']
    # ax[0,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(0,15), color=purple, **anno_kwargs2)
    # coeffs
    # mn = pm.df_summary(trace)['mean'][2]
    # ax[1,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5, 10), color=red, **anno_kwargs)
    # mn = pm.df_summary(trace)['mean'][3]
    # ax[2,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs)
    # mn = pm.df_summary(trace)['mean'][4]
    # ax[3,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs)
    # plt.savefig('figure1_mixed.png')

    ax = pm.traceplot(trace, vars=['Intercept']+trace.varnames[n0:n1],
            lines={k: v['mean'] for k, v in pm.df_summary(trace).iterrows()}
            )

    for i, mn in enumerate(pm.df_summary(trace)['mean'][n0:n1]): # +1 because up and down intercept
        ax[i,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs)
    plt.savefig('figure1_mixed.png')


    ax2 = pm.traceplot(trace, trace.varnames[n1:n2],
            lines={k: v['mean'] for k, v in pm.df_summary(trace).iterrows()}
            )
    for i, mn in enumerate(pm.df_summary(trace)['mean'][n1:n2]): # +1 because up and down intercept
        ax2[i,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs)
    plt.savefig('figure2_mixed.png')



    ax3 = pm.traceplot(trace, trace.varnames[n2:n3],
            lines={k: v['mean'] for k, v in pm.df_summary(trace).iterrows()}
            )
    for i, mn in enumerate(pm.df_summary(trace)['mean'][n2:n3]): # +1 because up and down intercept
        ax3[i,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs)
    plt.savefig('figure3_mixed.png')


    # _ = plt.figure(figsize=(5, 6))
    _ = pm.forestplot(trace, vars=['Intercept'], ylabels=le.classes_)
    plt.savefig('forestplot_intercepts.png')
    _ = pm.forestplot(trace, vars=covariates[1:], ylabels=covariates[1:])
    plt.savefig('forestplot_mixed.png')

    # pm.traceplot(trace, vars=['alpha', 'y_pred'])



    # def participant_y_pred(entity_name, burn=1000, hierarchical_trace=trace):
    #     """Return posterior predictive for person"""
    #     ix = np.where(le.classes_ == entity_name)[0][0]
    #     return hierarchical_trace['y_pred'][burn:, ix]

    def participant_y_pred(entity_name, burn=1000, ypred=y_pred):
        """Return posterior predictive for person"""
        ix = np.where(le.classes_ == entity_name)[0][0]
        return ypred[burn:, ix]

    days = 7

    fig = plt.figure(figsize=(16,10))
    fig.add_subplot(221)
    entity_plotA('Up', days=days)
    fig.add_subplot(222)
    entity_plotB('Up')

    fig.add_subplot(223)
    entity_plotA('Down', days=days)
    fig.add_subplot(224)
    entity_plotB('Down')
    plt.savefig("figure4-postpreddist-updown")
Exemplo n.º 28
0
    Observed = pm.Weibull("Observed",
                          alpha=alpha,
                          beta=beta_mu,
                          observed=ys_faults)  # 观测值
    trace_1 = pm.sample(10000, init='advi+adapt_diag')

pm.traceplot(trace_1,
             varnames=['beta', 'beta1', 'beta2', 'beta3', 'beta4', 'u'])
plt.show()

burnin = 9000
chain = trace_1[burnin:]
# get MAP estimate
varnames2 = ['beta', 'beta1', 'beta2', 'beta3', 'beta4', 'u']
tmp = pm.df_summary(chain, varnames2)
betaMAP = tmp['mean'][0]
beta1MAP = tmp['mean'][np.arange(companiesABC) + 1]
beta2MAP = tmp['mean'][np.arange(companiesABC) + 1 * companiesABC + 1]
beta3MAP = tmp['mean'][np.arange(companiesABC) + 2 * companiesABC + 1]
beta4MAP = tmp['mean'][np.arange(companiesABC) + 3 * companiesABC + 1]
uMAP = tmp['mean'][4 * companiesABC + 1]
# am0MAP = tmp['mean'][4*companiesABC+2]
# am1MAP = tmp['mean'][4*companiesABC+3]
# print(am0MAP)
# print(beta1MAP)
# print(tmp)
# print(beta2MAP)
# print(beta3MAP)
# 模型拟合效果图
ppcsamples = 500
        t2 = time.time()
        print("Found MAP, took %f seconds" % (t2 - t1))

        ## take samples
        t1 = time.time()
        traces_ols = pm.sample(2000, start=start_MAP, step=pm.NUTS(), progressbar=True)
        print()
        t2 = time.time()
        print("Done sampling, took %f seconds" % (t2 - t1))

    pm.summary(traces_ols)
    ## plot the samples and the marginal distributions
    _ = pm.traceplot(
        traces_ols,
        figsize=(12, len(traces_ols.varnames) * 1.5),
        lines={k: v["mean"] for k, v in pm.df_summary(traces_ols).iterrows()},
    )
    plt.show()


do_tstudent = False

if do_tstudent:

    print("Robust Student-t analysis...")

    t1 = time.time()
    with pm.Model() as mdl_studentt:

        ## Define weakly informative Normal priors to give Ridge regression
        b1 = pm.Normal("b", mu=0, sd=100)
    MU = ALPHA + dot(X_INPUT, BETA)
    NU = Deterministic('NU', Exponential('nu_', 1 / 29))

    # Likelihood (sampling distribution) of observations
    #     Y_OBS = Normal('Y_OBS', mu=mu, sigma=sigma, observed=Y_OUTPUT)
    Y_OBS = StudentT('Y_OBS', mu=MU, sigma=SIGMA, observed=Y_OUTPUT, nu=NU)

with cost_model:
    TRACE = sample(SAMPLES, tune=TUNE, cores=6)
    traceplot(TRACE)

with cost_model:
    Y_PRED = sample_posterior_predictive(TRACE, 1000, cost_model)
    Y_ = Y_PRED['Y_OBS'].mean(axis=0)
    PP['model_cost'] = exp(Y_)  # depends on imput/output
    SUMMARY = df_summary(TRACE)

with open('Time_and_Material_cost_model.pkl', 'wb') as f:
    dump({'model': cost_model, 'TRACE': TRACE}, f)

PROMPTS['F_BASENAME'] = F_BASENAME
with HDFStore('Time_and_Material_pricing_version_fp_2.h5') as store:
    store['PP'] = PP
    store['X'] = X
    store['Y'] = PP[MEASURE]
    store['MRR'] = MRR
    store['PROMPTS'] = DataFrame(PROMPTS, index=[1])
    store['SUMMARY'] = SUMMARY

_DELTA = 100 * (1 - (exp(Y_).sum() / PP[MEASURE].sum()))
print('*' * 80 + '\n' + '*' * 80)
Exemplo n.º 31
0
    #temp = 35.+15.*Uniform('temp', lower=-1, upper=1)
    #alpha = 3.45+0.75*Uniform('alpha', lower=-1, upper=1)
    plnorm = 0.3+0.2*Normal('plnorm', 0., 0.5)

    #src.sed.setBB(temp=temp)
    src.sed.setPL(turnover=tp,plnorm=plnorm)
    modflux = pho.getFlux(src)

    def logp(obs):
        return -0.5*((modflux-obs)/sigma)**2.

    Y_obs = DensityDist('Y_obs', logp, observed=Y)

    trace = sample(1000, n_init=50000)

    # obtain starting values via MAP
    #start = find_MAP(fmin=optimize.fmin_powell)

    # instantiate sampler
    #step = NUTS(scaling=start)

    # draw 2000 posterior samples
    #trace = sample(5000, step, start=start)

out = np.array([35.+15.*trace['tp'], 0.3+0.2*trace['plnorm']])
import corner
print df_summary(trace)
labels = ['TP', 'plnorm']
fig = corner.corner(out.T,labels=labels, plot_density=False, plot_contours=False)
fig.savefig("out.pdf")
Exemplo n.º 32
0
        sd3 = (-4*sigma + mu, 4*sigma + mu)
        x = np.linspace(sd3[0], sd3[1], 300)
        y = stats.norm(mu, sigma).pdf(x)
        ax.plot(x, y)
        if trace[var].ndim > 1:
            t = trace[var][i]
        else:
            t = trace[var]
        sns.distplot(t, kde=False, norm_hist=True, ax=ax)
fig.tight_layout()
#%%
pm.traceplot(trace, combined=True)
plt.show()

burnin = 0
df_summary1 = pm.df_summary(trace[burnin:],varnames=['w'])
wpymc = np.asarray(df_summary1['mean'])
df_summary2 = pm.df_summary(trace[burnin:],varnames=['z'])
zpymc = np.asarray(df_summary2['mean'])

import statsmodels.formula.api as smf
tbltest['Pheno'] = Pheno
md  = smf.mixedlm("Pheno ~ Condi1*Condi2", tbltest, groups=tbltest["subj"])
mdf = md.fit()
fixed = np.asarray(mdf.fe_params).flatten()

plt.figure()
plt.plot(w0,'r')
plt.plot(wpymc,'b')
plt.plot(fixed,'g')
plt.legend(['real','PyMC','LME'])
Exemplo n.º 33
0
    psi = pm.Deterministic('psi', Invlogit(alpha + alpha1 * elec_year[0:84]))
    Observed = pm.ZeroInflatedNegativeBinomial(
        'Observed', psi=psi, mu=theta, alpha=sdd,
        observed=elec_faults[0:84])  # 观测值

    #     step1 = pm.Slice([theta1, Δ_a])
    start = pm.find_MAP()
    trace_1 = pm.sample(1000, start=start, njobs=1)
    #     ,  init='advi+adapt_diag'

pm.traceplot(trace_1)
plt.show()

# 后验分析
varnames2 = ['theta']
tmp = pm.df_summary(trace_1, varnames2)
betaMAP = tmp['mean'][np.arange(12)]
print(betaMAP)

with model_1:
    pp_trace = pm.sample_ppc(trace_1, 1000)
ip = 0
fig, ax = plt.subplots(figsize=(8, 6))
x_plot = np.linspace(0.9, 12.1, 12)
low, high = np.percentile(pp_trace['Observed'], [5, 95], axis=0)

xp = elec_year2[ip * 7:(ip + 1) * 7, :]  # 原始数据
yp = elec_faults2[ip * 7:(ip + 1) * 7, :]
ax.plot(xp, yp, marker='o', alpha=.8)
ax.plot(x_plot, betaMAP[:], marker='*', alpha=.8, label="Fitting estimate")
ax.fill_between(x_plot, low[:12], high[:12], alpha=0.5)