Пример #1
0
def bayesian_random_effects(data, labels, group, n_samples=2000, n_burnin=500):
    import pymc as pm
    #preparing the data
    donors = data[group].unique()
    donors_lookup = dict(zip(donors, range(len(donors))))
    data['donor_code'] = data[group].replace(donors_lookup).values
    n_donors = len(data[group].unique())
    donor_idx = data['donor_code'].values
    
    #setting up the model
    with pm.Model() as hierarchical_model:
        # Hyperpriors for group nodes
        group_intercept_mean = pm.Normal('group intercept (mean)', mu=0., sd=100**2)
        group_intercept_variance = pm.Uniform('group intercept (variance)', lower=0, upper=100)
        group_slope_mean = pm.Normal('group slope (mean)', mu=0., sd=100**2)
        group_slope_variance = pm.Uniform('group slope (variance)', lower=0, upper=100)
        
        individual_intercepts = pm.Normal('individual intercepts', mu=group_intercept_mean, sd=group_intercept_variance, shape=n_donors)
        individual_slopes = pm.Normal('individual slopes', mu=group_slope_mean, sd=group_slope_variance, shape=n_donors)
        
        # Model error
        residuals = pm.Uniform('residuals', lower=0, upper=100)
        
        expression_est =  individual_slopes[donor_idx] * data[labels[0]].values + individual_intercepts[donor_idx]
        
        # Data likelihood
        expression_like = pm.Normal('expression_like', mu=expression_est, sd=residuals, observed=data[labels[1]])

        start = pm.find_MAP()
        step = pm.NUTS(scaling=start)
        hierarchical_trace = pm.sample(n_samples, step, start=start, progressbar=True)
        
    mean_slope = hierarchical_trace['group slope (mean)'][n_burnin:].mean()
    zero_percentile = percentileofscore(hierarchical_trace['group slope (mean)'][n_burnin:], 0)
    print "Mean group level slope was %g (zero was %g percentile of the posterior distribution)"%(mean_slope, zero_percentile)
    
    pm.summary(hierarchical_trace[n_burnin:], vars=['group slope (mean)'])
        
    pm.traceplot(hierarchical_trace[n_burnin:])
    
    selection = donors
    fig, axis = plt.subplots(2, 3, figsize=(12, 6), sharey=True, sharex=True)
    axis = axis.ravel()
    xvals = np.linspace(data[labels[0]].min(), data[labels[0]].max())
    for i, c in enumerate(selection):
        c_data = data.ix[data[group] == c]
        c_data = c_data.reset_index(drop = True)
        z = list(c_data['donor_code'])[0]
        for a_val, b_val in zip(hierarchical_trace['individual intercepts'][n_burnin::10][z], hierarchical_trace['individual slopes'][n_burnin::10][z]):
            axis[i].plot(xvals, a_val + b_val * xvals, 'g', alpha=.1)
        axis[i].plot(xvals, hierarchical_trace['individual intercepts'][n_burnin:][z].mean() + hierarchical_trace['individual slopes'][n_burnin:][z].mean() * xvals, 
                     'g', alpha=1, lw=2.)
        axis[i].hexbin(c_data[labels[0]], c_data[labels[1]], mincnt=1, cmap=plt.cm.YlOrRd_r)
        axis[i].set_title(c)
        axis[i].set_xlabel(labels[0])
        axis[i].set_ylabel(labels[1])
        
    plt.show()
        
    return mean_slope, zero_percentile
Пример #2
0
def run_banova():
    y = 10+hstack((np.random.randn(100),np.random.randn(100)+1,
        np.random.randn(100)+2))
    y = y-y.mean()
    y = y/y.std()
    x = concatenate(([1.0]*100,[0.0]*200))
    X = vstack((x, np.roll(x,100), np.roll(x,200)))
    m = oneway_banova(y.astype(float),X.astype(float))
    start = {'offset': 0.0,
        'alphas': array([0,1,2.])}
    with m:
        step = pm.Metropolis()
        #step = pm.NUTS()
        trace = pm.sample(150000, step, start, tune=1500, njobs=1, progressbar=True)
        pm.traceplot(trace[::2])
    show()
Пример #3
0
def bayesian_regression():
    import warnings
    warnings.simplefilter("ignore")
    import pymc as pm
    import numpy as np
    np.random.seed(1000)
    import matplotlib.pyplot as plt
    %matplotlib inline
    
    #PYMC3
    x = np.linspace(0, 10, 500)
    y = 4 + 2 * x + np.random.standard_normal(len(x)) * 2
    reg = np.polyfit(x, y, 1)
    
    plt.figure(figsize=(8, 4))
    plt.scatter(x, y, c=y, marker="v")
    plt.plot(x, reg[1] + reg[0] * x, lw=2.0)
    plt.colorbar()
    plt.grid(True)
    plt.xlabel("x")
    plt.ylabel("y")
    
    reg
    
    with pm.Model() as model:
        # model specifications in PyMC3
        # are wrapped in a with statement
        # define priors
        alpha = pm.Normal("alpha", mu=0, sd=20)
        beta = pm.Normal("beta", mu=0, sd=20)
        sigma = pm.Uniform("sigma", lower=0, upper=10)
        # define linear regression
        y_est = alpha + beta * x
        # define likelihood
        likelihood = pm.Normal("y", mu=y_est, sd=sigma, observed=y)
        # inference
        start = pm.find_MAP()
        # find starting value by optimization
        step = pm.NUTS(state=start)
        # instantiate MCMC sampling algorithm
        trace = pm.sample(100, step, start=start, progressbar=False)
        # draw 100 posterior samples using NUTS sampling
    
    trace[0]
    
    fig = pm.traceplot(trace, lines={"alpha": 4, "beta": 2, "sigma": 2})
    plt.figure(figsize=(8, 8))
    plt.figure(figsize=(8, 4))
    plt.scatter(x, y, c=y, marker="v")
    plt.colorbar()
    plt.grid(True)
    plt.xlabel("x")
    plt.ylabel("y")
    for i in range(len(trace)):
        plt.plot(x, trace["alpha"][i] + trace["beta"][i] * x)
        
    pass
Пример #4
0
    def foo(self, discrete):
        student_ids  = []
        timestep_ids = []
        y = []

        ids = collections.defaultdict(itertools.count().next)
        for t in range(0, len(self)):
            student_ids  +=  [ids[o.id] for o in self[t]]
            timestep_ids +=  [t         for o in self[t]]
            y            +=  [o.value   for o in self[t]]

        n_students  =  len(set(student_ids))
        n_timesteps = len(self)

        print student_ids, "!", n_students

        with pm.Model() as hierarchical_model:
            # Hyperpriors for group nodes
            mu_student = pm.Normal('mu_student', mu=0., sd=100**2)
            sigma_student = pm.Uniform('sigma_student', lower=0, upper=100)

            #mu_timestep = pm.Normal('mu_beta', mu=0., sd=100**2)
            #sigma_timestep = pm.Uniform('sigma_beta', lower=0, upper=100)

            student  = pm.Normal('student', mu=mu_student, sd=sigma_student, shape=n_students) #random effect
            timestep = pm.Normal('timestep', mu=0, sd=100**2, shape=n_timesteps)  #fixed effect

            # Model error
            eps = pm.Uniform('eps', lower=0, upper=100)


            theta = student[student_ids] + timestep[timestep_ids]

            # Data likelihood
            if discrete:
                ll = pm.Bernoulli('theta', p=self.invlogit(theta), observed=y)
            else:
                ll = pm.Normal('theta', mu=theta, sd=eps, observed=y)

        with hierarchical_model:
            print "Find MAP..."
            start = pm.find_MAP()
            #if discrete:
            #     step = pm.BinaryMetropolis(scaling=start)
            # else:
            print "NUTS..."
            step = pm.NUTS(scaling=start)
            print "Samples..."
            hierarchical_trace = pm.sample(2000, step, start=start, progressbar=False)
        print "done..."
        print "Plot..."

        pl.figure(figsize=(10,10))
        f = pm.traceplot(hierarchical_trace[500:])
        f.savefig("a.png")
        return hierarchical_trace
Пример #5
0
def run_best():
    a = np.random.randn(10)
    b = np.random.randn(10)+2
    print 'A:', a.mean(), a.std()
    print 'B:', b.mean(), b.std()
    x_eval = np.linspace(-10,10,100)

    m = best('A', a, 'B', b)
    start = {'A Mean': b.mean(),
        'A Std': a.std(),
        'B Mean': a.mean(),
        'B Std': b.std(),
        'Nu-1': 100}
    with m:
        step = pm.Metropolis(blocked=False)
        trace = pm.sample(10000, step, start, tune=1000, njobs=3, progressbar=False)
        pm.traceplot(trace)
    show()
    return m, trace
Пример #6
0
ndims = 2
nobs = 20

xtrue = np.random.normal(scale=2., size=1)
ytrue = np.random.normal(loc=np.exp(xtrue), scale=1, size=(ndims, 1))
zdata = np.random.normal(loc=xtrue + ytrue, scale=.75, size=(ndims, nobs))

with pm.Model() as model:
    x = pm.Normal('x', mu=0., sd=1)
    y = pm.Normal('y', mu=pm.exp(x), sd=2., shape=(ndims, 1)) # here, shape is telling us it's a vector rather than a scalar.
    z = pm.Normal('z', mu=x + y, sd=.75, observed=zdata) # shape is inferred from zdata
    
with model:
    start = pm.find_MAP()
    
print("MAP found:")
print("x:", start['x'])
print("y:", start['y'])

print("Compare with true values:")
print("ytrue", ytrue)
print("xtrue", xtrue)

with model:
    step = pm.NUTS()
    
with model: 
    trace = pm.sample(3000, step, start)
    pm.traceplot(trace);
Пример #7
0
xtrue=np.random.normal(scale=2., size=1.)
ytrue = np.random.normal(loc=np.exp(xtrue), scale=1, size=(ndims, 1))
zdata = np.random.normal(loc=xtrue + ytrue, scale=.75, size=(ndims, nobs))



with pm.Model() as model:
    x = pm.Normal('x', mu=0., sd=1)
    y = pm.Normal('y', mu=pm.exp(x), sd=2., shape=(ndims, 1)) # here, shape is telling us it's a vector rather than a scalar.
    z = pm.Normal('z', mu=x + y, sd=.75, observed=zdata) # shape is inferred from zdata

with model:
    start = pm.find_MAP()

print("MAP found:")
print("x:", start['x'])
print("y:", start['y'])

print("Compare with true values:")
print("ytrue", ytrue)
print("xtrue", xtrue)

with model:
    step = pm.NUTS()

with model: 
    trace = pm.sample(3000, step, start)


pm.traceplot(trace)
Пример #8
0
def realdata():
    import warnings
    warnings.simplefilter("ignore")
    import zipline
    import pytz
    import datetime as dt
    
    data = zipline.data.load_from_yahoo(stocks=["GLD", "GDX"],
    end=dt.datetime(2014, 3, 15, 0, 0, 0, 0, pytz.utc)).dropna()
    data.info()
    data.plot(figsize=(8, 4))
    
    data.ix[-1] / data.ix[0] - 1
    data.corr()
    data.index
    
    
    import matplotlib as mpl
    mpl_dates = mpl.dates.date2num(data.index)
    mpl_dates
    plt.figure(figsize=(8, 4))
    plt.scatter(data["GDX"], data["GLD"], c=mpl_dates, marker="o")
    plt.grid(True)
    plt.xlabel("GDX")
    plt.ylabel("GLD")
    plt.colorbar(ticks=mpl.dates.DayLocator(interval=250),
                 format=mpl.dates.DateFormatter("%d %b %y"))
    
    with pm.Model() as model:
        alpha = pm.Normal("alpha", mu=0, sd=20)
        beta = pm.Normal("beta", mu=0, sd=20)
        sigma = pm.Uniform("sigma", lower=0, upper=50)
        y_est = alpha + beta * data["GDX"].values
        likelihood = pm.Normal("GLD", mu=y_est, sd=sigma,
        observed=data["GLD"].values)
        start = pm.find_MAP()
        step = pm.NUTS(state=start)
        trace = pm.sample(100, step, start=start, progressbar=False)
        
    fig = pm.traceplot(trace)
    plt.figure(figsize=(8, 8))    
        
    plt.figure(figsize=(8, 4))
    plt.scatter(data["GDX"], data["GLD"], c=mpl_dates, marker="o")
    plt.grid(True)
    plt.xlabel("GDX")
    plt.ylabel("GLD")
    for i in range(len(trace)):
        plt.plot(data["GDX"], trace["alpha"][i] + trace["beta"][i] * data
        ["GDX"])
    plt.colorbar(ticks=mpl.dates.DayLocator(interval=250),
    format=mpl.dates.DateFormatter("%d %b %y"))    
        
    model_randomwalk = pm.Model()
    with model_randomwalk:
        # std of random walk best sampled in log space
        sigma_alpha, log_sigma_alpha = \
            model_randomwalk.TransformedVar("sigma_alpha",
            pm.Exponential.dist(1. / .02, testval=.1),
        pm.logtransform)
        sigma_beta, log_sigma_beta = \
            model_randomwalk.TransformedVar("sigma_beta",
            pm.Exponential.dist(1. / .02, testval=.1),
            pm.logtransform)    
    
    
    from pymc.distributions.timeseries import GaussianRandomWalk
    # to make the model simpler, we will apply the same coefficients
    # to 50 data points at a time
    subsample_alpha = 50
    subsample_beta = 50
    with model_randomwalk:
        alpha = GaussianRandomWalk("alpha", sigma_alpha**-2,
        shape=len(data) / subsample_alpha)
        beta = GaussianRandomWalk("beta", sigma_beta**-2,
        shape=len(data) / subsample_beta)
        # make coefficients have the same length as prices
        alpha_r = np.repeat(alpha, subsample_alpha)
        beta_r = np.repeat(beta, subsample_beta)
    
    len(data.dropna().GDX.values)
        
        
    with model_randomwalk:
        # define regression
        regression = alpha_r + beta_r * data.GDX.values[:1950]
        # assume prices are normally distributed
        # the mean comes from the regression
        sd = pm.Uniform("sd", 0, 20)
        likelihood = pm.Normal("GLD",
        mu=regression,
        sd=sd,
        observed=data.GLD.values[:1950])
    
    import scipy.optimize as sco
    with model_randomwalk:
        # first optimize random walk
        start = pm.find_MAP(vars=[alpha, beta], fmin=sco.fmin_l_bfgs_b)
        # sampling
        step = pm.NUTS(scaling=start)
        trace_rw = pm.sample(100, step, start=start, progressbar=False)
    
    np.shape(trace_rw["alpha"])
    part_dates = np.linspace(min(mpl_dates), max(mpl_dates), 39)
    
    fig, ax1 = plt.subplots(figsize=(10, 5))
    plt.plot(part_dates, np.mean(trace_rw["alpha"], axis=0), "b", lw=2.5, label="alpha")
    for i in range(45, 55):
        plt.plot(part_dates, trace_rw["alpha"][i], "b-.", lw=0.75)
    plt.xlabel("date")
    plt.ylabel("alpha")
    plt.axis("tight")
    plt.grid(True)
    plt.legend(loc=2)
    ax1.xaxis.set_major_formatter(mpl.dates.DateFormatter("%d %b %y") )
    ax2 = ax1.twinx()
    plt.plot(part_dates, np.mean(trace_rw["beta"], axis=0), "r", lw=2.5, label="beta")
    for i in range(45, 55):
        plt.plot(part_dates, trace_rw["beta"][i], "r-.", lw=0.75)
    plt.ylabel("beta")
    plt.legend(loc=4)
    fig.autofmt_xdate()
    
    
    
    plt.figure(figsize=(10, 5))
    plt.scatter(data["GDX"], data["GLD"], c=mpl_dates, marker="o")
    plt.colorbar(ticks=mpl.dates.DayLocator(interval=250),
    format=mpl.dates.DateFormatter("%d %b %y"))
    plt.grid(True)
    plt.xlabel("GDX")
    plt.ylabel("GLD")
    x = np.linspace(min(data["GDX"]), max(data["GDX"]))
    for i in range(39):
        alpha_rw = np.mean(trace_rw["alpha"].T[i])
        beta_rw = np.mean(trace_rw["beta"].T[i])
        plt.plot(x, alpha_rw + beta_rw * x, color=plt.cm.jet(256 * i / 39))
    
    
        
    pass
Пример #9
0
def run_appc_rt_model():
    '''
    Run appc model and load data for it...
    '''
    return sample_model_appc(get_appc_rt_model(), 500000, observed=['Data_context', 'Data_nocontext', ])


def get_multi_trace(model, data, chains=1):
    sd, traces = [], []
    if chains == 1:
        sd += [dict((k, data[k][:]) for k in data.keys())]
    for chain in range(chains):
        sd += [dict((k, data[k]['chain%i' % chain][:]) for k in data.keys())]

    for i, s in enumerate(sd):
        t = pm.backends.NDArray('', model)
        t.samples = s
        t.chain = i+1
        traces += [t]
    return pm.backends.base.MultiTrace(traces)


if __name__ == '__main__':
    import sys
    filename = sys.argv[1]
    # t = run_test_case() #run_fixdur()
    t = run_appc_subject_model()
    save(t, filename)
    pm.traceplot(t, vars=[v for v in t.varnames if not v.startswith('DNP')])
    plt.show()
Пример #10
0
        discriminability = Normal('Discriminability', mu=md, tau=taud, shape=num_observers)
        bias = Normal('Bias', mu=mc, tau=tauc, shape=num_observers)

        hi = phi( 0.5*(discriminability-bias))
        fi = phi(-0.5*(discriminability-bias))

        counts_signal = Binomial('Signal trials', num_trials, hi, observed=signal_responses)
        counts_noise  = Binomial('Noise trials',  num_trials, fi, observed=noise_responses)
    return model


def run_sig():
    signal_responses = binom.rvs(100, 0.69, size=1)
    noise_responses  = binom.rvs(100, 0.30, size=1)
    m = sig_detect(signal_responses, noise_responses, 1, 100)
    with m:
        #step = pm.Metropolis(blocked=False)
        step = pm.HamiltonianMC()
        start = pm.find_MAP()
        #start = {'Pr. mean discrim.':0.0, 'Pr. mean bias':0.0,
        #         'taud':0.001, 'tauc':0.001}
        trace = pm.sample(5000, step, start, tune=500, njobs=2)
    return trace[1000:]

if __name__ == '__main__':
    t = run_fixdur()
    save(t, 'fixdur_trace.hdf5')
    pm.traceplot(t)
    show()

Пример #11
0
# Entire Set

# <markdowncell>

# **(Run as individual)**

# <codecell>

## run sampling
traces_ind_all = OrderedDict()
traces_ind_all['all'] = get_traces_individual(df[xy['x']], df[xy['y']], max_iter=10000)

# <codecell>

## view parameters
p = pm.traceplot(traces_ind_all['all'],figsize=(18,1.5*3))
plt.show(p)

# <markdowncell>

# ### Plot regression

# <codecell>

plot_reg_bayes(df,xy,traces_ind_all,None,burn_ind=2000)

# <markdowncell>

# **Try Quadratic**

# <codecell>
Пример #12
0
# The trace object can be indexed by the variables in the model, returning an array with the first index being the sample index
# and the other indexes the shape of the parameter. Thus for this example:

# In[8]:

trace[y].shape

# Out[8]:

#     (3000, 2, 1)

# `traceplot` is a summary plotting function for a trace.

# In[9]:

pm.traceplot(trace)

# Out[9]:

# image file:

# ## PyMC Internals
#
# ### Model
#
# The `Model` class has very simple internals: just a list of unobserved variables (`Model.vars`) and a list of factors which go into computing the posterior density (`Model.factors`) (see model.py for more).
#
# A Python "`with model:`" block has `model` as the current model. Many functions, like `find_MAP` and `sample`, must be in such a block to work correctly by default. They look in the current context for a model to use. You may also explicitly specify the model for them to use. This allows us to treat the current model as an implicit parameter to these functions.
#
# ### Distribution Classes
#
Пример #13
0
    ### Now we calculate the expected flux from SED Model
    y_hat = FluxFromTheory(lTs,lMs,ldMenvbyMs,lRenv,ThetaCav,lMdbyMs,lRdo,lRdi,Zdisc,Bdisc,lalphad,lroCav,lroAmp,Inc,Av)/distance**2

    #Data likelihood
    y_like = pm.Normal('y_like',mu= y_hat, sd=ObservedDataSigma, observed=ObservedData)
#    y_like = pm.T('y_like',mu= y_hat, nu=T_nu, lam=T_lam, observed=ObservedData)  # T distribution for robustness

    # Inference...
#    start = pm.find_MAP() # Find starting value by optimization
    start = {'lTs':0.5,'lMs':0.5,'ldMenvbyMs':0.8,'lRenv':0.5,'ThetaCav':0.5,'lMdbyMs':0.8,'lRdo':0.5,'lRdi':0.5,'Zdisc':0.5,'Bdisc':0.5,'lalphad':0.5,'lroCav':0.5,'lroAmp':0.5,'Inc':1,'Av':2.5,'distance':0.9} 

#    step = pm.NUTS(state=start) # Instantiate MCMC sampling algorithm
    step = pm.Metropolis([lTs,lMs,ldMenvbyMs,lRenv,ThetaCav,lMdbyMs,lRdo,lRdi,Zdisc,Bdisc,lalphad,lroCav,lroAmp,Inc,Av,distance])
#    step1 = pm.Slice([x,w,z])
#    step2 = pm.Metropolis([z])    
    trace = pm.sample(1000, step, start=start, progressbar=True) # draw 1000 posterior samples using Sampling
#    trace = pm.sample(10000, [step1,step2], start=start, progressbar=True) # draw 1000 posterior samples using Sampling

print('The trace plot')
fig = pm.traceplot(trace, model.vars[:-1]);
fig.show()
raw_input('enter to close..')

# fig = pm.traceplot(trace, lines={'x': 16, 'w': 12, 'z':3.6})
# fig.show()
# print TESTFindFromGrid(start['x'],start['w'],start['z'])
# print ydata
# print TESTFindFromGrid(np.mean(trace['x']),np.mean(trace['w']),np.mean(trace['z']))


Пример #14
0
import matplotlib.pyplot as plt 
import numpy as np

import pandas as pd
data=pd.read_csv("http://hosho.ees.hokudai.ac.jp/~kubo/stat/iwanamibook/fig/hbm/data7a.csv")

#種子の数yごとに集計、グラフとして表示すると
plt.bar(range(9),data.groupby('y').sum().id)
data.groupby('y').sum().T

#dataの制限
Y=np.array(data.y)[:6]

import numpy as np
import pymc as pm import theano.tensor as T 
def invlogit(v):
	return T.exp(v)/(T.exp(v)+1)

with pm.Model() as model_hier: 
	s=pm.Uniform('s',0,1.0E+2) 
	beta=pm.Normal('beta',0,1.0E+2)
	r=pm.Normal('r',0,s,shape=len(Y)) 
	q=invlogit(beta+r) 
	y=pm.Binomial('y',8,q,observed=Y)
	
	step = pm.Slice([s,beta,r])
	trace_hier = pm.sample(10000, step)

with model_hier:
	pm.traceplot(trace_hier, model_hier.vars)
Пример #15
0
    alpha = pm.Normal('alpha', mu=0, sd=20)
    beta = pm.Normal('beta', mu=0, sd=20)
    sigma = pm.Uniform('sigma', lower=0, upper=10)
    # define linear regression
    y_est = alpha + beta * x
    # define likelihood
    likelihood = pm.Normal('y', mu=y_est, sd=sigma, observed=y)
    # inference
    start = pm.find_MAP()
    # find starting value by optimization
    step = pm.NUTS(state=start)
    # instantiate MCMC sampling algorithm
    trace = pm.sample(100, step, start=start, progressbar=False)
    # draw 100 posterior samples using NUTS sampling

fig = pm.traceplot(trace, lines={'alpha': 4, 'beta': 2, 'sigma': 2})
plt.figure(figsize=(8, 8))

#  GUI
# 出现NotImplementedError,被调用却没被实现,需要用到pyqt4
# 使用Anaconda新版本会强制安装pyqt5,如不兼容则卸载重装pyqt4
import numpy as np
import traits.api as trapi
import traitsui.api as trui


class short_rate(trapi.HasTraits):
    name = trapi.Str
    rate = trapi.Float
    time_list = trapi.Array(dtype=np.float, shape=(1, 5))
    disc_list = trapi.Array(dtype=np.float, shape=(1, 5))
Пример #16
0
import pymc as pm
from pymc.distributions.timeseries import GaussianRandomWalk
from scipy.sparse import csc_matrix
from scipy import optimize

with pm.Model() as model:
    sigma, log_sigma = model.TransformedVar('sigma', pm.Exponential.dist(1./.02, testval=.1),
                                            pm.logtransform)

    nu = pm.Exponential('nu', 1./10)
    s = GaussianRandomWalk('s', sigma**-2, shape=len(nreturns))
    r = pm.T('r', nu, lam=pm.exp(-2*s), observed=nreturns)

with model:
    start = pm.find_MAP(vars=[s], fmin=optimize.fmin_l_bfgs_b)
    step  = pm.NUTS(scaling=start)
    trace = pm.sample(2000, step, start,progressbar=False)   

plt.plot(trace[s][::10].T,'b', alpha=.03)
plt.title('log volatility')


with model:
    pm.traceplot(trace, model.vars[:2])

exps=np.exp(trace[s][::10].T)
plt.plot(returns[:600][::-1])
plt.plot( exps, 'r', alpha=.03);
plt.plot(-exps, 'r', alpha=.03);
plt.show()
Пример #17
0
	alpha = pm.Normal('alpha', mu=0, sd=20)
	beta = pm.Normal('beta', mu=0, sd=20)
	sigma = pm.Uniform('sigma', lower=0)
	y = pm.Normal('y', mu=beta*X + alpha, sd=sigma, observed=Y)
	start = pm.find_MAP()
	step = pm.NUTS(state=start)

with model:
	if(multicore):
		trace = pm.sample(itenum, step, start=start,
					njobs=chainnum, random_seed=range(chainnum), progress_bar=False)
	else:
		ts=[pm.sample(itenum, step, chain=i, progressbar=False) for i in range(chainnum)] 
		trace=merge_traces(ts)
	if(saveimage): 
		pm.traceplot(trace).savefig("simple_linear_trace.png")
	print "Rhat="+str(pm.gelman_rubin(trace))
		
t1=time.clock()
print "elapsed time="+str(t1-t0)

#trace
if(not multicore):
	trace=ts[0] 
with model:
	pm.traceplot(trace,model.vars)

pm.forestplot(trace)

import pickle as pkl
with open("simplelinearregression_model.pkl","w") as fpw:
#    trace = pm.sample(5000, [step1, step2], start=start, progressbar=False)

## Check the results.
burnin = 2000  # posterior samples to discard
thin = 10  # posterior samples to discard

## Print summary for each trace
#pm.summary(trace[burnin::thin])
#pm.summary(trace)

## Check for mixing and autocorrelation
pm.autocorrplot(trace[burnin::thin], vars =[mu, kappa])
#pm.autocorrplot(trace, vars =[mu, kappa])

## Plot KDE and sampled values for each parameter.
pm.traceplot(trace[burnin::thin])
#pm.traceplot(trace)

# Create arrays with the posterior sample
theta1_sample = trace['theta'][:,0][burnin::thin]
theta2_sample = trace['theta'][:,1][burnin::thin]
theta3_sample = trace['theta'][:,2][burnin::thin]
mu_sample = trace['mu'][burnin::thin]
kappa_sample = trace['kappa'][burnin::thin]

fig = plt.figure(figsize=(12,12))

# Scatter plot hyper-parameters
plt.subplot(4, 3, 1)
plt.scatter(mu_sample, kappa_sample, marker='o')
plt.xlim(0,1)
Пример #19
0
                                     getParameters=True,
                                     getEnergy=True)


class Step(object):
    def __init__(self, var):
        self.var = var.name

    def step(self, point):
        new = point.copy()
        #new[self.var] = 10 + np.random.rand() # Normal samples
        state = point['state']
        sigma = point['sigma']
        new[self.var] = propagate(simulation, state, temperature, sigma,
                                  epsilon)

        return new


with pymc.Model() as model:
    sigma = pymc.Uniform("sigma", 0.535, 0.565)
    state = pymc.Flat('state')

    step1 = pymc.step_methods.NUTS(vars=[sigma])
    step2 = Step(state)  # not sure how to limit this to one variable

    trace = pymc.sample(10, [step1, step2])

pymc.traceplot(trace[:])
show()
Пример #20
0
    def foo(self, discrete):
        student_ids = []
        timestep_ids = []
        y = []

        ids = collections.defaultdict(itertools.count().next)
        for t in range(0, len(self)):
            student_ids += [ids[o.id] for o in self[t]]
            timestep_ids += [t for o in self[t]]
            y += [o.value for o in self[t]]

        n_students = len(set(student_ids))
        n_timesteps = len(self)

        print student_ids, "!", n_students

        with pm.Model() as hierarchical_model:
            # Hyperpriors for group nodes
            mu_student = pm.Normal('mu_student', mu=0., sd=100**2)
            sigma_student = pm.Uniform('sigma_student', lower=0, upper=100)

            #mu_timestep = pm.Normal('mu_beta', mu=0., sd=100**2)
            #sigma_timestep = pm.Uniform('sigma_beta', lower=0, upper=100)

            student = pm.Normal('student',
                                mu=mu_student,
                                sd=sigma_student,
                                shape=n_students)  #random effect
            timestep = pm.Normal('timestep',
                                 mu=0,
                                 sd=100**2,
                                 shape=n_timesteps)  #fixed effect

            # Model error
            eps = pm.Uniform('eps', lower=0, upper=100)

            theta = student[student_ids] + timestep[timestep_ids]

            # Data likelihood
            if discrete:
                ll = pm.Bernoulli('theta', p=self.invlogit(theta), observed=y)
            else:
                ll = pm.Normal('theta', mu=theta, sd=eps, observed=y)

        with hierarchical_model:
            print "Find MAP..."
            start = pm.find_MAP()
            #if discrete:
            #     step = pm.BinaryMetropolis(scaling=start)
            # else:
            print "NUTS..."
            step = pm.NUTS(scaling=start)
            print "Samples..."
            hierarchical_trace = pm.sample(2000,
                                           step,
                                           start=start,
                                           progressbar=False)
        print "done..."
        print "Plot..."

        pl.figure(figsize=(10, 10))
        f = pm.traceplot(hierarchical_trace[500:])
        f.savefig("a.png")
        return hierarchical_trace
Пример #21
0
#    trace = pm.sample(5000, [step1, step2], start=start, progressbar=False)

## Check the results.
burnin = 2000  # posterior samples to discard
thin = 10  # posterior samples to discard

## Print summary for each trace
#pm.summary(trace[burnin::thin])
#pm.summary(trace)

## Check for mixing and autocorrelation
pm.autocorrplot(trace[burnin::thin], vars=[mu, kappa])
#pm.autocorrplot(trace, vars =[mu, kappa])

## Plot KDE and sampled values for each parameter.
pm.traceplot(trace[burnin::thin])
#pm.traceplot(trace)

# Create arrays with the posterior sample
theta1_sample = trace['theta'][:, 0][burnin::thin]
theta2_sample = trace['theta'][:, 1][burnin::thin]
theta3_sample = trace['theta'][:, 2][burnin::thin]
mu_sample = trace['mu'][burnin::thin]
kappa_sample = trace['kappa'][burnin::thin]

fig = plt.figure(figsize=(12, 12))

# Scatter plot hyper-parameters
plt.subplot(4, 3, 1)
plt.scatter(mu_sample, kappa_sample, marker='o')
plt.xlim(0, 1)
Пример #22
0
                                                      s['p'],
                                                      map_est['p'],
                                                      model.logp(map_est)))

# By default `basinhopping` uses a gradient minimization technique,
# `fmin_bfgs`, resulting in inaccurate predictions many times. If we force
# `basinhoping` to use a non-gradient technique we get much better results

with model:
    for i in range(n+1):
        s = {'p': 0.5, 'surv_sim': i}
        map_est = mc.find_MAP(start=s, vars=model.vars,
                              fmin=bh, minimizer_kwargs={"method": "Powell"})
        print('surv_sim: %i->%i, p: %f->%f, LogP:%f'%(s['surv_sim'],
                                                      map_est['surv_sim'],
                                                      s['p'],
                                                      map_est['p'],
                                                      model.logp(map_est)))

# Confident in our MAP estimate we can sample from the posterior, making sure
# we use the `Metropolis` method for our discrete variables.

with model:
    step1 = mc.step_methods.HamiltonianMC(vars=[p])
    step2 = mc.step_methods.Metropolis(vars=[surv_sim])

with model:
    trace = mc.sample(25000, [step1, step2], start=map_est)

mc.traceplot(trace);
Пример #23
0
        print('surv_sim: %i->%i, p: %f->%f, LogP:%f' %
              (s['surv_sim'], map_est['surv_sim'], s['p'], map_est['p'],
               model.logpc(map_est)))

# By default `basinhopping` uses a gradient minimization technique,
# `fmin_bfgs`, resulting in inaccurate predictions many times. If we force
# `basinhoping` to use a non-gradient technique we get much better results

with model:
    for i in range(n + 1):
        s = {'p': 0.5, 'surv_sim': i}
        map_est = mc.find_MAP(start=s,
                              vars=model.vars,
                              fmin=bh,
                              minimizer_kwargs={"method": "Powell"})
        print('surv_sim: %i->%i, p: %f->%f, LogP:%f' %
              (s['surv_sim'], map_est['surv_sim'], s['p'], map_est['p'],
               model.logpc(map_est)))

# Confident in our MAP estimate we can sample from the posterior, making sure
# we use the `Metropolis` method for our discrete variables.

with model:
    step1 = mc.step_methods.HamiltonianMC(vars=[p])
    step2 = mc.step_methods.Metropolis(vars=[surv_sim])

with model:
    trace = mc.sample(25000, [step1, step2], start=map_est)

mc.traceplot(trace)
Пример #24
0
# and the other indexes the shape of the parameter. Thus for this example:

# In[8]:

trace[y].shape


# Out[8]:

#     (3000, 2, 1)

# `traceplot` is a summary plotting function for a trace.

# In[9]:

pm.traceplot(trace);


# Out[9]:

# image file:

# ## PyMC Internals
#
# ### Model
#
# The `Model` class has very simple internals: just a list of unobserved variables (`Model.vars`) and a list of factors which go into computing the posterior density (`Model.factors`) (see model.py for more).
#
# A Python "`with model:`" block has `model` as the current model. Many functions, like `find_MAP` and `sample`, must be in such a block to work correctly by default. They look in the current context for a model to use. You may also explicitly specify the model for them to use. This allows us to treat the current model as an implicit parameter to these functions.
#
# ### Distribution Classes
Пример #25
0
import pymc as pm
from pymc.distributions.timeseries import GaussianRandomWalk
from scipy.sparse import csc_matrix
from scipy import optimize

with pm.Model() as model:
    sigma, log_sigma = model.TransformedVar(
        'sigma', pm.Exponential.dist(1. / .02, testval=.1), pm.logtransform)

    nu = pm.Exponential('nu', 1. / 10)
    s = GaussianRandomWalk('s', sigma**-2, shape=len(nreturns))
    r = pm.T('r', nu, lam=pm.exp(-2 * s), observed=nreturns)

with model:
    start = pm.find_MAP(vars=[s], fmin=optimize.fmin_l_bfgs_b)
    step = pm.NUTS(scaling=start)
    trace = pm.sample(2000, step, start, progressbar=False)

plt.plot(trace[s][::10].T, 'b', alpha=.03)
plt.title('log volatility')

with model:
    pm.traceplot(trace, model.vars[:2])

exps = np.exp(trace[s][::10].T)
plt.plot(returns[:600][::-1])
plt.plot(exps, 'r', alpha=.03)
plt.plot(-exps, 'r', alpha=.03)
plt.show()