def MCMC(model):
    import time
    with model:
        n = 6000
        START = time.time()
        try:
            start = pm.find_MAP()
        except AssertionError:
            return model, {'error':'AssertionError in pm.find_MAP()'}
        init_time = time.time()-START
        print 'Time to initialize: %ds' % (init_time)

        START = time.time()
        trace = pm.sample(n,pm.Metropolis(),start)
        duration = time.time()-START
        print 'Time to sample (MH): %ds' % (duration)

        # START = time.time()
        # trace = pm.sample(n,pm.Slice(),start)
        # print 'Time to sample (Slice): %ds' % (time.time()-START)

        # START = time.time()
        # trace = pm.sample(n,pm.HamiltonianMC(),start)
        # print 'Time to sample (HMC): %ds' % (time.time()-START)

        # error_b, error_x, output = error(trace,model.data.A,model.data.x_true,
        #                          model.data.b_obs,model.data.scaling)

        # fig = pm.traceplot(trace)
        # plot(error_b,error_x)
        # plt.show()
    return model, trace, init_time, duration
Пример #2
0
def batch_sample(ws):
    n = 20
    data = 2*np.random.normal(size=(1, n)) + 3.0
    nsamples = 50000

    model = Model()
    with model:
        theta = Normal('mean', 0., 1.)
        sg = Uniform('standard_deviation', 0.5, 10, testval=2.)
        Normal('x', theta, sg ** -2, shape=n, observed=data)

        # start sampling at the MAP
        start = find_MAP()
        step = NUTS(scaling=start)

        trace = iter_sample(nsamples, step, start=start)

    for i in range(nsamples/100):
        with model:
            for j in range(99):
                trace.next()
            t = trace.next()
            rvars = {k: t[k][-50:].tolist()
                     for k in t.varnames}
            ws.send(json.dumps(
                {"rvars": rvars,
                 "current_sample": len(t),
                 "total_samples": nsamples}
            ))
Пример #3
0
def poisson_regression(targets, predictors, iters=2000):
    """ Return the posterior of a Bayesian Poisson regression model.

        This function takes the targets and predictors and builds a Poisson
        regression model. The predictor coefficients are found by sampling
        from the posterior using PyMC (NUTS in particular).

        The posterior is returned as an MxN array, where M is the number of
        samples and N is the number of predictors. The first column is the 
        coefficient for the first predictor and so on.

        Requires PyMC3

    """
    with pm.Model() as poisson_model:
        # priors for coefficients
        coeffs = pm.Uniform('coeffs', -10, 10, shape=(1, predictors.shape[1]))
        
        p = t.exp(pm.sum(coeffs*predictors.values, 1))
        
        obs = pm.Poisson('obs', p, observed=targets)

        start = pm.find_MAP()
        step = pm.NUTS(scaling=start)
        poisson_trace = pm.sample(iters, step, start=start, progressbar=False)

    return poisson_trace['coeffs'].squeeze()
Пример #4
0
def batch_sample(ws):
    d = np.random.normal(size=(3, 30))
    d1 = d[0] + 4
    d2 = d[1] + 4
    yd = .2 * d1 + .3 * d2 + d[2]
    nsamples = 50000

    with Model() as model:
        s = Exponential('s', 1)
        m1 = Laplace('m1', 0, 100)
        m2 = Laplace('m2', 0, 100)
        p = d1 * m1 + d2 * m2

        Normal('y', p, s ** -2, observed=yd)

    with model:
        start = find_MAP()
        step1 = Metropolis([m1, m2])
        step2 = Metropolis([s], proposal_dist=LaplaceProposal)

        trace = iter_sample(nsamples, [step1, step2], start)

    for i in range(nsamples/100):
        with model:
            for j in range(99):
                trace.next()
            t = trace.next()
            rvars = {k: t[k][-50:].tolist()
                     for k in t.varnames}
            ws.send(json.dumps(
                {"rvars": rvars,
                 "current_sample": len(t),
                 "total_samples": nsamples}
            ))
Пример #5
0
def bayesian_random_effects(data, labels, group, n_samples=2000, n_burnin=500):
    import pymc as pm
    #preparing the data
    donors = data[group].unique()
    donors_lookup = dict(zip(donors, range(len(donors))))
    data['donor_code'] = data[group].replace(donors_lookup).values
    n_donors = len(data[group].unique())
    donor_idx = data['donor_code'].values
    
    #setting up the model
    with pm.Model() as hierarchical_model:
        # Hyperpriors for group nodes
        group_intercept_mean = pm.Normal('group intercept (mean)', mu=0., sd=100**2)
        group_intercept_variance = pm.Uniform('group intercept (variance)', lower=0, upper=100)
        group_slope_mean = pm.Normal('group slope (mean)', mu=0., sd=100**2)
        group_slope_variance = pm.Uniform('group slope (variance)', lower=0, upper=100)
        
        individual_intercepts = pm.Normal('individual intercepts', mu=group_intercept_mean, sd=group_intercept_variance, shape=n_donors)
        individual_slopes = pm.Normal('individual slopes', mu=group_slope_mean, sd=group_slope_variance, shape=n_donors)
        
        # Model error
        residuals = pm.Uniform('residuals', lower=0, upper=100)
        
        expression_est =  individual_slopes[donor_idx] * data[labels[0]].values + individual_intercepts[donor_idx]
        
        # Data likelihood
        expression_like = pm.Normal('expression_like', mu=expression_est, sd=residuals, observed=data[labels[1]])

        start = pm.find_MAP()
        step = pm.NUTS(scaling=start)
        hierarchical_trace = pm.sample(n_samples, step, start=start, progressbar=True)
        
    mean_slope = hierarchical_trace['group slope (mean)'][n_burnin:].mean()
    zero_percentile = percentileofscore(hierarchical_trace['group slope (mean)'][n_burnin:], 0)
    #print "Mean group level slope was %g (zero was %g percentile of the posterior distribution)"%(mean_slope, zero_percentile)
    
    #pm.summary(hierarchical_trace[n_burnin:], vars=['group slope (mean)'])
        
    #pm.traceplot(hierarchical_trace[n_burnin:])
    
    #selection = donors
    #fig, axis = plt.subplots(2, 3, figsize=(12, 6), sharey=True, sharex=True)
    #axis = axis.ravel()
    #xvals = np.linspace(data[labels[0]].min(), data[labels[0]].max())
    #for i, c in enumerate(selection):
    #    c_data = data.ix[data[group] == c]
    #    c_data = c_data.reset_index(drop = True)
    #    z = list(c_data['donor_code'])[0]
    #    for a_val, b_val in zip(hierarchical_trace['individual intercepts'][n_burnin::10][z], hierarchical_trace['individual slopes'][n_burnin::10][z]):
    #        axis[i].plot(xvals, a_val + b_val * xvals, 'g', alpha=.1)
    #    axis[i].plot(xvals, hierarchical_trace['individual intercepts'][n_burnin:][z].mean() + hierarchical_trace['individual slopes'][n_burnin:][z].mean() * xvals, 
    #                 'g', alpha=1, lw=2.)
    #    axis[i].hexbin(c_data[labels[0]], c_data[labels[1]], mincnt=1, cmap=plt.cm.YlOrRd_r)
    #    axis[i].set_title(c)
    #    axis[i].set_xlabel(labels[0])
    #    axis[i].set_ylabel(labels[1])
    #    
    #plt.show()
        
    return mean_slope, zero_percentile
def get_traces_hierarchical(x, y, idxs, max_iter=100000):
    """ sample hierarchical model """
    
    idx_size = len(np.unique(idxs))
    with pm.Model() as hierarchical_model:

        # hyperpriors for group nodes, all uninformative
        alpha_mu = pm.Normal('alpha_mu', mu=0., sd=100**2)
        alpha_sigma = pm.Uniform('alpha_sigma', lower=0, upper=100)
        beta_mu = pm.Normal('beta_mu', mu=0., sd=100**2)
        beta_sigma = pm.Uniform('beta_sigma', lower=0, upper=100)

        # Intercept for each testtype, distributed around group mean mu_a
        # Above mu & sd are fixed value while below we plug in a common 
        # group distribution for all a & b (which are vectors of length idx_size).

        # priors for alpha, beta and model error, uninformative  
        alpha = pm.Normal('alpha', mu=alpha_mu, sd=alpha_sigma, shape=idx_size)
        beta = pm.Normal('beta', mu=beta_mu, sd=beta_sigma, shape=idx_size)
        epsilon = pm.Uniform('epsilon', lower=0, upper=100)

        # hierarchical linear model
        y_est = alpha[idxs] + beta[idxs] * x
        likelihood = pm.Normal('likelihood', mu=y_est, sd=epsilon, observed=y)
        traces = pm.sample(max_iter, step=pm.Metropolis()
                                       ,start=pm.find_MAP(), progressbar=True)
    return traces
Пример #7
0
    def foo(self, discrete):
        student_ids  = []
        timestep_ids = []
        y = []

        ids = collections.defaultdict(itertools.count().next)
        for t in range(0, len(self)):
            student_ids  +=  [ids[o.id] for o in self[t]]
            timestep_ids +=  [t         for o in self[t]]
            y            +=  [o.value   for o in self[t]]

        n_students  =  len(set(student_ids))
        n_timesteps = len(self)

        print student_ids, "!", n_students

        with pm.Model() as hierarchical_model:
            # Hyperpriors for group nodes
            mu_student = pm.Normal('mu_student', mu=0., sd=100**2)
            sigma_student = pm.Uniform('sigma_student', lower=0, upper=100)

            #mu_timestep = pm.Normal('mu_beta', mu=0., sd=100**2)
            #sigma_timestep = pm.Uniform('sigma_beta', lower=0, upper=100)

            student  = pm.Normal('student', mu=mu_student, sd=sigma_student, shape=n_students) #random effect
            timestep = pm.Normal('timestep', mu=0, sd=100**2, shape=n_timesteps)  #fixed effect

            # Model error
            eps = pm.Uniform('eps', lower=0, upper=100)


            theta = student[student_ids] + timestep[timestep_ids]

            # Data likelihood
            if discrete:
                ll = pm.Bernoulli('theta', p=self.invlogit(theta), observed=y)
            else:
                ll = pm.Normal('theta', mu=theta, sd=eps, observed=y)

        with hierarchical_model:
            print "Find MAP..."
            start = pm.find_MAP()
            #if discrete:
            #     step = pm.BinaryMetropolis(scaling=start)
            # else:
            print "NUTS..."
            step = pm.NUTS(scaling=start)
            print "Samples..."
            hierarchical_trace = pm.sample(2000, step, start=start, progressbar=False)
        print "done..."
        print "Plot..."

        pl.figure(figsize=(10,10))
        f = pm.traceplot(hierarchical_trace[500:])
        f.savefig("a.png")
        return hierarchical_trace
Пример #8
0
def run_sig():
    signal_responses = binom.rvs(100, 0.69, size=1)
    noise_responses  = binom.rvs(100, 0.30, size=1)
    m = sig_detect(signal_responses, noise_responses, 1, 100)
    with m:
        #step = pm.Metropolis(blocked=False)
        step = pm.HamiltonianMC()
        start = pm.find_MAP()
        #start = {'Pr. mean discrim.':0.0, 'Pr. mean bias':0.0,
        #         'taud':0.001, 'tauc':0.001}
        trace = pm.sample(5000, step, start, tune=500, njobs=2)
    return trace[1000:]
Пример #9
0
def test_errors():
    _, model, _ = exponential_beta(2)

    with model: 
        try : 
            newstart = find_MAP(Point(x = [-.5, .01], y = [.5, 4.4]))
        except ValueError as e:
            msg = str(e) 
            assert "x.logp" in msg, msg
            assert "x.value" not in msg, msg
        else:
            assert False, newstart
Пример #10
0
def run_fixdur():
    import cPickle
    dur, fa, obs = cPickle.load(open('durtest.pickle'))
    m = piecewise_durations(dur, fa, obs-1)
    with m:
        start = pm.find_MAP() #cPickle.load(open('fixdur_map.pickle'))
        step = pm.Metropolis(vars=[m.named_vars['Mean_offset'],
            m.named_vars['Mean_slope1'], m.named_vars['Mean_slope2'], 
            m.named_vars['Mean_split']], blocked=False)
        step2 = pm.Metropolis(vars=[m.named_vars['Slope1'], 
            m.named_vars['Slope2'], m.named_vars['Offsets'], m.named_vars['Breakpoint']])
        trace = pm.sample(5000, [step, step2], start, tune=1000, njobs=1,
                progressbar=True)
    return trace
Пример #11
0
def sample_model_appc(model, steps, tune=None, njobs=4, observed=['Data']):
    if tune is None:
        tune = steps / 2
    with model:
        start = pm.find_MAP()
        non_blocked_step = pm.Metropolis(
            vars=[v for k, v in model.named_vars.iteritems()
                  if ('Obs_SD' in k) or ('Mean_' in k) and not (k in set(observed)) and not k.startswith('DNS')],
            blocked=False)
        blocked = pm.Metropolis(
            vars=[v for k, v in model.named_vars.iteritems()
                  if not (('Obs_SD' in k) or ('Mean_' in k))
                  and not (k in set(observed)) and not k.startswith('DNS')],
            blocked=True)
        trace = pm.sample(
            steps, [non_blocked_step, blocked], start,
            tune=tune, njobs=njobs, progressbar=True)
    return trace
Пример #12
0
def sample_model(model, steps, tune=None, njobs=1, observed=['Data']):
    if tune is None:
        tune = steps/2
    with model:
        start = pm.find_MAP()  # cPickle.load(open('fixdur_map.pickle'))
        non_blocked_step = pm.Metropolis(
            vars=[v for k, v in model.named_vars.iteritems()
                  if ('Obs_SD' in k) or ('Mean_' in k) and not (k in set(observed))],
            blocked=False)
        blocked = pm.Metropolis(
            vars=[v for k, v in model.named_vars.iteritems()
                  if not (('Obs_SD' in k) or ('Mean_' in k))
                  and not (k in set(observed))],
            blocked=True)
        trace = pm.sample(
            steps, [non_blocked_step, blocked], start,
            tune=tune, njobs=njobs, progressbar=True)
    return trace
Пример #13
0
 def __init__(self,sim,params,database=None,overwrite=False):
     raise ValueError('database not implemented yet')
 
     self.sim=sim
     self.params=params
     self.model=self.make_model(self.sim,dict(self.params))
     with model:
         self.MAP = pymc.find_MAP()
         self.step = pymc.NUTS()
         
         
     self.dbname=database
     if self.dbname is None:
         pass
     else:
         raise ValueError('database not implemented yet')
     
     self.MCMCparams={}
     
     self.number_of_bins=200
Пример #14
0
def get_traces_individual(x, y, max_iter=10000,quad=False):
    """ sample individual model """
    
    with pm.Model() as individual_model:

        # priors for alpha, beta and model error, uninformative  
        alpha = pm.Normal('alpha', mu=0, sd=100**2)
        beta = pm.Normal('beta', mu=0, sd=100**2)
        epsilon = pm.Uniform('epsilon', lower=0, upper=100)
        
        # configure model
        y_est = alpha + beta * x
        
        if quad:
            gamma = pm.Normal('gamma', mu=0, sd=100**2)
            y_est = alpha + beta * x + gamma * x ** 2

        # calc likelihood and do sampling
        likelihood = pm.Normal('likelihood', mu=y_est, sd=epsilon, observed=y)
        traces = pm.sample(max_iter, step=pm.NUTS(), start=pm.find_MAP(), progressbar=True)
    
    return traces
Пример #15
0
import pymc as pm
from pymc.distributions.timeseries import GaussianRandomWalk
from scipy.sparse import csc_matrix
from scipy import optimize

with pm.Model() as model:
    sigma, log_sigma = model.TransformedVar(
        'sigma', pm.Exponential.dist(1. / .02, testval=.1), pm.logtransform)

    nu = pm.Exponential('nu', 1. / 10)
    s = GaussianRandomWalk('s', sigma**-2, shape=len(nreturns))
    r = pm.T('r', nu, lam=pm.exp(-2 * s), observed=nreturns)

with model:
    start = pm.find_MAP(vars=[s], fmin=optimize.fmin_l_bfgs_b)
    step = pm.NUTS(scaling=start)
    trace = pm.sample(2000, step, start, progressbar=False)

plt.plot(trace[s][::10].T, 'b', alpha=.03)
plt.title('log volatility')

with model:
    pm.traceplot(trace, model.vars[:2])

exps = np.exp(trace[s][::10].T)
plt.plot(returns[:600][::-1])
plt.plot(exps, 'r', alpha=.03)
plt.plot(-exps, 'r', alpha=.03)
plt.show()
Пример #16
0
# distribution over the survey parameter.

alpha = 4
beta = 4
n = 20
yes = 15

with mc.Model() as model:
    p = mc.Beta('p', alpha, beta)
    surv_sim = mc.Binomial('surv_sim', n=n, p=p)
    surv = mc.Binomial('surv', n=n, p=p, observed=yes)

# First let's try and use `find_MAP`.

with model:
    print(mc.find_MAP())

# `find_map` defaults to find the MAP for only the continuous variables we have
# to specify if we would like to use the discrete variables.

with model:
    print(mc.find_MAP(vars=model.vars, disp=True))

# We set the `disp` variable to display a warning that we are using a
# non-gradient minimization technique, as discrete variables do not give much
# gradient information. To demonstrate this, if we use a gradient based
# minimization, `fmin_bfgs`, with various starting points we see that the map
# does not converge.

with model:
    for i in range(n+1):
Пример #17
0
import pymc as pm
from pymc.distributions.timeseries import GaussianRandomWalk
from scipy.sparse import csc_matrix
from scipy import optimize

with pm.Model() as model:
    sigma, log_sigma = model.TransformedVar('sigma', pm.Exponential.dist(1./.02, testval=.1),
                                            pm.logtransform)

    nu = pm.Exponential('nu', 1./10)
    s = GaussianRandomWalk('s', sigma**-2, shape=len(nreturns))
    r = pm.T('r', nu, lam=pm.exp(-2*s), observed=nreturns)

with model:
    start = pm.find_MAP(vars=[s], fmin=optimize.fmin_l_bfgs_b)
    step  = pm.NUTS(scaling=start)
    trace = pm.sample(2000, step, start,progressbar=False)   

plt.plot(trace[s][::10].T,'b', alpha=.03)
plt.title('log volatility')


with model:
    pm.traceplot(trace, model.vars[:2])

exps=np.exp(trace[s][::10].T)
plt.plot(returns[:600][::-1])
plt.plot( exps, 'r', alpha=.03);
plt.plot(-exps, 'r', alpha=.03);
plt.show()
Пример #18
0
ydata = theta_true[0] + theta_true[1] * xdata

# add scatter to points
xdata = np.random.normal(xdata, 10)
ydata = np.random.normal(ydata, 10)
data = {'x': xdata, 'y': ydata}

with pymc.Model() as model:
    alpha = pymc.Uniform('intercept', -100, 100)
    # Create custom densities
    beta = pymc.DensityDist('slope', lambda value: -1.5 * T.log(1 + value**2), testval=0)
    sigma = pymc.DensityDist('sigma', lambda value: -T.log(T.abs_(value)), testval=1)
    # Create likelihood
    like = pymc.Normal('y_est', mu=alpha + beta * xdata, sd=sigma, observed=ydata)

    start = pymc.find_MAP()
    step = pymc.NUTS(scaling=start) # Instantiate sampler
    trace = pymc.sample(10000, step, start=start)


#################################################
# Create some convenience routines for plotting
# All functions below written by Jake Vanderplas

def compute_sigma_level(trace1, trace2, nbins=20):
    """From a set of traces, bin by number of standard deviations"""
    L, xbins, ybins = np.histogram2d(trace1, trace2, nbins)
    L[L == 0] = 1E-16
    logL = np.log(L)

    shape = L.shape
# Generate the data
y1 = np.array([1, 1, 1, 1, 1, 0, 0])  # 5 heads and 2 tails
y2 = np.array([1, 1, 0, 0, 0, 0, 0])  # 2 heads and 5 tails


with pm.Model() as model:
    # define the prior
    theta1 = pm.Beta('theta1', 3, 3)  # prior
    theta2 = pm.Beta('theta2', 3, 3)  # prior
    # define the likelihood
    y1 = pm.Bernoulli('y1', p=theta1, observed=y1)
    y2 = pm.Bernoulli('y2', p=theta2, observed=y2)

    # Generate a MCMC chain
    start = pm.find_MAP()  # Find starting value by optimization
    trace = pm.sample(10000, pm.Metropolis(),
                      progressbar=False)  # Use Metropolis sampling
#    trace = pm.sample(10000, pm.NUTS(), progressbar=False) # Use NUTS sampling

# create an array with the posterior sample
theta1_sample = trace['theta1']
theta2_sample = trace['theta2']

# Plot the trajectory of the last 500 sampled values.
plt.plot(theta1_sample[:-500], theta2_sample[:-500], marker='o')
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.xlabel(r'$\theta1$')
plt.ylabel(r'$\theta2$')
Пример #20
0
def test_accuracy_normal():
    _, model, (mu, _) = simple_model()

    with model: 
        newstart = find_MAP(Point(x = [-10.5, 100.5]))
        close_to(newstart['x'], [mu, mu], 1e-5)
Пример #21
0
def test_accuracy_non_normal():
    _, model, (mu, _) = non_normal(4)

    with model: 
        newstart = find_MAP(Point(x = [.5, .01, .95, .99]))
        close_to(newstart['x'], mu, 1e-5)
Пример #22
0
    y = pm.Normal('y', mu=pm.exp(x), sd=2., shape=(ndims, 1)) # here, shape is telling us it's a vector rather than a scalar.
    z = pm.Normal('z', mu=x + y, sd=.75, observed=zdata) # shape is inferred from zdata


# A parenthetical note on the parameters for the normal. Variance is encoded as `tau`, indicating precision, which is simply inverse variance (so $\tau=\sigma^{-2}$ ). This is used because the gamma function is the conjugate prior for precision, and must be inverted to get variance. Encoding in terms of precision saves the inversion step in cases where variance is actually modeled using gamma as a prior.

# Fit Model
# ---------
# We need a starting point for our sampling. The `find_MAP` function finds the maximum a posteriori point (MAP), which is often a good choice for starting point. `find_MAP` uses an optimization algorithm (`scipy.optimize.fmin_l_bfgs_b`, or [BFGS](http://en.wikipedia.org/wiki/BFGS_method), by default) to find the local maximum of the log posterior.
#
# Note that this `with` construction is used again. Functions like `find_MAP` and `HamiltonianMC` need to have a model in their context. `with` activates the context of a particular model within its block.

# In[4]:

with model:
    start = pm.find_MAP()


# Points in parameter space are represented by dictionaries with parameter names as they keys and the value of the parameters as the values.

# In[5]:

print("MAP found:")
print("x:", start['x'])
print("y:", start['y'])

print("Compare with true values:")
print("ytrue", ytrue)
print("xtrue", xtrue)

    kappa1 = pm.Gamma('kappa1', shape_Gamma, rate_Gamma, shape=n_cond)
    a_Beta1 = mu[cond_of_subj] * kappa1[cond_of_subj]
    b_Beta1 = (1 - mu[cond_of_subj]) * kappa1[cond_of_subj]

    #Prior on theta
    theta0 = pm.Beta('theta0', a_Beta0, b_Beta0, shape=n_subj)
    theta1 = pm.Beta('theta1', a_Beta1, b_Beta1, shape=n_subj)
    # if model_index == 0 then sample from theta1 else sample from theta0
    theta = pm.switch(pm.eq(model_index, 0), theta1, theta0)

    # Likelihood:
    y = pm.Binomial('y', p=theta, n=n_trl_of_subj, observed=n_corr_of_subj)

    # Sampling
    start = pm.find_MAP()
    steps = [pm.Metropolis([i]) for i in model.unobserved_RVs[1:]]
    steps.append(pm.ElemwiseCategoricalStep(var=model_index,values=[0,1]))
    trace = pm.sample(50000, steps, start=start, progressbar=False)


# EXAMINE THE RESULTS.
burnin = 1000
thin = 1
model_idx_sample = trace['model_index'][burnin::thin]

pM1 = sum(model_idx_sample == 1) / len(model_idx_sample)
pM2 = 1 - pM1

plt.figure(figsize=(15, 15))
plt.subplot2grid((5,4), (0,0), colspan=4)