Exemplo n.º 1
2
 def test_sample(self):
     test_cores = [1]
     with self.model:
         for cores in test_cores:
             for steps in [1, 10, 300]:
                 pm.sample(steps, tune=0, step=self.step, cores=cores,
                           random_seed=self.random_seed)
Exemplo n.º 2
1
    def _fit_time_series_model(self, signal, target, samples):
        
        model_randomwalk = pm.Model()
        with model_randomwalk:

            sigma_alpha = pm.Exponential('sigma_alpha', 1. / .02, testval=.1)
            sigma_beta = pm.Exponential('sigma_beta', 1. / .02, testval=.1)

            alpha = GaussianRandomWalk('alpha', sigma_alpha ** -2, shape=len(tar))
            beta = GaussianRandomWalk('beta', sigma_beta ** -2, shape=len(tar))

            # Define regression
            regression = alpha + beta * rev.values

            # Assume prices are Normally distributed, the mean comes from the regression.
            sd = pm.Uniform('sd', 0, 20)
            likelihood = pm.Normal('y', 
                                   mu=regression, 
                                   sd=sd, 
                                   observed=tar.values)
        
            # First optimize random walk
            start = pm.find_MAP(vars=[alpha, beta], fmin=optimize.fmin_l_bfgs_b)
            step = pm.NUTS(scaling=start)
            trace = pm.sample(10, step, start)

            # Sample
            start2 = trace.point(-1)
            step = pm.NUTS(scaling=start2)
            trace_rw = pm.sample(samples, step, start=start)
            
            
Exemplo n.º 3
1
def test_log_post_trace():
    with pm.Model() as model:
        pm.Normal('y')
        trace = pm.sample(10, tune=10, chains=1)

    logp = pmstats._log_post_trace(trace, model)
    assert logp.shape == (len(trace), 0)

    with pm.Model() as model:
        pm.Normal('a')
        pm.Normal('y', observed=np.zeros((2, 3)))
        trace = pm.sample(10, tune=10, chains=1)

    logp = pmstats._log_post_trace(trace, model)
    assert logp.shape == (len(trace), 6)
    npt.assert_allclose(logp, -0.5 * np.log(2 * np.pi), atol=1e-7)

    with pm.Model() as model:
        pm.Normal('a')
        pm.Normal('y', observed=np.zeros((2, 3)))
        data = pd.DataFrame(np.zeros((3, 4)))
        data.values[1, 1] = np.nan
        pm.Normal('y2', observed=data)
        data = data.copy()
        data.values[:] = np.nan
        pm.Normal('y3', observed=data)
        trace = pm.sample(10, tune=10, chains=1)

    logp = pmstats._log_post_trace(trace, model)
    assert logp.shape == (len(trace), 17)
    npt.assert_allclose(logp, -0.5 * np.log(2 * np.pi), atol=1e-7)
Exemplo n.º 4
0
 def test_run(self):
     model = self.build_model()
     with model:
         start = {'psi': 0.5, 'z': (self.y > 0).astype(int), 'theta': 5}
         step_one = pm.Metropolis([model.theta_interval_, model.psi_logodds_])
         step_two = pm.BinaryMetropolis([model.z])
         pm.sample(50, [step_one, step_two], start)
Exemplo n.º 5
0
 def test_sample(self):
     test_njobs = [1]
     with self.model:
         for njobs in test_njobs:
             for steps in [1, 10, 300]:
                 pm.sample(steps, tune=0, step=self.step, njobs=njobs,
                           random_seed=self.random_seed)
Exemplo n.º 6
0
 def test_sample_does_not_set_seed(self):
     random_numbers = []
     for _ in range(2):
         np.random.seed(1)
         with self.model:
             pm.sample(1)
             random_numbers.append(np.random.random())
     self.assertEqual(random_numbers[0], random_numbers[1])
Exemplo n.º 7
0
 def test_sample_tune_len(self):
     with self.model:
         trace = pm.sample(draws=100, tune=50, cores=1)
         assert len(trace) == 100
         trace = pm.sample(draws=100, tune=50, cores=1, discard_tuned_samples=False)
         assert len(trace) == 150
         trace = pm.sample(draws=100, tune=50, cores=4)
         assert len(trace) == 100
Exemplo n.º 8
0
 def test_sample_does_not_set_seed(self):
     random_numbers = []
     for _ in range(2):
         np.random.seed(1)
         with self.model:
             pm.sample(1, tune=0, chains=1)
             random_numbers.append(np.random.random())
     assert random_numbers[0] == random_numbers[1]
Exemplo n.º 9
0
def test_empirical_from_trace(another_simple_model):
    with another_simple_model:
        step = pm.Metropolis()
        trace = pm.sample(100, step=step, chains=1)
        emp = Empirical(trace)
        assert emp.histogram.shape[0].eval() == 100
        trace = pm.sample(100, step=step, chains=4)
        emp = Empirical(trace)
        assert emp.histogram.shape[0].eval() == 400
Exemplo n.º 10
0
 def test_sample_init(self):
     with self.model:
         for init in ("advi", "advi_map", "map", "nuts"):
             pm.sample(
                 init=init,
                 tune=0,
                 n_init=1000,
                 draws=50,
                 random_seed=self.random_seed,
             )
Exemplo n.º 11
0
    def test_run(self):
        model = self.build_model()
        with model:
            # move the chain to the MAP which should be a good starting point
            start = pm.find_MAP()
            H = model.fastd2logp()  # find a good orientation using the hessian at the MAP
            h = H(start)

            step = pm.HamiltonianMC(model.vars, h)
            pm.sample(50, step, start)
Exemplo n.º 12
0
 def test_run(self):
     model = self.build_model()
     with model:
         start = {
             'psi': np.array(0.5, dtype='f'),
             'z': (self.y > 0).astype('int16'),
             'theta': np.array(5, dtype='f'),
         }
         step_one = pm.Metropolis([model.theta_interval__, model.psi_logodds__])
         step_two = pm.BinaryMetropolis([model.z])
         pm.sample(50, step=[step_one, step_two], start=start, chains=1)
Exemplo n.º 13
0
def test_compare():
    np.random.seed(42)
    x_obs = np.random.normal(0, 1, size=100)

    with pm.Model() as model0:
        mu = pm.Normal('mu', 0, 1)
        x = pm.Normal('x', mu=mu, sd=1, observed=x_obs)
        trace0 = pm.sample(1000)

    with pm.Model() as model1:
        mu = pm.Normal('mu', 0, 1)
        x = pm.Normal('x', mu=mu, sd=0.8, observed=x_obs)
        trace1 = pm.sample(1000)

    with pm.Model() as model2:
        mu = pm.Normal('mu', 0, 1)
        x = pm.StudentT('x', nu=1, mu=mu, lam=1, observed=x_obs)
        trace2 = pm.sample(1000)

    traces = [trace0, copy.copy(trace0)]
    models = [model0, copy.copy(model0)]

    model_dict = dict(zip(models, traces))

    w_st = pm.compare(model_dict, method='stacking')['weight']
    w_bb_bma = pm.compare(model_dict, method='BB-pseudo-BMA')['weight']
    w_bma = pm.compare(model_dict, method='pseudo-BMA')['weight']

    assert_almost_equal(w_st[0], w_st[1])
    assert_almost_equal(w_bb_bma[0], w_bb_bma[1])
    assert_almost_equal(w_bma[0], w_bma[1])

    assert_almost_equal(np.sum(w_st), 1.)
    assert_almost_equal(np.sum(w_bb_bma), 1.)
    assert_almost_equal(np.sum(w_bma), 1.)

    traces = [trace0, trace1, trace2]
    models = [model0, model1, model2]

    model_dict = dict(zip(models, traces))
    
    w_st = pm.compare(model_dict, method='stacking')['weight']
    w_bb_bma = pm.compare(model_dict, method='BB-pseudo-BMA')['weight']
    w_bma = pm.compare(model_dict, method='pseudo-BMA')['weight']

    assert(w_st[0] > w_st[1] > w_st[2])
    assert(w_bb_bma[0] > w_bb_bma[1] > w_bb_bma[2])
    assert(w_bma[0] > w_bma[1] > w_bma[2])

    assert_almost_equal(np.sum(w_st), 1.)
    assert_almost_equal(np.sum(w_st), 1.)
    assert_almost_equal(np.sum(w_st), 1.)
Exemplo n.º 14
0
 def too_slow(self):
     model = self.build_model()
     start = {'groupmean': self.obs_means.mean(),
              'groupsd_interval_': 0,
              'sd_interval_': 0,
              'means': self.obs_means,
              'floor_m': 0.,
              }
     with model:
         start = pm.find_MAP(start=start,
                             vars=[model['groupmean'], model['sd_interval_'], model['floor_m']])
         step = pm.NUTS(model.vars, scaling=start)
         pm.sample(50, step, start)
Exemplo n.º 15
0
def sample_pymc3(d, samples=2000, njobs=2):
    with pm.Model() as model:
        dfc = pm.Normal(mu=0.0, sd=d['sigma_fc'], name='dfc')
        Q = pm.Gamma(mu=d['mu_Q'], sd=d['sigma_Q'], name='Q')
        Pdet = pm.Gamma(mu=d['mu_Pdet'], sd=d['sigma_Pdet'], name='Pdet')
        kc = pm.Gamma(mu=d['mu_kc'], sd=d['sigma_kc'], name='kc')

        M = d['M']
        T = d['T']
        scale=d['scale']
        mu_fc = d['mu_fc']
        f = d['f']


        like = pm.Gamma(alpha=M, beta=(M/(((2 * 1.381e-5 * T) / (np.pi * Q * kc)) / scale * (dfc + mu_fc)**3 /
                    ((f * f - (dfc + mu_fc)**2) * (f * f - (dfc + mu_fc)**2) + f * f * (dfc + mu_fc)**2 / Q**2)
                    + Pdet)),
                            observed=d['y'],
                            name='like')

        start = pm.find_MAP()
        step = pm.NUTS(state=start)
        
        trace = pm.sample(samples, step=step, start=start, progressbar=True, njobs=njobs)
    return trace
Exemplo n.º 16
0
def run( n=100000 ):
    with model:
        # initialize NUTS() with ADVI under the hood
        trace = pm.sample( n )

    # drop some first samples as burnin
    pm.traceplot( trace[1000:] )
Exemplo n.º 17
0
def lin_fit(t, y, yerr=None, samples=10000, sampler="NUTS", alphalims=[-100,100]):
    """
    Bayesian linear fitting function.
    See Jake Vanderplas' blog post on how to be a
    bayesian in python for more details

    uses pymc3 MCMC sampling

    inputs:
        t    ::    Vector of values at which the function is evaluated ("x" values)
        y    ::    Vector of dependent values (observed y(t))
        yerr (optional = None) :: Errors on y values.  If not provided, errors are taken to be the same for each dta point,
            with a 1/sigma (jefferys) prior.
        samples (optional = 1000)  :: Number of samples to draw from MCMC
        sampler (optional = "NUTS")  :: Type of MCMC sampler to use.  "NUTS" or "Metropolis"
        alphalims (optional = [-100,100])  ::  Length 2 vector of endpoints for uniform prior on intercept of the line
    """
    with pm.Model() as model:
            #Use uninformative priors on slope/intercept of line
            alpha = pm.Uniform('alpha',alphalims[0],alphalims[1])
            #this defines an uninformative prior on slope.  See Jake's blog post
            beta = pm.DensityDist('beta',lambda value: -1.5 * T.log(1 + value**2.),testval=0)
            #if yerr not given, assume all values have same errorbar
            if yerr is None:
                sigma = pm.DensityDist('sigma', lambda value: -T.log(T.abs_(value)),testval=1)
            else:
                sigma = yerr
            like = pm.Normal('likelihood',mu=alpha+beta*t, sd=sigma, observed=y)
            #start the sampler at the maximum a-posteriori value
            start = pm.find_MAP()
            step = select_sampler(sampler,start)
            trace = pm.sample(draws=samples,start=start,step=step)
    return trace
Exemplo n.º 18
0
def test_minibatch():
    draws = 3000
    mu0 = 1
    sd0 = 1
    
    def f(x, a, b, c):
        return a*x**2 + b*x + c
    
    a, b, c = 1, 2, 3

    batch_size = 50
    total_size = batch_size*500
    x_train = np.random.uniform(-10, 10, size=(total_size,)).astype('float32')
    x_obs = pm.data.Minibatch(x_train, batch_size=batch_size)

    y_train = f(x_train, a, b, c) + np.random.normal(size=x_train.shape).astype('float32')
    y_obs = pm.data.Minibatch(y_train, batch_size=batch_size)

    with Model():
        abc = Normal('abc', mu=mu0, sd=sd0, shape=(3,))
        x = x_obs
        x2 = x**2
        o = tt.ones_like(x)
        X = tt.stack([x2, x, o]).T
        y = X.dot(abc)
        pm.Normal('y', mu=y, observed=y_obs)

        step_method = pm.SGFS(batch_size=batch_size, step_size=1., total_size=total_size)
        trace = pm.sample(draws=draws, step=step_method, init=None)

    np.testing.assert_allclose(np.mean(trace['abc'], axis=0), np.asarray([a, b, c]), rtol=0.1)
Exemplo n.º 19
0
def run(n = 3000):
    if n == "short":
        n = 50
    with model:
        trace = pm.sample(n, step, start)

    pm.traceplot(trace);
Exemplo n.º 20
0
    def test_deterministic_of_observed(self):
        meas_in_1 = pm.theanof.floatX(2 + 4 * np.random.randn(100))
        meas_in_2 = pm.theanof.floatX(5 + 4 * np.random.randn(100))
        with pm.Model() as model:
            mu_in_1 = pm.Normal("mu_in_1", 0, 1)
            sigma_in_1 = pm.HalfNormal("sd_in_1", 1)
            mu_in_2 = pm.Normal("mu_in_2", 0, 1)
            sigma_in_2 = pm.HalfNormal("sd__in_2", 1)

            in_1 = pm.Normal("in_1", mu_in_1, sigma_in_1, observed=meas_in_1)
            in_2 = pm.Normal("in_2", mu_in_2, sigma_in_2, observed=meas_in_2)
            out_diff = in_1 + in_2
            pm.Deterministic("out", out_diff)

            trace = pm.sample(100)
            ppc_trace = pm.trace_to_dataframe(
                trace, varnames=[n for n in trace.varnames if n != "out"]
            ).to_dict("records")
            ppc = pm.sample_posterior_predictive(
                model=model,
                trace=ppc_trace,
                samples=len(ppc_trace),
                vars=(model.deterministics + model.basic_RVs),
            )

            rtol = 1e-5 if theano.config.floatX == "float64" else 1e-3
            assert np.allclose(ppc["in_1"] + ppc["in_2"], ppc["out"], rtol=rtol)
Exemplo n.º 21
0
def model_returns_t(data, samples=500):
    """Run Bayesian model assuming returns are normally distributed.

    Parameters
    ----------
    returns : pandas.Series
        Series of simple returns of an algorithm or stock.
    samples : int, optional
        Number of posterior samples to draw.

    Returns
    -------
    pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.

    """

    with pm.Model():
        mu = pm.Normal('mean returns', mu=0, sd=.01, testval=data.mean())
        sigma = pm.HalfCauchy('volatility', beta=1, testval=data.std())
        nu = pm.Exponential('nu_minus_two', 1. / 10., testval=3.)

        returns = pm.T('returns', nu=nu + 2, mu=mu, sd=sigma, observed=data)
        pm.Deterministic('annual volatility',
                         returns.distribution.variance**.5 * np.sqrt(252))

        pm.Deterministic('sharpe', returns.distribution.mean /
                         returns.distribution.variance**.5 *
                         np.sqrt(252))

        start = pm.find_MAP(fmin=sp.optimize.fmin_powell)
        step = pm.NUTS(scaling=start)
        trace = pm.sample(samples, step, start=start)
    return trace
Exemplo n.º 22
0
	def fit(self,xdata,ydata,yerr,arange=[-100.,100],brange=[-100.,100]):
		trace = None
		with pm.Model() as model:
		    # alpha = pm.Normal('alpha', mu=1.0e7, sd=1.0e6)
		    # beta  = pm.Normal('beta', mu=1.0e7, sd=1.0e6)
		    # sigma = pm.Uniform('sigma', lower=0, upper=20)
		    alpha = pm.Uniform('alpha', lower=arange[0], upper=arange[1])
		    beta  = pm.Uniform('beta',  lower=brange[0], upper=brange[1])
		    sigma = yerr
		    
		    y_est = alpha + beta * xdata
		    
		    likelihood = pm.Normal('y', mu=y_est, sd=sigma, observed=ydata)
		    
		    # obtain starting values via MAP
		    start = pm.find_MAP()
		    step  = pm.NUTS(state=start)
		    trace = pm.sample(2000, step, start=start, progressbar=False)
		    
		    # pm.traceplot(trace)

		# plt.show()
		# pprint(trace['alpha'].mean())
		# pprint(trace['alpha'].std())
		# print pm.summary(trace)
		# print pm.summary(trace, ['alpha'])
		# print pm.stats()
		# print(trace.__dict__)

		# Return the traces
		return [trace['alpha'], trace['beta']]
    def fit(self, x, y, mcmc_samples=1000):
        t = x.shape[0] - 1  # number of additive components
        varnames = ["xc", "w", "decay", "sigma", "b", "lam"]

        with pm.Model() as model:
            # Priors for additive predictor
            w = pm.Normal("w", mu=0, sd=1, shape=t)
            decay = pm.HalfNormal("decay", sd=200, shape=t)
            # Prior for likelihood
            sigma = pm.Uniform("sigma", 0, 0.3)
            b = pm.Normal("b", mu=0, sd=20)
            lam = pm.Uniform("lam", 0, 0.3)

            # Building linear predictor
            lin_pred = 0
            for ii in range(1, t + 1):
                lin_pred += self.bias(w[ii - 1], decay[ii - 1])(x[ii, :])

            phi2 = pm.Deterministic("phi2", 0.5 * lam + (1 - lam) * phi(b + lin_pred + x[0, :] / sigma))
            y = pm.Bernoulli("y", p=phi2, observed=y)

        with model:
            # Inference
            start = pm.find_MAP()  # Find starting value by optimization
            print("MAP found:")
            # step = pm.NUTS(scaling = start)
            # step = pm.Slice()
            step = pm.NUTS(scaling=start)
            trace = pm.sample(mcmc_samples, step, start=start, progressbar=True)  # draw posterior samples

        return trace, model
Exemplo n.º 24
0
    def run(self, samples=1000, find_map=True, verbose=True, step='nuts',
            burn=0.5, **kwargs):
        ''' Run the model.
        Args:
            samples (int): Number of MCMC samples to generate
            find_map (bool): passed to find_map argument of pm.sample()
            verbose (bool): if True, prints additional information
            step (str or PyMC3 Sampler): either an instantiated PyMC3 sampler,
                or the name of the sampler to use (either 'nuts' or
                'metropolis').
            start: Optional starting point to pass onto sampler.
            burn (int or float): Number or proportion of samples to treat as
                burn-in; passed onto the BayesianModelResults instance returned
                by this method.
            kwargs (dict): optional keyword arguments passed on to the sampler.

        Returns: an instance of class BayesianModelResults.

        '''
        with self.model:
            njobs = kwargs.pop('njobs', 1)
            start = kwargs.pop('start', pm.find_MAP() if find_map else None)
            chain = kwargs.pop('chain', 0)
            if isinstance(step, string_types):
                step = {
                    'nuts': pm.NUTS,
                    'metropolis': pm.Metropolis
                }[step.lower()](**kwargs)

            self.start = start
            trace = pm.sample(
                samples, start=start, step=step, progressbar=verbose, njobs=njobs, chain=chain)
            self.last_trace = trace  # for convenience
            return BayesianModelResults(trace)
Exemplo n.º 25
0
def run(n=5000):
    with model_1:
        xstart = pm.find_MAP()
        xstep = pm.Slice()
        trace = pm.sample(5000, xstep, xstart, random_seed=123, progressbar=True)

        pm.summary(trace)
Exemplo n.º 26
0
 def test_value_n_eff_rhat(self):
     mu = -2.1
     tau = 1.3
     with Model():
         Normal('x0', mu, tau, testval=floatX_array(.1)) # 0d
         Normal('x1', mu, tau, shape=2, testval=floatX_array([.1, .1]))# 1d
         Normal('x2', mu, tau, shape=(2, 2),
                testval=floatX_array(np.tile(.1, (2, 2))))# 2d
         Normal('x3', mu, tau, shape=(2, 2, 3),
                testval=floatX_array(np.tile(.1, (2, 2, 3))))# 3d
         trace = pm.sample(100, step=pm.Metropolis())
     for varname in trace.varnames:
         # test effective_n value
         n_eff = pm.effective_n(trace, varnames=[varname])[varname]
         n_eff_df = np.asarray(
                 pm.summary(trace, varnames=[varname])['n_eff']
                              ).reshape(n_eff.shape)
         npt.assert_equal(n_eff, n_eff_df)
         
         # test Rhat value
         rhat = pm.gelman_rubin(trace, varnames=[varname])[varname]
         rhat_df = np.asarray(
                 pm.summary(trace, varnames=[varname])['Rhat']
                              ).reshape(rhat.shape)
         npt.assert_equal(rhat, rhat_df)
Exemplo n.º 27
0
def run(n=1000):
    if n == "short":
        n = 50
    with model:
        trace = pm.sample(n)
    pm.traceplot(trace, varnames=['mu', 'r'],
                 lines={'mu': mu_r, 'r': corr_r[np.triu_indices(n_var, k=1)]})
Exemplo n.º 28
0
    def test_model_shared_variable(self):
        x = np.random.randn(100)
        y = x > 0
        x_shared = theano.shared(x)
        y_shared = theano.shared(y)
        with pm.Model() as model:
            coeff = pm.Normal("x", mu=0, sd=1)
            logistic = pm.Deterministic("p", pm.math.sigmoid(coeff * x_shared))

            obs = pm.Bernoulli("obs", p=logistic, observed=y_shared)
            trace = pm.sample(100)

        x_shared.set_value([-1, 0, 1.0])
        y_shared.set_value([0, 0, 0])

        samples = 100
        with model:
            post_pred = pm.sample_posterior_predictive(
                trace, samples=samples, vars=[logistic, obs]
            )

        expected_p = np.array(
            [logistic.eval({coeff: val}) for val in trace["x"][:samples]]
        )
        assert post_pred["obs"].shape == (samples, 3)
        assert np.allclose(post_pred["p"], expected_p)
Exemplo n.º 29
0
    def test_linear_component(self):
        vars_to_create = {
            'sigma',
            'sigma_interval__',
            'y_obs',
            'lm_x0',
            'lm_Intercept'
        }
        with Model() as model:
            lm = LinearComponent(
                self.data_linear['x'],
                self.data_linear['y'],
                name='lm'
            )   # yields lm_x0, lm_Intercept
            sigma = Uniform('sigma', 0, 20)     # yields sigma_interval__
            Normal('y_obs', mu=lm.y_est, sigma=sigma, observed=self.y_linear)  # yields y_obs
            start = find_MAP(vars=[sigma])
            step = Slice(model.vars)
            trace = sample(500, tune=0, step=step, start=start,
                           progressbar=False, random_seed=self.random_seed)

            assert round(abs(np.mean(trace['lm_Intercept'])-self.intercept), 1) == 0
            assert round(abs(np.mean(trace['lm_x0'])-self.slope), 1) == 0
            assert round(abs(np.mean(trace['sigma'])-self.sd), 1) == 0
        assert vars_to_create == set(model.named_vars.keys())
Exemplo n.º 30
0
def main():

    #load data    
    returns = data.get_data_google('SPY', start='2008-5-1', end='2009-12-1')['Close'].pct_change()
    returns.plot()
    plt.ylabel('daily returns in %');
    
    with pm.Model() as sp500_model:
        
        nu = pm.Exponential('nu', 1./10, testval=5.0)
        sigma = pm.Exponential('sigma', 1./0.02, testval=0.1)
        
        s = pm.GaussianRandomWalk('s', sigma**-2, shape=len(returns))                
        r = pm.StudentT('r', nu, lam=pm.math.exp(-2*s), observed=returns)
        
    
    with sp500_model:
        trace = pm.sample(2000)

    pm.traceplot(trace, [nu, sigma]);
    plt.show()
    
    plt.figure()
    returns.plot()
    plt.plot(returns.index, np.exp(trace['s',::5].T), 'r', alpha=.03)
    plt.legend(['S&P500', 'stochastic volatility process'])
    plt.show()
Exemplo n.º 31
0
#数据来自 https://www.lixinger.com/ 数据版权属于此网站

import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pymc3 as pm
import theano.tensor as tt
import warnings
import csv

temp = []

filey = 'yunnanbaiyao.csv'
value_of_y = []
daily_exp_y = []

with open(filey) as y:
    reader = csv.reader(y)
    temp = list(temp)
    for i in range(101):
        value_of_y.append(float(temp[i + 1][4]))

for i in range(100):
    daily_exp_y.append((value_of_y[i + 1] - value_of_y[i] / value_of_y[i]))

with pm.Model() as model_y:
    mu = pm.Normal("mu", mu=0, sigma=1)
    obs = pm.Normal("obs", mu=mu, sigma=0.001, observed=daily_exp_y(100))

    sample_y = pm.sample(10000, tune=2500)
Exemplo n.º 32
0
    plt.ylabel("relative flux")
    plt.xlabel("time [days]")
    plt.legend(fontsize=10)
    _ = plt.title("map model")
    fig = plt.gcf()
    savefig(fig, '../results/test_results/test_{}_MAP.png'.format(modelid), writepdf=0)

    # sample from the posterior defined by this model. As usual, there are strong
    # covariances between some of the parameters so we’ll use
    # exoplanet.get_dense_nuts_step().
    np.random.seed(42)
    with model:
        trace = pm.sample(
            tune=3000,
            draws=3000,
            start=map_estimate,
            cores=16,
            chains=4,
            step=xo.get_dense_nuts_step(target_accept=0.9),
        )

    with open(pklpath, 'wb') as buff:
        pickle.dump({'model': model, 'trace': trace,
                     'map_estimate': map_estimate}, buff)

    samples = pm.trace_to_dataframe(trace, varnames=["period", "r"])
    truth = np.concatenate(
        xo.eval_in_model([period, r], model.test_point, model=model)
    )
    fig = corner.corner(
        samples,
        truths=truth,
Exemplo n.º 33
0
                          shape=2,
                          transform=pm.distributions.transforms.ordered)

    # Likelihood
    y_obs = pm.OrderedLogistic("y_obs",
                               eta=lp,
                               cutpoints=cutpoints,
                               observed=y - 1)

# ### Sampling

# In[153]:

with mod:
    # draw posterior samples
    trace = pm.sample(5000, tune=5000, nuts_kwargs=dict(target_accept=.85))

# ### Parameter estimates

# In[154]:

pm.summary(trace).round(2)

# ### Predictions

# In[155]:

ppc = pm.sample_ppc(trace, samples=5000, model=mod, size=1)

# In[192]:
Exemplo n.º 34
0
returns = pd.read_csv(pm.get_data('SP500.csv'), parse_dates=True, index_col=0)
dates=returns.index.strftime("%Y/%m/%d").tolist()

#model-inference
fileName='stochastic_volatility_PyMC3'
samples=2000
tune=2000
chains=2
coords = {"date": dates}
with pm.Model(coords=coords) as model:
	step_size = pm.Exponential('step_size', 10)
    volatility = pm.GaussianRandomWalk('volatility', sigma=step_size, dims='date')
    nu = pm.Exponential('nu', 0.1)
    returns = pm.StudentT('returns',nu=nu,lam=np.exp(-2*volatility) ,observed=data["change"], dims='date')
	#inference
	trace = pm.sample(draws=samples, chains=chains, tune=tune)
    prior = pm.sample_prior_predictive(samples=samples)
    posterior_predictive = pm.sample_posterior_predictive(trace,samples=samples)    

## STEP 1
# will also capture all the sampler statistics
data = az.from_pymc3(trace=trace, prior=prior, posterior_predictive=posterior_predictive)

## STEP 2
#dag
dag = get_dag(stochastic_vol_model)
# insert dag into sampler stat attributes
data.sample_stats.attrs["graph"] = str(dag)

## STEP 3   
# save data      
Exemplo n.º 35
0
    out[switchpoint:] = late_mean
    return out


with pm.Model() as model:

    # Prior for distribution of switchpoint location
    switchpoint = pm.DiscreteUniform('switchpoint', lower=0, upper=years)
    # Priors for pre- and post-switch mean number of disasters
    early_mean = pm.Exponential('early_mean', lam=1.)
    late_mean = pm.Exponential('late_mean', lam=1.)

    # Allocate appropriate Poisson rates to years before and after current
    # switchpoint location
    idx = arange(years)
    rate = rate_(switchpoint, early_mean, late_mean)

    # Data likelihood
    disasters = pm.Poisson('disasters', rate, observed=disasters_data)

    # Use slice sampler for means
    step1 = pm.Slice([early_mean, late_mean])
    # Use Metropolis for switchpoint, since it accomodates discrete variables
    step2 = pm.Metropolis([switchpoint])

    # Initial values for stochastic nodes
    start = {'early_mean': 2., 'late_mean': 3.}

    tr = pm.sample(1000, tune=500, start=start, step=[step1, step2], cores=2)
    pm.traceplot(tr)
Exemplo n.º 36
0
    # Create custom densities
    beta = pymc3.DensityDist('slope',
                             lambda value: -1.5 * tt.log(1 + value**2),
                             testval=0)
    sigma = pymc3.DensityDist('sigma',
                              lambda value: -tt.log(tt.abs_(value)),
                              testval=1)
    # Create likelihood
    like = pymc3.Normal('y_est',
                        mu=alpha + beta * xdata,
                        sd=sigma,
                        observed=ydata)

    start = pymc3.find_MAP()
    step = pymc3.NUTS(scaling=start)  # Instantiate sampler
    trace = pymc3.sample(10000, step, start=start)

#################################################
# Create some convenience routines for plotting
# All functions below written by Jake Vanderplas


def compute_sigma_level(trace1, trace2, nbins=20):
    """From a set of traces, bin by number of standard deviations"""
    L, xbins, ybins = np.histogram2d(trace1, trace2, nbins)
    L[L == 0] = 1E-16
    logL = np.log(L)

    shape = L.shape
    L = L.ravel()
Exemplo n.º 37
0
def init_nuts(init='auto',
              chains=1,
              n_init=500000,
              model=None,
              random_seed=None,
              progressbar=True,
              **kwargs):
    """Set up the mass matrix initialization for NUTS.

    NUTS convergence and sampling speed is extremely dependent on the
    choice of mass/scaling matrix. This function implements different
    methods for choosing or adapting the mass matrix.

    Parameters
    ----------
    init : str
        Initialization method to use.

        * auto : Choose a default initialization method automatically.
          Currently, this is `'jitter+adapt_diag'`, but this can change in
          the future. If you depend on the exact behaviour, choose an
          initialization method explicitly.
        * adapt_diag : Start with a identity mass matrix and then adapt
          a diagonal based on the variance of the tuning samples. All
          chains use the test value (usually the prior mean) as starting
          point.
        * jitter+adapt_diag : Same as `adapt_diag`, but add uniform jitter
          in [-1, 1] to the starting point in each chain.
        * advi+adapt_diag : Run ADVI and then adapt the resulting diagonal
          mass matrix based on the sample variance of the tuning samples.
        * advi+adapt_diag_grad : Run ADVI and then adapt the resulting
          diagonal mass matrix based on the variance of the gradients
          during tuning. This is **experimental** and might be removed
          in a future release.
        * advi : Run ADVI to estimate posterior mean and diagonal mass
          matrix.
        * advi_map: Initialize ADVI with MAP and use MAP as starting point.
        * map : Use the MAP as starting point. This is discouraged.
        * nuts : Run NUTS and estimate posterior mean and mass matrix from
          the trace.
    chains : int
        Number of jobs to start.
    n_init : int
        Number of iterations of initializer
        If 'ADVI', number of iterations, if 'nuts', number of draws.
    model : Model (optional if in `with` context)
    progressbar : bool
        Whether or not to display a progressbar for advi sampling.
    **kwargs : keyword arguments
        Extra keyword arguments are forwarded to pymc3.NUTS.

    Returns
    -------
    start : pymc3.model.Point
        Starting point for sampler
    nuts_sampler : pymc3.step_methods.NUTS
        Instantiated and initialized NUTS sampler object
    """
    model = pm.modelcontext(model)

    vars = kwargs.get('vars', model.vars)
    if set(vars) != set(model.vars):
        raise ValueError('Must use init_nuts on all variables of a model.')
    if not pm.model.all_continuous(vars):
        raise ValueError('init_nuts can only be used for models with only '
                         'continuous variables.')

    if not isinstance(init, str):
        raise TypeError('init must be a string.')

    if init is not None:
        init = init.lower()

    if init == 'auto':
        init = 'jitter+adapt_diag'

    pm._log.info('Initializing NUTS using {}...'.format(init))

    if random_seed is not None:
        random_seed = int(np.atleast_1d(random_seed)[0])
        np.random.seed(random_seed)

    cb = [
        pm.callbacks.CheckParametersConvergence(tolerance=1e-2,
                                                diff='absolute'),
        pm.callbacks.CheckParametersConvergence(tolerance=1e-2,
                                                diff='relative'),
    ]

    if init == 'adapt_diag':
        start = [model.test_point] * chains
        mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0)
        var = np.ones_like(mean)
        potential = quadpotential.QuadPotentialDiagAdapt(
            model.ndim, mean, var, 10)
    elif init == 'jitter+adapt_diag':
        start = []
        for _ in range(chains):
            mean = {var: val.copy() for var, val in model.test_point.items()}
            for val in mean.values():
                val[...] += 2 * np.random.rand(*val.shape) - 1
            start.append(mean)
        mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0)
        var = np.ones_like(mean)
        potential = quadpotential.QuadPotentialDiagAdapt(
            model.ndim, mean, var, 10)
    elif init == 'advi+adapt_diag_grad':
        approx = pm.fit(
            random_seed=random_seed,
            n=n_init,
            method='advi',
            model=model,
            callbacks=cb,
            progressbar=progressbar,
            obj_optimizer=pm.adagrad_window,
        )  # type: pm.MeanField
        start = approx.sample(draws=chains)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds)**2
        mean = approx.bij.rmap(approx.mean.get_value())
        mean = model.dict_to_array(mean)
        weight = 50
        potential = quadpotential.QuadPotentialDiagAdaptGrad(
            model.ndim, mean, cov, weight)
    elif init == 'advi+adapt_diag':
        approx = pm.fit(
            random_seed=random_seed,
            n=n_init,
            method='advi',
            model=model,
            callbacks=cb,
            progressbar=progressbar,
            obj_optimizer=pm.adagrad_window,
        )  # type: pm.MeanField
        start = approx.sample(draws=chains)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds)**2
        mean = approx.bij.rmap(approx.mean.get_value())
        mean = model.dict_to_array(mean)
        weight = 50
        potential = quadpotential.QuadPotentialDiagAdapt(
            model.ndim, mean, cov, weight)
    elif init == 'advi':
        approx = pm.fit(random_seed=random_seed,
                        n=n_init,
                        method='advi',
                        model=model,
                        callbacks=cb,
                        progressbar=progressbar,
                        obj_optimizer=pm.adagrad_window)  # type: pm.MeanField
        start = approx.sample(draws=chains)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds)**2
        potential = quadpotential.QuadPotentialDiag(cov)
    elif init == 'advi_map':
        start = pm.find_MAP(include_transformed=True)
        approx = pm.MeanField(model=model, start=start)
        pm.fit(random_seed=random_seed,
               n=n_init,
               method=pm.KLqp(approx),
               callbacks=cb,
               progressbar=progressbar,
               obj_optimizer=pm.adagrad_window)
        start = approx.sample(draws=chains)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds)**2
        potential = quadpotential.QuadPotentialDiag(cov)
    elif init == 'map':
        start = pm.find_MAP(include_transformed=True)
        cov = pm.find_hessian(point=start)
        start = [start] * chains
        potential = quadpotential.QuadPotentialFull(cov)
    elif init == 'nuts':
        init_trace = pm.sample(draws=n_init,
                               step=pm.NUTS(),
                               tune=n_init // 2,
                               random_seed=random_seed)
        cov = np.atleast_1d(pm.trace_cov(init_trace))
        start = list(np.random.choice(init_trace, chains))
        potential = quadpotential.QuadPotentialFull(cov)
    else:
        raise NotImplementedError(
            'Initializer {} is not supported.'.format(init))

    step = pm.NUTS(potential=potential, **kwargs)

    return start, step
Exemplo n.º 38
0
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)


def model_factory(df_in, features):
    with pm.Model() as logistic_model:
        pm.glm.GLM.from_formula(
            "value ~ {} * {} + {}".format(*features),
            df_in,
            family=pm.glm.families.Binomial(),
        )
        return logistic_model


with model_factory(df_in, features):
    trace = pm.sample(1000, tune=1000, init="adapt_diag")  # ,
    # return_inferencedata=True) #

with model_factory(df_test, features):
    ppc = pm.sample_posterior_predictive(trace)  # or whatever

pred_mean = np.apply_along_axis(np.mean, 0, ppc["y"])
pred_median = np.apply_along_axis(np.median, 0, ppc["y"])

df_test["prediction"] = pred_median

fig = plt.figure()
plt.scatter(df_test["v_x"], df_test["v_y"], c=pred_mean, s=0.01)
fig.savefig(os.path.join(plot_dir, "prediction_bayes_mean.png"))

fig = plt.figure()
Exemplo n.º 39
0
    plt.scatter(x[0], x[1])

    plt.xlabel("$x_{}$".format(idx - 1))
    plt.xlabel("$x_{}$".format(idx), rotation=0)


scatter_plot(X, y)
plt.savefig("confounding_data.png")

# now create the model

with pm.Model() as model_mlr:
    alpha = pm.Normal("alpha", mu=0, sd=10)
    beta = pm.Normal("beta", mu=0, sd=1, shape=2)
    # beta = pm.Normal("beta", mu=0, sd=1)
    epsilon = pm.HalfCauchy("epsilon", 5)

    mu = alpha + pm.math.dot(beta, X)
    # mu = alpha + beta * x_2

    y_pred = pm.Normal("y_pred", mu=mu, sd=epsilon, observed=y)

    start = pm.find_MAP()
    step = pm.NUTS(scaling=start)
    trace_red = pm.sample(5000, step=step, start=start)

pm.traceplot(trace_red)
plt.savefig("confounding_traceplot.png")
plt.close()
print(pm.summary(trace_red))
Exemplo n.º 40
0
#print(data)

#print(np.asarray(data).T)
data = np.asarray(data).T
plt.scatter(data[0], data[1])
plt.savefig('img4ex1.png')

with pm.Model() as model_ex:
    alpha = pm.Normal('alpha', mu=0, sd=200)
    beta = pm.Normal('beta', mu=0, sd=200)
    epsolon = pm.HalfCauchy('epsilon', 5)
    mu = alpha + pm.math.dot(beta, data[0])
    nu = pm.Deterministic('nu', pm.Exponential('nu_', 1 / 15))
    y_pred = pm.StudentT('y_pred', mu=mu, nu=nu, sd=epsolon, observed=data[1])

    trace_ex = pm.sample(5000)

pm.traceplot(trace_ex[500:])
plt.savefig('img4ex2.png')
pm.summary(trace_ex[500:])
pm.autocorrplot(trace_ex[500:])
plt.savefig('img4ex3.png')

pm.forestplot(trace_ex)
plt.savefig('img4ex4.png')

plt.figure()
y_temp = stats.linregress(data[0], data[1])[:2]
plt.plot(data[0], y_temp[0] * data[0] + y_temp[1], alpha=0.5)
alpha_m = trace_ex['alpha'][500:].mean()
beta_m = trace_ex['beta'][500:].mean()
Exemplo n.º 41
0
def model_best(y1, y2, samples=1000, progressbar=True):
    """
    Bayesian Estimation Supersedes the T-Test

    This model runs a Bayesian hypothesis comparing if y1 and y2 come
    from the same distribution. Returns are assumed to be T-distributed.

    In addition, computes annual volatility and Sharpe of in and
    out-of-sample periods.

    This model replicates the example used in:
    Kruschke, John. (2012) Bayesian estimation supersedes the t
    test. Journal of Experimental Psychology: General.

    Parameters
    ----------
    y1 : array-like
        Array of returns (e.g. in-sample)
    y2 : array-like
        Array of returns (e.g. out-of-sample)
    samples : int, optional
        Number of posterior samples to draw.

    Returns
    -------
    model : pymc.Model object
        PyMC3 model containing all random variables.
    trace : pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.

    See Also
    --------
    plot_stoch_vol : plotting of tochastic volatility model
    """

    y = np.concatenate((y1, y2))

    mu_m = np.mean(y)
    mu_p = 0.000001 * 1 / np.std(y)**2

    sigma_low = np.std(y) / 1000
    sigma_high = np.std(y) * 1000
    with pm.Model() as model:
        group1_mean = pm.Normal('group1_mean',
                                mu=mu_m,
                                tau=mu_p,
                                testval=y1.mean())
        group2_mean = pm.Normal('group2_mean',
                                mu=mu_m,
                                tau=mu_p,
                                testval=y2.mean())
        group1_std = pm.Uniform('group1_std',
                                lower=sigma_low,
                                upper=sigma_high,
                                testval=y1.std())
        group2_std = pm.Uniform('group2_std',
                                lower=sigma_low,
                                upper=sigma_high,
                                testval=y2.std())
        nu = pm.Exponential('nu_minus_two', 1 / 29., testval=4.) + 2.

        returns_group1 = pm.StudentT('group1',
                                     nu=nu,
                                     mu=group1_mean,
                                     lam=group1_std**-2,
                                     observed=y1)
        returns_group2 = pm.StudentT('group2',
                                     nu=nu,
                                     mu=group2_mean,
                                     lam=group2_std**-2,
                                     observed=y2)

        diff_of_means = pm.Deterministic('difference of means',
                                         group2_mean - group1_mean)
        pm.Deterministic('difference of stds', group2_std - group1_std)
        pm.Deterministic(
            'effect size', diff_of_means / pm.math.sqrt(
                (group1_std**2 + group2_std**2) / 2))

        pm.Deterministic(
            'group1_annual_volatility',
            returns_group1.distribution.variance**.5 * np.sqrt(252))
        pm.Deterministic(
            'group2_annual_volatility',
            returns_group2.distribution.variance**.5 * np.sqrt(252))

        pm.Deterministic(
            'group1_sharpe', returns_group1.distribution.mean /
            returns_group1.distribution.variance**.5 * np.sqrt(252))
        pm.Deterministic(
            'group2_sharpe', returns_group2.distribution.mean /
            returns_group2.distribution.variance**.5 * np.sqrt(252))

        trace = pm.sample(samples, progressbar=progressbar)
    return model, trace
Exemplo n.º 42
0
n1 = 10
n2 = 10

model = pm.Model()

with model:
    # Prior on Rates
    theta1 = pm.Beta('theta1', alpha=1, beta=1)
    theta2 = pm.Beta('theta2', alpha=1, beta=1)
    # Observed Counts
    k1 = pm.Binomial('k1', p=theta1, n=n1, observed=k1)
    k2 = pm.Binomial('k2', p=theta2, n=n2, observed=k2)
    # Difference between the two rates
    delta = pm.Deterministic('delta', theta1 - theta2)
    # instantiate Metropolis-Hastings sampler
    stepFunc = pm.Metropolis()
    # draw 5,000 posterior samples (in 4 parallel chains)
    Nsample = 5000, Nchains = 4
    traces = pm.sample(Nsample, step=stepFunc, njobs=Nchains)

axs = pm.traceplot(traces, vars=['theta1','theta2','delta'], combined=False)
axs[0][0].set_xlim([0,1]) # manually set x-limits for comparisons

# mean of delta:
np.mean(traces['delta'])
# median of delta:
np.median(traces['delta'])
# mode of delta
mstats.mode(traces['delta']) #FIXME! apply to SMOOTHED histogram
# 95% credible interval for delta:
mstats.mquantiles(traces['delta'], (0.025, 0.975))
Exemplo n.º 43
0
        alpha = pm.Gamma('alpha', mu=hyper_alpha_mu, sd=hyper_alpha_sd)
        mu = pm.Gamma('mu', mu=hyper_mu_mu, sd=hyper_mu_sd)

        y_obs = data[data.Hour == h]['Connected'].values

        y_est = pm.NegativeBinomial('y_est',
                                    mu=mu,
                                    alpha=alpha,
                                    observed=y_obs)

        y_pred = pm.NegativeBinomial('y_pred', mu=mu, alpha=alpha)

        trace = pm.sample(smpls,
                          tune=tunes,
                          chains=4,
                          progressbar=False,
                          nuts={"target_accept": 0.9})

        # Export traceplot
        #trarr = pm.traceplot()
        #fig = plt.gcf()
        #fig.savefig("out_tracePlt"+ str(int(h)) +".png")

        #trace24[h] = list(trace)
        trace24[h] = pm.save_trace(trace)
        ess = pm.diagnostics.effective_n(trace)

    print('- ESS: ', ess)
    obs = np.mean(data[data.Hour == h]['Connected'].values)
    print('- Observed: ', obs)
Exemplo n.º 44
0
    elif args.model == 1:
        with cm_effect.models.CMCombined_Final_V3(data, None) as model:
            model.build_model()

    elif args.model == 2:
        with cm_effect.models.CMCombined_Final_NoNoise(data, None) as model:
            model.build_model()

    elif args.model == 3:
        with cm_effect.models.CMCombined_Final_ICL(data, None) as model:
            model.build_model()

    elif args.model == 4:
        with cm_effect.models.CMCombined_ICL_NoNoise(data, None) as model:
            model.build_model()

    elif args.model == 5:
        with cm_effect.models.CMCombined_Final_DifEffects(data, None) as model:
            model.build_model()

    elif args.model == 6:
        with cm_effect.models.CMCombined_Additive(data, None) as model:
            model.build_model()

    with model.model:
        model.trace = pm.sample(args.nS, chains=args.nC, target_accept=0.95)

    results_obj = ResultsObject(r_is, model.trace)
    pickle.dump(results_obj,
                open(f"cv/model_{args.model}_fold_{args.fold}.pkl", "wb"))
Exemplo n.º 45
0
t = np.linspace(0, 10, 2)
x = np.random.uniform(0, 10, 50)
y = x * true_params[0] + true_params[1]
y_obs = y + np.exp(true_params[-1]) * np.random.randn(N)

plt.plot(x, y_obs, ".k", label="observations")
plt.plot(t, true_params[0] * t + true_params[1], label="truth")
plt.xlabel("x")
plt.ylabel("y")
plt.legend(fontsize=14)

import pymc3 as pm
import theano.tensor as tt

with pm.Model() as model:
    logs = pm.Uniform("logs", lower=-10, upper=10)
    alphaperp = pm.Uniform("alphaperp", lower=-10, upper=10)
    theta = pm.Uniform("theta", -2 * np.pi, 2 * np.pi, testval=0.0)

    # alpha_perp = alpha * cos(theta)
    alpha = pm.Deterministic("alpha", alphaperp / tt.cos(theta))

    # beta = tan(theta)
    beta = pm.Deterministic("beta", tt.tan(theta))

    # The observation model
    mu = alpha * x + beta
    pm.Normal("obs", mu=mu, sd=tt.exp(logs), observed=y_obs)

    trace = pm.sample(draws=2000, tune=2000)
Exemplo n.º 46
0
    'cosi': np.cos(incl_),
    'phi': phi_,
    'g0': widths[0],
    'g1': widths[1],
    'g2': widths[2],
    'a0': amps[0],
    'a1': amps[1],
    'a2': amps[2]
}

# In[29]:

with pm_model:
    trace = pm.sample(chains=4,
                      target_accept=.99,
                      start=start,
                      init='advi+adapt_diag',
                      progressbar=True)

# In[ ]:

pm.summary(trace)

# In[ ]:

labels = ['xsplit', 'cosi', 'split', 'i']
chain = np.array([trace[label] for label in labels])
truths = [init[9] * np.sin(init[10]), np.cos(init[10]), init[9], init[10]]
corner.corner(chain.T,
              labels=labels,
              truths=truths,
Exemplo n.º 47
0
# builds our model
basic_model = pm.Model()  # creates a container for the model

with basic_model:  # everything in here is added to the model behind the screens

    # priors for unknown parameters
    alpha = pm.Normal(
        'alpha', mu=0,
        sd=10)  # first arg is name of RV; match the name of the var
    # it is assigned to; the rest are the hyperparamters of the model (Beta, Exponential, Categorical)
    beta = pm.Normal('beta', mu=0, sd=10, shape=2)
    sigma = pm.HalfNormal('sigma', sd=1)

    # Expected values of outcome
    mu = alpha + beta[0] * X1 + beta[1] * X2
    # RVS can be added, subtracted, divided, multiplied, and indexed into to creat new RVs

    # Likelihood (sampling distribution) of observations
    Y_obs = pm.Normal('Y_obs', mu=mu, sd=sigma,
                      observed=Y)  # the Y says it is an observed stochastic
    # this is apparently a sampling distribution of outcomes

# runs a maximum a posteriori methods
map_estimate = pm.find_MAP(model=basic_model, fmin=optimize.fmin_powell)
print(map_estimate)

# runs an MCMC
with basic_model:
    # draw 500 posterior samples
    trace = pm.sample(500)
print(pm.summary(trace))
Exemplo n.º 48
0
with basic_model:
    #prior belief
    theta = pymc3.Beta("theta", alpha=alpha, beta=beta)
    #Bernoulli likelihood
    y = pymc3.Binomial("y", n=n, p=theta, observed=z)
    #carry out MCMC analysis with the Metropolis algorithm
    #using Maximum A Posteriori optimisation as initial values
    start = pymc3.find_MAP()

    #use the metropolis algorithm
    step = pymc3.Metropolis()

    #calculate the trace
    trace = pymc3.sample(iterations,
                         step,
                         start,
                         random_seed=1,
                         progressbar=True)

    #plot the posterior histogram from MCMC analysis
    bins = 50
    plt.hist(trace["theta"],
             bins,
             histtype="step",
             normed=True,
             label="posterior(MCMC)",
             color="red")

    #plot the analytic prior and posterior beta distributions
    x = np.linspace(0, 1, 100)
    plt.plot(x,
Exemplo n.º 49
0
def run(n=1000):
    if n == "short":
        n = 50
    with garch:
        tr = sample(n)
Exemplo n.º 50
0
    plt.show()

    with pm.Model() as model:
        l_ = pm.Gamma("l", alpha=2, beta=1)
        eta = pm.HalfCauchy("eta", beta=1)

        cov = eta ** 2 * pm.gp.cov.Matern52(1, l_)
        gp = pm.gp.Latent(cov_func=cov)

        f = gp.prior("f", X=X)

        sigma = pm.HalfCauchy("sigma", beta=5)
        nu = pm.Gamma("nu", alpha=2, beta=0.1)
        y_ = pm.StudentT("y", mu=f, lam=1.0 / sigma, nu=nu, observed=y)

        trace = pm.sample(200, n_init=100, tune=100, chains=2, cores=2, return_inferencedata=True)
        az.to_netcdf(trace, 'src/experiments/results/lat_gp_trace')

    # check Rhat, values above 1 may indicate convergence issues
    n_nonconverged = int(np.sum(az.rhat(trace)[["eta", "l", "f_rotated_"]].to_array() > 1.03).values)
    print("%i variables MCMC chains appear not to have converged." % n_nonconverged)

    # plot the results
    fig = plt.figure(figsize=(12, 5))
    ax = fig.gca()

    # plot the samples from the gp posterior with samples and shading
    from pymc3.gp.util import plot_gp_dist

    plot_gp_dist(ax, trace.posterior["f"][0, :, :], X)
Exemplo n.º 51
0
import pymc3 as pm
import numpy as np
import tushare as ts
import matplotlib.pyplot as plt

gdp_year = ts.get_gdp_year()
gdp_year.set_index('year')
gdp_year = gdp_year[::-1]

gdp_year['gdp'] = gdp_year['gdp'].apply(lambda x: x/1000)

gdp_year['lag'] = gdp_year['gdp'].shift()

gdp_year.dropna(inplace=True)
with pm.Model() as model:
    sigma = pm.Exponential('sigma', 1. / .02, testval=.1)
    nu = pm.Exponential('nu', 1. / 10)
    beta = pm.GaussianRandomWalk('beta', sigma ** -2, shape=len(gdp_year['gdp']))
    observed = pm.Normal('observed', mu=beta * gdp_year['lag'], sd=1 / nu, observed=gdp_year['gdp'])

    trace = pm.sample(1000, tune=1000, cores=2)

plt.plot(gdp_year.index,trace['beta'].T, 'b', alpha=.03)
plt.plot(gdp_year.index, 1 + (np.log(gdp_year['gdp']) - np.log(gdp_year['lag'])), 'r', label='True Growth Rate')
plt.show()
Exemplo n.º 52
0
#%% Hierarchical GP Energy Model

with pm.Model() as marginal_gp_model:
    # Specify the covariance function.
    input_dim = 1; T = 96; ls1 = 96; ls2 = 4;
    cov_func = ( pm.gp.cov.Periodic(input_dim, period=T, ls=ls1)
                + pm.gp.cov.ExpQuad(input_dim, ls=ls2) ) 

    # Specify the GP.  The default mean function is `Zero`.
    gp = pm.gp.Marginal(cov_func=cov_func)

    # The scale of the white noise term can be provided,
    sigma = pm.HalfCauchy("sigma", beta=5)
    y_ = gp.marginal_likelihood("y", X=hrs_idx, y=y_obs, noise=sigma)
    
    trace = pm.sample(1000, chains=4, cores=1)

pm.plot_posterior(trace) 

#%% Hierarchical Poisson Count Model
with pm.Model() as arrivalModel:
    
    # Hyper-Priors
    hyper_mu_sd = pm.Uniform('hyper_mu_sd', lower=0, upper=10)
    hyper_mu_mu = pm.Uniform('hyper_mu_mu', lower=0, upper=10) 
    
    # Priors   
    mu = pm.Gamma('mu', mu=hyper_mu_mu, 
                        sigma=hyper_mu_sd,
                        shape=n_hrs)    
    
Exemplo n.º 53
0
        z = pm.Uniform("z", lower=0.0, upper=3.0)

        # Make sure model is not older than the Universe
        # Allowing at least 100 Myr for the first galaxies to form after Big Bang
        #age_at_z = get_age_at_z(z)
        #age_lim = age_at_z - 0.1  # in Gyr

        ms = pm.Uniform("ms", lower=9.0, upper=12.5)
        age = pm.Uniform("age", lower=0.01, upper=10.0)
        logtau = pm.Uniform("logtau", lower=-3.0, upper=2.0)
        av = pm.Uniform("av", lower=0.0, upper=5.0)

        # ----------------

        # convert inputs to a tensor vector
        theta = tt.as_tensor_variable([z, ms, age, logtau, av])

        # use a DensityDist (use a lamdba function to "call" the Op)
        #pm.DensityDist("likelihood", my_logl, observed={"v": theta})
        like = pm.Potential("like", logl(theta))

    with model:
        trace = pm.sample(ndraws,
                          cores=ncores,
                          chains=nchains,
                          tune=nburn,
                          discard_tuned_samples=True)
        print(pm.summary(trace).to_string())

    sys.exit(0)
Exemplo n.º 54
0
def mixture_model(
        data_2d,
        N,  # noqa: N803
        M,
        std,
        lam_backg,
        nsteps,
        nchains
):
    """Define the mixture model and sample from it.

    Parameters
    ----------
    data_2d : ndarray of floats
        2D intensity distribution of the collected light
    N : integer
        number of lattice sites along one axis
    M : integer
        number of pixels per lattice site along one axis
    std : float
        Gaussian width of the point spread function
    lam_backg: integer
        Expected value of the Poissonian background
    nsteps : integer
        number of steps taken by each walker in the pymc3 sampling
    nchains : integer
        number of walkers in the pymc3 sampling

    Returns
    -------
    traces : pymc3 MultiTrace
        An object that contains the samples.
    df : dataframe
        Samples converted into a dataframe object

    """
    # x-pixel locations for one lattice site
    x = np.arange(-M/2, M/2)
    # X, Y meshgrid of pixel locations
    X, Y = np.meshgrid(x, x)  # noqa: N806

    # in future gen instead of passing N, use
    # opticalLatticeShape = tuple((np.array(pixel_grid.shape)/M).astype(int))

    with pm.Model() as mixture_model:  # noqa: F841

        # Priors

        # Boolean numbers characterizing if lattice sites is filled or not.
        q = pm.Uniform('q', lower=0, upper=1, shape=(N, N))

        # Amplitude of the Gaussian signal for the atoms
        aa = pm.Uniform('Aa', lower=0.5*np.max(data_2d), upper=np.max(data_2d))

        # Amplitude of the uniform background signal
        ab = pm.Uniform('Ab', lower=0, upper=lam_backg / (M * M * N))

        # Width of the point spread function
        atom_std = pm.Normal('std', mu=std, sd=0.2)

        # Width of the Gaussian likelihood for the atoms
        sigma_a = pm.Uniform('sigma_a', lower=0, upper=10)

        # Width of the Gaussian likelihood for the background
        sigma_b = pm.Uniform('sigma_b', lower=0, upper=10)

        # Model (gaussian + uniform)

        # Gaussian with amplitude Aa modelling the PSF
        single_atom = aa * np.exp(-(X**2 + Y**2) / (2 * atom_std**2)) + ab

        # Place a PSF on each lattice site scale it by q
        atom = tt.slinalg.kron(q, single_atom)

        # Constant background with amplitude, Ab, drawn from a
        # Uniform distribution, modelling the background
        background = ab * np.ones((N*M, N*M))

        # Log-likelihood
        # log-likelihood for the counts to come from atoms
        good_data = pm.Normal.dist(mu=atom, sd=sigma_a).logp(data_2d)

        # log-likelihood for the counts to come from the background
        bad_data = pm.Normal.dist(mu=background, sd=sigma_b).logp(data_2d)
        log_like = good_data + bad_data

        pm.Potential('logp', log_like.sum())

        # Sample
        # sample from the log-likelihood
        traces = pm.sample(tune=nsteps, draws=nsteps, chains=nchains)

    # convert the PymC3 traces into a dataframe
    df = pm.trace_to_dataframe(traces)

    return traces, df
Exemplo n.º 55
0
def mixture_model_boolean_vnm(
        data_2d,
        N,  # noqa: N803
        M,
        std,
        lam_backg,
        nsteps,
        nchains
):
    """Define the mixture model and sample from it.

    This version of the model was contributed by
    V N Manoharan

    Parameters
    ----------
    data_2d : ndarray of floats
        2D intensity distribution of the collected light
    N : integer
        number of lattice sites along one axis
    M : integer
        number of pixels per lattice site along one axis
    std : float
        Gaussian width of the point spread function
    lam_backg: integer
        Expected value of the Poissonian background
    nsteps : integer
        number of steps taken by each walker in the pymc3 sampling
    nchains : integer
        number of walkers in the pymc3 sampling

    Returns
    -------
    traces : pymc3 MultiTrace
        An object that contains the samples.
    df : dataframe
        Samples converted into a dataframe object

    """
    # x-pixel locations for one lattice site
    x = np.arange(-M/2, M/2)
    # X, Y meshgrid of pixel locations
    X, Y = np.meshgrid(x, x)  # noqa: N806

    # in future gen instead of passing N, use
    # opticalLatticeShape = tuple((np.array(pixel_grid.shape)/M).astype(int))

    with pm.Model() as mixture_model:  # noqa: F841

        # Prior
        # Use an informative prior for P based on what
        # you would know in a real experiment.
        # A Uniform(0,1) prior causes severe problems
        # and probably doesn't represent your
        # true state of knowledge prior to the experiment.
        # I use a Gamma distribution (rather than a Normal)
        # so that P stays positive and the sampler doesn't diverge.
        # You can adjust the width to match what you would
        # know in a typical experiment.

        P = pm.Gamma('P', mu=0.5, sd=0.05)  # noqa: N806
        q = pm.Bernoulli('q', p=P, shape=(N, N), testval=np.ones((N, N)))

        # Here again you need more informative priors.
        # Previously these were Uniform, with limits determined by the data.
        # But priors should not be based on the data.
        # They should be based on what you know prior to to experiment.
        # I use a Gamma distribution for both
        # because that constrains the values to be positive.
        # Adjust mu and sd to match what you
        # would know before a typical experiment.
        aa = pm.Gamma('Aa', mu=3, sd=0.5)
        ab = pm.Gamma('Ab', mu=0.5, sd=0.1)

        # Again, replaced Uniform priors by Gamma priors.
        # Adjust mu and sd to match what you
        # would know before a typical experiment
        sigma_a = pm.Gamma('sigma_a', mu=1, sd=0.1)
        sigma_b = pm.Gamma('sigma_b', mu=1, sd=0.1)

        # Replaced Normal by Gamma distribution to keep atom_std positive
        # atom_std = pm.Normal('std', mu = std, sd = 0.2)
        atom_std = pm.Gamma('std', mu=std, sd=0.1)

        # Removed atom_back as a parameter and
        # assumed background in presence of atom is the
        # same as that without the atom.
        # If you want to keep this, don't use a Uniform prior.
        # atom_back = pm.Uniform('A_back', lower=0, upper=20)

        # Model (gaussian + uniform)
        single_background = ab * np.ones((M, M))
        # Replaced background with Ab rather than atom_back.
        single_atom = aa * np.exp(
            -((X - 0)**2 + (Y - 0)**2) / (2 * atom_std**2)) \
            + Ab * np.ones((M,  M)  # noqa: F821
        )  # noqa: E124

        atom = tt.slinalg.kron(q, single_atom)
        background = tt.slinalg.kron(1-q, single_background)
        # Log-likelihood
        good_data = pm.Normal.dist(mu=atom, sd=sigma_a).logp(data_2d)
        bad_data = pm.Normal.dist(mu=background, sd=sigma_b).logp(data_2d)
        log_like = good_data + bad_data

        # Here I added a binomial log-likelihood term.
        # I used the normal approximation to the
        # binomial (please check my math).
        # This term accounts for deviations from the expected
        # occupancy fraction. If the mean of the q_i are
        # signficantly different from P, the
        # configuration is penalized.
        # This is why you shouldn't put a uniform prior on P.

        log_add = pm.Normal.dist(mu=P, tau=N*N/(P*(1-P))).logp(q.mean())
        pm.Potential('logp', log_like.sum() + log_add)

        # Sample
        # We'll explicitly set the two sampling steps
        # (rather than let pymc3 do it for us), so that
        # we can tune each step.
        # We use binary Gibbs Metropolis for the q and NUTS for everything
        # else.  Note that if you add a variable to the model,
        # you should explicitly add it to the
        # sampling step below.
        steps = [  # noqa: F841
            pm.BinaryGibbsMetropolis([q], transit_p=0.8),
            pm.NUTS(
                [atom_std, sigma_b, sigma_a, Ab, Aa, P],  # noqa: F821
                target_accept=0.8
            )
        ]

        # Sample
        # sample from the log-likelihood
        traces = pm.sample(tune=nsteps, draws=nsteps, chains=nchains)

    # convert the PymC3 traces into a dataframe
    df = pm.trace_to_dataframe(traces)

    return traces, df
Exemplo n.º 56
0
    zbeta0 = pm.Normal('zbeta0', mu=float(n_cat)/2., tau=1./n_cat**2)
    zbeta = pm.Normal('zbeta', mu=0., tau=1./n_cat**2, shape=Z_data.shape[1])
    zsigma = pm.Uniform('zsigma', n_cat/1000., n_cat*10.)
    # Linear model
    mu = pm.Deterministic('mu', zbeta0 + pm.math.dot(zbeta, Z_data.T))
    # Link function
    pr = outcome_probabilities(theta, mu, zsigma)
    # For the *robust* version of the ordered "probit" regression
    # comment the previous line and uncomment the following lines
    ##nu = pm.Exponential('nu', lam=1./30.)
    ##pr = outcome_probabilities_robust(theta, mu, zsigma, nu)
    # Likelihood
    y = pm.Categorical('y', pr, observed=y_train_log)
    # MCMC (it is not possible to use gradient-based samplers)
    step_M = pm.DEMetropolis()  # experimental sampler!
    chain = pm.sample(draws=32000, tune=4000, step=step_M, chains=4, parallelize=True)


# In[30]:


burnin = 2000
thin = 6
# Trace after burn-in and thinning
trace = chain[burnin::thin]


# In[31]:


pm.gelman_rubin(chain, varnames=['theta_missing', 'zbeta0', 'zbeta', 'zsigma'])
Exemplo n.º 57
0
def mixture_model_mobile_centers(
        data_2d,
        N,  # noqa: N803
        M,
        std,
        lam_backg,
        nsteps,
        nchains
):
    """Define the mixture model and sample from it.

    This mobile centers model
    extends the above mixture model in that allows the center positions of
    each atom to vary slightly from the center of the lattice site. This should
    help in cases of lattice inhomogeneity.

    Parameters
    ----------
    data_2d : ndarray of floats
        2D intensity distribution of the collected light
    N : integer
        number of lattice sites along one axis
    M : integer
        number of pixels per lattice site along one axis
    std : float
        Gaussian width of the point spread function
    lam_backg: integer
        Expected value of the Poissonian background
    nsteps : integer
        number of steps taken by each walker in the pymc3 sampling
    nchains : integer
        number of walkers in the pymc3 sampling

    Returns
    -------
    traces : pymc3 MultiTrace
        An object that contains the samples.
    df : dataframe
        Samples converted into a dataframe object

    """
    # x-pixel locations for the entire image
    x = np.arange(0, N*M)
    # X, Y meshgrid of pixel locations
    X, Y = np.meshgrid(x, x)  # noqa: N806

    # atom center locations are explicitly supplied as the centers of
    # the lattice sites
    centers = np.linspace(0, (N-1)*M, N)+M/2
    Xcent_mu, Ycent_mu = np.meshgrid(centers, centers)  # noqa: N806

    with pm.Model() as mixture_model:  # noqa: F841

        # Priors

        # continuous numbers characterizing if lattice sites are filled
        # or not.
        q = pm.Uniform('q', lower=0, upper=1, shape=(N, N))

        # Allow centers to move but we expect them to be
        # pretty near their lattice centers
        Xcent = pm.Normal(  # noqa: N806
            'Xcent',
            mu=Xcent_mu,
            sigma=Xcent_mu/10,
            shape=(N, N)
        )

        Ycent = pm.Normal(  # noqa: N806
            'Ycent',
            mu=Ycent_mu,
            sigma=Ycent_mu/10,
            shape=(N, N)
        )

        # Amplitude of the Gaussian signal for the atoms
        aa = pm.Gamma('Aa', mu=3, sd=0.5)
        # Amplitude of the uniform background signal
        ab = pm.Gamma('Ab', mu=0.5, sd=0.1)

        # Width of the Gaussian likelihood for the atoms
        sigma_a = pm.Gamma('sigma_a', mu=1, sd=0.1)

        # Width of the Gaussian likelihood for the background
        sigma_b = pm.Gamma('sigma_b', mu=1, sd=0.1)

        # Width of the point spread function
        atom_std = pm.Gamma('std', mu=std, sd=0.1)

        # Instead of tiling a single_atom PSF with kronecker, use
        # broadcasting and summing along appropriate axis
        # to allow for spill over of one atom to neighboring sites.
        atom = tt.sum(
                tt.sum(
                    q*aa * tt.exp(
                        -((X[:, :, None, None] - Xcent)**2 +
                          (Y[:, :, None, None] - Ycent)**2) / (2 * atom_std**2)
                    ),
                    axis=2
                ),
                axis=2
            )
        atom += ab

        # background is just flat
        background = ab*np.ones((N*M, N*M))
        # Log-likelihood
        good_data = pm.Normal.dist(mu=atom, sd=sigma_a).logp(data_2d)
        bad_data = pm.Normal.dist(mu=background, sd=sigma_b).logp(data_2d)
        log_like = good_data + bad_data

        pm.Potential('logp', log_like.sum())

        # Sample
        traces = pm.sample(tune=nsteps, draws=nsteps, chains=nchains)

    # convert the PymC3 traces into a dataframe
    df = pm.trace_to_dataframe(traces)

    return traces, df
Exemplo n.º 58
0
                          p=0.35)

        flight_time = \
            pmc.Exponential("Flight Time",
                            lam=0.5 - (0.1 * rough_weather))
        arrival_traffic_delay = \
            pmc.Wald("Arrival Traffic Delay",
                     mu=0.1, lam=0.2)

        arrival_time = \
            pm.Deterministic("Arrival time",
                             departure_time +
                             flight_time +
                              arrival_traffic_delay)

    # Sample from the model
    with model:
        samples = pm.sample(draws=nb_samples, random_seed=1000)

    # Show the summary
    with pd.option_context('display.max_rows', None, 'display.max_columns',
                           None):
        print(pm.summary(samples))

    # Show the diagrams
    fig = plt.figure()

    pm.traceplot(samples, figsize=(14, 18))

    plt.show()
Exemplo n.º 59
0
x_0 = df[x_n].values
x_c = x_0 - x_0.mean()

# logistic regression
with pm.Model() as model_0:
    alpha = pm.Normal('alpha', mu=0, sd=10)
    beta = pm.Normal('beta', mu=0, sd=10)

    mu = alpha + pm.math.dot(x_c, beta)
    theta = pm.Deterministic('theta', pm.math.sigmoid(mu))

    bd = pm.Deterministic('bd', -alpha / beta)

    yl = pm.Bernoulli('yl', p=theta, observed=y_0)

    trace_0 = pm.sample(1000)

varnames = ['alpha', 'beta', 'bd']
pm.summary(trace_0, varnames)
pm.plot_trace(trace_0, varnames)

#######################
# multi variable logit
#######################

df = iris.query("species == ('setosa', 'versicolor')")
y_1 = pd.Categorical(df['species']).codes
x_n = ['sepal_length', 'sepal_width']
# note: not centering this time
x_1 = df[x_n].values
Exemplo n.º 60
0
# ======================================================================
# 模型建立:
# 模型1:using pymc3 GLM自建立模型,Normal分布更优
# 模型2: 自己模型
# ======================================================================
data = dict(x=elec_year, z1=elec_tem, y=elec_faults)

with pm.Model() as mdl_ols_glm:
    # family = pm.glm.families.StudentT()
    pm.glm.GLM.from_formula('y ~ 1+x + z1',
                            data,
                            family=pm.glm.families.Normal())
    # pm.glm.GLM.from_formula('y ~ 1 + x + z1', data, family=family)

    traces_ols_glm = pm.sample(3000)
pm.traceplot(traces_ols_glm)
plt.show()

with pm.Model() as pooled_model:
    # define priors
    sigma = pm.HalfCauchy('sigma', 5)
    beta = pm.Normal('beta', 0, 1000)
    beta1 = pm.Normal('beta1', 0, 10000)
    beta2 = pm.Normal('beta2', 0, 1000)

    # define likelihood 建立与时间相关的函数
    # out_pai = pm.Deterministic('out_pai',)
    theta = beta + beta1 * elec_year + beta2 * elec_tem1
    Observed = pm.Normal("Observed", theta, sd=sigma,
                         observed=elec_faults1)  # 观测值