def test_AR():
    # AR1
    data = np.array([0.3, 1, 2, 3, 4])
    phi = np.array([0.99])
    with Model() as t:
        y = AR("y", phi, sigma=1, shape=len(data))
        z = Normal("z", mu=phi * data[:-1], sigma=1, shape=len(data) - 1)
    ar_like = t["y"].logp({"z": data[1:], "y": data})
    reg_like = t["z"].logp({"z": data[1:], "y": data})
    np.testing.assert_allclose(ar_like, reg_like)

    # AR1 and AR(1)
    with Model() as t:
        rho = Normal("rho", 0.0, 1.0)
        y1 = AR1("y1", rho, 1.0, observed=data)
        y2 = AR("y2", rho, 1.0, init=Normal.dist(0, 1), observed=data)
    np.testing.assert_allclose(y1.logp(t.initial_point), y2.logp(t.initial_point))

    # AR1 + constant
    with Model() as t:
        y = AR("y", np.hstack((0.3, phi)), sigma=1, shape=len(data), constant=True)
        z = Normal("z", mu=0.3 + phi * data[:-1], sigma=1, shape=len(data) - 1)
    ar_like = t["y"].logp({"z": data[1:], "y": data})
    reg_like = t["z"].logp({"z": data[1:], "y": data})
    np.testing.assert_allclose(ar_like, reg_like)

    # AR2
    phi = np.array([0.84, 0.10])
    with Model() as t:
        y = AR("y", phi, sigma=1, shape=len(data))
        z = Normal("z", mu=phi[0] * data[1:-1] + phi[1] * data[:-2], sigma=1, shape=len(data) - 2)
    ar_like = t["y"].logp({"z": data[2:], "y": data})
    reg_like = t["z"].logp({"z": data[2:], "y": data})
    np.testing.assert_allclose(ar_like, reg_like)
Пример #2
0
def run_MCMC_ARp(x, y, draws, p, resmdl):
    phi_means = resmdl.params[:p]
    phi_sd = resmdl.bse[:p]

    with Model() as model8:
        alpha = Normal('alpha', mu=0, sd=10)
        beta = Normal('beta', mu=0, sd=10)
        sd = HalfNormal('sd', sd=10)
        phi = Normal('phi', mu=phi_means, sd=phi_sd, shape=p)
        y = tt.as_tensor(y)
        x = tt.as_tensor(x)
        y_r = y[p:]
        x_r = x[p:]
        resids = y - beta * x - alpha

        u = tt.add(*[phi[i] * resids[p - (i + 1):-(i + 1)] for i in range(p)])
        mu = alpha + beta * x_r + u
        data = Normal('y_r', mu=mu, sd=sd, observed=y_r)

    with model8:
        if p == 1:
            step = None
        else:
            step = Metropolis([phi])
        tune = int(draws / 5)
        trace = sample(draws, tune=tune, step=step, progressbar=False)

    print(summary(trace, varnames=['alpha', 'beta', 'sd', 'phi']))
    #plt.show(forestplot(trace, varnames=['alpha', 'beta', 'sd', 'phi']))
    #traceplot(trace, varnames=['alpha', 'beta', 'sd', 'phi'])
    return trace
Пример #3
0
def test_missing_logp():
    with Model() as m:
        theta1 = Normal("theta1", 0, 5, observed=[0, 1, 2, 3, 4])
        theta2 = Normal("theta2", mu=theta1, observed=[0, 1, 2, 3, 4])
    m_logp = m.logp()

    with Model() as m_missing:
        theta1 = Normal("theta1", 0, 5, observed=np.array([0, 1, np.nan, 3, np.nan]))
        theta2 = Normal("theta2", mu=theta1, observed=np.array([np.nan, np.nan, 2, np.nan, 4]))
    m_missing_logp = m_missing.logp({"theta1_missing": [2, 4], "theta2_missing": [0, 1, 3]})

    assert m_logp == m_missing_logp
def test_AR_nd():
    # AR2 multidimensional
    p, T, n = 3, 100, 5
    beta_tp = np.random.randn(p, n)
    y_tp = np.random.randn(T, n)
    with Model() as t0:
        beta = Normal("beta", 0.0, 1.0, shape=(p, n), initval=beta_tp)
        AR("y", beta, sigma=1.0, shape=(T, n), initval=y_tp)

    with Model() as t1:
        beta = Normal("beta", 0.0, 1.0, shape=(p, n), initval=beta_tp)
        for i in range(n):
            AR("y_%d" % i, beta[:, i], sigma=1.0, shape=T, initval=y_tp[:, i])

    np.testing.assert_allclose(t0.logp(t0.initial_point), t1.logp(t1.initial_point))
Пример #5
0
def test_missing(data):

    with Model() as model:
        x = Normal("x", 1, 1)
        with pytest.warns(ImputationWarning):
            y = Normal("y", x, 1, observed=data)

    assert "y_missing" in model.named_vars

    test_point = model.initial_point
    assert not np.isnan(model.logp(test_point))

    with model:
        prior_trace = sample_prior_predictive()
    assert {"x", "y"} <= set(prior_trace.keys())
Пример #6
0
def test_logpt_basic():
    """Make sure we can compute a log-likelihood for a hierarchical model with transforms."""

    with Model() as m:
        a = Uniform("a", 0.0, 1.0)
        c = Normal("c")
        b_l = c * a + 2.0
        b = Uniform("b", b_l, b_l + 1.0)

    a_value_var = m.rvs_to_values[a]
    assert a_value_var.tag.transform

    b_value_var = m.rvs_to_values[b]
    assert b_value_var.tag.transform

    c_value_var = m.rvs_to_values[c]

    b_logp = logpt(b, b_value_var)

    res_ancestors = list(walk_model((b_logp, ), walk_past_rvs=True))
    res_rv_ancestors = [
        v for v in res_ancestors
        if v.owner and isinstance(v.owner.op, RandomVariable)
    ]

    # There shouldn't be any `RandomVariable`s in the resulting graph
    assert len(res_rv_ancestors) == 0
    assert b_value_var in res_ancestors
    assert c_value_var in res_ancestors
    assert a_value_var in res_ancestors
def test_linear():
    lam = -0.78
    sig2 = 5e-3
    N = 300
    dt = 1e-1
    sde = lambda x, lam: (lam * x, sig2)
    x = floatX(_gen_sde_path(sde, (lam, ), dt, N, 5.0))
    z = x + np.random.randn(x.size) * sig2
    # build model
    with Model() as model:
        lamh = Flat("lamh")
        xh = EulerMaruyama("xh", dt, sde, (lamh, ), shape=N + 1, testval=x)
        Normal("zh", mu=xh, sigma=sig2, observed=z)
    # invert
    with model:
        trace = sample(init="advi+adapt_diag", chains=1)

    ppc = sample_posterior_predictive(trace, model=model)
    ppcf = fast_sample_posterior_predictive(trace, model=model)
    # test
    p95 = [2.5, 97.5]
    lo, hi = np.percentile(trace[lamh], p95, axis=0)
    assert (lo < lam) and (lam < hi)
    lo, hi = np.percentile(ppc["zh"], p95, axis=0)
    assert ((lo < z) * (z < hi)).mean() > 0.95
    lo, hi = np.percentile(ppcf["zh"], p95, axis=0)
    assert ((lo < z) * (z < hi)).mean() > 0.95
Пример #8
0
def test_missing_dual_observations():
    with Model() as model:
        obs1 = ma.masked_values([1, 2, -1, 4, -1], value=-1)
        obs2 = ma.masked_values([-1, -1, 6, -1, 8], value=-1)
        beta1 = Normal("beta1", 1, 1)
        beta2 = Normal("beta2", 2, 1)
        latent = Normal("theta", size=5)
        with pytest.warns(ImputationWarning):
            ovar1 = Normal("o1", mu=beta1 * latent, observed=obs1)
        with pytest.warns(ImputationWarning):
            ovar2 = Normal("o2", mu=beta2 * latent, observed=obs2)

        prior_trace = sample_prior_predictive()
        assert {"beta1", "beta2", "theta", "o1", "o2"} <= set(prior_trace.keys())
        # TODO: Assert something
        trace = sample(chains=1, draws=50)
Пример #9
0
def test_missing_with_predictors():
    predictors = array([0.5, 1, 0.5, 2, 0.3])
    data = ma.masked_values([1, 2, -1, 4, -1], value=-1)
    with Model() as model:
        x = Normal("x", 1, 1)
        with pytest.warns(ImputationWarning):
            y = Normal("y", x * predictors, 1, observed=data)

    assert "y_missing" in model.named_vars

    test_point = model.initial_point
    assert not np.isnan(model.logp(test_point))

    with model:
        prior_trace = sample_prior_predictive()
    assert {"x", "y"} <= set(prior_trace.keys())
Пример #10
0
def test_interval_missing_observations():
    with Model() as model:
        obs1 = ma.masked_values([1, 2, -1, 4, -1], value=-1)
        obs2 = ma.masked_values([-1, -1, 6, -1, 8], value=-1)

        rng = aesara.shared(np.random.RandomState(2323), borrow=True)

        with pytest.warns(ImputationWarning):
            theta1 = Uniform("theta1", 0, 5, observed=obs1, rng=rng)
        with pytest.warns(ImputationWarning):
            theta2 = Normal("theta2", mu=theta1, observed=obs2, rng=rng)

        assert "theta1_observed_interval__" in model.named_vars
        assert "theta1_missing_interval__" in model.named_vars
        assert isinstance(
            model.rvs_to_values[model.named_vars["theta1_observed"]].tag.transform, Interval
        )

        prior_trace = sample_prior_predictive()

        # Make sure the observed + missing combined deterministics have the
        # same shape as the original observations vectors
        assert prior_trace["theta1"].shape[-1] == obs1.shape[0]
        assert prior_trace["theta2"].shape[-1] == obs2.shape[0]

        # Make sure that the observed values are newly generated samples
        assert np.all(np.var(prior_trace["theta1_observed"], 0) > 0.0)
        assert np.all(np.var(prior_trace["theta2_observed"], 0) > 0.0)

        # Make sure the missing parts of the combined deterministic matches the
        # sampled missing and observed variable values
        assert np.mean(prior_trace["theta1"][:, obs1.mask] - prior_trace["theta1_missing"]) == 0.0
        assert np.mean(prior_trace["theta1"][:, ~obs1.mask] - prior_trace["theta1_observed"]) == 0.0
        assert np.mean(prior_trace["theta2"][:, obs2.mask] - prior_trace["theta2_missing"]) == 0.0
        assert np.mean(prior_trace["theta2"][:, ~obs2.mask] - prior_trace["theta2_observed"]) == 0.0

        assert {"theta1", "theta2"} <= set(prior_trace.keys())

        trace = sample(chains=1, draws=50, compute_convergence_checks=False)

        assert np.all(0 < trace["theta1_missing"].mean(0))
        assert np.all(0 < trace["theta2_missing"].mean(0))
        assert "theta1" not in trace.varnames
        assert "theta2" not in trace.varnames

        # Make sure that the observed values are newly generated samples and that
        # the observed and deterministic matche
        pp_trace = sample_posterior_predictive(trace)
        assert np.all(np.var(pp_trace["theta1"], 0) > 0.0)
        assert np.all(np.var(pp_trace["theta2"], 0) > 0.0)
        assert np.mean(pp_trace["theta1"][:, ~obs1.mask] - pp_trace["theta1_observed"]) == 0.0
        assert np.mean(pp_trace["theta2"][:, ~obs2.mask] - pp_trace["theta2_observed"]) == 0.0
def test_GARCH11():
    # test data ~ N(0, 1)
    data = np.array(
        [
            -1.35078362,
            -0.81254164,
            0.28918551,
            -2.87043544,
            -0.94353337,
            0.83660719,
            -0.23336562,
            -0.58586298,
            -1.36856736,
            -1.60832975,
            -1.31403141,
            0.05446936,
            -0.97213128,
            -0.18928725,
            1.62011258,
            -0.95978616,
            -2.06536047,
            0.6556103,
            -0.27816645,
            -1.26413397,
        ]
    )
    omega = 0.6
    alpha_1 = 0.4
    beta_1 = 0.5
    initial_vol = np.float64(0.9)
    vol = np.empty_like(data)
    vol[0] = initial_vol
    for i in range(len(data) - 1):
        vol[i + 1] = np.sqrt(omega + beta_1 * vol[i] ** 2 + alpha_1 * data[i] ** 2)

    with Model() as t:
        y = GARCH11(
            "y",
            omega=omega,
            alpha_1=alpha_1,
            beta_1=beta_1,
            initial_vol=initial_vol,
            shape=data.shape,
        )
        z = Normal("z", mu=0, sigma=vol, shape=data.shape)
    garch_like = t["y"].logp({"z": data, "y": data})
    reg_like = t["z"].logp({"z": data, "y": data})
    decimal = select_by_precision(float64=7, float32=4)
    np.testing.assert_allclose(garch_like, reg_like, 10 ** (-decimal))
Пример #12
0
def mcmc(model_prior_params, data_prior_params, N, epsilon, Z, sensitivity,
         num_samples):

    data_dim = Z['XX'].shape[0] - 1

    if data_dim > 1:
        raise ValueError(f'MCMC only works for data dim 1! ({data_dim})')

    Z = Z.copy()
    Z['X'] = Z['XX'][:, -1][:, None]

    import pymc3 as pm
    from pymc3.distributions.continuous import InverseGamma
    from pymc3.distributions.continuous import Normal
    from pymc3.distributions.multivariate import MvNormal
    from pymc3.distributions.continuous import Laplace
    from pymc3 import Deterministic
    import theano.tensor as T

    num_tune_samples = 500
    max_treedepth = 12
    target_accept = .95

    with pm.Model():
        # data prior
        tau_squared = InverseGamma('ts',
                                   alpha=data_prior_params[2],
                                   beta=data_prior_params[3][0, 0])

        mu_x_offset = Normal('mu_x_offset', mu=0, sd=1)
        mu_x = Deterministic(
            'mu', data_prior_params[0][0, 0] + mu_x_offset *
            pm.math.sqrt(tau_squared / data_prior_params[1][0, 0]))

        x_offset = Normal('x_offset', mu=0, sd=1, shape=N)
        x_temp = Deterministic('X',
                               mu_x + x_offset * pm.math.sqrt(tau_squared))

        ones = T.shape_padright(pm.math.ones_like(x_temp))
        x = pm.math.concatenate((T.shape_padright(x_temp), ones), axis=1)

        # regression model
        sigma_squared = InverseGamma('ss',
                                     alpha=model_prior_params[2],
                                     beta=model_prior_params[3])

        L = pm.math.sqrt(sigma_squared) * np.linalg.cholesky(
            np.linalg.inv(model_prior_params[1]))
        theta_offset = MvNormal('theta_offset',
                                mu=[0] * (data_dim + 1),
                                cov=np.diag([1] * (data_dim + 1)),
                                shape=data_dim + 1)
        thetas = Deterministic(
            't',
            model_prior_params[0].flatten() + pm.math.dot(L, theta_offset))

        # response data
        y_offset = Normal('y_offset', mu=0, sd=1, shape=N)
        y = Deterministic(
            'y',
            pm.math.flatten(pm.math.dot(thetas, x.T)) +
            y_offset * pm.math.sqrt(sigma_squared))

        # noisy sufficient statistics
        noise_scale = sensitivity / epsilon
        Laplace('z-X', mu=pm.math.sum(x), b=noise_scale, observed=Z['X'])
        Laplace('z-XX',
                mu=pm.math.sum(pm.math.sqr(x)),
                b=noise_scale,
                observed=Z['XX'].flatten())
        Laplace('z-Xy',
                mu=pm.math.sum(x.T * y),
                b=noise_scale,
                observed=Z['Xy'])
        Laplace('z-yy',
                mu=pm.math.sum(pm.math.sqr(y)),
                b=noise_scale,
                observed=Z['yy'])

        trace = pm.sampling.sample(draws=num_samples,
                                   tune=num_tune_samples,
                                   nuts_kwargs={
                                       'max_treedepth': max_treedepth,
                                       'target_accept': target_accept,
                                   })

    theta = trace.get_values('t')
    sigma_squared = trace.get_values('ss')

    return theta.squeeze(), sigma_squared
Пример #13
0
def run_MCMC_ARMApq(x, y, draws, model):
    """Derive slope and intercept for ARMA(p, q) model with known p nd q.
    We initially fit a model to the residuals using statsmodels.tsa.api.ARMA.
    Details of this model (as produced by ARMA_select_models.py) are provided as a parameter
    to the present function to allow derivation of reasonably accurate prior distributions for phi and theta.
    If these priors are too broad, the MCMC will not converge in a reasonable time."""

    p = model['order'][0]
    q = model['order'][1]
    phi_means = model['tab']['params'].to_numpy()[:p]
    phi_sd = model['tab']['bse'].to_numpy()[:p]
    print(phi_means, phi_sd)
    theta_means = model['tab']['params'].to_numpy()[-q:]
    theta_sd = model['tab']['bse'].to_numpy()[-q:]

    # NaN values can occur in std err (see e.g. stackoverflow.com/questions/35675693 & 210228.
    # We therefore conservatively replace any NaNs by 0.1.
    phi_sd = np.nan_to_num(phi_sd) + np.isnan(phi_sd) * 0.1
    theta_sd = np.nan_to_num(theta_sd) + np.isnan(theta_sd) * 0.1
    m = p + q
    with Model() as model9:
        alpha = Normal('alpha', mu=0, sd=10)
        beta = Normal('beta', mu=0, sd=10)
        sd = HalfNormal('sd', sd=10)
        if p == 1:
            phi = Normal('phi', mu=phi_means[0], sd=phi_sd[0])
        else:
            phi = Normal('phi', mu=phi_means, sd=phi_sd, shape=p)
        if q == 1:
            theta = Normal('theta', mu=theta_means[0], sd=theta_sd[0])
        else:
            theta = Normal('theta', mu=theta_means, sd=theta_sd, shape=q)
        y = tt.as_tensor(y)
        x = tt.as_tensor(x)
        y_r = y[m:]
        x_r = x[m:]
        resids = y - beta * x - alpha

        if p == 1:
            u = phi * resids[p - 1:-1]
        else:
            u = tt.add(
                *[phi[i] * resids[p - (i + 1):-(i + 1)] for i in range(p)])
        eps = resids[p:] - u
        if q == 1:
            v = theta * eps[q - 1:-1]
        else:
            v = tt.add(
                *[theta[i] * eps[q - (i + 1):-(i + 1)] for i in range(q)])
        mu = alpha + beta * x_r + u[q:] + v
        data = Normal('y_r', mu=mu, sd=sd, observed=y_r)

    with model9:
        if q == 1:
            step = Metropolis([phi])
        else:
            step = Metropolis([phi, theta])
        tune = int(draws / 5)
        trace = sample(draws, tune=tune, step=step, progressbar=False)

    print(summary(trace, varnames=['alpha', 'beta', 'sd', 'phi', 'theta']))
    #plt.show(forestplot(trace, varnames=['alpha', 'beta', 'sd', 'phi', 'theta']))
    #traceplot(trace, varnames=['alpha', 'beta', 'sd', 'phi', 'theta'])
    return trace
Пример #14
0
def run_MCMC_ARMA_multi(x, y, draws, models):
    """Derive slope and intercept for ARMA model across multiple chromosomes. The slope and intercept are held constant
    across the chromosomes, while the ARMA model for residuals can vary across chromosomes. The details of
    these models are created by ARMS_select_models.py.
    If q=0, i.e. the model is pure AR(p) a separate function should be used."""
    num_chroms = len(models)

    with Model() as model9:
        alpha = Normal('alpha', mu=0, sd=10)
        beta = Normal('beta', mu=0, sd=10)
        sd = HalfNormal('sd', sd=10)
        steps = list()
        var_names1 = list()

        for c in range(num_chroms):
            p = models[c]['order'][0]
            q = models[c]['order'][1]
            m = p + q
            phi_means = models[c]['tab']['params'].to_numpy()[:p]
            phi_sd = models[c]['tab']['bse'].to_numpy()[:p]
            theta_means = models[c]['tab']['params'].to_numpy()[-q:]
            theta_sd = models[c]['tab']['bse'].to_numpy()[-q:]
            # NaN values can occur in std err (see e.g. stackoverflow.com/questions/35675693 & 210228.
            # We therefore conservatively replace any NaNs by 0.1.
            phi_sd = np.nan_to_num(phi_sd) + np.isnan(phi_sd) * 0.1
            theta_sd = np.nan_to_num(theta_sd) + np.isnan(theta_sd) * 0.1

            if p == 1:
                phi = Normal('phi_%i' % c, mu=phi_means[0], sd=phi_sd[0])
            else:
                phi = Normal('phi_%i' % c, mu=phi_means, sd=phi_sd, shape=p)
            if q == 1:
                theta = Normal('theta_%i' % c,
                               mu=theta_means[0],
                               sd=theta_sd[0])
            else:
                theta = Normal('theta_%i' % c,
                               mu=theta_means,
                               sd=theta_sd,
                               shape=q)
            y[c] = tt.as_tensor(y[c])
            x[c] = tt.as_tensor(x[c])
            y_r = y[c][m:]
            x_r = x[c][m:]
            resids = y[c] - beta * x[c] - alpha
            if p == 1:
                u = phi * resids[p - 1:-1]
            else:
                u = tt.add(
                    *[phi[i] * resids[p - (i + 1):-(i + 1)] for i in range(p)])
            eps = resids[p:] - u
            if q == 1:
                v = theta * eps[q - 1:-1]
            else:
                v = tt.add(
                    *[theta[i] * eps[q - (i + 1):-(i + 1)] for i in range(q)])
            mu = alpha + beta * x_r + u[q:] + v

            data = Normal('y_r_%i' % c, mu=mu, sd=sd, observed=y_r)
            step = Metropolis([phi, theta])  # See pymc3 #1304.
            steps.append(step)
            var_names1.append('phi_%i' % c)
            var_names1.append('theta_%i' % c)

    with model9:
        tune = int(draws / 5)
        trace = sample(draws, tune=tune, step=steps, progressbar=False)

    print(summary(trace, varnames=['alpha', 'beta'] + var_names1))
    #plt.show(forestplot(trace, varnames=['alpha', 'beta']))
    #traceplot(trace, varnames=['alpha', 'beta'])
    return trace