示例#1
0
 def test_zeroinflatedpoisson(self):
     with pm.Model():
         theta = pm.Beta("theta", alpha=1, beta=1)
         psi = pm.HalfNormal("psi", sd=1)
         pm.ZeroInflatedPoisson("suppliers", psi=psi, theta=theta, shape=20)
         gen_data = pm.sample_prior_predictive(samples=5000)
         assert gen_data["theta"].shape == (5000,)
         assert gen_data["psi"].shape == (5000,)
         assert gen_data["suppliers"].shape == (5000, 20)
示例#2
0
 def test_zeroinflatedpoisson(self):
     with pm.Model():
         theta = pm.Beta('theta', alpha=1, beta=1)
         psi = pm.HalfNormal('psi', sd=1)
         pm.ZeroInflatedPoisson('suppliers', psi=psi, theta=theta, shape=20)
         gen_data = pm.sample_prior_predictive(samples=5000)
         assert gen_data['theta'].shape == (5000, )
         assert gen_data['psi'].shape == (5000, )
         assert gen_data['suppliers'].shape == (5000, 20)
示例#3
0
 def build_model(self):
     with pm.Model() as model:
         # Estimated occupancy
         psi = pm.Beta('psi', 1, 1)
         # Latent variable for occupancy
         pm.Bernoulli('z', psi, self.y.shape)
         # Estimated mean count
         theta = pm.Uniform('theta', 0, 100)
         # Poisson likelihood
         pm.ZeroInflatedPoisson('y', theta, psi, observed=self.y)
     return model
示例#4
0
 def build_model(self):
     with pm.Model() as model:
         # Estimated occupancy
         psi = pm.Beta("psi", 1, 1)
         # Latent variable for occupancy
         pm.Bernoulli("z", psi, shape=self.y.shape)
         # Estimated mean count
         theta = pm.Uniform("theta", 0, 100)
         # Poisson likelihood
         pm.ZeroInflatedPoisson("y", psi, theta, observed=self.y)
     return model
def get_model(dist, data) -> pm.Model:
    means = data.mean(0)
    n_exp = data.shape[1]
    if dist == "Poisson":
        with pm.Model() as poi_model:
            lam = pm.Exponential("lam", lam=means, shape=(1, n_exp))
            poi = pm.Poisson(
                "poi",
                mu=lam,
                observed=data,
            )
        return poi_model
    if dist == "ZeroInflatedPoisson":
        with pm.Model() as zip_model:
            psi = pm.Uniform("psi", shape=(1, n_exp))
            lam = pm.Exponential("lam", lam=means, shape=(1, n_exp))
            zip = pm.ZeroInflatedPoisson(
                "zip",
                psi=psi,
                theta=lam,
                observed=data,
            )
        return zip_model
    if dist == "NegativeBinomial":
        with pm.Model() as nb_model:
            gamma = pm.Gamma("gm", 0.01, 0.01, shape=(1, n_exp))
            lam = pm.Exponential("lam", lam=means, shape=(1, n_exp))
            nb = pm.NegativeBinomial(
                "nb",
                alpha=gamma,
                mu=lam,
                observed=data,
            )
        return nb_model
    if dist == "ZeroInflatedNegativeBinomial":
        with pm.Model() as zinb_model:
            gamma = pm.Gamma("gm", 0.01, 0.01, shape=(1, n_exp))
            lam = pm.Exponential("lam", lam=means, shape=(1, n_exp))
            psi = pm.Uniform("psi", shape=(1, n_exp))
            zinb = pm.ZeroInflatedNegativeBinomial(
                "zinb",
                psi=psi,
                alpha=gamma,
                mu=lam,
                observed=data,
            )
        return zinb_model
示例#6
0
def run_model(month=7,
              n_samples=1000,
              interp_type='ncs',
              binary=True,
              spike=0.9,
              hdi_prob=0.95,
              zero_inf=0.7):

    # preprocessing
    binary_str = 'binary' if binary else 'nonbinary'
    df = pd.read_csv('../data/' + interp_type + '-pop-deaths-and-' +
                     binary_str + '-mandates.csv',
                     index_col=0)
    df = df.rename(columns={
        "Age Group": "Age_Group",
        "COVID-19 Deaths": "covid_19_deaths"
    })
    test_df = df[df["Month"] == month]
    sex = np.array(test_df["Sex"])
    mandates = test_df.iloc[:,
                            -4:]  # takes all of the 4 mandate columns that currently exist
    age = test_df["Age_Group"]
    covid_deaths = test_df["covid_19_deaths"]
    population = test_df[
        "Population"] / 1000000  # makes the population in units of millions
    n = len(test_df["Age_Group"].unique()
            )  # should decrease by 1 after proper age filtering

    age_data = pd.get_dummies(test_df["Age_Group"]).drop("Under 1 year",
                                                         axis=1)
    sex_data = pd.get_dummies(test_df["Sex"], drop_first=True)

    # run the model

    with pm.Model() as model:

        # spike and slab prior
        tau = pm.InverseGamma('tau', alpha=20, beta=20)
        xi = pm.Bernoulli('xi', p=spike, shape=len(mandates.columns))
        beta_mandates = pm.MvNormal('beta_mandate',
                                    mu=0,
                                    cov=tau * np.eye(len(mandates.columns)),
                                    shape=len(mandates.columns))

        # age prior
        mu_age_mean = np.linspace(-5, 5, len(age_data.columns))
        cov = pm.HalfNormal('cov', sigma=2)
        mu_age = pm.MvNormal('mu_age',
                             mu=mu_age_mean,
                             cov=np.identity(len(age_data.columns)),
                             shape=(1, 10))
        beta_age = pm.MvNormal('beta_age',
                               mu=mu_age,
                               cov=(cov**2) * np.identity(10),
                               shape=(1, 10))

        # sex prior
        mu_sex = pm.Normal('mu_sex', mu=0, sigma=1)
        sigma_sex = pm.HalfNormal('simga_sex', sigma=2)
        beta_sex = pm.Normal('beta_sex', mu=mu_sex, sigma=sigma_sex)

        # intercept prior
        mu_intercept = pm.Normal('mu_intercept', mu=0, sigma=1)
        sigma_intercept = pm.HalfNormal('simga_intercept', sigma=2)
        beta_intercept = pm.Normal('beta_intercept',
                                   mu=mu_intercept,
                                   sigma=sigma_intercept)

        # mean setup for likelihood
        mandates = np.array(mandates).astype(theano.config.floatX)
        population = np.array(population).astype(theano.config.floatX)
        sex = np.array(sex_data).astype(theano.config.floatX)
        age = np.array(age_data).astype(theano.config.floatX)
        w_mandates = theano.shared(mandates, 'w_mandate')
        w_sex = theano.shared(sex, 'w_sex')
        w_age = theano.shared(age, 'w_age')
        mean = beta_intercept + pm.math.matrix_dot(w_mandates, xi*beta_mandates) \
                            + pm.math.matrix_dot(w_sex, beta_sex).T \
                            + pm.math.matrix_dot(w_age, beta_age.T).T

        # likelihood
        obs = pm.ZeroInflatedPoisson('y_obs',
                                     psi=zero_inf,
                                     theta=population * tt.exp(mean),
                                     observed=covid_deaths)
        # obs = pm.Normal('crap', mu=mean, sigma=3, observed=covid_deaths)

        # sample from posterior
        trace = pm.sample(n_samples,
                          tune=n_samples,
                          nuts={'target_accept': 0.98})

    # posterior hdis
    mandates = test_df.iloc[:, -4:]
    x = az.summary(trace, var_names=["beta_mandate"], hdi_prob=hdi_prob)
    x.index = mandates.columns
    x.to_csv('../images/posteriors/mandate_' + interp_type + '_' + binary_str +
             '_' + 'summary.csv')
    x = az.summary(trace, var_names=["beta_sex"], hdi_prob=hdi_prob)
    x.index = sex_data.columns
    x.to_csv('../images/posteriors/sex_' + interp_type + '_' + binary_str +
             '_' + 'summary.csv')
    x = az.summary(trace, var_names=["beta_age"], hdi_prob=hdi_prob)
    x.index = age_data.columns
    x.to_csv('../images/posteriors/age_' + interp_type + '_' + binary_str +
             '_' + 'summary.csv')
    x = az.summary(trace, var_names=["beta_intercept"], hdi_prob=hdi_prob)
    x.to_csv('../images/posteriors/intercept_' + interp_type + '_' +
             binary_str + '_' + 'summary.csv')

    # posterior distributions
    ax = az.plot_forest(trace,
                        'ridgeplot',
                        var_names=["beta_intercept"],
                        combined=True,
                        hdi_prob=0.99999)
    ax[0].set_title(r'Posterior Distribution of $\beta_0$')
    plt.savefig('../images/posteriors/intercept_posteriors_' + interp_type +
                '_' + binary_str + '.png')

    ax = az.plot_forest(trace,
                        'ridgeplot',
                        var_names=["beta_age"],
                        combined=True,
                        hdi_prob=0.99999)
    ax[0].set_yticklabels(reversed(age_data.columns))
    ax[0].set_title(r'Posterior Distribution of $\beta_{age}$')
    plt.savefig('../images/posteriors/age_posteriors_' + interp_type + '_' +
                binary_str + '.png')

    ax = az.plot_forest(trace,
                        'ridgeplot',
                        var_names=["beta_sex"],
                        combined=True,
                        hdi_prob=0.99999)
    ax[0].set_yticklabels(reversed(sex_data.columns))
    ax[0].set_title(r'Posterior Distribution of $\beta_{sex}$')
    plt.savefig('../images/posteriors/sex_posteriors_' + interp_type + '_' +
                binary_str + '.png')

    ax = az.plot_forest(trace,
                        'ridgeplot',
                        var_names=["beta_mandate"],
                        combined=True,
                        hdi_prob=0.99999)
    ax[0].set_yticklabels(reversed(mandates.columns))
    ax[0].set_title(r'Posterior Distribution of $\beta_{mandate}$')
    plt.savefig('../images/posteriors/mandate_posteriors_' + interp_type +
                '_' + binary_str + '.png')

    # ESS Plots
    ax = az.plot_ess(trace, var_names=["beta_intercept"])
    ax.set_title(r'$\beta_0$  ESS')
    plt.savefig('../images/ess/' + interp_type + '_' + binary_str +
                '_interceptESS.png')

    ax = az.plot_ess(trace, var_names=["beta_age"])
    ax[0, 0].set_title(r'$\beta_{age[1-4]}$  ESS', fontsize=18)
    ax[0, 1].set_title(r'$\beta_{age[15-24]}$  ESS', fontsize=18)
    ax[0, 2].set_title(r'$\beta_{age[25-34]}$  ESS', fontsize=18)
    ax[1, 0].set_title(r'$\beta_{age[35-44]}$  ESS', fontsize=18)
    ax[1, 1].set_title(r'$\beta_{age[45-54]}$  ESS', fontsize=18)
    ax[1, 2].set_title(r'$\beta_{age[5-14]}$  ESS', fontsize=18)
    ax[2, 0].set_title(r'$\beta_{age[55-64]}$  ESS', fontsize=18)
    ax[2, 1].set_title(r'$\beta_{age[65-74]}$  ESS', fontsize=18)
    ax[2, 2].set_title(r'$\beta_{age[75-84]}$  ESS', fontsize=18)
    ax[3, 0].set_title(r'$\beta_{age[85+]}$  ESS', fontsize=18)
    plt.savefig('../images/ess/' + interp_type + '_' + binary_str +
                '_ageESS.png')

    ax = az.plot_ess(trace, var_names=["beta_sex"])
    ax.set_title(r'$\beta_{sex}$  ESS')
    plt.savefig('../images/ess/' + interp_type + '_' + binary_str +
                '_sexESS.png')

    ax = az.plot_ess(trace, var_names=["beta_mandate"])
    ax[0].set_title(r'$\beta_{mandate[April]}$  ESS', fontsize=18)
    ax[1].set_title(r'$\beta_{mandate[May]}$  ESS', fontsize=18)
    ax[2].set_title(r'$\beta_{mandate[June]}$  ESS', fontsize=18)
    ax[3].set_title(r'$\beta_{mandate[July]}$  ESS', fontsize=18)
    plt.savefig('../images/ess/' + interp_type + '_' + binary_str +
                '_mandateESS.png')

    # posterior predictive checking
    with model:
        ppc = pm.sample_posterior_predictive(trace, var_names=["y_obs"])
    az.plot_ppc(az.from_pymc3(posterior_predictive=ppc, model=model))
    plt.savefig('../images/posterior_predictive/' + interp_type + '_' +
                binary_str + '.png')

    # return trace so that user can work with posterior data directly
    return trace
示例#7
0
df = pd.read_csv(FISHFILE)
# This dataset includes data collected from a survey of 250 visitors who
# visited the park. The group level data consists of:
#  - The number of fish they caught (count)
#  - The number of children in the group (child)
#  - If they took a camper to the park (camper)

with pm.Model() as ZIP_reg:
    psi = pm.Beta("psi", 1, 1)

    alpha = pm.Normal("alpha", 0, 10)
    beta = pm.Normal("beta", 0, 10, shape=2)

    lam = pm.math.exp(alpha + beta[0] * df["child"] + beta[1] * df["camper"])

    y = pm.ZeroInflatedPoisson("y", theta=lam, psi=psi, observed=df["count"])

    trace_ZIP_reg = pm.sample(2000)

chain_ZIP_reg = trace_ZIP_reg[100:]
pm.traceplot(chain_ZIP_reg)
plt.savefig("fish_traceplot.png")
plt.close()

children = [0, 1, 2, 3, 4]
fish_count_pred_0 = []
fish_count_pred_1 = []
thin = 5

for n in children:
    without_camper = chain_ZIP_reg.alpha[::
import pymc3 as pm
import matplotlib.pyplot as plt
import numpy as np

np.random.seed(42)
n = 100
theta = 2.5  # Poisson rate
pi = 0.1  # probability of extra-zeros (pi = 1-psi)

# Simulate some data
counts = np.array([(np.random.random() > pi) * np.random.poisson(theta)
                   for i in range(n)])

with pm.Model() as ZIP:
    psi = pm.Beta('p', 1, 1)
    lam = pm.Gamma('lam', 2, 0.1)

    y = pm.ZeroInflatedPoisson('y', lam, psi, observed=counts)
    trace = pm.sample(5000)
pm.traceplot(trace[100:])
plt.show()
示例#9
0
n = 100
theta_real = 2.5
psi = 0.1

# Simulate some data
counts = np.array([(np.random.random() >
                    (1 - psi)) * np.random.poisson(theta_real)
                   for i in range(n)])

# In[33]:

with pm.Model() as ZIP:
    psi = pm.Beta('psi', 1, 1)
    theta = pm.Gamma('theta', 2, 0.1)
    y = pm.ZeroInflatedPoisson('y', psi, theta, observed=counts)
    trace = pm.sample(1000)

# In[34]:

az.plot_trace(trace)
plt.savefig('B11197_04_11.png', dpi=300)

# In[35]:

#az.summary(trace)

# ## Poisson regression and ZIP regression

# In[36]:
示例#10
0
plt.figure()

np.random.seed(42)
n = 100
theta = 2.5
pi = 0.1

counts = np.array([(np.random.random() > pi) * np.random.poisson(theta)
                   for i in range(n)])

with pm.Model() as ZIP:
    psi = pm.Beta('psi', 1, 1)
    lam = pm.Gamma('lam', 2, 0.1)

    y = pm.ZeroInflatedPoisson('y', psi, lam, observed=counts)
    trace_ZIP = pm.sample(5000, njobs=1)

chain_ZIP = trace_ZIP[100:]
pm.traceplot(chain_ZIP)
plt.savefig('img708.png', dpi=300, figsize=[5.5, 5.5])

plt.figure()

#https://stats.idre.ucla.edu/stat/data/fish.csv
fish_data = pd.read_csv('fish.csv')
fish_data.head()

with pm.Model() as ZIP_reg:
    psi = pm.Beta('psi', 1, 1)
plt.bar(0.0, drink_zeros, width=1.0, bottom=work_zeros, color="C1", alpha=0.5)

plt.xticks(bins + 0.5)
plt.xlabel("manuscripts completed")

plt.ylabel("Frequency")

# %%
with pm.Model() as m11_4:
    ap = pm.Normal("ap", 0.0, 1.0)
    p = pm.math.sigmoid(ap)

    al = pm.Normal("al", 0.0, 10.0)
    lambda_ = pm.math.exp(al)

    y_obs = pm.ZeroInflatedPoisson("y_obs", 1.0 - p, lambda_, observed=y)

# %%
with m11_4:
    map_11_4 = pm.find_MAP()

# %%
map_11_4

# %%
sp.special.expit(map_11_4["ap"])

# %%
np.exp(map_11_4["al"])

# %%
示例#12
0
fishes = data["count"].values
children = data["child"].values
camper = data["camper"].values
  
# --------------------------- specify a probabilistic model ----------------------------------- # 

with pm.Model() as zip_regression:
    # get the priors on the parameters
    alpha = pm.Normal("alpha", mu = 0, sd = 10)
    beta = pm.Normal("beta", mu = 0, sd = 10, shape = 2)
    # get the prior on the inflation coefficient
    psi = pm.Beta("psi", 1 , 1)
    # get the theta
    theta = pm.Deterministic("theta", pm.math.exp(alpha + beta[0] * children + beta[1] * camper))
    # specify the likelihood of the data
    y_obs = pm.ZeroInflatedPoisson("y_obs", psi, theta, observed = fishes)
    # inference step
    trace = pm.sample(1500)
    
# -------------------------- analyse the posterior --------------------------------------- # 
    
with zip_regression:
    log.info("The summary of the trace is as follows: %s", az.summary(trace,var_names = ["alpha","beta","psi"]))
    az.plot_trace(trace,var_names = ["alpha","beta","psi"])
    
# -------------------------- plot --------------------------------------- # 

plt.figure()
# initialize data to plot
children = [0,1,2,3,4]
fish_count_pred_0 = []
plt.show()

# -------------------- generate synthetic data --------------------- # 

# set the number of draws
n = 1000
# set the true theta 
theta_real = 2.5
# set the zero-inflating factor 
psi_true = 0.5
# generate data from the ZIP model
counts = np.array([(np.random.random() > 1 - psi_true) * np.random.poisson(theta_real) for i in range(n)])

# --------------------- probabilistic method ---------------------- # 

with pm.Model() as zip_model:
    # specify the priors of the zero-inflated Poisson model
    psi = pm.Beta("psi", 1,1)
    theta = pm.Gamma("theta",2,0.1)
    # specify the likelihood of the data
    y = pm.ZeroInflatedPoisson("y",psi,theta,observed = counts)
    # inference step 
    trace = pm.sample(1500)

# ---------------------- analyse the posterior -------------- # 
    
with zip_model:
    az.plot_trace(trace)