示例#1
0
    def test_mixture_list_of_poissons(self):
        with Model() as model:
            w = Dirichlet("w",
                          floatX(np.ones_like(self.pois_w)),
                          shape=self.pois_w.shape)
            mu = Gamma("mu", 1.0, 1.0, shape=self.pois_w.size)
            Mixture(
                "x_obs",
                w,
                [Poisson.dist(mu[0]), Poisson.dist(mu[1])],
                observed=self.pois_x)
            step = Metropolis()
            trace = sample(5000,
                           step,
                           random_seed=self.random_seed,
                           progressbar=False,
                           chains=1)

        assert_allclose(np.sort(trace["w"].mean(axis=0)),
                        np.sort(self.pois_w),
                        rtol=0.1,
                        atol=0.1)
        assert_allclose(np.sort(trace["mu"].mean(axis=0)),
                        np.sort(self.pois_mu),
                        rtol=0.1,
                        atol=0.1)
示例#2
0
    def test_mixture_list_of_poissons(self):
        with Model() as model:
            w = Dirichlet('w', np.ones_like(self.pois_w))

            mu = Gamma('mu', 1., 1., shape=self.pois_w.size)

            x_obs = Mixture(
                'x_obs',
                w,
                [Poisson.dist(mu[0]), Poisson.dist(mu[1])],
                observed=self.pois_x)

            step = Metropolis()
            trace = sample(5000,
                           step,
                           random_seed=self.random_seed,
                           progressbar=False)

        assert_allclose(np.sort(trace['w'].mean(axis=0)),
                        np.sort(self.pois_w),
                        rtol=0.1,
                        atol=0.1)
        assert_allclose(np.sort(trace['mu'].mean(axis=0)),
                        np.sort(self.pois_mu),
                        rtol=0.1,
                        atol=0.1)
示例#3
0
def test_check_discrete_minibatch():
    disaster_data_t = tt.vector()
    disaster_data_t.tag.test_value = np.zeros(len(disaster_data))

    with Model() as disaster_model:

        switchpoint = DiscreteUniform('switchpoint',
                                      lower=year.min(),
                                      upper=year.max(),
                                      testval=1900)

        # Priors for pre- and post-switch rates number of disasters
        early_rate = Exponential('early_rate', 1)
        late_rate = Exponential('late_rate', 1)

        # Allocate appropriate Poisson rates to years before and after current
        rate = switch(switchpoint >= year, early_rate, late_rate)

        disasters = Poisson('disasters', rate, observed=disaster_data_t)

    def create_minibatch():
        while True:
            return (disaster_data, )

    # This should raise ValueError
    assert_raises(ValueError,
                  advi_minibatch,
                  model=disaster_model,
                  n=10,
                  minibatch_RVs=[disasters],
                  minibatch_tensors=[disaster_data_t],
                  minibatches=create_minibatch(),
                  verbose=False)
示例#4
0
    def test_mixture_list_of_poissons(self):
        with Model() as model:
            w = Dirichlet('w', np.ones_like(self.pois_w))
            mu = Gamma('mu', 1., 1., shape=self.pois_w.size)
            Mixture('x_obs', w,
                    [Poisson.dist(mu[0]), Poisson.dist(mu[1])],
                    observed=self.pois_x)
            step = Metropolis()
            trace = sample(5000, step, random_seed=self.random_seed, progressbar=False)

        assert_allclose(np.sort(trace['w'].mean(axis=0)),
                        np.sort(self.pois_w),
                        rtol=0.1, atol=0.1)
        assert_allclose(np.sort(trace['mu'].mean(axis=0)),
                        np.sort(self.pois_mu),
                        rtol=0.1, atol=0.1)
示例#5
0
    def test_check_discrete_minibatch(self):
        disaster_data_t = tt.vector()
        disaster_data_t.tag.test_value = np.zeros(len(self.disaster_data))

        def create_minibatches():
            while True:
                return (self.disaster_data, )

        with Model():
            switchpoint = DiscreteUniform('switchpoint',
                                          lower=self.year.min(),
                                          upper=self.year.max(),
                                          testval=1900)

            # Priors for pre- and post-switch rates number of disasters
            early_rate = Exponential('early_rate', 1)
            late_rate = Exponential('late_rate', 1)

            # Allocate appropriate Poisson rates to years before and after current
            rate = tt.switch(switchpoint >= self.year, early_rate, late_rate)
            disasters = Poisson('disasters', rate, observed=disaster_data_t)

            with self.assertRaises(ValueError):
                advi_minibatch(n=10,
                               minibatch_RVs=[disasters],
                               minibatch_tensors=[disaster_data_t],
                               minibatches=create_minibatches())
示例#6
0
    def test_check_discrete(self):
        with Model():
            switchpoint = DiscreteUniform(
                'switchpoint', lower=self.year.min(), upper=self.year.max(), testval=1900)

            # Priors for pre- and post-switch rates number of disasters
            early_rate = Exponential('early_rate', 1)
            late_rate = Exponential('late_rate', 1)

            # Allocate appropriate Poisson rates to years before and after current
            rate = tt.switch(switchpoint >= self.year, early_rate, late_rate)
            Poisson('disasters', rate, observed=self.disaster_data)

            # This should raise ValueError
            with self.assertRaises(ValueError):
                advi(n=10)
示例#7
0
def test_check_discrete():
    with Model() as disaster_model:
        switchpoint = DiscreteUniform('switchpoint',
                                      lower=year.min(),
                                      upper=year.max(),
                                      testval=1900)

        # Priors for pre- and post-switch rates number of disasters
        early_rate = Exponential('early_rate', 1)
        late_rate = Exponential('late_rate', 1)

        # Allocate appropriate Poisson rates to years before and after current
        rate = switch(switchpoint >= year, early_rate, late_rate)

        disasters = Poisson('disasters', rate, observed=disaster_data)

    # This should raise ValueError
    assert_raises(ValueError, advi, model=disaster_model, n=10)
示例#8
0
plt.ylabel("Disaster count")
plt.xlabel("Year")

plt.show()

from pymc3 import DiscreteUniform, Poisson, switch, Model, Exponential, NUTS, Metropolis, sample, traceplot

with Model() as disaster_model:

    switchpoint = DiscreteUniform('switchpoint',
                                  lower=year.min(),
                                  upper=year.max(),
                                  testval=1900)

    # Priors for pre- and post-switch rates number of disasters
    early_rate = Exponential('early_rate', 1)
    late_rate = Exponential('late_rate', 1)

    # Allocate appropriate Poisson rates to years before and after current
    rate = switch(switchpoint >= year, early_rate, late_rate)

    disasters = Poisson('disasters', rate, observed=disaster_data)

    step1 = NUTS([early_rate, late_rate])

    # Use Metropolis for switchpoint, and missing values since it accommodates discrete variables
    step2 = Metropolis([switchpoint, disasters.missing_values[0]])

    trace = sample(10000, step=[step1, step2])

traceplot(trace)
示例#9
0
def mcmc_changepoint(dates,
                     ratings,
                     mcmc_iter=1000,
                     discrete=0,
                     plot_result=1):
    """This function models Yelp reviews as coming from two normal distributions
    with a switch point somewhere between them. When left of the switch point then
    reviews are drawn from the first normal distribution. To the right of the
    switch point reviews are drawn from the second normal distribution. Normal
    distributions are used if the reviews have been normalized to the user's
    average rating; otherwise if analyzing in terms of 1-5 stars set discrete=1
    and the function will do the same estimation on Poisson distributions. This
    function then finds the most likely distribution for where the switchpoint is
    and the most likely parameters for the two generator distributions by using
    Metropolis-Hastings sampling and Hamiltonian Monte Carlo."""

    # dates: Array of dates when the reviews were posted
    # ratings: Array of the ratings given by each review
    # mcmc_iter: How many iterations of the MCMC to run?
    # discrete: Should I use Normal or Poisson distributions to model the ratings?
    # (i.e. are the user-averaged or 1-5 stars)
    # plot_result: Should the function output a plot?

    number_of_ratings = np.arange(0, len(ratings))

    if discrete == 0:
        with Model() as switch_model:
            switchpoint = DiscreteUniform('switchpoint',
                                          lower=0,
                                          upper=len(dates))

            before_intensity = Normal('before_intensity', mu=0, sd=1)
            after_intensity = Normal('after_intensity', mu=0, sd=1)

            intensity = switch(switchpoint >= number_of_ratings,
                               before_intensity, after_intensity)
            sigma = HalfNormal('sigma', sd=1)

            rating = Normal('rating', mu=intensity, sd=sigma, observed=ratings)

    elif discrete == 1:
        with Model() as switch_model:
            switchpoint = DiscreteUniform('switchpoint',
                                          lower=0,
                                          upper=len(dates))

            before_intensity = Exponential('before_intensity', 1)
            after_intensity = Exponential('after_intensity', 1)

            intensity = switch(switchpoint >= number_of_ratings,
                               before_intensity, after_intensity)

            rating = Poisson('rating', intensity, observed=ratings)

    with switch_model:
        trace = sample(mcmc_iter)

    if plot_result == 1:
        traceplot(trace)
        plt.show()

    switch_posterior = trace['switchpoint']
    N_MCs = switch_posterior.shape[0]

    before_intensity_posterior = trace['before_intensity']
    after_intensity_posterior = trace['after_intensity']

    expected_stars = np.zeros(len(ratings))
    for a_rating in number_of_ratings:
        where_switch = a_rating < switch_posterior
        expected_stars[a_rating] = (
            before_intensity_posterior[where_switch].sum() +
            after_intensity_posterior[~where_switch].sum()) / N_MCs

    if plot_result == 1:
        plt.plot(dates, ratings, 'o')
        plt.plot(dates, expected_stars, 'b-')
        plt.show()

    # Return the mode and it's frequency / mcmc_iter
    b_mean, b_count = scipy.stats.mode(trace['before_intensity'])
    a_mean, a_count = scipy.stats.mode(trace['after_intensity'])
    modal_switch, count = scipy.stats.mode(trace['switchpoint'])
    sigma_est, sigma_count = scipy.stats.mode(trace['sigma'])
    differential = b_mean - a_mean
    return differential, modal_switch, expected_stars, sigma_est, switch_posterior
示例#10
0
basic_model = Model()

# Specify model components
with basic_model:
    # Priors for unknown model parameters (Stochastic random vars)
    s = Uniform('s', lower=0, upper=200000, shape=5)  # scaling of bg component
    # Expected value of outcome (Deterministic var)
    sim_2v_y = s[0] * pdf_2v_y  # scaled bg component
    sim_Th_y = s[1] * pdf_Th_y
    sim_U_y = s[2] * pdf_U_y
    sim_K_y = s[3] * pdf_K_y
    sim_Co_y = s[4] * pdf_Co_y
    model_y = sim_2v_y + sim_Th_y + sim_U_y + sim_K_y + sim_Co_y
    # Likelihood (sampling distribution) of observations (Observed Stochastic var)
    L = Poisson('L',
                mu=model_y[fit_index_lo:fit_index_hi],
                observed=data_y[fit_index_lo:fit_index_hi])

###############
# Fit model and get posterior estimates for parameters
###############

# Import a sampler
from pymc3 import Metropolis, HamiltonianMC, sample

# Setup sampler within the context of the model
with basic_model:
    # Set some starting value guesses
    start = {'s': [0., 0., 0., 0., 0.]}
    # Instantiate sampler
    step = HamiltonianMC([s])