示例#1
0
    def sample_mod(
        self, 
        posterior_draws = 2000, # this is not enough
        post_pred_draws = 1000,
        prior_pred_draws = 1000,
        random_seed = 42,
        chains = 2):
        """Sample the posterior, the posterior predictive and the prior predictive distribution.

        Args:
            posterior_draws (int, optional): Number of draws for the posterior. Defaults to 2000.
            prior_pred_draws (int, optional): Number of draws for the prior predictive distribution. Defaults to 1000.
            post_pred_draws (int, optional): Number of draws from the posterior predictive distribution. Defaults to 1000.
            random_seed (int, optional): Random seed for ensuring reproducibility. Defaults to 42.
            chains (int, optional): Number of chains used for sampling the posterior. Defaults to 2.

        Example:
            Pc.sample_mod(posterior_draws = 3000, post_pred_draws = 1500, prior_pred_draws = 55, random_seed = 13, chains = 4)
        """        

        # we need these for later
        self.posterior_draws = posterior_draws
        self.post_pred_draws = post_pred_draws
        self.prior_pred_draws = prior_pred_draws
        
        with self.model: 
            self.trace = pm.sample(
                return_inferencedata = False, 
                draws = posterior_draws,
                target_accept = .99,
            random_seed = random_seed,
            chains = chains) #hard set to 42
            self.post_pred = pm.sample_posterior_predictive(self.trace, samples = post_pred_draws)
            self.prior_pred = pm.sample_prior_predictive(samples = prior_pred_draws)
            self.m_idata = az.from_pymc3(trace = self.trace, posterior_predictive=self.post_pred, prior=self.prior_pred)

        with self.model:
            pm.set_data({"t1_shared": self.t1_test})
            pm.set_data({"t2_shared": self.t2_test})
            pm.set_data({"idx_shared": self.idx_test})
            pm.set_data({"t3_shared": np.array(self.t3_test)})
            predictions = pm.fast_sample_posterior_predictive(
                self.m_idata.posterior
            )
            az.from_pymc3_predictions(
                predictions, 
                idata_orig = self.m_idata,
                coords = {'idx': self.test[self.index].values},
                inplace = True)
示例#2
0
 def predict(self): ## make this work for only one. 
     
     with self.model:
         pm.set_data({"t1_shared": self.t1_test})
         pm.set_data({"t2_shared": self.t2_test})
         pm.set_data({"idx_shared": self.idx_test})
         pm.set_data({"t3_shared": np.array(self.t3_test)})
         predictions = pm.fast_sample_posterior_predictive(
             self.m_idata.posterior
         )
         az.from_pymc3_predictions(
             predictions, 
             idata_orig = self.m_idata,
             coords = {'idx': self.test[self.index].values},
             inplace = True)
示例#3
0
    def test_predictions_constant_data(self):
        with pm.Model():
            x = pm.Data("x", [1.0, 2.0, 3.0])
            y = pm.Data("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 1)
            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
            trace = pm.sample(100, tune=100)

            inference_data = from_pymc3(trace=trace)
        test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails

        with pm.Model():
            x = pm.Data("x", [1.0, 2.0])
            y = pm.Data("y", [1.0, 2.0])
            beta = pm.Normal("beta", 0, 1)
            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
            predictive_trace = pm.sample_posterior_predictive(trace)
            assert set(predictive_trace.keys()) == {"obs"}
            # this should be four chains of 100 samples
            # assert predictive_trace["obs"].shape == (400, 2)
            # but the shape seems to vary between pymc3 versions
            inference_data = from_pymc3_predictions(predictive_trace, posterior_trace=trace)
        test_dict = {"posterior": ["beta"], "~observed_data": ""}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails, "Posterior data not copied over as expected."
        test_dict = {"predictions": ["obs"]}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails, "Predictions not instantiated as expected."
        test_dict = {"predictions_constant_data": ["x"]}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails, "Predictions constant data not instantiated as expected."
示例#4
0
    def forecast_election(self,
                          idata: arviz.InferenceData) -> arviz.InferenceData:
        """
        Generate out-of-sample predictions for ``election_to_predict`` specified in ``__init__``.

        Parameters
        ----------
        idata: arviz.InferenceData
            Posterior trace generated by ``self.sample_all`` on the training dataset.
            The dataset used for predictions is generated automatically: one observation for each
            of the days in ``self.coords["countdown"]``. The corresponding values of predictors are
            handled automatically.
        """
        new_dates, oos_data = self._generate_oos_data(idata)
        oos_data = self._join_with_continuous_predictors(oos_data)
        forecast_data_index = pd.DataFrame(
            data=0,  # just a placeholder
            index=pd.MultiIndex.from_frame(oos_data),
            columns=self.parties_complete,
        )
        forecast_data = forecast_data_index.reset_index()

        PREDICTION_COORDS = {"observations": new_dates}
        PREDICTION_DIMS = {
            "latent_popularity": ["observations", "parties_complete"],
            "noisy_popularity": ["observations", "parties_complete"],
            "N_approve": ["observations", "parties_complete"],
        }

        forecast_model = self.build_model(
            polls=forecast_data,
            continuous_predictors=forecast_data,
        )
        with forecast_model:
            ppc = pm.fast_sample_posterior_predictive(
                idata,
                var_names=[
                    "party_intercept",
                    "latent_popularity",
                    "noisy_popularity",
                    "N_approve",
                    "latent_pop_t0",
                    "R",
                ],
            )
            ppc = arviz.from_pymc3_predictions(
                ppc,
                idata_orig=idata,
                inplace=False,
                coords=PREDICTION_COORDS,
                dims=PREDICTION_DIMS,
            )

        return ppc
示例#5
0
 def make_predictions_inference_data(
     self, data, eight_schools_params
 ) -> Tuple[InferenceData, Dict[str, np.ndarray]]:
     with data.model:
         posterior_predictive = pm.sample_posterior_predictive(data.obj)
         idata = from_pymc3_predictions(
             posterior_predictive,
             posterior_trace=data.obj,
             coords={"school": np.arange(eight_schools_params["J"])},
             dims={"theta": ["school"], "eta": ["school"]},
         )
         assert isinstance(idata, InferenceData)
     return idata, posterior_predictive
示例#6
0
    def get_predictions_inference_data(
        self, data, eight_schools_params, inplace
    ) -> Tuple[InferenceData, Dict[str, np.ndarray]]:
        with data.model:
            prior = pm.sample_prior_predictive()
            posterior_predictive = pm.sample_posterior_predictive(data.obj)

            idata = from_pymc3(
                trace=data.obj,
                prior=prior,
                coords={"school": np.arange(eight_schools_params["J"])},
                dims={"theta": ["school"], "eta": ["school"]},
            )
            assert isinstance(idata, InferenceData)
            extended = from_pymc3_predictions(
                posterior_predictive, idata_orig=idata, inplace=inplace
            )
            assert isinstance(extended, InferenceData)
            assert (id(idata) == id(extended)) == inplace
        return (extended, posterior_predictive)
n_idx_test = len(idx_unique_test)

# new coords as well
prediction_coords = {'idx': idx_unique_test, 't': t_unique_test}

# test data in correct format.
t_test = test.t.values.reshape((n_idx_test, n_time_test))
y_test = test.y.values.reshape((n_idx_test, n_time_test))
idx_test = test.idx.values.reshape((n_idx_test, n_time_test))

with m:
    pm.set_data({"t_shared": t_test, "idx_shared": idx_test})
    stl_pred = pm.fast_sample_posterior_predictive(m_idata.posterior,
                                                   random_seed=RANDOM_SEED)
    az.from_pymc3_predictions(stl_pred,
                              idata_orig=m_idata,
                              inplace=True,
                              coords=prediction_coords)

# plot hdi for prediction
fh.plot_hdi(t=t_test,
            y=y_test,
            n_idx=n_idx_test,
            m_idata=m_idata,
            model_type="covariation",
            prior_level="generic",
            kind="predictions")

model_type = "covariation"
prior_level = "generic"

# plot hdi for individual aliens
示例#8
0
y_ar = []
for tt in trace:
    y_ar.append(generate_harmonic_sample(tdaynew, omega, tt))

a0_pred = np.array(y_ar)
    
#######
# Save the samples
#######
print('Saving the output to ', outputnc)

# Convert the data to arviz structure 
# Save the predictions
dims = ('chain','draw','time')
ds = az.from_pymc3_predictions({'a0':a0_pred}, \
                coords={'time':predtime,'chain':np.array([1])}, dims={'a0':dims})

# Save the posterior
ds2 = az.from_pymc3(trace=trace)

# Update the observed data becuase it comes out as a theano.tensor in the way
# our particular model is specified
ds2.observed_data['X_obs'] = xr.DataArray(X, dims=('time',), coords={'time':timein})

# This merges the data sets
ds2.extend(ds)

# Save 
ds2.to_netcdf(outputnc)

print(ds2)
    'idx': idx_unique_test,
    't': t_unique_test
}

# test data in correct format. 
t_test = test.t.values.reshape((n_idx_test, n_time_test))
y_test = test.y.values.reshape((n_idx_test, n_time_test))
idx_test = test.idx.values.reshape((n_idx_test, n_time_test))

with m_covariation:
    pm.set_data({"t_shared": t_test, "idx_shared": idx_test})
    stl_pred = pm.fast_sample_posterior_predictive(
        idata_covariation.posterior, random_seed=RANDOM_SEED
    )
    az.from_pymc3_predictions(
        stl_pred, idata_orig=idata_covariation, inplace=True, coords=prediction_coords
    )
    
### python: plot hdi (full uncertainty) ###
# take posterior predictive out of idata for convenience
ppc = idata_covariation.posterior_predictive

# take out predictions (mean over chains). 
y_pred = ppc["y_pred"].mean(axis = 0).values

# calculate mean y predicted (mean over draws and idx)
y_mean = y_pred.mean(axis = (0, 1))

# THE DIFFERENCE: base it on the actual predictions of the full model 
outcome = y_pred.reshape((4000*n_idx, n_time)) # 4000 = 2000 (draws) * 2 (chains)