def create_model(self, prior: R.SdPrior, data: pd.Series): """ Args: prior: an R.SdPrior object describing the prior distribution on the residual variance paramter. data: The time series of observations as a Pandas Series. Returns: A boom.StateSpaceModel object. """ boom_data = boom.Vector(data.values) is_observed = ~data.isna() self._model = boom.StateSpaceModel(boom_data, is_observed) if prior is None: sdy = np.std(data) prior = R.SdPrior(sigma_guess=sdy, upper_limit=sdy * 1.2) boom_prior = boom.ChisqModel(prior.sample_size, prior.sigma_guess) observation_model_sampler = boom.ZeroMeanGaussianConjSampler( self._model.observation_model, boom_prior) observation_model_sampler.set_sigma_upper_limit( prior.upper_limit) self._model.observation_model.set_method(observation_model_sampler) sampler = boom.StateSpacePosteriorSampler( self._model, boom.GlobalRng.rng) self._model.set_method(sampler) self._original_series = data return self._model
def __init__(self, y, sigma_prior=None, initial_state_prior=None, sdy=None, initial_y=None): """ Args: y: The data to be modeled. If sdy and initial_y are supplied this is not used. sigma_prior: An object of class boom.GammaModelBase serving as the prior on the precision (reciprocal variance) of the innovation terms. If None then 'sdy' will be used to choose a defalt. initial_state_prior: An object of class boom.GaussianModel serving as the prior distribution on the value of the state at time 0 (the time of the first observation). If None then initial_y and sdy will be used to choose a defalt. sdy: The standard deviation of y. If None then this will be computed from y. This argument is primarily intended to handle unusual cases where 'y' is unavailable. initial_y: The first element of y. If None then this will be computed from y. This argument is primarily intended to handle unusual cases where 'y' is unavailable. Returns: A StateModel object representing a local level model. """ if sigma_prior is None: if sdy is None: sdy = np.std(y) sigma_prior = R.SdPrior(sigma_guess=.01 * sdy, sample_size=.01, upper_limit=sdy) if not isinstance(sigma_prior, R.SdPrior): raise Exception("sigma_prior should be an R.SdPrior.") if initial_state_prior is None: if initial_y is None: initial_y = y[0] if sdy is None: sdy = np.std(y) initial_y = float(initial_y) sdy = float(sdy) initial_state_prior = boom.GaussianModel(initial_y, sdy**2) if not isinstance(initial_state_prior, boom.GaussianModel): raise Exception( "initial_state_prior should be a boom.GaussianModel.") self._state_model = boom.LocalLevelStateModel() self._state_model.set_initial_state_mean(initial_state_prior.mu) self._state_model.set_initial_state_variance( initial_state_prior.sigsq) innovation_precision_prior = boom.ChisqModel( sigma_prior.sigma_guess, sigma_prior.sample_size) state_model_sampler = self._state_model.set_posterior_sampler( innovation_precision_prior) state_model_sampler.set_sigma_upper_limit(sigma_prior.upper_limit) self._state_contribution = None
def _assign_posterior_sampler(self, innovation_sd_prior: R.SdPrior): innovation_precision_prior = boom.ChisqModel( innovation_sd_prior.sigma_guess, innovation_sd_prior.sample_size) state_model_sampler = boom.ZeroMeanGaussianConjSampler( self._state_model, innovation_precision_prior, seeding_rng=boom.GlobalRng.rng) state_model_sampler.set_sigma_upper_limit( innovation_sd_prior.upper_limit) self._state_model.set_method(state_model_sampler)
def _build_state_model(self): self._state_model = boom.LocalLevelStateModel() self._state_model.set_initial_state_mean( self._initial_state_prior.mean) self._state_model.set_initial_state_variance( self._initial_state_prior.variance) innovation_precision_prior = boom.ChisqModel( self._sigma_prior.sigma_guess, self._sigma_prior.sample_size) state_model_sampler = self._state_model.set_posterior_sampler( innovation_precision_prior) state_model_sampler.set_sigma_upper_limit( self._sigma_prior.upper_limit)
def test_mcmc(self): true_sigma = 2.3 data = np.random.randn(100) * true_sigma prior = boom.ChisqModel(1.0, 1.0) self.model.set_data(boom.Vector(data)) sampler = boom.ZeroMeanGaussianConjSampler(self.model, prior) self.model.set_method(sampler) niter = 1000 draws = np.zeros(niter) for i in range(niter): self.model.sample_posterior() draws[i] = self.model.sigma self.assertNotAlmostEqual(draws[0], draws[-1])
def test_mcmc(): model = boom.GaussianModel() mu = -16 sigma = 7 data = np.random.randn(10000) * sigma + mu model.set_data(boom.Vector(data)) mean_prior = boom.GaussianModelGivenSigma( model.sigsq_parameter, mu, 1.0) sigsq_prior = boom.ChisqModel(1.0, sigma) sampler = boom.GaussianConjugateSampler( model, mean_prior, sigsq_prior) model.set_method(sampler) for _ in range(100): model.sample_posterior()
def __init__(self, x, y=None, expected_r2=.5, prior_df=.01, expected_model_size=1, prior_information_weight=.01, diagonal_shrinkage=.5, optional_coefficient_estimate=None, max_flips=-1, mean_y=None, sdy=None, prior_inclusion_probabilities=None, sigma_upper_limit=np.Inf): """ Computes information that is shared by the different implementation of spike and slab priors. Currently, the only difference between the different priors is the prior variance on the regression coefficients. When that changes, change this class accordingly, and change all the classes that inherit from it. Args: number_of_variables: The number of columns in the design matrix for the regression begin modeled. The maximum size of the coefficient vector. expected_r2: The R^2 statistic that the model is expected to achieve. Used along with 'sdy' to derive a prior distribution for the residual variance. prior_df: The number of observations worth of weight to give to the guess at the residual variance. expected_model_size: The expected number of nonzero coefficients in the model. Used to set prior_inclusion_probabilities to expected_model_size / number_of_variables. If expected_model_size is either negative or larger than number.of.variables then all elements of prior_inclusion_probabilities will be set to 1.0 and the model will be fit with all available coefficients. optional_coefficient_estimate: A vector of length number.of.variables to use as the prior mean of the regression coefficients. This can also be None, in which case the prior mean for the intercept will be set to mean.y, and the prior mean for all slopes will be 0. mean.y: The mean of the response variable. Used to create a sensible default prior mean for the regression coefficients when optional_coefficient_estimate is None. sdy: Used along with expected_r2 to create a prior guess at the residual variance. prior_inclusion_probabilities: A vector of length number.of.variables giving the prior inclusion probability of each coefficient. Each element must be between 0 and 1, inclusive. If left as None then a default value will be created with all elements set to expected_model_size / number_of_variables. sigma_upper_limit: The largest acceptable value for the residual standard deviation. """ if isinstance(x, np.ndarray): x = boom.Matrix(x) if not isinstance(x, boom.Matrix): raise Exception( "x should either be a 2-dimensional np.array or a boom.Matrix." ) if mean_y is None: if y is None: raise Exception("Either 'y' or 'mean_y' must be specified.") if isinstance(y, np.ndarray): y = boom.Vector(y) mean_y = boom.mean(y) if optional_coefficient_estimate is None: optional_coefficient_estimate = np.zeros(x.ncol) optional_coefficient_estimate[0] = mean_y self._mean = boom.Vector(optional_coefficient_estimate) sample_size = x.nrow ods = 1. - diagonal_shrinkage scale_factor = prior_information_weight * ods / sample_size self._unscaled_prior_precision = x.inner() * scale_factor diag_view = self._unscaled_prior_precision.diag() diag_view /= ods if prior_inclusion_probabilities is None: potential_nvars = x.ncol prob = expected_model_size / potential_nvars if prob > 1: prob = 1 if prob < 0: prob = 0 self._prior_inclusion_probabilities = boom.Vector( potential_nvars, prob) else: self._prior_inclusion_probabilities = boom.Vector( prior_inclusion_probabilities) if sdy is None: sdy = boom.sd(y) sample_variance = sdy**2 expected_residual_variance = (1 - expected_r2) * sample_variance self._residual_precision_prior = boom.ChisqModel( prior_df, np.sqrt(expected_residual_variance))
def boom(self): """ Return the boom.ChisqModel corresponding to the input parameters. """ import BayesBoom.boom as boom return boom.ChisqModel(self.sample_size, self.sigma_guess)
def create_chisq_model(self): import BayesBoom.boom as boom return boom.ChisqModel(self.sample_size, self.sigma_guess)
def __setstate__(self, payload): self.__dict__.update(payload) self._residual_precision_prior = boom.ChisqModel( self.prior_df, np.sqrt(self.prior_ss / self.prior_df)) del self.prior_df del self.prior_ss
def __init__(self, y, nseasons: int, season_duration: int = 1, initial_state_prior: boom.MvnModel = None, innovation_sd_prior: R.SdPrior = None, sdy: float = None): """ Args: y: The time series being modeled. This can be omitted if either (a) initial_state_prior and sdy and initial_y are passed, or (b) sdy and initial_y are passed. nseasons: The number of seasons in a cycle. season_duration: The number of time periods each season. See below. initial_state_prior: A multivariate normal distribution of dimension nseasons - 1. This is a distribution on the seasonal value at time 0 and on the nseasons-2 previous values. If None is passed then a default prior will be assumed. innovation_sd_prior: Prior distribution on the standard deviation of the innovation terms. If None, then a default prior will be assumed. sdy: The standard deviation of the time series being modeled. Details: """ self._state_model = boom.SeasonalStateModel( nseasons=nseasons, season_duration=season_duration) if initial_state_prior is None: if sdy is None: if y is None: raise Exception("One of 'y', 'sdy', or " "'initial_state_prior' must be supplied.") sdy = np.nanstd(y) initial_state_prior = self._default_initial_state_prior(sdy) if innovation_sd_prior is None: if sdy is None: if y is None: raise Exception("One of 'y', 'sdy', or " "'innovation_sd_prior' must be supplied.") sdy = np.nanstd(y) innovation_sd_prior = self._default_sigma_prior(sdy) self._state_model.set_initial_state_mean( initial_state_prior.mu) self._state_model.set_initial_state_variance( initial_state_prior.Sigma) innovation_precision_prior = boom.ChisqModel( innovation_sd_prior.sigma_guess, innovation_sd_prior.sample_size) state_model_sampler = boom.ZeroMeanGaussianConjSampler( self._state_model, innovation_precision_prior, seeding_rng=boom.GlobalRng.rng) state_model_sampler.set_sigma_upper_limit( innovation_sd_prior.upper_limit) self._state_model.set_method(state_model_sampler) self._state_contribution = None