Exemplos de Matrix em Python, exemplos de BayesBoom.boom.Matrix em Python

Exemplo n.º 1

0

Exibir arquivo

    def _restore_parameters(self, iteration: int):
        """
        Restore the state of the model to a specific MCMC iteration.
        """
        self._model.set_coefficients(
            boom.Matrix(self.coefficients[iteration, :, :]))
        self._model.set_residual_variance(
            boom.SpdMatrix(self.residual_variance[iteration, :, :]))
        for cluster in range(self.nclusters):
            for col in range(len(self._numeric_colnames)):
                vname = self._numeric_colnames[col]
                self._model.set_atom_probs(
                    cluster, col,
                    boom.Vector(self.atom_probs[vname][iteration, cluster, :]))
                self._model.set_atom_error_probs(
                    cluster, col,
                    boom.Matrix(self.atom_error_probs[vname][iteration,
                                                             cluster, :, :]))

            for col in range(len(self._categorical_colnames)):
                vname = self._categorical_colnames[col]
                self._model.set_level_probs(
                    cluster, col,
                    boom.Vector(self.level_probs[vname][iteration,
                                                        cluster, :]))
                self._model.set_level_observation_probs(
                    cluster, col,
                    boom.Matrix(
                        self.level_observation_probs[vname][iteration,
                                                            cluster, :, :]))

Exemplo n.º 2

0

Exibir arquivo

    def _restore_parameters(self, i):
        self._model.set_coefficients(boom.Matrix(
            self.coefficient_draws[i, :, :]))
        self._model.set_residual_variance(boom.Matrix(
            self.residual_variance_draws[i, :, :]))
        for cluster in range(self.nclusters):
            for col in range(len(self._numeric_colnames)):
                name = self._numeric_colnames[col]
                probs = self.atom_probs[name][i, cluster, :]
                self._model.set_atom_probs(cluster, col, boom.Vector(probs))

                error_probs = self.atom_error_probs[name][i, cluster, :, :]
                self._model.set_atom_error_probs(
                    cluster, col, boom.Matrix(error_probs))

Exemplo n.º 3

0

Exibir arquivo

Arquivo: moments_test.py Projeto: steve-the-bayesian/BOOM

    def test_matrix(self):
        y = np.random.randn(10000, 3)
        Sigma = boom.SpdMatrix(
            np.array([[1, .8, -.6], [.8, 2, -.8], [-.6, -.8, 4]]))
        chol = boom.Cholesky(Sigma)
        R = chol.getLT()
        y = y @ R.to_numpy()
        mu = np.array([1, 2, -3])
        y = y + mu
        mu = boom.Vector(mu)
        y = boom.Matrix(y)
        meany = mean(y)
        self.assertLess((meany - mu).normsq(), .01)

        V = var(y)
        self.assertLess((V.diag() - Sigma.diag()).normsq(), .05)

        R = cor(y)
        Rtrue = Sigma.to_numpy()
        for i in range(3):
            for j in range(3):
                Rtrue[i, j] = Sigma[i, j] / np.sqrt(Sigma[i, i] * Sigma[j, j])
        Rtrue = boom.SpdMatrix(Rtrue)

        self.assertLess((Rtrue - R).max_abs(), .01)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: dynreg_test.py Projeto: steve-the-bayesian/BOOM

    def simulate_data_from_model(time_dimension: int, typical_sample_size: int,
                                 xdim: int, residual_sd: float,
                                 unscaled_innovation_sd: np.ndarray,
                                 p00: np.ndarray, p11: np.ndarray):
        from BayesBoom.R import rmarkov
        inclusion = np.full((xdim, time_dimension), -1)
        p00 = p00.ravel()
        p11 = p11.ravel()
        for j in range(xdim):
            P = np.array([[p00[j], 1 - p00[j]], [1 - p11[j], p11[j]]])
            inclusion[j, :] = rmarkov(time_dimension, P)

        coefficients = np.zeros((xdim, time_dimension))
        for j in range(xdim):
            sd = unscaled_innovation_sd[j] * residual_sd
            for t in range(time_dimension):
                prev = 0 if t == 0 else coefficients[j, t - 1]
                coefficients[j,
                             t] = inclusion[j, t] * (prev +
                                                     np.random.randn(1) * sd)

        data = []
        for t in range(time_dimension):
            sample_size = np.random.poisson(typical_sample_size, 1)[0]
            X = np.random.randn(sample_size, xdim)
            X[:, 0] = 1.0
            yhat = X @ coefficients[:, t]
            y = yhat + residual_sd * np.random.randn(sample_size)
            data.append(
                boom.RegressionDataTimePoint(boom.Matrix(X), boom.Vector(y)))
        return data, coefficients, inclusion

Exemplo n.º 5

0

Exibir arquivo

    def create_model(self, prior, data):
        """Create the boom model object, and store related model artifacts.

        Args:
          formula:  A model formula describing the regression component.
          data: A pandas DataFrame containing the variables appearing
            'formula'.
          prior: A spikeslab.RegressionSpikeSlabPrior describing the prior
            distribution on the regression coefficients and the residual
            standard deviation.

        Effects: self._model is created, and model formula artifacts are stored
          so they will be available for future predictions.

        """
        if not isinstance(prior, spikeslab.RegressionSpikeSlabPrior):
            raise Exception("Unexpected type for prior.")
        response, predictors = patsy.dmatrices(self._formula, data)
        is_observed = ~np.isnan(response)

        self._model = boom.StateSpaceRegressionModel(
            boom.Vector(response),
            boom.Matrix(predictors),
            is_observed)

        spikeslab.set_posterior_sampler(self._model.observation_model, prior)
        self._original_series = response

        return self._model

Exemplo n.º 6

0

Exibir arquivo

Arquivo: student.py Projeto: steve-the-bayesian/BOOM

 def format_prediction_data(self, prediction_data, **kwargs):
     if isinstance(prediction_data, int):
         formatted = {
             "forecast_horizon": int(prediction_data),
             "predictors": boom.Matrix(np.ones((int(prediction_data), 1)))
         }
     else:
         predictor_matrix = patsy.dmatrix(self._formula,
                                          data=prediction_data)
         xnames = predictor_matrix.design_info.term_names
         formatted = {
             "forecast_horizon": prediction_data.shape[0],
             "predictors": boom.Matrix(predictor_matrix),
             "xnames": xnames,
         }
     return formatted

Exemplo n.º 7

0

Exibir arquivo

Arquivo: logit.py Projeto: steve-the-bayesian/BOOM

    def create_model(self, prior, data, **kwargs):
        if data is not None:
            response, predictors = patsy.dmatrices(self._formula, data)
            self.predictor_names = predictors.design_info.term_names
            extra_args = {**kwargs}
            trials = extra_args.get("trials", 1)
            if isinstance(trials, Number):
                trials = np.full(len(response), trials)
            observed = np.isfinite(response)
            self._model = boom.StateSpacePoissonModel(boom.Vector(response),
                                                      boom.Vector(trials),
                                                      boom.Matrix(predictors),
                                                      observed)
        elif prior is not None:
            xdim = len(prior._prior_inclusion_probabilities)
            self._model = boom.StateSpaceLogitModel(xdim)
            response = None
            predictors = None
            trials = None
        else:
            raise Exception("At least one of 'data' or 'prior' is needed.")

        logit_reg = self._model.observation_model
        prior = self._verify_prior(prior, response, predictors, trials,
                                   **kwargs)
        self._prior = prior
        observation_model_sampler = prior.create_sampler(logit_reg,
                                                         assign=True)

        sampler = boom.StateSpacePoissonPosteriorSampler(
            self._model, observation_model_sampler)
        self._model.set_method(sampler)
        self._original_series = response
        return self._model

Exemplo n.º 8

0

Exibir arquivo

def check_stochastic_process(draws: np.ndarray,
                             truth: np.ndarray,
                             confidence: float = .95,
                             sd_ratio_threshold: float = .1,
                             control_multiple_comparisons: bool = True):
    """
    Args:
      draws: A matrix of Monte Carlo draws to be checked.  Each row is a draw
        and each column is a variable.
      truth: A vector of true values against which draws will be compared.
        truth.size() must match ncol(draws).
      confidence: The confidence associated with the marginal posterior
        intervals used to determine coverage.
      sd_ratio_threshold: One of the testing diagnostics compares the standard
        deviation of the centered draws to the standard deviation of the true
        function.  If that ratio is less than this threshold the diagnostic is
        passed.

    Returns:
      A string containing an error message describing the mode of the failure
      to cover.
    """
    import BayesBoom.boom as boom
    return boom.check_stochastic_process(boom.Matrix(draws),
                                         boom.Vector(truth), float(confidence),
                                         float(sd_ratio_threshold),
                                         bool(control_multiple_comparisons))

Exemplo n.º 9

0

Exibir arquivo

 def _set_default_regression_prior(self):
     xdim = self._model.xdim
     ydim = self._model.ydim
     b0 = np.zeros((xdim, ydim))
     Sigma = np.diag(np.ones(ydim))
     self._model.set_regression_prior(boom.Matrix(b0), 1.0,
                                      boom.SpdMatrix(Sigma), ydim + 1)

Exemplo n.º 10

0

Exibir arquivo

def to_boom_matrix(m):
    """
    Convert the matrix-like object 'm' to a boom.Matrix.  This is a more user
    friendly experience than relying on the boom.Matrix constructor, which only
    accepts floating point numpy arrays.  Here 'm' can be a numeric scalar, a
    numpy array of any numeric dtype, a pandas DataFrame containing numeric
    data, or any similar object that either acts like a pd.DataFrame or is
    convertible to a np.array.
    """
    if hasattr(m, "values") and hasattr(m, "dtypes") and is_all_numeric(m):
        # Handle pd.DataFrame and similar.
        return boom.Matrix(m.values.astype("float"))

    if isinstance(m, Number):
        return boom.Matrix(np.full((1, 1), m, dtype="float"))

    return boom.Matrix(np.array(m, dtype="float"))

Exemplo n.º 11

0

Exibir arquivo

Arquivo: linalg_test.py Projeto: steve-the-bayesian/BOOM

 def test_implicit_conversion(self):
     X = np.random.randn(3, 4)
     bX = boom.Matrix(X)
     Y = np.random.randn(3, 4)
     Z = X + Y
     bZ = bX + Y
     self.assertTrue(isinstance(bZ, boom.Matrix))
     # Check that numpy addition and boom addition get the same answer.
     # This also checks fortran vs C ordering.
     delta = bZ - Z
     self.assertLess(delta.max_abs(), 1e-15)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: linalg_test.py Projeto: steve-the-bayesian/BOOM

    def test_matrix(self):
        m = boom.Matrix(np.array([[1.0, 2], [3, 4.0]]))
        mn = m.to_numpy()
        m2 = m * m
        m2n = mn @ mn
        self.assertTrue(np.array_equal(m2.to_numpy(), m2n))

        # Test assignment
        m[0, 1] = 8.3
        self.assertEqual(m[0, 1], 8.3)
        self.assertEqual(m[0, 0], 1.0)

Exemplo n.º 13

0

Exibir arquivo

    def format_prediction_data(self, prediction_data, **kwargs):
        extra_args = {**kwargs}
        if isinstance(prediction_data, int):
            formatted = {
                "forecast_horizon": prediction_data,
                "predictors": boom.Matrix(np.ones((int(prediction_data), 1)))
            }
        else:
            formatted = {
                "forecast_horizon": prediction_data.shape[0],
                "predictors": boom.Matrix(patsy.dmatrix(
                    self._formula, data=prediction_data))
            }
        exposure = extra_args.get("exposure", 1)
        if isinstance(exposure, Number):
            exposure = np.full(formatted["forecast_horizon"], exposure)
        else:
            exposure = np.array(exposure)
        formatted["exposure"] = boom.Vector(exposure)

        return formatted

Exemplo n.º 14

0

Exibir arquivo

    def _set_regression_prior(self):
        coefficient_prior_mean = np.zeros((self.xdim, self.ydim))
        coefficient_prior_mean[:, 0] = 0.0    # TODO replace 0.0 with ybar

        coefficient_weight = 1.0
        variance_weight = float(self.ydim + 1)
        residual_variance_guess = np.eye(self.ydim)

        self._model.set_regression_prior(
            boom.Matrix(coefficient_prior_mean.astype("float")),
            float(coefficient_weight),
            boom.SpdMatrix(residual_variance_guess.astype("float")),
            float(variance_weight))

Exemplo n.º 15

0

Exibir arquivo

Arquivo: logit.py Projeto: steve-the-bayesian/BOOM

    def format_prediction_data(self, prediction_data, **kwargs):
        if isinstance(prediction_data, int):
            formatted = {
                "forecast_horizon": prediction_data,
                "predictors": boom.Matrix(np.ones((int(prediction_data), 1)))
            }
        else:
            predictor_matrix = patsy.dmatrix(self._formula,
                                             data=prediction_data)
            xnames = predictor_matrix.design_info.term_names
            formatted = {
                "forecast_horizon": prediction_data.shape[0],
                "predictors": boom.Matrix(predictor_matrix),
                "xnames": xnames,
            }
        extra_args = {**kwargs}
        trials = extra_args.get("trials", 1)
        if isinstance(trials, Number):
            trials = np.full(formatted["forecast_horizon"], trials)
        else:
            trials = np.array(trials)
        formatted["trials"] = boom.Vector(trials)

        return formatted

Exemplo n.º 16

0

Exibir arquivo

    def _set_prior(self):
        """
        Set the prior distribution on the BOOM model object.  If user-specified
        priors have been set using set_atom_prior, set_atom_error_prior, etc
        then those priors will be installed.  Variables for which no prior was
        specified will receive default priors.
        """
        self._set_default_regression_prior()
        self._set_default_prior_for_mixing_weights()

        for i in range(len(self._numeric_colnames)):
            vname = self._numeric_colnames[i]
            if vname not in self._atom_prior:
                self._atom_prior[vname] = self._default_atom_prior(
                    self._atoms[vname])
            self._model.set_atom_prior(boom.Vector(self._atom_prior[vname]), i)

            if vname not in self._atom_error_prior:
                self._atom_error_prior[vname] = self._default_atom_error_prior(
                    len(self._atoms[vname]))
            self._model.set_atom_error_prior(
                boom.Matrix(self._atom_error_prior[vname]), i)

        for i in range(len(self._categorical_colnames)):
            vname = self._categorical_colnames[i]
            levels = self._levels[vname]
            if vname not in self._level_prior:
                self._level_prior[vname] = self._default_level_prior(levels)
            self._model.set_level_prior(
                boom.Vector(np.array(self._level_prior[vname])), i)

            if vname not in self._level_observation_prior:
                self._level_observation_prior[vname] = (
                    self._default_level_observation_prior(levels))
            self._model.set_level_observation_prior(
                boom.Matrix(self._level_observation_prior[vname]), i)

Exemplo n.º 17

0

Exibir arquivo

Arquivo: test_dynreg.py Projeto: steve-the-bayesian/BOOM

    def test_mcmc(self):
        xdim = 4
        true_residual_sd = .25
        data, coefficients, inclusion = self.simulate_data_from_model(
            time_dimension=200,
            typical_sample_size=5000,
            xdim=xdim,
            residual_sd=true_residual_sd,
            unscaled_innovation_sd=np.array([.01] * xdim),
            p00=np.array([.95] * xdim),
            p11=np.array([.99] * xdim),
        )
        model = dynreg.SparseDynamicRegressionModel(
            "y ~ " + ss.dot(data, ["y", "timestamp", "(Intercept)"]),
            data=data,
            timestamps="timestamp",
            niter=100,
            residual_precision_prior=R.SdPrior(true_residual_sd, 1),
            seed=8675309)

        model.plot()
        for i in range(4):
            self.assertEqual(
                "",
                boom.check_stochastic_process(
                    boom.Matrix(model._beta_draws[:, i, :]),
                    boom.Vector(coefficients[i, :]),
                    confidence=.95,
                    sd_ratio_threshold=10000,  # Turn off the sd_ratio check.
                ))

        posterior_mean_residual_sd = np.mean(model._residual_sd_draws[10:])
        self.assertGreater(posterior_mean_residual_sd, true_residual_sd - .02)
        self.assertLess(posterior_mean_residual_sd, true_residual_sd + .02)

        sd_fig, sd_ax = plt.subplots(1, 2)
        model.plot_residual_sd(ax=sd_ax[0])
        model.plot_residual_sd(ax=sd_ax[1], type="ts")
        # sd_fig.show()

        size_fig, size_ax = plt.subplots(1, 1)
        model.plot_size(ax=size_ax)

Exemplo n.º 18

0

Exibir arquivo

Arquivo: dynreg.py Projeto: steve-the-bayesian/BOOM

    def _set_data(self, formula, data, timestamps):
        """
        Partitiion the DataFrame 'data' into chunks defined by 'timestamps',
        pass it through the 'formula', and convert the output to the
        expected BayesBoom objects.

        Args:
          formula: A string defining a formula as interpreted by the 'patsy'
            library.
          data: A data frame containing the variables in 'formula', and
            maybe other variables as well.  Extraneous variables will be
            ignored.
          timestamps: A vector-like object containing objects that can be
            ordered.  E.g. a vector of dates, or integers.  Each element of
            'timestamps' corresponds to a row of 'data', and determines
            which the time point to which that row belongs.

        Effects:
          * Data are added to the BOOM model.
          * self._response_suf is created and populated with response data.
          * self._predictor_suf is created and each element is populated with
              data from the corresponding predictor variable.
        """

        unique_time_points = sorted(set(timestamps))
        self._response_suf = R.GaussianSuf()
        xdim = self.xdim
        self._predictor_suf = [R.GaussianSuf()] * xdim
        for time_stamp in unique_time_points:
            subset = timestamps == time_stamp
            response, predictors = patsy.dmatrices(formula,
                                                   data.loc[subset, :],
                                                   eval_env=1)
            data_point = boom.RegressionDataTimePoint(boom.Matrix(predictors),
                                                      boom.Vector(response))
            self._response_suf += response
            for i in range(xdim):
                self._predictor_suf[i] += predictors[:, i]

            self._model.add_data(data_point)

Exemplo n.º 19

0

Exibir arquivo

    def __init__(self,
                 x,
                 y=None,
                 expected_r2=.5,
                 prior_df=.01,
                 expected_model_size=1,
                 prior_information_weight=.01,
                 diagonal_shrinkage=.5,
                 optional_coefficient_estimate=None,
                 max_flips=-1,
                 mean_y=None,
                 sdy=None,
                 prior_inclusion_probabilities=None,
                 sigma_upper_limit=np.Inf):
        """
        Computes information that is shared by the different implementation of
        spike and slab priors.  Currently, the only difference between the
        different priors is the prior variance on the regression coefficients.
        When that changes, change this class accordingly, and change all the
        classes that inherit from it.

        Args:
          number_of_variables: The number of columns in the design matrix for
            the regression begin modeled.  The maximum size of the coefficient
            vector.

          expected_r2: The R^2 statistic that the model is expected
            to achieve.  Used along with 'sdy' to derive a prior distribution
            for the residual variance.

          prior_df: The number of observations worth of weight to give to the
            guess at the residual variance.

          expected_model_size: The expected number of nonzero coefficients in
            the model.  Used to set prior_inclusion_probabilities to
            expected_model_size / number_of_variables.  If expected_model_size
            is either negative or larger than number.of.variables then all
            elements of prior_inclusion_probabilities will be set to 1.0 and
            the model will be fit with all available coefficients.

          optional_coefficient_estimate: A vector of length number.of.variables
            to use as the prior mean of the regression coefficients.  This can
            also be None, in which case the prior mean for the intercept will
            be set to mean.y, and the prior mean for all slopes will be 0.

          mean.y: The mean of the response variable.  Used to create a sensible
            default prior mean for the regression coefficients when
            optional_coefficient_estimate is None.

          sdy: Used along with expected_r2 to create a prior guess at the
            residual variance.

          prior_inclusion_probabilities: A vector of length number.of.variables
            giving the prior inclusion probability of each coefficient.  Each
            element must be between 0 and 1, inclusive.  If left as None then a
            default value will be created with all elements set to
            expected_model_size / number_of_variables.

          sigma_upper_limit: The largest acceptable value for the residual
            standard deviation.
        """
        if isinstance(x, np.ndarray):
            x = boom.Matrix(x)
        if not isinstance(x, boom.Matrix):
            raise Exception(
                "x should either be a 2-dimensional np.array or a boom.Matrix."
            )

        if mean_y is None:
            if y is None:
                raise Exception("Either 'y' or 'mean_y' must be specified.")
            if isinstance(y, np.ndarray):
                y = boom.Vector(y)
            mean_y = boom.mean(y)
        if optional_coefficient_estimate is None:
            optional_coefficient_estimate = np.zeros(x.ncol)
            optional_coefficient_estimate[0] = mean_y
        self._mean = boom.Vector(optional_coefficient_estimate)

        sample_size = x.nrow
        ods = 1. - diagonal_shrinkage
        scale_factor = prior_information_weight * ods / sample_size
        self._unscaled_prior_precision = x.inner() * scale_factor
        diag_view = self._unscaled_prior_precision.diag()
        diag_view /= ods

        if prior_inclusion_probabilities is None:
            potential_nvars = x.ncol
            prob = expected_model_size / potential_nvars
            if prob > 1:
                prob = 1
            if prob < 0:
                prob = 0
            self._prior_inclusion_probabilities = boom.Vector(
                potential_nvars, prob)
        else:
            self._prior_inclusion_probabilities = boom.Vector(
                prior_inclusion_probabilities)

        if sdy is None:
            sdy = boom.sd(y)
        sample_variance = sdy**2
        expected_residual_variance = (1 - expected_r2) * sample_variance
        self._residual_precision_prior = boom.ChisqModel(
            prior_df, np.sqrt(expected_residual_variance))

Exemplo n.º 20

0

Exibir arquivo

    def __init__(self,
                 formula: str,
                 niter: int,
                 data: pd.DataFrame,
                 prior: RegressionSpikeSlabPrior = None,
                 ping: int = None,
                 seed: int = None,
                 **kwargs):
        """Create and a model object and run a specified number of MCMC iterations.

        Args:
          formula: A model formula that can be interpreted by the 'patsy'
            module to produce a model matrix from 'data'.
          niter: The desired number of MCMC iterations.
          data: A pd.DataFrame containing the data with which to train the
            model.
          prior: A SpikeSlabPrior object providing the prior distribution over
            the inclusion indicators, the coefficients, and the residual
            variance parameter.
          ping: The frequency (in iterations) with which to print status
            updates.  If ping is None then niter/10 will be assumed.
          seed: The seed for the C++ random number generator, or None.
          **kwargs: Extra argumnts will be passed to SpikeSlabPrior.

        Returns:
          An lm_spike object.

        """

        response, predictors = patsy.dmatrices(formula, data, eval_env=1)
        self._x_design_info = predictors.design_info
        # xdim = predictors.shape[1]
        # sample_size = predictors.shape[0]
        niter = int(niter)
        if niter <= 0:
            raise Exception("niter should be a positive integer.")

        if ping is None:
            ping = int(niter / 10)
        ping = int(ping)

        if seed is not None:
            boom.GlobalRng.rng.seed(int(seed))

        X = boom.Matrix(predictors)
        y = boom.Vector(response)
        nvars = X.ncol

        self._model = boom.RegressionModel(X, y, False)
        if prior is None:
            prior = RegressionSpikeSlabPrior(x=X, y=y, **kwargs)

        sampler = boom.BregVsSampler(self._model,
                                     prior.slab(self._model.Sigsq_prm),
                                     prior.residual_precision, prior.spike)
        self._model.set_method(sampler)
        # A lil matrix is a "linked list" matrix.  This is an efficient method
        # for constructing matrices.  It should be converted to a different
        # matrix type before doing anything with it.
        self._coefficient_draws = scipy.sparse.lil_matrix((niter, nvars))
        self._residual_sd = np.zeros(niter)
        self._log_likelihood = np.zeros(niter)

        for i in range(niter):
            self._model.sample_posterior()
            self._residual_sd[i] = self._model.sigma
            beta = self._model.coef
            self._coefficient_draws[i, :] = self.sparsify(beta)
            self._log_likelihood[i] = self._model.log_likelihood()

        # Convert the coefficient draws to sparse column format.  Predictions
        # vs this format should take the form X @ beta, not beta @ X.
        self._coefficient_draws = self._coefficient_draws.tocsc()

        self._fitted_values = self.predict(predictors).mean(axis=0)
        self._residuals = y.to_numpy() - self._fitted_values

Exemplo n.º 21

0

Exibir arquivo

 def test_data(self):
     model = boom.MvnModel(self.mu, self.Sigma)
     model.set_data(boom.Matrix(self.data))
     model.mle()
     self.assertLess(model.siginv.Mdist(self.mu, model.mu), .05)

Exemplo n.º 22

0

Exibir arquivo

Arquivo: student.py Projeto: steve-the-bayesian/BOOM

    def create_model(self, prior, data, rng, **kwargs):
        """
        Create the boom model object, and store related model artifacts.

        Args:
          prior: The prior for the observation model.  Either None or a
            spikeslab.StudentSpikeSlabPrior.
          data: If self._formula is None then data is the time series to be
            modeled.  Either a pd.Series or a np.ndarray.  Otherwise data
            should be a pd.DataFrame containing the variables referenced in
            'formula'.
          **kwargs: If prior is None then any remaining arguments are passed to
            the StudentSpikeSlabPrior constructor.

        Returns:
          The created model.

        Effects:
          self._model is populated with the created model.
          self._prior is populated with the prior distribution for the
            observation model.
        """

        if data is not None:
            if self._formula is None:
                # Pure time series case.
                response = data
                predictors = np.ones((len(response), 1))
                kwargs["expected_model_size"] = 0
            else:
                # Time series regression case.
                response, predictors = patsy.dmatrices(self._formula, data)
                self.predictor_names = predictors.design_info.term_names

            boom_response = boom.Vector(R.to_numpy(response))
            boom_predictors = boom.Matrix(R.to_numpy(predictors))
            response_is_observed = np.isfinite(response).ravel()

            self._model = boom.StateSpaceStudentRegressionModel(
                boom_response, boom_predictors, response_is_observed)
        elif prior is not None:
            xdim = len(prior._prior_inclusion_probabilities)
            self._model = boom.StateSpaceStudentRegressionModel(xdim)
            response = None
            predictors = None
        else:
            raise Exception("At least one of 'data' or 'prior' is needed.")

        regression = self._model.observation_model

        prior = self._verify_prior(prior, response, predictors, **kwargs)

        self._prior = prior

        observation_model_sampler = boom.TRegressionSpikeSlabSampler(
            regression,
            prior.slab(regression.Sigsq_prm),
            prior.spike,
            prior.residual_precision,
            prior.tail_thickness,
            rng,
        )
        observation_model_sampler.set_sigma_upper_limit(
            prior.sigma_upper_limit)
        if prior.max_flips > 0:
            observation_model_sampler.limit_model_selection(prior.max_flips)
        regression.set_method(observation_model_sampler)

        sampler = boom.StateSpaceStudentPosteriorSampler(
            self._model, observation_model_sampler)
        self._model.set_method(sampler)

        self._original_series = response
        return self._model