Пример #1
0
    def boom(self):
        if hasattr(self, "_boom_holiday"):
            return self._boom_holiday
        start_days = [R.to_boom_date(x) for x in self._start]
        end_days = [R.to_boom_date(x) for x in self._end]

        return boom.DateRangeHoliday(start_days, end_days)
Пример #2
0
    def plot_state(self,
                   burn=None,
                   time=None,
                   show_actuals=True,
                   style=None,
                   scale=None,
                   ylim=None,
                   ax=None,
                   **kwargs):
        if style is None:
            style = "dynamic"
        style = R.unique_match(style, ["dynamic", "boxplot"])

        if scale is None:
            scale = "linear"
        scale = R.unique_match(scale, ["linear", "mean"])

        niter = self._niter
        if burn is None:
            burn = self.suggest_burn()

        if time is None:
            time = self.original_series.index

        state_contribution = np.zeros((niter, len(time)))
        for model in self._state_models:
            state_contribution += model.state_contribution

        R.plot_dynamic_distribution(
            curves=state_contribution,
            timestamps=time,
            ax=ax,
            ylim=ylim,
            **kwargs)
Пример #3
0
    def test_numerics(self):
        numeric_df = pd.DataFrame(np.random.randn(10, 3))
        self.assertTrue(R.is_all_numeric(numeric_df))

        non_numeric = numeric_df.copy()
        non_numeric["text"] = "foo"
        self.assertFalse(R.is_all_numeric(non_numeric))
Пример #4
0
    def _set_posterior_sampler(self, y, level_sigma_prior, slope_sigma_prior,
                               sdy):
        """
        A utility called by the constructor.  See the __init__ method for
        argument documentation.
        """
        if level_sigma_prior is None:
            sdy = self._compute_sdy(sdy, y, "level_sigma_prior")
            level_sigma_prior = R.SdPrior(sigma_guess=.01 * sdy,
                                          upper_limit=sdy)
        if not isinstance(level_sigma_prior, R.SdPrior):
            raise Exception("Unexpected type for level_sigma_prior.")

        if slope_sigma_prior is None:
            sdy = self._compute_sdy(sdy, y, "slope_sigma_prior")
            slope_sigma_prior = R.SdPrior(sigma_guess=0.1 * sdy,
                                          upper_limit=sdy)
        if not isinstance(slope_sigma_prior, R.SdPrior):
            raise Exception("Unexpected type for slope_sigma_prior.")

        self._state_model.set_posterior_sampler(
            level_sigma_prior.create_chisq_model(),
            level_sigma_prior.upper_limit,
            slope_sigma_prior.create_chisq_model(),
            slope_sigma_prior.upper_limit, boom.GlobalRng.rng)
    def _validate_priors(self, level_sigma_prior, level_nu_prior,
                         slope_sigma_prior, slope_nu_prior,
                         y, sdy):
        if level_sigma_prior is None:
            sdy = self._compute_sdy(sdy, y, "level_sigma_prior")
            level_sigma_prior = R.SdPrior(
                sigma_guess=.01 * sdy,
                upper_limit=sdy)
        if not isinstance(level_sigma_prior, R.SdPrior):
            raise Exception("Unexpected type for level_sigma_prior.")

        if slope_sigma_prior is None:
            sdy = self._compute_sdy(sdy, y, "slope_sigma_prior")
            slope_sigma_prior = R.SdPrior(
                sigma_guess=.01 * sdy,
                upper_limit=sdy)
        if not isinstance(slope_sigma_prior, R.SdPrior):
            raise Exception("Unexpected type for slope_sigma_prior.")

        if level_nu_prior is None:
            level_nu_prior = R.UniformPrior(0.1, 100)
        if not isinstance(level_nu_prior, R.DoubleModel):
            raise Exception("Unexpected type for level_nu_prior.")

        if slope_nu_prior is None:
            slope_nu_prior = R.UniformPrior(0.1, 100)
        if not isinstance(slope_nu_prior, R.DoubleModel):
            raise Exception("Unexpected type for slope_nu_prior.")

        self._level_sigma_prior = level_sigma_prior
        self._slope_sigma_prior = slope_sigma_prior
        self._level_nu_prior = level_nu_prior
        self._slope_nu_prior = slope_nu_prior
    def _verify_initial_state_prior(self, initial_state_prior, xtx, xty, sdy):
        if initial_state_prior is None:
            try:
                beta_hat = np.linalg.solve(xtx, xty)
                if not np.all(np.finite(beta_hat)):
                    raise Exception("Least squares initializer failed.")

                self._initial_state_prior = R.MvnPrior(
                    beta_hat, sdy * sdy * np.linalg.inv(xtx))
            except Exception:
                self._initial_state_prior = R.MvnPrior(
                    np.zeros(self.xdim),
                    sdy * sdy * np.diag(1.0 / np.diagonal(xtx)))

        elif isinstance(initial_state_prior, R.NormalPrior):
            mean = np.full(initial_state_prior.mean, self.xdim)
            var = np.full(initial_state_prior.sd**2, self.xdim)
            self._initial_state_prior = R.MvnPrior(mean, np.diag(var))

        elif isinstance(initial_state_prior, list) and all(
            [isinstance(x, R.NormalPrior) for x in initial_state_prior]):
            mean = np.array([x.mean for x in initial_state_prior])
            var = np.array([x.sd**2 for x in initial_state_prior])
            self._initial_state_prior = R.MvnPrior(mean, np.diag(var))

        else:
            if not isinstance(initial_state_prior, R.MvnPrior):
                raise Exception("Unrecognized type for initial_state_prior.")
            self._initial_state_prior = initial_state_prior

        return self._initial_state_prior
Пример #7
0
    def plot_single_coefficient(self,
                                beta,
                                ylim=None,
                                ax=None,
                                highlight_median="green"):
        """
        Plot the dynamic distribution of a single model coefficient.

        Args:
          beta: The coefficient to be plotted.  A matrix.  Rows are Monte Carlo
            draws, and columns are time points.
          ylim: A pair of numbers giving the lower and upper limits of the Y
            axis.  If 'None' then 'ylim' will be inferred from the range of the
            data.
          ax: A plt.Axes object on which to draw.  If None then a new
            plt.Figure and Axes will be created and drawn on function exit.
          highlight_median: The name of a color used to draw the meadian of the
            curves at each time point.  The empty string signals not to add the
            extra highlighting.

        Returns:
          The axes object containing the plot.
        """
        fig = None
        if ax is None:
            fig, ax = plt.subplots(1, 1)
        R.plot_dynamic_distribution(beta,
                                    timestamps=self._unique_timestamps,
                                    ax=ax,
                                    ylim=ylim,
                                    highlight_median=highlight_median)
        if fig is not None:
            fig.show()
        return ax
Пример #8
0
    def create_base_dataset(self, sample_size, num_numeric, num_cat,
                            num_levels):
        xdim = 1 + num_cat * (num_levels - 1)
        cats = {}
        levels = {}
        encoders = []
        for i in range(num_cat):
            # local_levels are the levels this variable can assume.
            local_levels = random_words(num_levels)
            vname = "cat" + str(i + 1)
            values = np.random.choice(local_levels, sample_size)
            cats[vname] = values
            levels[vname] = local_levels
            encoders.append(boom.EffectsEncoder(i, local_levels))

        ydim = num_numeric
        self._beta = np.random.randn(xdim, ydim)

        Rho = boom.random_correlation_matrix(ydim).to_numpy()
        S = np.diag(R.rgamma(ydim, 1, 1))
        self._Sigma = S @ Rho @ S
        Sigma_root = np.linalg.cholesky(self._Sigma)

        errors = (Sigma_root @ np.random.randn(ydim, sample_size)).T
        encoder = boom.DatasetEncoder(encoders)
        xcat = encoder.encode_dataset(R.to_data_table(pd.DataFrame(cats)))
        yhat = xcat.to_numpy() @ self._beta
        numerics = yhat + errors
        self._data = pd.DataFrame(
            numerics, columns=["X" + str(i + 1) for i in range(ydim)])
        for vname, column in cats.items():
            self._data[vname] = column
        self._ydim = ydim
        self._xdim = xdim
        self._ncat = num_cat
Пример #9
0
    def test_data_table(self):
        table = R.to_data_table(self._data)
        self.assertEqual(table.nrow, self._data.shape[0])
        self.assertEqual(table.ncol, self._data.shape[1])

        frame = R.to_data_frame(table)
        for i in range(5):
            self.assertTrue(np.all(self._data.iloc[:, i] == frame.iloc[:, i]))
Пример #10
0
    def test_conversions(self):
        x = [1, 2, 3]
        v = R.to_boom_vector(x)
        self.assertIsInstance(v, boom.Vector)

        x = pd.Series(x, dtype="int")
        v = R.to_boom_vector(x)
        self.assertIsInstance(v, boom.Vector)
Пример #11
0
    def plot_residual_sd(self,
                         burn: int = None,
                         type: str = "density",
                         ax=None,
                         **kwargs):
        """
        Args:
          burn: The number of MCMC iterations to discard as burn-in.  "None"
            indicates that an estimated default number should be used.
          type: The type of plot.  "density" shows a kernel density estimate of
            the residual SD draws.  "ts" shows a time series plot of the draws.
          ax: A plt.Axes object on which to draw the plot.  If None new Figure
            and Axes objects are created and drawn on function exit.
          kwargs:  Further keyword arguments are ignored.

        Effects:
          A plot is added to the relevant Axes object.

        Returns:
          The Axes object on which the plot is drawn.
        """
        plot_types = ["density", "ts"]
        type = R.unique_match(type, plot_types)

        if burn is None:
            burn = self.suggest_burn()

        if burn < 0:
            burn = 0
        sd = self._residual_sd_draws[burn:]

        show_plot = False
        if ax is None:
            fig, ax = plt.subplots(1, 1)
            show_plot = True

        if type == "density":
            density = R.Density(sd)
            density.plot(ax=ax, xlab="Residual SD", ylab="Density")
        elif type == "ts":
            iteration = np.arange(len(self._residual_sd_draws))
            if burn > 0:
                iteration = iteration[burn:]
            ax.plot(iteration, sd)
            ax.set_xlabel("Iteration")
            ax.set_ylabel("Residual SD")

        if show_plot:
            fig.show()
        return ax
    def _verify_prior(self, sigma_prior, sdy, sdx):
        if sigma_prior is None:
            self._sigma_prior = [
                R.SdPrior(.01 * sdy / sdxi, 1) for sdxi in sdx
            ]
        elif isinstance(sigma_prior, R.SdPrior):
            self._sigma_prior = [sigma_prior] * len(sdx)

        if not R.is_iterable(self._sigma_prior) and all(
            [isinstance(x, R.SdPrior) for x in self._sigma_prior]):
            raise Exception(
                "sigma_prior must be a list-like of R.SdPrior objects.")

        return self._sigma_prior
Пример #13
0
    def test_paste(self):
        foo = R.paste("X", [1, 2, 3])
        self.assertEqual(foo, ["X 1", "X 2", "X 3"])

        bar = R.paste("X", [1, 2, 3], sep="")
        self.assertEqual(bar, ["X1", "X2", "X3"])

        baz = R.paste([1, 2, "X"], [4, 5, 6])
        self.assertEqual(baz, ["1 4", "2 5", "X 6"])

        foo = R.paste("X", pd.Series([1, 2, 3]), sep="")
        self.assertEqual(foo, ["X1", "X2", "X3"])

        f = R.paste("X", [1, 2, 3], sep="", collapse=" ")
        self.assertEqual(f, "X1 X2 X3")
Пример #14
0
    def create_model(self, prior: R.SdPrior, data: pd.Series):
        """
        Args:
          prior: an R.SdPrior object describing the prior distribution on the
            residual variance paramter.
          data:  The time series of observations as a Pandas Series.

        Returns:
          A boom.StateSpaceModel object.
        """
        boom_data = boom.Vector(data.values)
        is_observed = ~data.isna()
        self._model = boom.StateSpaceModel(boom_data, is_observed)

        if prior is None:
            sdy = np.std(data)
            prior = R.SdPrior(sigma_guess=sdy, upper_limit=sdy * 1.2)

        boom_prior = boom.ChisqModel(prior.sample_size, prior.sigma_guess)
        observation_model_sampler = boom.ZeroMeanGaussianConjSampler(
            self._model.observation_model,
            boom_prior)
        observation_model_sampler.set_sigma_upper_limit(
            prior.upper_limit)
        self._model.observation_model.set_method(observation_model_sampler)

        sampler = boom.StateSpacePosteriorSampler(
            self._model, boom.GlobalRng.rng)
        self._model.set_method(sampler)

        self._original_series = data

        return self._model
Пример #15
0
 def _validate_slope_mean_prior(slope_mean_prior, sdy):
     if slope_mean_prior is None:
         slope_mean_prior = R.NormalPrior(0, sdy)
     if not isinstance(slope_mean_prior, R.NormalPrior):
         raise Exception("Wrong type passed for slope_mean_prior.  "
                         "Expected an R.NormalPrior")
     return slope_mean_prior
Пример #16
0
 def _validate_slope_ar1_prior(slope_ar1_prior, sdy):
     if slope_ar1_prior is None:
         slope_ar1_prior = R.Ar1CoefficientPrior()
     if not isinstance(slope_ar1_prior, R.Ar1CoefficientPrior):
         raise Exception("Wrong type passed for slope_ar1_prior.  "
                         "Expected an R.Ar1CoefficientPrior")
     return slope_ar1_prior
Пример #17
0
    def test_draw_inclusion_indicators(self):
        """
        Check that the model draws the inclusion indicators conditional on all
        other unknowns fixed at their true values.  The regression coefficients
        are integrated out and not conditioned on.
        """
        # Make the coefficients big, so that effects are obvious.
        unscaled_innovation_sd = np.array([10, 20, 30])
        data, coefficients, inclusion = self.simulate_data_from_model(
            time_dimension=100,
            typical_sample_size=500,
            xdim=self._xdim,
            residual_sd=self._residual_sd,
            unscaled_innovation_sd=unscaled_innovation_sd,
            p00=self._p00,
            p11=self._p11)

        model, sampler = self.setup_model(data, coefficients, inclusion,
                                          self._residual_sd,
                                          unscaled_innovation_sd, self._p00,
                                          self._p11)

        niter = 1000
        draws = np.full((niter, model.xdim, model.time_dimension), -1)
        for i in range(niter):
            sampler.draw_inclusion_indicators()
            draws[i, :, :] = model.inclusion_indicators.to_numpy()

        posterior_mean = np.mean(draws[100:, :], axis=0)
        mean_vector = posterior_mean.flatten()
        inclusion_vector = inclusion.flatten()

        cor = R.corr(inclusion_vector, mean_vector)
        self.assertGreater(cor, .6)
Пример #18
0
 def test_encode_dataset(self):
     data = pd.DataFrame(np.random.randn(3, 2), columns=["X1", "X2"])
     data["Color"] = ["Red", "Blue", "Green"]
     encoder = R.EffectEncoder("Color", ["Red", "Blue", "Green"])
     enc = encoder.encode_dataset(data)
     expected = np.array([[1.0, 0.0], [0.0, 1.0], [-1.0, -1.0]])
     self.assertTrue(np.allclose(enc, expected))
Пример #19
0
    def _validate_coefficient_innovation_priors(self):
        """
        Ensure that self._coefficient_innovation_priors are a list of SdPriors.
        """
        if (isinstance(self._coefficient_innovation_priors, list) and np.all([
                isinstance(x, R.SdPrior)
                for x in self._coefficient_innovation_priors
        ])):
            return

        if isinstance(self._coefficient_innovation_priors, R.SdPrior):
            self._coefficient_innovation_priors = [
                self._coefficient_innovation_priors
            ] * self.xdim
            return

        if self._coefficient_innovation_priors is not None:
            raise Exception("coefficient_innovation_priors must either be an "
                            "R.SdPrior or a list of such priors.")

        sdy = self._response_suf.sample_sd
        self._coefficient_innovation_priors = [
            R.SdPrior(.01 * sdy / self._predictor_suf[i].sample_sd, 1)
            for i in range(self.xdim)
        ]
Пример #20
0
 def _validate_slope_sigma_prior(slope_sigma_prior, sdy):
     if slope_sigma_prior is None:
         slope_sigma_prior = R.SdPrior(.01 * sdy, upper_limit=sdy)
     if not isinstance(slope_sigma_prior, R.SdPrior):
         raise Exception("Wrong type passed for slope_sigma_prior.  "
                         "Expected an R.SdPrior")
     return slope_sigma_prior
Пример #21
0
    def test_draw_coefficients(self):
        # Make the coefficients big, so that effects are obvious.
        unscaled_innovation_sd = np.array([10, 20, 30])
        data, coefficients, inclusion = self.simulate_data_from_model(
            time_dimension=100,
            typical_sample_size=500,
            xdim=self._xdim,
            residual_sd=self._residual_sd,
            unscaled_innovation_sd=unscaled_innovation_sd,
            p00=self._p00,
            p11=self._p11)

        model, _ = self.setup_model(data, coefficients, inclusion,
                                    self._residual_sd, unscaled_innovation_sd,
                                    self._p00, self._p11)

        niter = 1000
        draws = np.full((niter, model.xdim, model.time_dimension), np.NaN)

        for i in range(niter):
            model.draw_coefficients_given_inclusion(boom.GlobalRng.rng)
            draws[i, :, :] = model.all_coefficients.to_numpy()

        posterior_mean = np.mean(draws, axis=0)
        mean_vector = posterior_mean.flatten()
        beta_vector = coefficients.flatten()
        cor = R.corr(mean_vector, beta_vector)
        self.assertGreater(cor, .9)
Пример #22
0
 def _default_initial_state_prior(self, sdy):
     """
     The default prior to use for the initial state vector.
     """
     dim = self.nseasons - 1
     return R.MvnPrior(np.zeros(dim),
                       np.diag(np.full(dim, float(sdy))))
Пример #23
0
    def plot_inclusion(self,
                       burn=None,
                       inclusion_threshold=0,
                       unit_scale=True,
                       number_of_variables=None,
                       ax=None,
                       **kwargs):
        """A barplot showing the marginal inclusion probability of each variable.

        """
        inc = self.inclusion_probs(burn=burn)
        pos = self.coefficient_positive_probability(burn=burn)
        colors = np.array([str(x) for x in pos])
        index = np.argsort(inc.values)[::-1]

        if number_of_variables is None:
            number_of_variables = np.sum(inc >= inclusion_threshold)
        inc = inc[index[:number_of_variables]]
        pos = pos[index[:number_of_variables]]
        colors = colors[index[:number_of_variables]]
        foo = R.barplot(inc,
                        ax=ax,
                        color=colors[::-1],
                        linewidth=.25,
                        edgecolor="black",
                        xlab="Marginal Inclusion Probability",
                        ylab="Variable",
                        **kwargs)
        return foo
Пример #24
0
    def plot(self, what=None, **kwargs):
        """Plot an aspect of the model.

        Args:
          what: The type of plot desired.  Acceptable choices are
            "inclusion", "coefficients", "residual", and "predicted".

          kwargs: Extra arguments are passed to the specific plot function
            being called.

        """

        plot_types = ["inclusion", "coefficients", "residual", "predicted"]
        if what is None:
            what = plot_types[0]
        what = R.unique_match(what, plot_types)
        if what == "coefficients":
            return self.plot_coefficients(**kwargs)
        elif what == "inclusion":
            return self.plot_inclusion(**kwargs)
        elif what == "residual":
            return self.plot_residual(**kwargs)
        elif what == "predicted":
            return self.plot_predicted(**kwargs)
        else:
            raise Exception(f"Unknown plot type {what}.")
Пример #25
0
 def _validate_initial_level_prior(initial_level_prior, initial_y, sdy):
     if initial_level_prior is None:
         initial_level_prior = R.NormalPrior(initial_y, sdy)
     if not isinstance(initial_level_prior, R.NormalPrior):
         raise Exception("Wrong type for initial_level_prior.  "
                         "Expected an R.NormalPrior.")
     return initial_level_prior
Пример #26
0
def plot_inclusion_probs(coefficients,
                         burn,
                         xnames,
                         inclusion_threshold=0,
                         unit_scale=True,
                         number_of_variables=None,
                         ax=None,
                         **kwargs):
    """
    """
    coef = coefficients[burn:, :]
    inc = compute_inclusion_probabilities(coef)
    pos = coefficient_positive_probability(coef)
    colors = np.array([str(x) for x in pos])
    index = np.argsort(inc.values)[::-1]

    if number_of_variables is None:
        number_of_variables = np.sum(inc >= inclusion_threshold)
    inc = inc[index[:number_of_variables]]
    pos = pos[index[:number_of_variables]]
    colors = colors[index[:number_of_variables]]
    ans = R.barplot(inc,
                    ax=ax,
                    color=colors[::-1],
                    linewidth=.25,
                    edgecolor="black",
                    xlab="Marginal Inclusion Probability",
                    ylab="Variable",
                    **kwargs)
    return ans
Пример #27
0
 def test_lty(self):
     x = np.linspace(0, 10)
     fig, ax = plt.subplots()
     for i in range(10):
         y = x + i
         ax.plot(x, y, ls=R.lty(i))
     if _show_figs:
         fig.show()
    def _build_state_model(self):
        self._state_model = boom.DynamicRegressionStateModel(
            R.to_boom_matrix(self._predictors))

        boom_sigma_priors = [pri.boom() for pri in self._sigma_prior]
        state_model_sampler = boom.DynamicRegressionIndependentPosteriorSampler(
            self._state_model, boom_sigma_priors)
        for i, prior in enumerate(self._sigma_prior):
            finite_limit = np.isfinite(prior.upper_limit)
            if prior.upper_limit > 0 and finite_limit:
                state_model_sampler.set_sigma_max(i, prior.upper_limit)
        self._state_model.set_method(state_model_sampler)

        self._state_model.set_initial_state_mean(
            R.to_boom_vector(self._initial_state_prior.mean))
        self._state_model.set_initial_state_variance(
            R.to_boom_spd(self._initial_state_prior.Sigma))
Пример #29
0
    def test_encoding(self):
        enc1 = R.EffectEncoder("Color", ["Red", "Blue"])
        enc2 = R.IdentityEncoder("Height")
        enc3 = R.InteractionEncoder(enc1, enc2)
        encoder = R.DatasetEncoder([enc1, enc2, enc3])

        sample_size = 1000
        data = pd.DataFrame({
            "Height":
            np.random.randn(sample_size),
            "Color":
            np.random.choice(["Red", "Blue"], sample_size)
        })
        enc = encoder.encode_dataset(data)
        self.assertEqual(sample_size, enc.shape[0])
        self.assertEqual(4, enc.shape[1])
        self.assertTrue(np.allclose(enc[:, 2], data.iloc[:, 0]))
Пример #30
0
    def plot_size(self, ax=None, burn: int = None, **kwargs):
        fig = None
        if ax is None:
            fig, ax = plt.subplots(1, 1)

        size = np.sum(self._beta_draws != 0, axis=1)
        R.plot_dynamic_distribution(
            size,
            timestamps=self._unique_timestamps,
            ax=ax,
            xlab="Time",
            ylab="Number Included Predictors",
        )

        if fig is not None:
            fig.show()
        return ax