def test_with_tuning(mock_data):
    with patch(THRESH_STR, return_value=(60, 20)) as mock_thresh:
        train_thresh, test_thresh = calculate_split_thresholds(
            mock_data, 0.603, 0.2015, True)
    mock_thresh.assert_called_once_with(mock_data, 0.60, 0.20)
    assert train_thresh == 60
    assert test_thresh == 20
def test_not_tuning(mock_data):
    with patch(THRESH_STR, return_value=(80, 20)) as mock_thresh:
        train_thresh, test_thresh = calculate_split_thresholds(
            mock_data, 0.6045, 0.2027, False)
    mock_thresh.assert_called_once_with(mock_data, 0.80, 0.20)
    assert train_thresh == 80
    assert test_thresh == 20
    def split_time_series_data(self, ts_data, tuning=True):
        """Splits `ts_data` into two sets for the modeling process.

        If `tuning` is True, `ts_data` is split into a training and
        validation set. Otherwise, it is split into a training + validation
        and testing set.

        Parameters
        ----------
        ts_data: pd.Object
            A pandas Object representing the data to be split.
        tuning: bool
            A boolean indicating whether the split is being done for training.

        Returns
        -------
        pd.DataFrame, pd.DataFrame
            Two pandas DataFrames where the first dataframe contains the
            records in the training set and the second contains the records
            in the testing set.

        """
        train_thresh, test_thresh = utils.calculate_split_thresholds(
            ts_data, self._train_prop, self._val_prop, tuning)
        return ts_data[:train_thresh], ts_data[train_thresh:test_thresh]
示例#4
0
    def split_data(self, model_data, tuning=True):
        """Splits `model_data` into training and testing time series.

        If `tuning` is True, `model_data` is split into training and
        validation sets. Otherwise, it is split into training + validation
        and testing sets.

        Parameters
        ----------
        model_data: np.array
            A numpy array containing the data of the time series being
            forecast.
        tuning: bool
            A boolean value indicating whether the split is for tuning.

        Returns
        -------
        np.array, np.array
            Two numpy arrays representing the records in the train and
            test sets respectively.

        """
        train_thresh, test_thresh = utils.calculate_split_thresholds(
            model_data, self._train_prop, self._val_prop, tuning)
        return model_data[:train_thresh], model_data[train_thresh:test_thresh]
    def split_data(self, model_data, tuning=True):
        """Splits `model_data` into the training and testing set.

        If `tuning` is True, `model_data` is split into training and
        validation sets. Otherwise, it is split into training + validation
        and testing sets.

        Parameters
        ----------
        model_data: pandas.Object
            A pandas Object (Series or DataFrame) containing the data used to
            build the trace model.
        tuning: bool
            A boolean value indicating whether the split is for tuning.

        Returns
        -------
        pandas.Object, pandas.Object
            The two pandas objects obtained from `model_data` after applying
            the train-test split.

        """
        train_thresh, test_thresh = utils.calculate_split_thresholds(
            model_data, self._train_prop, self._val_prop, tuning)
        return model_data[:train_thresh], model_data[train_thresh:test_thresh]
示例#6
0
    def split_data(self, model_data, tuning=True):
        """Splits `model_data` into training and testing sets.

        Parameters
        ----------
        model_data: pd.DataFrame
            A pandas DataFrame containing the time series that are being
            modeled, with a separate column for each time series that is
            being modeled by the multivariate model.
        tuning: bool
            A boolean value indicating whether the split is for tuning.

        Returns
        -------
        pd.DataFrame, pd.DataFrame
            Two pandas DataFrames representing the records in the train and
            test sets respectively.

        """
        train_thresh, test_thresh = utils.calculate_split_thresholds(
            model_data, self._train_prop, self._val_prop, tuning)
        model_data = model_data.reset_index(drop=True)
        return model_data[:train_thresh], model_data[train_thresh:test_thresh]
示例#7
0
    def get_total_spare(self, trace, tuning=True):
        """The spare amount of the target for `trace` over the test window.

        Parameters
        ----------
        trace: Trace
            The `Trace` for which the spare is calculated.
        tuning: bool
            A boolean value indicating whether the spare is being calculated
            to tune the model or evaluate the model on the test set.

        Returns
        -------
        float
            A float representing the total spare amount of the target
            variable for `trace` over the test window.

        """
        target_ts = trace.get_target_time_series(self._model_vars[0])
        train_thresh, test_thresh = utils.calculate_split_thresholds(
            target_ts, self._train_prop, self._val_prop, tuning)
        return trace.get_spare_resource_in_window(
            self._model_vars[0], train_thresh, test_thresh)