def test_with_tuning(mock_data): with patch(THRESH_STR, return_value=(60, 20)) as mock_thresh: train_thresh, test_thresh = calculate_split_thresholds( mock_data, 0.603, 0.2015, True) mock_thresh.assert_called_once_with(mock_data, 0.60, 0.20) assert train_thresh == 60 assert test_thresh == 20
def test_not_tuning(mock_data): with patch(THRESH_STR, return_value=(80, 20)) as mock_thresh: train_thresh, test_thresh = calculate_split_thresholds( mock_data, 0.6045, 0.2027, False) mock_thresh.assert_called_once_with(mock_data, 0.80, 0.20) assert train_thresh == 80 assert test_thresh == 20
def split_time_series_data(self, ts_data, tuning=True): """Splits `ts_data` into two sets for the modeling process. If `tuning` is True, `ts_data` is split into a training and validation set. Otherwise, it is split into a training + validation and testing set. Parameters ---------- ts_data: pd.Object A pandas Object representing the data to be split. tuning: bool A boolean indicating whether the split is being done for training. Returns ------- pd.DataFrame, pd.DataFrame Two pandas DataFrames where the first dataframe contains the records in the training set and the second contains the records in the testing set. """ train_thresh, test_thresh = utils.calculate_split_thresholds( ts_data, self._train_prop, self._val_prop, tuning) return ts_data[:train_thresh], ts_data[train_thresh:test_thresh]
def split_data(self, model_data, tuning=True): """Splits `model_data` into training and testing time series. If `tuning` is True, `model_data` is split into training and validation sets. Otherwise, it is split into training + validation and testing sets. Parameters ---------- model_data: np.array A numpy array containing the data of the time series being forecast. tuning: bool A boolean value indicating whether the split is for tuning. Returns ------- np.array, np.array Two numpy arrays representing the records in the train and test sets respectively. """ train_thresh, test_thresh = utils.calculate_split_thresholds( model_data, self._train_prop, self._val_prop, tuning) return model_data[:train_thresh], model_data[train_thresh:test_thresh]
def split_data(self, model_data, tuning=True): """Splits `model_data` into the training and testing set. If `tuning` is True, `model_data` is split into training and validation sets. Otherwise, it is split into training + validation and testing sets. Parameters ---------- model_data: pandas.Object A pandas Object (Series or DataFrame) containing the data used to build the trace model. tuning: bool A boolean value indicating whether the split is for tuning. Returns ------- pandas.Object, pandas.Object The two pandas objects obtained from `model_data` after applying the train-test split. """ train_thresh, test_thresh = utils.calculate_split_thresholds( model_data, self._train_prop, self._val_prop, tuning) return model_data[:train_thresh], model_data[train_thresh:test_thresh]
def split_data(self, model_data, tuning=True): """Splits `model_data` into training and testing sets. Parameters ---------- model_data: pd.DataFrame A pandas DataFrame containing the time series that are being modeled, with a separate column for each time series that is being modeled by the multivariate model. tuning: bool A boolean value indicating whether the split is for tuning. Returns ------- pd.DataFrame, pd.DataFrame Two pandas DataFrames representing the records in the train and test sets respectively. """ train_thresh, test_thresh = utils.calculate_split_thresholds( model_data, self._train_prop, self._val_prop, tuning) model_data = model_data.reset_index(drop=True) return model_data[:train_thresh], model_data[train_thresh:test_thresh]
def get_total_spare(self, trace, tuning=True): """The spare amount of the target for `trace` over the test window. Parameters ---------- trace: Trace The `Trace` for which the spare is calculated. tuning: bool A boolean value indicating whether the spare is being calculated to tune the model or evaluate the model on the test set. Returns ------- float A float representing the total spare amount of the target variable for `trace` over the test window. """ target_ts = trace.get_target_time_series(self._model_vars[0]) train_thresh, test_thresh = utils.calculate_split_thresholds( target_ts, self._train_prop, self._val_prop, tuning) return trace.get_spare_resource_in_window( self._model_vars[0], train_thresh, test_thresh)