def predict(self, input_data, is_fit_pipeline_stage: bool): """ Method for time series prediction on forecast length :param input_data: data with features, target and ids to process :param is_fit_pipeline_stage: is this fit or predict stage for pipeline :return output_data: output data with smoothed time series """ input_data = copy(input_data) parameters = input_data.task.task_params forecast_length = parameters.forecast_length old_idx = input_data.idx target = input_data.target if is_fit_pipeline_stage: fitted = self.autoreg.predict(start=old_idx[0], end=old_idx[-1]) # First n elements in time series are skipped diff = self.actual_ts_len - len(fitted) # Fill nans with first values first_element = fitted[0] first_elements = [first_element] * diff first_elements.extend(list(fitted)) fitted = np.array(first_elements) _, predict = _ts_to_table(idx=old_idx, time_series=fitted, window_size=forecast_length) new_idx, target_columns = _ts_to_table(idx=old_idx, time_series=target, window_size=forecast_length) # Update idx and target input_data.idx = new_idx input_data.target = target_columns # For predict stage we can make prediction else: start_id = old_idx[-1] - forecast_length + 1 end_id = old_idx[-1] predicted = self.autoreg.predict(start=start_id, end=end_id) # Convert one-dim array as column predict = np.array(predicted).reshape(1, -1) new_idx = np.arange(start_id, end_id + 1) # Update idx input_data.idx = new_idx # Update idx and features output_data = self._convert_to_output(input_data, predict=predict, data_type=DataTypesEnum.table) return output_data
def test_sparse_matrix(): # Create lagged matrix for sparse train_input, _, _ = synthetic_univariate_ts() _, lagged_table = _ts_to_table(idx=train_input.idx, time_series=train_input.features, window_size=window_size) features_columns = _sparse_matrix(log, lagged_table) # assert if sparse matrix features less than half or less than another dimension assert features_columns.shape[0] == lagged_table.shape[0] assert features_columns.shape[1] <= lagged_table.shape[ 1] / 2 or features_columns.shape[1] < lagged_table.shape[0]
def test_ts_to_lagged_table(): # Check first step - lagged transformation of features train_input, _, _ = synthetic_univariate_ts() new_idx, lagged_table = _ts_to_table(idx=train_input.idx, time_series=train_input.features, window_size=window_size) correct_lagged_table = ((0., 10., 20., 30.), (10., 20., 30., 40.), (20., 30., 40., 50.), (30., 40., 50., 60.), (40., 50., 60., 70.), (50., 60., 70., 80.), (60., 70., 80., 90.), (70., 80., 90., 100.), (80., 90., 100., 110.), (90., 100., 110., 120.)) correct_new_idx = (4, 5, 6, 7, 8, 9, 10, 11, 12, 13) # Convert into tuple for comparison new_idx_as_tuple = tuple(new_idx) lagged_table_as_tuple = tuple(map(tuple, lagged_table)) assert lagged_table_as_tuple == correct_lagged_table assert new_idx_as_tuple == correct_new_idx # Second step - processing for correct the target final_idx, features_columns, final_target = _prepare_target( idx=new_idx, features_columns=lagged_table, target=train_input.target, forecast_length=forecast_length) correct_final_idx = (4, 5, 6, 7, 8, 9, 10) correct_features_columns = ((0., 10., 20., 30.), (10., 20., 30., 40.), (20., 30., 40., 50.), (30., 40., 50., 60.), (40., 50., 60., 70.), (50., 60., 70., 80.), (60., 70., 80., 90.)) correct_final_target = ((40., 50., 60., 70.), (50., 60., 70., 80.), (60., 70., 80., 90.), (70., 80., 90., 100.), (80., 90., 100., 110.), (90., 100., 110., 120.), (100., 110., 120., 130.)) # Convert into tuple for comparison final_idx_as_tuple = tuple(final_idx) features_columns_as_tuple = tuple(map(tuple, features_columns)) final_target_as_tuple = tuple(map(tuple, final_target)) assert final_idx_as_tuple == correct_final_idx assert features_columns_as_tuple == correct_features_columns assert final_target_as_tuple == correct_final_target
def predict(self, input_data, is_fit_chain_stage: bool): """ Method for smoothing time series :param input_data: data with features, target and ids to process :param is_fit_chain_stage: is this fit or predict stage for chain :return output_data: output data with smoothed time series """ parameters = input_data.task.task_params forecast_length = parameters.forecast_length old_idx = input_data.idx target = input_data.target # For training chain get fitted data if is_fit_chain_stage: fitted_values = self.arima.fittedvalues fitted_values = self._inverse_boxcox(predicted=fitted_values, lmbda=self.lmbda) # Undo shift operation fitted_values = self._inverse_shift(fitted_values) diff = int(self.actual_ts_len - len(fitted_values)) # If first elements skipped if diff != 0: # Fill nans with first values first_element = fitted_values[0] first_elements = [first_element] * diff first_elements.extend(list(fitted_values)) fitted_values = np.array(first_elements) _, predict = _ts_to_table(idx=old_idx, time_series=fitted_values, window_size=forecast_length) new_idx, target_columns = _ts_to_table(idx=old_idx, time_series=target, window_size=forecast_length) # Update idx and target input_data.idx = new_idx input_data.target = target_columns # For predict stage we can make prediction else: start_id = old_idx[-1] - forecast_length + 1 end_id = old_idx[-1] predicted = self.arima.predict(start=start_id, end=end_id) predicted = self._inverse_boxcox(predicted=predicted, lmbda=self.lmbda) # Undo shift operation predict = self._inverse_shift(predicted) # Convert one-dim array as column predict = np.array(predict).reshape(1, -1) new_idx = np.arange(start_id, end_id + 1) # Update idx input_data.idx = new_idx # Update idx and features output_data = self._convert_to_output(input_data, predict=predict, data_type=DataTypesEnum.table) return output_data