def impute(self, mode="last", const_num=0): ''' Impute the tsdataset by imputing each univariate time series distinguished by id_col and feature_col :param mode: imputation mode, select from "last", "const" or "linear". "last": impute by propagating the last non N/A number to its following N/A. if there is no non N/A number ahead, 0 is filled instead. "const": impute by a const value input by user. "linear": impute by linear interpolation. :param const_num: indicate the const number to fill, which only effective when mode is set to "const". :return: the tsdataset instance. Note: It is preferred that `impute` is called after `resample` while before `roll` if needed. ''' df_list = [impute_timeseries_dataframe(df=self.df[self.df[self.id_col] == id_name], dt_col=self.dt_col, mode=mode, const_num=const_num) for id_name in self._id_list] self.df = pd.concat(df_list) return self
def impute(self, mode="last", const_num=0): ''' Impute the tsdataset by imputing each univariate time series distinguished by id_col and feature_col. :param mode: imputation mode, select from "last", "const" or "linear". "last": impute by propagating the last non N/A number to its following N/A. if there is no non N/A number ahead, 0 is filled instead. "const": impute by a const value input by user. "linear": impute by linear interpolation. :param const_num: indicates the const number to fill, which is only effective when mode is set to "const". :return: the tsdataset instance. ''' self.df = self.df.groupby([self.id_col])\ .apply(lambda df: impute_timeseries_dataframe(df=df, dt_col=self.dt_col, mode=mode, const_num=const_num)) self.df.reset_index(drop=True, inplace=True) return self
def test_impute_timeseries_dataframe(self): with pytest.raises(AssertionError): impute_timeseries_dataframe(self.df, dt_col="z") with pytest.raises(AssertionError): impute_timeseries_dataframe( self.df, dt_col="datetime", mode="dummy") with pytest.raises(AssertionError): impute_timeseries_dataframe(self.df, dt_col="a") last_res_df = impute_timeseries_dataframe( self.df, dt_col="datetime", mode="last") assert self.df.isna().sum().sum() != 0 assert last_res_df.isna().sum().sum() == 0 const_res_df = impute_timeseries_dataframe( self.df, dt_col="datetime", mode="const") assert self.df.isna().sum().sum() != 0 assert const_res_df.isna().sum().sum() == 0 linear_res_df = impute_timeseries_dataframe( self.df, dt_col="datetime", mode="linear") assert self.df.isna().sum().sum() != 0 assert linear_res_df.isna().sum().sum() == 0
def impute(self, mode="last", const_num=0): ''' Impute the tsdataset :param mode: imputation mode, select from "last", "const" or "linear". "last": impute by propagating the last non N/A number to its following N/A. if there is no non N/A number ahead, 0 is filled instead. "const": impute by a const value input by user. "linear": impute by linear interpolation. :param const_num: only effective when mode is set to "const". ''' df_list = [ impute_timeseries_dataframe( df=self.df[self.df[self.id_col] == id_name], dt_col=self.dt_col, mode=mode, const_num=const_num) for id_name in self._id_list ] self.df = pd.concat(df_list) return self