示例#1
0
    def impute(self, mode="last", const_num=0):
        '''
        Impute the tsdataset by imputing each univariate time series
        distinguished by id_col and feature_col

        :param mode: imputation mode, select from "last", "const" or "linear".
               "last": impute by propagating the last non N/A number to its following N/A.
                       if there is no non N/A number ahead, 0 is filled instead.
               "const": impute by a const value input by user.
               "linear": impute by linear interpolation.
        :param const_num:  indicate the const number to fill, which only effective when mode
               is set to "const".

        :return: the tsdataset instance.

        Note: It is preferred that `impute` is called after `resample` while before
              `roll` if needed.
        '''
        df_list = [impute_timeseries_dataframe(df=self.df[self.df[self.id_col] == id_name],
                                               dt_col=self.dt_col,
                                               mode=mode,
                                               const_num=const_num)
                   for id_name in self._id_list]
        self.df = pd.concat(df_list)
        return self
示例#2
0
    def impute(self, mode="last", const_num=0):
        '''
        Impute the tsdataset by imputing each univariate time series
        distinguished by id_col and feature_col.

        :param mode: imputation mode, select from "last", "const" or "linear".

            "last": impute by propagating the last non N/A number to its following N/A.
            if there is no non N/A number ahead, 0 is filled instead.

            "const": impute by a const value input by user.

            "linear": impute by linear interpolation.
        :param const_num:  indicates the const number to fill, which is only effective when mode
            is set to "const".

        :return: the tsdataset instance.
        '''
        self.df = self.df.groupby([self.id_col])\
                         .apply(lambda df: impute_timeseries_dataframe(df=df,
                                                                       dt_col=self.dt_col,
                                                                       mode=mode,
                                                                       const_num=const_num))
        self.df.reset_index(drop=True, inplace=True)
        return self
示例#3
0
 def test_impute_timeseries_dataframe(self):
     with pytest.raises(AssertionError):
         impute_timeseries_dataframe(self.df, dt_col="z")
     with pytest.raises(AssertionError):
         impute_timeseries_dataframe(
             self.df, dt_col="datetime", mode="dummy")
     with pytest.raises(AssertionError):
         impute_timeseries_dataframe(self.df, dt_col="a")
     last_res_df = impute_timeseries_dataframe(
         self.df, dt_col="datetime", mode="last")
     assert self.df.isna().sum().sum() != 0
     assert last_res_df.isna().sum().sum() == 0
     const_res_df = impute_timeseries_dataframe(
         self.df, dt_col="datetime", mode="const")
     assert self.df.isna().sum().sum() != 0
     assert const_res_df.isna().sum().sum() == 0
     linear_res_df = impute_timeseries_dataframe(
         self.df, dt_col="datetime", mode="linear")
     assert self.df.isna().sum().sum() != 0
     assert linear_res_df.isna().sum().sum() == 0
示例#4
0
 def impute(self, mode="last", const_num=0):
     '''
     Impute the tsdataset
     :param mode: imputation mode, select from "last", "const" or "linear".
        "last": impute by propagating the last non N/A number to its following N/A.
                if there is no non N/A number ahead, 0 is filled instead.
        "const": impute by a const value input by user.
        "linear": impute by linear interpolation.
     :param const_num: only effective when mode is set to "const".
     '''
     df_list = [
         impute_timeseries_dataframe(
             df=self.df[self.df[self.id_col] == id_name],
             dt_col=self.dt_col,
             mode=mode,
             const_num=const_num) for id_name in self._id_list
     ]
     self.df = pd.concat(df_list)
     return self