def test_deduplicate_timeseries_dataframe(self): with pytest.raises(AssertionError): deduplicate_timeseries_dataframe(self.df, dt_col="z") with pytest.raises(AssertionError): deduplicate_timeseries_dataframe(self.df, dt_col="a") res_df = deduplicate_timeseries_dataframe(self.df, dt_col="datetime") assert len(res_df) == 50
def deduplicate(self): ''' Remove those duplicated records which has exactly the same values in each feature_col for each multivariate timeseries distinguished by id_col. :return: the tsdataset instance. ''' self.df = deduplicate_timeseries_dataframe(df=self.df, dt_col=self.dt_col) return self
def deduplicate(self): ''' Remove those duplicated rows which has exactly the same values in each feature_col for each multivariate timeseries distinguished by id_col :return: the tsdataset instance. Note: It is preferred that `deduplicate` is called before all other operations if needed. ''' df_list = [deduplicate_timeseries_dataframe(df=self.df[self.df[self.id_col] == id_name], dt_col=self.dt_col) for id_name in self._id_list] self.df = pd.concat(df_list) return self
def deduplicate(self): ''' Remove those duplicated records which has exactly the same values in each feature_col for each multivariate timeseries distinguished by id_col. :return: the tsdataset instance. ''' df_list = [ deduplicate_timeseries_dataframe( df=self.df[self.df[self.id_col] == id_name], dt_col=self.dt_col) for id_name in self._id_list ] self.df = pd.concat(df_list) return self