def get_data(self) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: series_iter: Iterable[pd.Series] = self.data_provider.load_series( from_ts=self.from_ts, to_ts=self.to_ts, tag_list=list(set(self.tag_list + self.target_tag_list)), ) # Resample if we have a resolution set, otherwise simply join the series. if self.resolution: data = self.join_timeseries( series_iter, self.from_ts, self.to_ts, self.resolution, aggregation_methods=self.aggregation_methods, ) else: data = pd.concat(series_iter, axis=1, join="inner") if self.row_filter: data = pandas_filter_rows(data, self.row_filter, buffer_size=self.row_filter_buffer_size) x_tag_names = [tag.name for tag in self.tag_list] y_tag_names = [tag.name for tag in self.target_tag_list] X = data[x_tag_names] y = data[y_tag_names] if self.target_tag_list else None return X, y
def get_data(self) -> Tuple[pd.DataFrame, None]: dataframes = self.data_provider.load_series(from_ts=self.from_ts, to_ts=self.to_ts, tag_list=self.tag_list) X = self.join_timeseries(dataframes, self.from_ts, self.resolution) y = None if self.row_filter: X = pandas_filter_rows(X, self.row_filter) return X, y
def get_data(self) -> Tuple[pd.DataFrame, None]: dataframes = self.data_provider.load_series( from_ts=self.from_ts, to_ts=self.to_ts, tag_list=self.tag_list ) X = self.join_timeseries(dataframes, self.from_ts, self.to_ts, self.resolution) y = None if self.row_filter: X = pandas_filter_rows(X, self.row_filter) logger.info(f"First five rows of the filtered dataset are {X.head()}") return X, y
def test_filter_rows_catches_illegal(): df = pd.DataFrame(list(np.ndindex((10, 2))), columns=["Tag 1", "Tag 2"]) with pytest.raises(ValueError): pandas_filter_rows(df, "sys.exit(0)") with pytest.raises(ValueError): pandas_filter_rows(df, "lambda x:x") with pytest.raises(ValueError): pandas_filter_rows(df, "__import__('os').system('clear')"), ValueError
def test_filter_rows_catches_illegal(self): with self.assertRaises(ValueError): pandas_filter_rows(self.df, "sys.exit(0)") with self.assertRaises(ValueError): pandas_filter_rows(self.df, "lambda x:x") with self.assertRaises(ValueError): pandas_filter_rows(self.df, "__import__('os').system('clear')"), ValueError
def test_filter_rows_basic(): df = pd.DataFrame(list(np.ndindex((10, 2))), columns=["Tag 1", "Tag 2"]) assert len(pandas_filter_rows(df, "`Tag 1` <= `Tag 2`")) == 3 assert len(pandas_filter_rows(df, "`Tag 1` == `Tag 2`")) == 2 assert len(pandas_filter_rows(df, "(`Tag 1` <= `Tag 2`) | `Tag 2` < 2")) == 20 assert len(pandas_filter_rows( df, "(`Tag 1` <= `Tag 2`) | `Tag 2` < 0.9")) == 9 assert_frame_equal( pandas_filter_rows(df, "(`Tag 1` <= `Tag 2`)"), pandas_filter_rows(df, "~(`Tag 1` > `Tag 2`)"), )
def get_data(self) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: series_iter: Iterable[pd.Series] = self.data_provider.load_series( from_ts=self.from_ts, to_ts=self.to_ts, tag_list=list(set(self.tag_list + self.target_tag_list)), ) data: pd.DataFrame = self.join_timeseries(series_iter, self.from_ts, self.to_ts, self.resolution) if self.row_filter: data = pandas_filter_rows(data, self.row_filter) x_tag_names = [tag.name for tag in self.tag_list] y_tag_names = [tag.name for tag in self.target_tag_list] X = data[x_tag_names] y = data[y_tag_names] if self.target_tag_list else None return X, y
def test_filter_rows_basic(self): df = self.df self.assertEqual(len(pandas_filter_rows(df, "`Tag 1` <= `Tag 2`")), 3) self.assertEqual(len(pandas_filter_rows(df, "`Tag 1` == `Tag 2`")), 2) self.assertEqual( len(pandas_filter_rows(df, "(`Tag 1` <= `Tag 2`) | `Tag 2` < 2")), 20) self.assertEqual( len(pandas_filter_rows(df, "(`Tag 1` <= `Tag 2`) | `Tag 2` < 0.9")), 9) assert_frame_equal( pandas_filter_rows(df, "(`Tag 1` <= `Tag 2`)"), pandas_filter_rows(df, "~(`Tag 1` > `Tag 2`)"), )
def test_filter_rows_basic(self): df = self.df self.assertEqual(len(pandas_filter_rows(df, "'Tag 1' <= 'Tag 2'")), 3) self.assertEqual(len(pandas_filter_rows(df, "'Tag 1' == 'Tag 2'")), 2) self.assertEqual( len(pandas_filter_rows(df, "('Tag 1' <= 'Tag 2') | 'Tag 2' < 2 ")), 20) self.assertEqual( len( pandas_filter_rows(df, "('Tag 1' <= 'Tag 2') | 'Tag 2' < 0.9 ")), 9) assert_frame_equal( pandas_filter_rows(df, "('Tag 1' <= 'Tag 2')"), pandas_filter_rows(df, "~('Tag 1' > 'Tag 2')"), )