def test_faked_DataLakeBackedDataset(MockDataset): provider = DataLakeProvider(storename="dataplatformdlsprod", interactive=True) dataset = TimeSeriesDataset(data_provider=provider, **CONFIG) # Should be able to call get_data without being asked to authenticate in tests X, y = dataset.get_data()
def test_timeseries_target_tags(tag_list, target_tag_list): start = dateutil.parser.isoparse("2017-12-25 06:00:00Z") end = dateutil.parser.isoparse("2017-12-29 06:00:00Z") tsd = TimeSeriesDataset( start, end, tag_list=tag_list, target_tag_list=target_tag_list, data_provider=MockDataProvider(), ) X, y = tsd.get_data() assert len(X) == len(y) # If target_tag_list is empty, it defaults to tag_list if target_tag_list: assert y.shape[1] == len(target_tag_list) else: assert y.shape[1] == len(tag_list) # Ensure the order in maintained assert [tag.name for tag in target_tag_list or tag_list] == y.columns.tolist() # Features should match the tag_list assert X.shape[1] == len(tag_list) # Ensure the order in maintained assert [tag.name for tag in tag_list] == X.columns.tolist()
def test_trigger_tags(mock_tag_normalizer): data_provider = MockDataProvider() dataset = TimeSeriesDataset( data_provider=data_provider, tag_list=[ SensorTag("Tag 1", None), SensorTag("Tag 2", None), ], target_tag_list=[ SensorTag("Tag 5", None), ], train_start_date=dateutil.parser.isoparse("2017-12-25 06:00:00Z"), train_end_date=dateutil.parser.isoparse("2017-12-29 06:00:00Z"), row_filter="`Tag 3` > 0 & `Tag 4` > 1", tag_normalizer=mock_tag_normalizer, ) X, y = dataset.get_data() assert X is not None assert y is not None assert set(data_provider.last_tag_list) == { SensorTag("Tag 1", None), SensorTag("Tag 2", None), SensorTag("Tag 3", None), SensorTag("Tag 4", None), SensorTag("Tag 5", None), } assert set(X.columns.values) == {"Tag 1", "Tag 2"} assert set(y.columns.values) == {"Tag 5"}
def test_metadata_statistics(): """Tests that it works to set aggregation method(s)""" kwargs = dict( data_provider=MockDataProvider(), tag_list=[ SensorTag("Tag 1", None), SensorTag("Tag 2", None), SensorTag("Tag 3", None), ], train_start_date=dateutil.parser.isoparse("2017-12-25 06:00:00Z"), train_end_date=dateutil.parser.isoparse("2017-12-29 06:00:00Z"), ) # Default aggregation gives no extra columns dataset = TimeSeriesDataset(**kwargs) X, _ = dataset.get_data() assert (83, 3) == X.shape metadata = dataset.get_metadata() assert isinstance(metadata["x_hist"], dict) assert len(metadata["x_hist"].keys()) == 3