Exemplo n.º 1
0
def test_faked_DataLakeBackedDataset(MockDataset):

    provider = DataLakeProvider(storename="dataplatformdlsprod", interactive=True)
    dataset = TimeSeriesDataset(data_provider=provider, **CONFIG)

    # Should be able to call get_data without being asked to authenticate in tests
    X, y = dataset.get_data()
Exemplo n.º 2
0
def test_timeseries_target_tags(tag_list, target_tag_list):
    start = dateutil.parser.isoparse("2017-12-25 06:00:00Z")
    end = dateutil.parser.isoparse("2017-12-29 06:00:00Z")
    tsd = TimeSeriesDataset(
        start,
        end,
        tag_list=tag_list,
        target_tag_list=target_tag_list,
        data_provider=MockDataProvider(),
    )
    X, y = tsd.get_data()

    assert len(X) == len(y)

    # If target_tag_list is empty, it defaults to tag_list
    if target_tag_list:
        assert y.shape[1] == len(target_tag_list)
    else:
        assert y.shape[1] == len(tag_list)

    # Ensure the order in maintained
    assert [tag.name for tag in target_tag_list or tag_list] == y.columns.tolist()

    # Features should match the tag_list
    assert X.shape[1] == len(tag_list)

    # Ensure the order in maintained
    assert [tag.name for tag in tag_list] == X.columns.tolist()
Exemplo n.º 3
0
def test_trigger_tags(mock_tag_normalizer):
    data_provider = MockDataProvider()
    dataset = TimeSeriesDataset(
        data_provider=data_provider,
        tag_list=[
            SensorTag("Tag 1", None),
            SensorTag("Tag 2", None),
        ],
        target_tag_list=[
            SensorTag("Tag 5", None),
        ],
        train_start_date=dateutil.parser.isoparse("2017-12-25 06:00:00Z"),
        train_end_date=dateutil.parser.isoparse("2017-12-29 06:00:00Z"),
        row_filter="`Tag 3` > 0 & `Tag 4` > 1",
        tag_normalizer=mock_tag_normalizer,
    )
    X, y = dataset.get_data()
    assert X is not None
    assert y is not None
    assert set(data_provider.last_tag_list) == {
        SensorTag("Tag 1", None),
        SensorTag("Tag 2", None),
        SensorTag("Tag 3", None),
        SensorTag("Tag 4", None),
        SensorTag("Tag 5", None),
    }
    assert set(X.columns.values) == {"Tag 1", "Tag 2"}
    assert set(y.columns.values) == {"Tag 5"}
Exemplo n.º 4
0
def test_metadata_statistics():
    """Tests that it works to set aggregation method(s)"""

    kwargs = dict(
        data_provider=MockDataProvider(),
        tag_list=[
            SensorTag("Tag 1", None),
            SensorTag("Tag 2", None),
            SensorTag("Tag 3", None),
        ],
        train_start_date=dateutil.parser.isoparse("2017-12-25 06:00:00Z"),
        train_end_date=dateutil.parser.isoparse("2017-12-29 06:00:00Z"),
    )

    # Default aggregation gives no extra columns
    dataset = TimeSeriesDataset(**kwargs)
    X, _ = dataset.get_data()
    assert (83, 3) == X.shape
    metadata = dataset.get_metadata()
    assert isinstance(metadata["x_hist"], dict)
    assert len(metadata["x_hist"].keys()) == 3