示例#1
0
def test_InstanceSplitter(start, target, lead_time: int, is_train: bool,
                          pick_incomplete: bool):
    train_length = 100
    pred_length = 13
    t = transform.InstanceSplitter(
        target_field=FieldName.TARGET,
        is_pad_field=FieldName.IS_PAD,
        start_field=FieldName.START,
        forecast_start_field=FieldName.FORECAST_START,
        train_sampler=transform.UniformSplitSampler(p=1.0),
        past_length=train_length,
        future_length=pred_length,
        lead_time=lead_time,
        time_series_fields=["some_time_feature"],
        pick_incomplete=pick_incomplete,
    )

    assert_serializable(t)

    other_feat = np.arange(len(target) + 100)
    data = {
        "start": start,
        "target": target,
        "some_time_feature": other_feat,
        "some_other_col": "ABC",
    }

    if not is_train and not pick_incomplete and len(target) < train_length:
        with pytest.raises(AssertionError):
            out = list(t.flatmap_transform(data, is_train=is_train))
        return
    else:
        out = list(t.flatmap_transform(data, is_train=is_train))

    if is_train:
        assert len(out) == max(
            0,
            len(target) - pred_length - lead_time + 1 -
            (0 if pick_incomplete else train_length),
        )
    else:
        assert len(out) == 1

    for o in out:
        assert "target" not in o
        assert "some_time_feature" not in o
        assert "some_other_col" in o

        assert len(o["past_some_time_feature"]) == train_length
        assert len(o["past_target"]) == train_length

        if is_train:
            assert len(o["future_target"]) == pred_length
            assert len(o["future_some_time_feature"]) == pred_length
        else:
            assert len(o["future_target"]) == 0
            assert len(o["future_some_time_feature"]) == pred_length
示例#2
0
def test_CanonicalInstanceSplitter(
    start,
    target,
    is_train: bool,
    use_prediction_features: bool,
    allow_target_padding: bool,
):
    train_length = 100
    pred_length = 13
    t = transform.CanonicalInstanceSplitter(
        target_field=FieldName.TARGET,
        is_pad_field=FieldName.IS_PAD,
        start_field=FieldName.START,
        forecast_start_field=FieldName.FORECAST_START,
        instance_sampler=(transform.UniformSplitSampler(
            p=1.0,
            min_past=train_length,
        ) if is_train else (
            transform.ValidationSplitSampler()
            if allow_target_padding else transform.TestSplitSampler())),
        instance_length=train_length,
        prediction_length=pred_length,
        time_series_fields=["some_time_feature"],
        allow_target_padding=allow_target_padding,
        use_prediction_features=use_prediction_features,
    )

    assert_serializable(t)

    other_feat = np.arange(len(target) + 100)
    data = {
        "start": start,
        "target": target,
        "some_time_feature": other_feat,
        "some_other_col": "ABC",
    }

    out = list(t.flatmap_transform(data, is_train=is_train))

    min_num_instances = 1 if allow_target_padding and not is_train else 0
    if is_train:
        assert len(out) == max(min_num_instances,
                               len(target) - train_length + 1)
    else:
        assert len(out) == 1

    for o in out:
        assert "target" not in o
        assert "future_target" not in o
        assert "some_time_feature" not in o
        assert "some_other_col" in o

        assert len(o["past_some_time_feature"]) == train_length
        assert len(o["past_target"]) == train_length

        if use_prediction_features and not is_train:
            assert len(o["future_some_time_feature"]) == pred_length
示例#3
0
def test_CanonicalInstanceSplitter(
    start, target, is_train, use_prediction_features, allow_target_padding
):
    train_length = 100
    pred_length = 13
    t = transform.CanonicalInstanceSplitter(
        target_field=transform.FieldName.TARGET,
        is_pad_field=transform.FieldName.IS_PAD,
        start_field=transform.FieldName.START,
        forecast_start_field=transform.FieldName.FORECAST_START,
        instance_sampler=transform.UniformSplitSampler(p=1.0),
        instance_length=train_length,
        prediction_length=pred_length,
        time_series_fields=['some_time_feature'],
        allow_target_padding=allow_target_padding,
        use_prediction_features=use_prediction_features,
    )

    assert_serializable(t)

    other_feat = np.arange(len(target) + 100)
    data = {
        'start': start,
        'target': target,
        'some_time_feature': other_feat,
        'some_other_col': 'ABC',
    }

    out = list(t.flatmap_transform(data, is_train=is_train))

    min_num_instances = 1 if allow_target_padding else 0
    if is_train:
        assert len(out) == max(
            min_num_instances, len(target) - train_length + 1
        )
    else:
        assert len(out) == 1

    for o in out:
        assert 'target' not in o
        assert 'future_target' not in o
        assert 'some_time_feature' not in o
        assert 'some_other_col' in o

        assert len(o['past_some_time_feature']) == train_length
        assert len(o['past_target']) == train_length

        if use_prediction_features and not is_train:
            assert len(o['future_some_time_feature']) == pred_length
示例#4
0
def test_InstanceSplitter(start, target, is_train):
    train_length = 100
    pred_length = 13
    t = transform.InstanceSplitter(
        target_field=transform.FieldName.TARGET,
        is_pad_field=transform.FieldName.IS_PAD,
        start_field=transform.FieldName.START,
        forecast_start_field=transform.FieldName.FORECAST_START,
        train_sampler=transform.UniformSplitSampler(p=1.0),
        past_length=train_length,
        future_length=pred_length,
        time_series_fields=["some_time_feature"],
        pick_incomplete=True,
    )

    assert_serializable(t)

    other_feat = np.arange(len(target) + 100)
    data = {
        "start": start,
        "target": target,
        "some_time_feature": other_feat,
        "some_other_col": "ABC",
    }

    out = list(t.flatmap_transform(data, is_train=is_train))

    if is_train:
        assert len(out) == max(0, len(target) - pred_length + 1)
    else:
        assert len(out) == 1

    for o in out:
        assert "target" not in o
        assert "some_time_feature" not in o
        assert "some_other_col" in o

        assert len(o["past_some_time_feature"]) == train_length
        assert len(o["past_target"]) == train_length

        if is_train:
            assert len(o["future_target"]) == pred_length
            assert len(o["future_some_time_feature"]) == pred_length
        else:
            assert len(o["future_target"]) == 0
            assert len(o["future_some_time_feature"]) == pred_length