Пример #1
0
def test_translate_alone():
    feature_set = sorted(["A", "B"])

    for l in [32, 64, 128]:
        feature_df_list = [
            pd.DataFrame({
                "time": pd.to_datetime(list(range(l)), unit="s"),
                "A": np.array(list(range(l))),
                "B": np.array(list(range(l))),
                "y": np.ones(l)
            }) for _ in range(1)
        ]

        for (look_back, look_forward) in [(3, 2), (1, 0)]:
            custom_transforms = list()
            custom_transforms.append(remove_false_anchors_factory("y"))
            custom_transforms.append(
                split_flat_df_by_time_factory(look_back, look_forward, 1))

            translate = Translate(features=feature_set,
                                  look_back=look_back,
                                  look_forward=look_forward,
                                  n_seconds=1,
                                  custom_transforms=custom_transforms,
                                  normalize=False)

            X, y = translate.scale_and_transform_session(feature_df_list[0])
            tools.eq_(X.shape, (l - (look_back + look_forward),
                                (look_back + look_forward + 1), 2))
            tools.eq_(len(y), l - (look_back + look_forward))

            # first elements should slide forward in time one element at a time
            np.array_equal(X[:, 0, 0], np.array(list(range(l))))
            # second elements should slide forward one at a time starting at 1
            np.array_equal(X[:, 1, 0], np.array(list(range(l))) + 1)
Пример #2
0
def test_normalize_off():
    feature_df_list = [
        pd.DataFrame({
            "time": pd.to_datetime(list(range(160)), unit="s"),
            "A": range(160),
            "B": range(160),
            "y": np.ones(160)
        }) for _ in range(1)
    ]

    meta = StorageMeta()
    storage = BatchStorageMemory(meta)
    translate = Translate(features=["A", "B"],
                          look_back=0,
                          look_forward=0,
                          n_seconds=1,
                          normalize=False)
    batch_generator = Builder(storage,
                              translate,
                              batch_size=16,
                              pseudo_stratify=False)

    batch_generator.generate_and_save_batches(feature_df_list)

    for batch in storage._data.values():
        # all batches have monotonically increasing numbers (range used to create data)
        assert np.diff(batch["features"][:, 0, 0]).all()  # feature A
        assert np.diff(batch["features"][:, 0, 1]).all()  # feature B
Пример #3
0
    def memory_builder_factory(feature_set,
                               look_back,
                               look_forward,
                               batch_size,
                               batch_seconds=1,
                               validation_split=0,
                               pseudo_stratify=False,
                               stratify_nbatch_groupings=20,
                               n_workers=None,
                               seed=None,
                               normalize=True,
                               custom_transforms=None,
                               verbose=False):

        storage_meta = StorageMeta(validation_split=validation_split)
        storage = BatchStorageMemory(storage_meta)
        translate = Translate(feature_set, look_back, look_forward,
                              batch_seconds, normalize, verbose,
                              custom_transforms)
        return Builder(storage=storage,
                       translate=translate,
                       batch_size=batch_size,
                       pseudo_stratify=pseudo_stratify,
                       stratify_nbatch_groupings=stratify_nbatch_groupings,
                       verbose=verbose,
                       seed=seed,
                       n_workers=n_workers)
Пример #4
0
    def s3_builder_factory(s3_bucket_resource,
                           feature_set,
                           look_back,
                           look_forward,
                           batch_size,
                           s3_prefix="",
                           batch_seconds=1,
                           stride=1,
                           validation_split=0,
                           pseudo_stratify=False,
                           stratify_nbatch_groupings=20,
                           n_workers=None,
                           seed=None,
                           normalize=True,
                           custom_transforms=None,
                           session_norm_filter=None,
                           verbose=False):

        storage_meta = StorageMeta(validation_split=validation_split)
        storage = BatchStorageS3(storage_meta,
                                 s3_bucket_resource=s3_bucket_resource,
                                 s3_prefix=s3_prefix)
        translate = Translate(feature_set, look_back, look_forward,
                              batch_seconds, stride, normalize, verbose,
                              custom_transforms, session_norm_filter)
        return Builder(storage=storage,
                       translate=translate,
                       batch_size=batch_size,
                       pseudo_stratify=pseudo_stratify,
                       stratify_nbatch_groupings=stratify_nbatch_groupings,
                       verbose=verbose,
                       seed=seed,
                       n_workers=n_workers)
Пример #5
0
def test_normalize_on():
    feature_df_list = reduce(add, [[pd.DataFrame({"time": pd.to_datetime(list(range(50)), unit="s"),
                                                  "A": range(1, 51),
                                                  "B": range(101, 151),
                                                  "y": np.ones(50)}),
                                    pd.DataFrame({"time": pd.to_datetime(list(range(50)), unit="s"),
                                                  "A": range(51, 101),
                                                  "B": range(151, 201),
                                                  "y": np.ones(50)})]
                                   for _ in range(5)], [])

    meta = StorageMeta()
    storage = BatchStorageMemory(meta)
    translate = Translate(features=["A", "B"], look_back=0, look_forward=0, n_seconds=1, normalize=True, verbose=True)
    batch_generator = Builder(storage,
                              translate,
                              batch_size=10,
                              pseudo_stratify=False)

    batch_generator.generate_and_save_batches(feature_df_list)

    tools.assert_almost_equal(translate.scaler.mean_[0], 50, delta=1)
    tools.assert_almost_equal(translate.scaler.mean_[1], 150, delta=1)

    for batch in storage._data.values():
        # all batches have monotonically increasing numbers (range used to create data)
        assert np.diff(batch["features"][:, 0, 0]).all()  # feature A
        assert np.diff(batch["features"][:, 0, 1]).all()  # feature B
Пример #6
0
def test_builder_stratify():
    feature_set = sorted(["A", "B"])

    feature_df_list = [
        pd.DataFrame({
            "time": pd.to_datetime(list(range(160)), unit="s"),
            "A": np.ones(160),
            "B": np.ones(160),
            "y": np.ones(160)
        }) for _ in range(1)
    ]

    meta = StorageMeta(validation_split=0.5)
    storage = BatchStorageMemory(meta)
    translate = Translate(features=feature_set,
                          look_back=0,
                          look_forward=0,
                          n_seconds=1)
    batch_generator = Builder(storage,
                              translate,
                              batch_size=16,
                              stratify_nbatch_groupings=3,
                              pseudo_stratify=True)

    batch_generator.generate_and_save_batches(feature_df_list)

    assert batch_generator._stratify
    tools.eq_(len(meta.train.ids), 5)
    tools.eq_(len(meta.validation.ids), 5)
Пример #7
0
def test_feature_sequencing():
    n = 50
    features = ["A", "B"]
    df = pd.DataFrame({
        "time": range(n),
        "A": np.random.randn(n),
        "B": np.random.randn(n),
        "y": np.random.randint(2, size=n)
    })

    for i in range(10):
        lookback = i
        lookforward = i

        translate = Translate(features, lookback, lookforward)
        X_res, y_res = translate._feature_df_to_nn_input(df)
        tools.eq_(X_res.shape,
                  (n - (lookback + lookforward), lookback + lookforward + 1,
                   len(features)))
Пример #8
0
def test_save_and_load_meta():
    feature_df_list = [
        pd.DataFrame({
            "time": pd.to_datetime(list(range(160)), unit="s"),
            "A": range(160),
            "B": range(160),
            "y": np.ones(160)
        }) for _ in range(1)
    ]

    meta = StorageMeta()
    storage = BatchStorageMemory(meta)
    translate = Translate(features=["A", "B"],
                          look_back=0,
                          look_forward=0,
                          n_seconds=1,
                          normalize=False)
    batch_generator = Builder(storage,
                              translate,
                              batch_size=16,
                              pseudo_stratify=False)

    batch_generator.generate_and_save_batches(feature_df_list)
    batch_generator.save_meta()

    translate = Translate(features=["A", "B"],
                          look_back=99,
                          look_forward=99,
                          n_seconds=99,
                          normalize=True)
    batch_generator_reload = Builder(storage,
                                     translate,
                                     batch_size=99,
                                     pseudo_stratify=False)
    batch_generator_reload.load_meta()

    tools.eq_(batch_generator.batch_size, batch_generator_reload.batch_size)
    tools.eq_(translate._features, translate._features)
    tools.eq_(translate._look_forward, translate._look_forward)
    tools.eq_(translate._look_back, translate._look_back)
    tools.eq_(translate._n_seconds, translate._n_seconds)
    tools.eq_(translate._normalize, translate._normalize)
Пример #9
0
def test_builder_storage_meta_validation():
    feature_set = sorted(["A", "B"])

    feature_df_list = [
        pd.DataFrame({
            "time": pd.to_datetime(list(range(35)), unit="s"),
            "A": np.ones(35),
            "B": np.ones(35),
            "y": np.ones(35)
        }) for _ in range(1)
    ]

    meta = StorageMeta(validation_split=0.5)
    storage = BatchStorageMemory(meta)
    translate = Translate(features=feature_set,
                          look_back=2,
                          look_forward=1,
                          n_seconds=1)
    batch_generator = Builder(storage, translate, batch_size=16)

    batch_generator.generate_and_save_batches(feature_df_list)

    tools.eq_(len(meta.train.ids), 1)
    tools.eq_(len(meta.validation.ids), 1)