示例#1
0
def test_mem_storage_save():
    meta = StorageMeta()
    storage = BatchStorageMemory(meta)
    X = np.array([1, 2, 3])
    y = np.array([0, 0, 0])

    filename = storage.save(X, y)
    assert filename in list(storage._data.keys())
示例#2
0
def test_mem_storage_load():
    meta = StorageMeta()
    storage = BatchStorageMemory(meta)
    X = np.array([1, 2, 3])
    y = np.array([0, 0, 0])

    storage.save(X, y)
    X_data, y_data = storage.load(0)
    assert np.array_equal(X_data, X)
    assert np.array_equal(y_data, y)
示例#3
0
def test_normalize_off():
    feature_df_list = [
        pd.DataFrame({
            "time": pd.to_datetime(list(range(160)), unit="s"),
            "A": range(160),
            "B": range(160),
            "y": np.ones(160)
        }) for _ in range(1)
    ]

    meta = StorageMeta()
    storage = BatchStorageMemory(meta)
    translate = Translate(features=["A", "B"],
                          look_back=0,
                          look_forward=0,
                          n_seconds=1,
                          normalize=False)
    batch_generator = Builder(storage,
                              translate,
                              batch_size=16,
                              pseudo_stratify=False)

    batch_generator.generate_and_save_batches(feature_df_list)

    for batch in storage._data.values():
        # all batches have monotonically increasing numbers (range used to create data)
        assert np.diff(batch["features"][:, 0, 0]).all()  # feature A
        assert np.diff(batch["features"][:, 0, 1]).all()  # feature B
示例#4
0
    def memory_builder_factory(feature_set,
                               look_back,
                               look_forward,
                               batch_size,
                               batch_seconds=1,
                               stride=1,
                               validation_split=0,
                               pseudo_stratify=False,
                               stratify_nbatch_groupings=20,
                               n_workers=None,
                               seed=None,
                               normalize=True,
                               custom_transforms=None,
                               session_norm_filter=None,
                               verbose=False):

        storage_meta = StorageMeta(validation_split=validation_split)
        storage = BatchStorageMemory(storage_meta)
        translate = Translate(feature_set, look_back, look_forward,
                              batch_seconds, stride, normalize, verbose,
                              custom_transforms, session_norm_filter)
        return Builder(storage=storage,
                       translate=translate,
                       batch_size=batch_size,
                       pseudo_stratify=pseudo_stratify,
                       stratify_nbatch_groupings=stratify_nbatch_groupings,
                       verbose=verbose,
                       seed=seed,
                       n_workers=n_workers)
示例#5
0
def test_normalize_on():
    feature_df_list = reduce(add, [[pd.DataFrame({"time": pd.to_datetime(list(range(50)), unit="s"),
                                                  "A": range(1, 51),
                                                  "B": range(101, 151),
                                                  "y": np.ones(50)}),
                                    pd.DataFrame({"time": pd.to_datetime(list(range(50)), unit="s"),
                                                  "A": range(51, 101),
                                                  "B": range(151, 201),
                                                  "y": np.ones(50)})]
                                   for _ in range(5)], [])

    meta = StorageMeta()
    storage = BatchStorageMemory(meta)
    translate = Translate(features=["A", "B"], look_back=0, look_forward=0, n_seconds=1, normalize=True, verbose=True)
    batch_generator = Builder(storage,
                              translate,
                              batch_size=10,
                              pseudo_stratify=False)

    batch_generator.generate_and_save_batches(feature_df_list)

    tools.assert_almost_equal(translate.scaler.mean_[0], 50, delta=1)
    tools.assert_almost_equal(translate.scaler.mean_[1], 150, delta=1)

    for batch in storage._data.values():
        # all batches have monotonically increasing numbers (range used to create data)
        assert np.diff(batch["features"][:, 0, 0]).all()  # feature A
        assert np.diff(batch["features"][:, 0, 1]).all()  # feature B
示例#6
0
def test_builder_stratify():
    feature_set = sorted(["A", "B"])

    feature_df_list = [
        pd.DataFrame({
            "time": pd.to_datetime(list(range(160)), unit="s"),
            "A": np.ones(160),
            "B": np.ones(160),
            "y": np.ones(160)
        }) for _ in range(1)
    ]

    meta = StorageMeta(validation_split=0.5)
    storage = BatchStorageMemory(meta)
    translate = Translate(features=feature_set,
                          look_back=0,
                          look_forward=0,
                          n_seconds=1)
    batch_generator = Builder(storage,
                              translate,
                              batch_size=16,
                              stratify_nbatch_groupings=3,
                              pseudo_stratify=True)

    batch_generator.generate_and_save_batches(feature_df_list)

    assert batch_generator._stratify
    tools.eq_(len(meta.train.ids), 5)
    tools.eq_(len(meta.validation.ids), 5)
示例#7
0
def test_mem_storage_metadata_val():
    meta = StorageMeta(validation_split=1.0)
    storage = BatchStorageMemory(meta)
    X = np.array([1, 2, 3])
    y = np.array([0, 0, 0])

    storage.save(X, y)
    storage.save_meta({})
    params = storage.load_meta()
    assert len(params["val_ids"]) == 1
    assert params["val_map"][params["val_ids"][0]] == "IDv_0"
    assert len(params["train_ids"]) == 0
示例#8
0
def test_save_and_load_meta():
    feature_df_list = [
        pd.DataFrame({
            "time": pd.to_datetime(list(range(160)), unit="s"),
            "A": range(160),
            "B": range(160),
            "y": np.ones(160)
        }) for _ in range(1)
    ]

    meta = StorageMeta()
    storage = BatchStorageMemory(meta)
    translate = Translate(features=["A", "B"],
                          look_back=0,
                          look_forward=0,
                          n_seconds=1,
                          normalize=False)
    batch_generator = Builder(storage,
                              translate,
                              batch_size=16,
                              pseudo_stratify=False)

    batch_generator.generate_and_save_batches(feature_df_list)
    batch_generator.save_meta()

    translate = Translate(features=["A", "B"],
                          look_back=99,
                          look_forward=99,
                          n_seconds=99,
                          normalize=True)
    batch_generator_reload = Builder(storage,
                                     translate,
                                     batch_size=99,
                                     pseudo_stratify=False)
    batch_generator_reload.load_meta()

    tools.eq_(batch_generator.batch_size, batch_generator_reload.batch_size)
    tools.eq_(translate._features, translate._features)
    tools.eq_(translate._look_forward, translate._look_forward)
    tools.eq_(translate._look_back, translate._look_back)
    tools.eq_(translate._n_seconds, translate._n_seconds)
    tools.eq_(translate._normalize, translate._normalize)
示例#9
0
def test_builder_storage_meta_validation():
    feature_set = sorted(["A", "B"])

    feature_df_list = [
        pd.DataFrame({
            "time": pd.to_datetime(list(range(35)), unit="s"),
            "A": np.ones(35),
            "B": np.ones(35),
            "y": np.ones(35)
        }) for _ in range(1)
    ]

    meta = StorageMeta(validation_split=0.5)
    storage = BatchStorageMemory(meta)
    translate = Translate(features=feature_set,
                          look_back=2,
                          look_forward=1,
                          n_seconds=1)
    batch_generator = Builder(storage, translate, batch_size=16)

    batch_generator.generate_and_save_batches(feature_df_list)

    tools.eq_(len(meta.train.ids), 1)
    tools.eq_(len(meta.validation.ids), 1)
示例#10
0
def test_load_empty_meta():
    BatchStorageMemory(StorageMeta()).load_meta()