Пример #1
0
    def memory_builder_factory(feature_set,
                               look_back,
                               look_forward,
                               batch_size,
                               batch_seconds=1,
                               validation_split=0,
                               pseudo_stratify=False,
                               stratify_nbatch_groupings=20,
                               n_workers=None,
                               seed=None,
                               normalize=True,
                               custom_transforms=None,
                               verbose=False):

        storage_meta = StorageMeta(validation_split=validation_split)
        storage = BatchStorageMemory(storage_meta)
        translate = Translate(feature_set, look_back, look_forward,
                              batch_seconds, normalize, verbose,
                              custom_transforms)
        return Builder(storage=storage,
                       translate=translate,
                       batch_size=batch_size,
                       pseudo_stratify=pseudo_stratify,
                       stratify_nbatch_groupings=stratify_nbatch_groupings,
                       verbose=verbose,
                       seed=seed,
                       n_workers=n_workers)
Пример #2
0
    def s3_builder_factory(s3_bucket_resource,
                           feature_set,
                           look_back,
                           look_forward,
                           batch_size,
                           s3_prefix="",
                           batch_seconds=1,
                           stride=1,
                           validation_split=0,
                           pseudo_stratify=False,
                           stratify_nbatch_groupings=20,
                           n_workers=None,
                           seed=None,
                           normalize=True,
                           custom_transforms=None,
                           session_norm_filter=None,
                           verbose=False):

        storage_meta = StorageMeta(validation_split=validation_split)
        storage = BatchStorageS3(storage_meta,
                                 s3_bucket_resource=s3_bucket_resource,
                                 s3_prefix=s3_prefix)
        translate = Translate(feature_set, look_back, look_forward,
                              batch_seconds, stride, normalize, verbose,
                              custom_transforms, session_norm_filter)
        return Builder(storage=storage,
                       translate=translate,
                       batch_size=batch_size,
                       pseudo_stratify=pseudo_stratify,
                       stratify_nbatch_groupings=stratify_nbatch_groupings,
                       verbose=verbose,
                       seed=seed,
                       n_workers=n_workers)
Пример #3
0
def test_normalize_on():
    feature_df_list = reduce(add, [[pd.DataFrame({"time": pd.to_datetime(list(range(50)), unit="s"),
                                                  "A": range(1, 51),
                                                  "B": range(101, 151),
                                                  "y": np.ones(50)}),
                                    pd.DataFrame({"time": pd.to_datetime(list(range(50)), unit="s"),
                                                  "A": range(51, 101),
                                                  "B": range(151, 201),
                                                  "y": np.ones(50)})]
                                   for _ in range(5)], [])

    meta = StorageMeta()
    storage = BatchStorageMemory(meta)
    translate = Translate(features=["A", "B"], look_back=0, look_forward=0, n_seconds=1, normalize=True, verbose=True)
    batch_generator = Builder(storage,
                              translate,
                              batch_size=10,
                              pseudo_stratify=False)

    batch_generator.generate_and_save_batches(feature_df_list)

    tools.assert_almost_equal(translate.scaler.mean_[0], 50, delta=1)
    tools.assert_almost_equal(translate.scaler.mean_[1], 150, delta=1)

    for batch in storage._data.values():
        # all batches have monotonically increasing numbers (range used to create data)
        assert np.diff(batch["features"][:, 0, 0]).all()  # feature A
        assert np.diff(batch["features"][:, 0, 1]).all()  # feature B
Пример #4
0
def test_builder_stratify():
    feature_set = sorted(["A", "B"])

    feature_df_list = [
        pd.DataFrame({
            "time": pd.to_datetime(list(range(160)), unit="s"),
            "A": np.ones(160),
            "B": np.ones(160),
            "y": np.ones(160)
        }) for _ in range(1)
    ]

    meta = StorageMeta(validation_split=0.5)
    storage = BatchStorageMemory(meta)
    translate = Translate(features=feature_set,
                          look_back=0,
                          look_forward=0,
                          n_seconds=1)
    batch_generator = Builder(storage,
                              translate,
                              batch_size=16,
                              stratify_nbatch_groupings=3,
                              pseudo_stratify=True)

    batch_generator.generate_and_save_batches(feature_df_list)

    assert batch_generator._stratify
    tools.eq_(len(meta.train.ids), 5)
    tools.eq_(len(meta.validation.ids), 5)
Пример #5
0
def test_normalize_off():
    feature_df_list = [
        pd.DataFrame({
            "time": pd.to_datetime(list(range(160)), unit="s"),
            "A": range(160),
            "B": range(160),
            "y": np.ones(160)
        }) for _ in range(1)
    ]

    meta = StorageMeta()
    storage = BatchStorageMemory(meta)
    translate = Translate(features=["A", "B"],
                          look_back=0,
                          look_forward=0,
                          n_seconds=1,
                          normalize=False)
    batch_generator = Builder(storage,
                              translate,
                              batch_size=16,
                              pseudo_stratify=False)

    batch_generator.generate_and_save_batches(feature_df_list)

    for batch in storage._data.values():
        # all batches have monotonically increasing numbers (range used to create data)
        assert np.diff(batch["features"][:, 0, 0]).all()  # feature A
        assert np.diff(batch["features"][:, 0, 1]).all()  # feature B
Пример #6
0
def test_mem_storage_save():
    meta = StorageMeta()
    storage = BatchStorageMemory(meta)
    X = np.array([1, 2, 3])
    y = np.array([0, 0, 0])

    filename = storage.save(X, y)
    assert filename in list(storage._data.keys())
Пример #7
0
def test_file_storage_save():
    meta = StorageMeta()
    storage = BatchStorageFile(meta, directory="test")
    X = np.array([1, 2, 3])
    y = np.array([0, 0, 0])

    filename = storage.save(X, y)
    assert os.path.isfile(filename)
Пример #8
0
def test_file_storage_load():
    meta = StorageMeta()
    storage = BatchStorageFile(meta, directory="test")
    X = np.array([1, 2, 3])
    y = np.array([0, 0, 0])

    storage.save(X, y)
    X_data, y_data = storage.load(0)
    assert np.array_equal(X_data, X)
    assert np.array_equal(y_data, y)
Пример #9
0
def test_mem_storage_load():
    meta = StorageMeta()
    storage = BatchStorageMemory(meta)
    X = np.array([1, 2, 3])
    y = np.array([0, 0, 0])

    storage.save(X, y)
    X_data, y_data = storage.load(0)
    assert np.array_equal(X_data, X)
    assert np.array_equal(y_data, y)
Пример #10
0
def test_storage_s3():
    conn = boto3.resource("s3", region_name="us-east-1")
    conn.create_bucket(Bucket="test_bucket")
    storage = BatchStorageS3(StorageMeta(), conn.Bucket("test_bucket"), "test")

    X = np.array([1, 2, 3])
    y = np.array([0, 0, 0])

    storage.save(X, y)
    X_data, y_data = storage.load(0)
    assert np.array_equal(X_data, X)
    assert np.array_equal(y_data, y)
Пример #11
0
def test_file_storage_metadata():
    meta = StorageMeta()
    storage = BatchStorageFile(meta, directory="test")
    X = np.array([1, 2, 3])
    y = np.array([0, 0, 0])

    storage.save(X, y)
    storage.save_meta({})
    params = storage.load_meta()
    assert len(params["train_ids"]) == 1
    assert params["train_map"][params["train_ids"][0]] == "ID_0"
    assert len(params["val_ids"]) == 0
Пример #12
0
def test_mem_storage_metadata_val():
    meta = StorageMeta(validation_split=1.0)
    storage = BatchStorageMemory(meta)
    X = np.array([1, 2, 3])
    y = np.array([0, 0, 0])

    storage.save(X, y)
    storage.save_meta({})
    params = storage.load_meta()
    assert len(params["val_ids"]) == 1
    assert params["val_map"][params["val_ids"][0]] == "IDv_0"
    assert len(params["train_ids"]) == 0
Пример #13
0
def test_s3_storage_metadata():
    conn = boto3.resource("s3", region_name="us-east-1")
    conn.create_bucket(Bucket="test_bucket")

    meta = StorageMeta()
    storage = BatchStorageS3.from_config(meta, "test_bucket", s3_prefix="test")
    X = np.array([1, 2, 3])
    y = np.array([0, 0, 0])

    storage.save(X, y)
    storage.save_meta({})
    params = storage.load_meta()
    assert len(params["train_ids"]) == 1
    assert params["train_map"][params["train_ids"][0]] == "ID_0"
    assert len(params["val_ids"]) == 0
Пример #14
0
def test_save_and_load_meta():
    feature_df_list = [
        pd.DataFrame({
            "time": pd.to_datetime(list(range(160)), unit="s"),
            "A": range(160),
            "B": range(160),
            "y": np.ones(160)
        }) for _ in range(1)
    ]

    meta = StorageMeta()
    storage = BatchStorageMemory(meta)
    translate = Translate(features=["A", "B"],
                          look_back=0,
                          look_forward=0,
                          n_seconds=1,
                          normalize=False)
    batch_generator = Builder(storage,
                              translate,
                              batch_size=16,
                              pseudo_stratify=False)

    batch_generator.generate_and_save_batches(feature_df_list)
    batch_generator.save_meta()

    translate = Translate(features=["A", "B"],
                          look_back=99,
                          look_forward=99,
                          n_seconds=99,
                          normalize=True)
    batch_generator_reload = Builder(storage,
                                     translate,
                                     batch_size=99,
                                     pseudo_stratify=False)
    batch_generator_reload.load_meta()

    tools.eq_(batch_generator.batch_size, batch_generator_reload.batch_size)
    tools.eq_(translate._features, translate._features)
    tools.eq_(translate._look_forward, translate._look_forward)
    tools.eq_(translate._look_back, translate._look_back)
    tools.eq_(translate._n_seconds, translate._n_seconds)
    tools.eq_(translate._normalize, translate._normalize)
Пример #15
0
def test_builder_storage_meta_validation():
    feature_set = sorted(["A", "B"])

    feature_df_list = [
        pd.DataFrame({
            "time": pd.to_datetime(list(range(35)), unit="s"),
            "A": np.ones(35),
            "B": np.ones(35),
            "y": np.ones(35)
        }) for _ in range(1)
    ]

    meta = StorageMeta(validation_split=0.5)
    storage = BatchStorageMemory(meta)
    translate = Translate(features=feature_set,
                          look_back=2,
                          look_forward=1,
                          n_seconds=1)
    batch_generator = Builder(storage, translate, batch_size=16)

    batch_generator.generate_and_save_batches(feature_df_list)

    tools.eq_(len(meta.train.ids), 1)
    tools.eq_(len(meta.validation.ids), 1)
Пример #16
0
def test_file_storage_directory():
    meta = StorageMeta()
    storage = BatchStorageFile(meta, directory="test")
    tools.eq_(storage.directory, "test")
    assert os.path.exists("test"), True
Пример #17
0
def test_load_empty_s3_meta():
    conn = boto3.resource("s3", region_name="us-east-1")
    conn.create_bucket(Bucket="test_bucket")

    BatchStorageS3(StorageMeta(), conn.Bucket("test_bucket")).load_meta()
Пример #18
0
def test_load_empty_file_meta():
    BatchStorageFile(StorageMeta(), directory="test").load_meta()
Пример #19
0
def test_load_empty_meta():
    BatchStorageMemory(StorageMeta()).load_meta()