Exemplo n.º 1
0
@pytest.mark.skipif(not s3_creds_exist() or not dask_loaded(),
                    reason="requires s3 credentials")
def test_s3_dataset():
    ds = dataset.generate(UnknownCountGenerator(), range(1, 3))
    assert ds["arr"].shape == (-1, 5)
    assert ds["rra"].shape == (-1, )
    ds = ds.store("s3://snark-test/test_dataflow/test_s3_dataset")
    assert len(ds) == 3
    assert (ds["rra"][:3].compute() == np.array([0, 0, 1],
                                                dtype="int32")).all()
    assert ds["rra"][2].compute() == 1
    assert (ds["arr"][1].compute() == np.array([0, 1, 2, 3, 4],
                                               dtype="int32")).all()


@pytest.mark.skipif(not gcp_creds_exist() or not dask_loaded(),
                    reason="requires gcs credentials")
def test_gcs_dataset():
    ds = dataset.generate(UnknownCountGenerator(), range(1, 3))
    assert ds["arr"].shape == (-1, 5)
    assert ds["rra"].shape == (-1, )
    ds = ds.store("gcs://snark-test/test_dataflow/test_gcs_dataset")
    assert len(ds) == 3
    assert (ds["rra"][:3].compute() == np.array([0, 0, 1],
                                                dtype="int32")).all()
    assert ds["rra"][2].compute() == 1
    assert (ds["arr"][1].compute() == np.array([0, 1, 2, 3, 4],
                                               dtype="int32")).all()


@pytest.mark.skipif(not pytorch_loaded() or not dask_loaded(),
Exemplo n.º 2
0
        schema=schema,
    )

    ds["first"][0] = np.ones((10, 10))

    pickled_ds = cloudpickle.dumps(ds)
    new_ds = pickle.loads(pickled_ds)
    assert np.all(new_ds["first"][0].compute() == ds["first"][0].compute())


@pytest.mark.skipif(not s3_creds_exist(), reason="requires s3 credentials")
def test_pickleability_s3():
    test_pickleability("s3://snark-test/test_dataset_pickle_s3")


@pytest.mark.skipif(not gcp_creds_exist(), reason="requires gcp credentials")
def test_pickleability_gcs():
    test_pickleability("gcs://snark-test/test_dataset_gcs")


def test_dataset_dynamic_shaped():
    schema = {
        "first":
        Tensor(
            shape=(None, None),
            dtype="int32",
            max_shape=(100, 100),
            chunks=(100, ),
        )
    }
    ds = Dataset(
Exemplo n.º 3
0
@pytest.mark.skipif(
    not s3_creds_exist() or not dask_loaded(), reason="requires s3 credentials"
)
def test_s3_dataset():
    ds = dataset.generate(UnknownCountGenerator(), range(1, 3))
    assert ds["arr"].shape == (-1, 5)
    assert ds["rra"].shape == (-1,)
    ds = ds.store("s3://snark-test/test_dataflow/test_s3_dataset")
    assert len(ds) == 3
    assert (ds["rra"][:3].compute() == np.array([0, 0, 1], dtype="int32")).all()
    assert ds["rra"][2].compute() == 1
    assert (ds["arr"][1].compute() == np.array([0, 1, 2, 3, 4], dtype="int32")).all()


@pytest.mark.skipif(
    not gcp_creds_exist() or not dask_loaded(), reason="requires gcs credentials"
)
def test_gcs_dataset():
    ds = dataset.generate(UnknownCountGenerator(), range(1, 3))
    assert ds["arr"].shape == (-1, 5)
    assert ds["rra"].shape == (-1,)
    ds = ds.store("gcs://snark-test/test_dataflow/test_gcs_dataset")
    assert len(ds) == 3
    assert (ds["rra"][:3].compute() == np.array([0, 0, 1], dtype="int32")).all()
    assert ds["rra"][2].compute() == 1
    assert (ds["arr"][1].compute() == np.array([0, 1, 2, 3, 4], dtype="int32")).all()


@pytest.mark.skipif(
    not pytorch_loaded() or not dask_loaded(), reason="requires pytorch to be loaded"
)