示例#1
0
def test_futures_to_delayed_bag(c):
    db = pytest.importorskip("dask.bag")
    L = [1, 2, 3]

    futures = c.scatter([L, L])
    b = db.from_delayed(futures)
    assert list(b) == L + L
def test_unpublish_sync(client):
    data = client.scatter([0, 1, 2])
    client.publish_dataset(data=data)
    client.unpublish_dataset(name="data")

    with pytest.raises(KeyError) as exc_info:
        client.get_dataset(name="data")

    assert "not found" in str(exc_info.value)
    assert "data" in str(exc_info.value)
示例#3
0
def test_unpublish_sync(client):
    data = client.scatter([0, 1, 2])
    client.publish_dataset(data=data)
    client.unpublish_dataset(name='data')

    with pytest.raises(KeyError) as exc_info:
        result = client.get_dataset(name='data')

    assert "not found" in str(exc_info.value)
    assert "data" in str(exc_info.value)
示例#4
0
def test_futures_to_delayed_dataframe(c):
    pd = pytest.importorskip("pandas")
    dd = pytest.importorskip("dask.dataframe")
    df = pd.DataFrame({"x": [1, 2, 3]})

    futures = c.scatter([df, df])
    ddf = dd.from_delayed(futures)
    dd.utils.assert_eq(ddf.compute(), pd.concat([df, df], axis=0))

    with pytest.raises(TypeError):
        ddf = dd.from_delayed([1, 2])
示例#5
0
def test_futures_to_delayed_array(c):
    da = pytest.importorskip("dask.array")
    from dask.array.utils import assert_eq

    np = pytest.importorskip("numpy")
    x = np.arange(5)

    futures = c.scatter([x, x])
    A = da.concatenate(
        [da.from_delayed(f, shape=x.shape, dtype=x.dtype) for f in futures], axis=0
    )
    assert_eq(A.compute(), np.concatenate([x, x], axis=0))
示例#6
0
def test_futures_to_delayed_dataframe(c):
    pd = pytest.importorskip("pandas")
    dd = pytest.importorskip("dask.dataframe")

    from dask.array.numpy_compat import _numpy_120

    if _numpy_120:
        pytest.skip("https://github.com/dask/dask/issues/7170")

    df = pd.DataFrame({"x": [1, 2, 3]})

    futures = c.scatter([df, df])
    ddf = dd.from_delayed(futures)
    dd.utils.assert_eq(ddf.compute(), pd.concat([df, df], axis=0))

    with pytest.raises(TypeError):
        ddf = dd.from_delayed([1, 2])
示例#7
0
def test_blockwise_dataframe_io(c, tmpdir, io, fuse, from_futures):
    pd = pytest.importorskip("pandas")
    dd = pytest.importorskip("dask.dataframe")

    # TODO: this configuration is flaky on osx in CI
    # See https://github.com/dask/dask/issues/8816
    if from_futures and sys.platform == "darwin":
        pytest.xfail("This test sometimes fails on osx in CI")

    df = pd.DataFrame({"x": [1, 2, 3] * 5, "y": range(15)})

    if from_futures:
        parts = [df.iloc[:5], df.iloc[5:10], df.iloc[10:15]]
        futs = c.scatter(parts)
        ddf0 = dd.from_delayed(futs, meta=parts[0])
    else:
        ddf0 = dd.from_pandas(df, npartitions=3)

    if io.startswith("parquet"):
        if io == "parquet-pyarrow":
            pytest.importorskip("pyarrow.parquet")
            engine = "pyarrow"
        else:
            pytest.importorskip("fastparquet")
            engine = "fastparquet"
        ddf0.to_parquet(str(tmpdir), engine=engine)
        ddf = dd.read_parquet(str(tmpdir), engine=engine)
    elif io == "csv":
        ddf0.to_csv(str(tmpdir), index=False)
        ddf = dd.read_csv(os.path.join(str(tmpdir), "*"))
    elif io == "hdf":
        pytest.importorskip("tables")
        fn = str(tmpdir.join("h5"))
        ddf0.to_hdf(fn, "/data*")
        ddf = dd.read_hdf(fn, "/data*")

    df = df[["x"]] + 10
    ddf = ddf[["x"]] + 10
    with dask.config.set({"optimization.fuse.active": fuse}):
        ddf.compute()
        dsk = dask.dataframe.optimize(ddf.dask, ddf.__dask_keys__())
        # dsk should not be a dict unless fuse is explicitly True
        assert isinstance(dsk, dict) == bool(fuse)

        dd.assert_eq(ddf, df, check_index=False)
示例#8
0
def test_futures_to_delayed_bag(c):
    L = [1, 2, 3]

    futures = c.scatter([L, L])
    b = db.from_delayed(futures)
    assert list(b) == L + L