def test_dataframes(loop):
    dfs = [pd.DataFrame({'x': np.random.random(100),
                         'y': np.random.random(100)},
                        index=list(range(i, i + 100)))
           for i in range(0, 100*10, 100)]
    with cluster() as (c, [a, b]):
        with Executor(('127.0.0.1', c['port']), loop=loop) as e:
            remote_dfs = e.map(lambda x: x, dfs)
            rdf = futures_to_dask_dataframe(remote_dfs, divisions=True)
            name = 'foo'
            ldf = dd.DataFrame({(name, i): df for i, df in enumerate(dfs)},
                               name, dfs[0].columns,
                               list(range(0, 1000, 100)) + [999])

            assert rdf.divisions == ldf.divisions
            tm.assert_frame_equal(rdf.compute(get=e.get),
                                  ldf.compute(get=dask.get))

            exprs = [lambda df: df.x.mean(),
                     lambda df: df.y.std(),
                     lambda df: df.assign(z=df.x + df.y).drop_duplicates(),
                     lambda df: df.index,
                     lambda df: df.x,
                     lambda df: df.x.cumsum(),
                     lambda df: df.loc[50:75]]
            for f in exprs:
                local = f(ldf).compute(get=dask.get)
                remote = f(rdf).compute(get=e.get)
                assert_equal(local, remote)
def test_futures_to_dask_dataframe(loop):
    with cluster() as (c, [a, b]):
        with Executor(('127.0.0.1', c['port']), loop=loop) as e:
            remote_dfs = e.map(lambda x: x, dfs)
            ddf = futures_to_dask_dataframe(remote_dfs, divisions=True)

            assert isinstance(ddf, dd.DataFrame)
            assert ddf.x.sum().compute(get=e.get) == sum([df.x.sum() for df in dfs])
示例#3
0
def test_futures_to_dask_dataframe(loop):
    with cluster() as (c, [a, b]):
        with Executor(('127.0.0.1', c['port']), loop=loop) as e:
            remote_dfs = e.map(lambda x: x, dfs)
            ddf = futures_to_dask_dataframe(remote_dfs, divisions=True)

            assert isinstance(ddf, dd.DataFrame)
            assert ddf.x.sum().compute(get=e.get) == sum(
                [df.x.sum() for df in dfs])
示例#4
0
def test_futures_to_dask_dataframe():
    dfs = [pd.DataFrame({'x': [1, 2, 3]}, index=[0, 10, 20]),
           pd.DataFrame({'x': [4, 5, 6]}, index=[30, 40, 50]),
           pd.DataFrame({'x': [7, 8, 9]}, index=[60, 70, 80])]

    with cluster() as (c, [a, b]):
        with Executor(('127.0.0.1', c['port'])) as e:
            remote_dfs = e.map(lambda x: x, dfs)
            ddf = futures_to_dask_dataframe(e, remote_dfs, divisions=True)

            assert isinstance(ddf, dd.DataFrame)
            assert ddf.x.sum().compute(get=e.get) == sum([df.x.sum() for df in dfs])
def test_futures_to_dask_dataframe(loop):
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop) as c:
            remote_dfs = c.map(lambda x: x, dfs)
            ddf = futures_to_dask_dataframe(remote_dfs, divisions=True)

            assert isinstance(ddf, dd.DataFrame)
            assert ddf.x.sum().compute(get=c.get) == sum([df.x.sum() for df in dfs])

            ddf2 = futures_to_collection(remote_dfs, divisions=True)
            assert type(ddf) == type(ddf2)
            assert ddf.dask == ddf2.dask
示例#6
0
def test_futures_to_dask_dataframe(loop):
    with cluster() as (c, [a, b]):
        with Client(('127.0.0.1', c['port']), loop=loop) as c:
            remote_dfs = c.map(lambda x: x, dfs)
            ddf = futures_to_dask_dataframe(remote_dfs, divisions=True)

            assert isinstance(ddf, dd.DataFrame)
            assert ddf.x.sum().compute(get=c.get) == sum([df.x.sum() for df in dfs])

            ddf2 = futures_to_collection(remote_dfs, divisions=True)
            assert type(ddf) == type(ddf2)
            assert ddf.dask == ddf2.dask