def test_dataframes(loop): dfs = [pd.DataFrame({'x': np.random.random(100), 'y': np.random.random(100)}, index=list(range(i, i + 100))) for i in range(0, 100*10, 100)] with cluster() as (c, [a, b]): with Executor(('127.0.0.1', c['port']), loop=loop) as e: remote_dfs = e.map(lambda x: x, dfs) rdf = futures_to_dask_dataframe(remote_dfs, divisions=True) name = 'foo' ldf = dd.DataFrame({(name, i): df for i, df in enumerate(dfs)}, name, dfs[0].columns, list(range(0, 1000, 100)) + [999]) assert rdf.divisions == ldf.divisions tm.assert_frame_equal(rdf.compute(get=e.get), ldf.compute(get=dask.get)) exprs = [lambda df: df.x.mean(), lambda df: df.y.std(), lambda df: df.assign(z=df.x + df.y).drop_duplicates(), lambda df: df.index, lambda df: df.x, lambda df: df.x.cumsum(), lambda df: df.loc[50:75]] for f in exprs: local = f(ldf).compute(get=dask.get) remote = f(rdf).compute(get=e.get) assert_equal(local, remote)
def test_futures_to_dask_dataframe(loop): with cluster() as (c, [a, b]): with Executor(('127.0.0.1', c['port']), loop=loop) as e: remote_dfs = e.map(lambda x: x, dfs) ddf = futures_to_dask_dataframe(remote_dfs, divisions=True) assert isinstance(ddf, dd.DataFrame) assert ddf.x.sum().compute(get=e.get) == sum([df.x.sum() for df in dfs])
def test_futures_to_dask_dataframe(loop): with cluster() as (c, [a, b]): with Executor(('127.0.0.1', c['port']), loop=loop) as e: remote_dfs = e.map(lambda x: x, dfs) ddf = futures_to_dask_dataframe(remote_dfs, divisions=True) assert isinstance(ddf, dd.DataFrame) assert ddf.x.sum().compute(get=e.get) == sum( [df.x.sum() for df in dfs])
def test_futures_to_dask_dataframe(): dfs = [pd.DataFrame({'x': [1, 2, 3]}, index=[0, 10, 20]), pd.DataFrame({'x': [4, 5, 6]}, index=[30, 40, 50]), pd.DataFrame({'x': [7, 8, 9]}, index=[60, 70, 80])] with cluster() as (c, [a, b]): with Executor(('127.0.0.1', c['port'])) as e: remote_dfs = e.map(lambda x: x, dfs) ddf = futures_to_dask_dataframe(e, remote_dfs, divisions=True) assert isinstance(ddf, dd.DataFrame) assert ddf.x.sum().compute(get=e.get) == sum([df.x.sum() for df in dfs])
def test_futures_to_dask_dataframe(loop): with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: remote_dfs = c.map(lambda x: x, dfs) ddf = futures_to_dask_dataframe(remote_dfs, divisions=True) assert isinstance(ddf, dd.DataFrame) assert ddf.x.sum().compute(get=c.get) == sum([df.x.sum() for df in dfs]) ddf2 = futures_to_collection(remote_dfs, divisions=True) assert type(ddf) == type(ddf2) assert ddf.dask == ddf2.dask
def test_futures_to_dask_dataframe(loop): with cluster() as (c, [a, b]): with Client(('127.0.0.1', c['port']), loop=loop) as c: remote_dfs = c.map(lambda x: x, dfs) ddf = futures_to_dask_dataframe(remote_dfs, divisions=True) assert isinstance(ddf, dd.DataFrame) assert ddf.x.sum().compute(get=c.get) == sum([df.x.sum() for df in dfs]) ddf2 = futures_to_collection(remote_dfs, divisions=True) assert type(ddf) == type(ddf2) assert ddf.dask == ddf2.dask