def test_indexed_concat(): A = pd.DataFrame({ 'x': [1, 2, 3, 4, 6, 7], 'y': list('abcdef') }, index=[1, 2, 3, 4, 6, 7]) a = dd.repartition(A, [1, 4, 7]) B = pd.DataFrame({'x': [10, 20, 40, 50, 60, 80]}, index=[1, 2, 4, 5, 6, 8]) b = dd.repartition(B, [1, 2, 5, 8]) for how in ['inner', 'outer']: c = concat_indexed_dataframes([a, b], join=how) result = c.compute() expected = pd.concat([A, B], 0, how) assert list(result.columns) == list(expected.columns) assert sorted(zip(result.values.tolist(), result.index.values.tolist())) == \ sorted(zip(expected.values.tolist(), expected.index.values.tolist())) assert sorted(concat_indexed_dataframes([a, b], join='inner').dask) == \ sorted(concat_indexed_dataframes([a, b], join='inner').dask) assert sorted(concat_indexed_dataframes([a, b], join='inner').dask) != \ sorted(concat_indexed_dataframes([a, b], join='outer').dask)
def test_indexed_concat(join): A = pd.DataFrame({'x': [1, 2, 3, 4, 6, 7], 'y': list('abcdef')}, index=[1, 2, 3, 4, 6, 7]) a = dd.repartition(A, [1, 4, 7]) B = pd.DataFrame({'x': [10, 20, 40, 50, 60, 80]}, index=[1, 2, 4, 5, 6, 8]) b = dd.repartition(B, [1, 2, 5, 8]) result = concat_indexed_dataframes([a, b], join=join) expected = pd.concat([A, B], axis=0, join=join) assert eq(result, expected) assert sorted(concat_indexed_dataframes([a, b], join=join).dask) == \ sorted(concat_indexed_dataframes([a, b], join=join).dask) assert sorted(concat_indexed_dataframes([a, b], join='inner').dask) != \ sorted(concat_indexed_dataframes([a, b], join='outer').dask)
def test_indexed_concat(): A = pd.DataFrame({'x': [1, 2, 3, 4, 6, 7], 'y': list('abcdef')}, index=[1, 2, 3, 4, 6, 7]) a = dd.repartition(A, [1, 4, 7]) B = pd.DataFrame({'x': [10, 20, 40, 50, 60, 80]}, index=[1, 2, 4, 5, 6, 8]) b = dd.repartition(B, [1, 2, 5, 8]) for how in ['inner', 'outer']: c = concat_indexed_dataframes([a, b], join=how) result = c.compute() expected = pd.concat([A, B], 0, how) assert list(result.columns) == list(expected.columns) assert sorted(zip(result.values.tolist(), result.index.values.tolist())) == \ sorted(zip(expected.values.tolist(), expected.index.values.tolist())) assert sorted(concat_indexed_dataframes([a, b], join='inner').dask) == \ sorted(concat_indexed_dataframes([a, b], join='inner').dask) assert sorted(concat_indexed_dataframes([a, b], join='inner').dask) != \ sorted(concat_indexed_dataframes([a, b], join='outer').dask)