def test_compute_divisions(): from dask.dataframe.shuffle import compute_divisions df = pd.DataFrame( { "x": [1, 2, 3, 4], "y": [10, 20, 20, 40], "z": [4, 3, 2, 1] }, index=[1, 3, 10, 20], ) a = dd.from_pandas(df, 2, sort=False) assert not a.known_divisions divisions = compute_divisions(a) b = copy(a) b.divisions = divisions assert_eq(a, b, check_divisions=False) assert b.known_divisions c = dd.from_pandas(df.set_index("y"), 2, sort=False) # Partitions overlap warning with pytest.warns(UserWarning): compute_divisions(c)
def test_compute_divisions(): from dask.dataframe.shuffle import compute_divisions df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [10, 20, 30, 40], 'z': [4, 3, 2, 1]}, index=[1, 3, 10, 20]) a = dd.from_pandas(df, 2, sort=False) assert not a.known_divisions divisions = compute_divisions(a) b = copy(a) b.divisions = divisions assert_eq(a, b, check_divisions=False) assert b.known_divisions
def test_compute_divisions(): from dask.dataframe.shuffle import compute_divisions df = pd.DataFrame( { "x": [1, 2, 3, 4], "y": [10, 20, 20, 40], "z": [4, 3, 2, 1] }, index=[1, 3, 10, 20], ) a = dd.from_pandas(df, 2, sort=False) assert not a.known_divisions b = compute_divisions(copy(a)) assert_eq(a, b, check_divisions=False) assert b.known_divisions