def test_from_map_meta(): # Test that `meta` can be specified to `from_map`, # and that `enforce_metadata` works as expected func = lambda x, s=0: pd.DataFrame({"x": [x] * s}) iterable = ["A", "B"] expect = pd.DataFrame({"x": ["A", "A", "B", "B"]}, index=[0, 1, 0, 1]) # First Check - Pass in valid metadata meta = pd.DataFrame({"x": ["A"]}).iloc[:0] ddf = dd.from_map(func, iterable, meta=meta, s=2) assert_eq(ddf._meta, meta) assert_eq(ddf, expect) # Second Check - Pass in invalid metadata meta = pd.DataFrame({"a": ["A"]}).iloc[:0] ddf = dd.from_map(func, iterable, meta=meta, s=2) assert_eq(ddf._meta, meta) with pytest.raises(ValueError, match="The columns in the computed data"): assert_eq(ddf.compute(), expect) # Third Check - Pass in invalid metadata, # but use `enforce_metadata=False` ddf = dd.from_map(func, iterable, meta=meta, enforce_metadata=False, s=2) assert_eq(ddf._meta, meta) assert_eq(ddf.compute(), expect)
def test_from_map_simple(vals): # Simple test to ensure required inputs (func & iterable) # and basic kwargs work as expected for `from_map` def func(input, size=0): # Simple function to create Series with a # repeated value and index value, index = input return pd.Series([value] * size, index=[index] * size) iterable = [(vals[0], 1), (vals[1], 2)] ser = dd.from_map(func, iterable, size=2) expect = pd.Series( [vals[0], vals[0], vals[1], vals[1]], index=[1, 1, 2, 2], ) # Make sure `from_map` produces single `Blockwise` layer layers = ser.dask.layers assert len(layers) == 1 assert isinstance(layers[ser._name], Blockwise) # Check that result and partition count make sense assert ser.npartitions == len(iterable) assert_eq(ser, expect)
def test_from_map_args(): # Test that the optional `args` argument works as expected func = lambda x, y, z: pd.DataFrame({"add": x + y + z}) iterable = [np.arange(2, dtype="int64"), np.arange(2, dtype="int64")] index = np.array([0, 1, 0, 1], dtype="int64") expect = pd.DataFrame({"add": np.array([5, 6, 5, 6], dtype="int64")}, index=index) ddf = dd.from_map(func, iterable, args=[2, 3]) assert_eq(ddf, expect)
def test_from_map_custom_name(): # Test that `label` and `token` arguments to # `from_map` works as expected func = lambda x: pd.DataFrame({"x": [x] * 2}) iterable = ["A", "B"] label = "my-label" token = "8675309" expect = pd.DataFrame({"x": ["A", "A", "B", "B"]}, index=[0, 1, 0, 1]) ddf = dd.from_map(func, iterable, label=label, token=token) assert ddf._name == label + "-" + token assert_eq(ddf, expect)
def test_from_map_multi(): # Test that `iterables` can contain multiple Iterables func = lambda x, y: pd.DataFrame({"add": x + y}) iterables = ( [np.arange(2, dtype="int64"), np.arange(2, dtype="int64")], [np.array([2, 2], dtype="int64"), np.array([2, 2], dtype="int64")], ) index = np.array([0, 1, 0, 1], dtype="int64") expect = pd.DataFrame({"add": np.array([2, 3, 2, 3], dtype="int64")}, index=index) ddf = dd.from_map(func, *iterables) assert_eq(ddf, expect)
def test_from_map_divisions(): # Test that `divisions` argument works as expected for `from_map` func = lambda x: pd.Series([x[0]] * 2, index=range(x[1], x[1] + 2)) iterable = [("B", 0), ("C", 2)] divisions = (0, 2, 4) ser = dd.from_map(func, iterable, divisions=divisions) expect = pd.Series( ["B", "B", "C", "C"], index=[0, 1, 2, 3], ) assert ser.divisions == divisions assert_eq(ser, expect)
def test_from_map_other_iterables(iterable): # Test that iterable arguments to `from_map` # can be enumerate and generator # See: https://github.com/dask/dask/issues/9064 def func(t): size = t[0] + 1 x = t[1] return pd.Series([x] * size) ddf = dd.from_map(func, iterable) expect = pd.Series( ["A", "B", "B", "C", "C", "C"], index=[0, 0, 1, 0, 1, 2], ) assert_eq(ddf.compute(), expect)
def test_from_map_column_projection(): # Test that column projection works # as expected with from_map when # enforce_metadata=True projected = [] class MyFunc: def __init__(self, columns=None): self.columns = columns def project_columns(self, columns): return MyFunc(columns) def __call__(self, t): size = t[0] + 1 x = t[1] df = pd.DataFrame({"A": [x] * size, "B": [10] * size}) if self.columns is None: return df projected.extend(self.columns) return df[self.columns] ddf = dd.from_map( MyFunc(), enumerate([0, 1, 2]), label="myfunc", enforce_metadata=True, ) expect = pd.DataFrame( { "A": [0, 1, 1, 2, 2, 2], "B": [10] * 6, }, index=[0, 0, 1, 0, 1, 2], ) assert_eq(ddf["A"], expect["A"]) assert set(projected) == {"A"} assert_eq(ddf, expect)