示例#1
0
def test_from_map_meta():
    # Test that `meta` can be specified to `from_map`,
    # and that `enforce_metadata` works as expected

    func = lambda x, s=0: pd.DataFrame({"x": [x] * s})
    iterable = ["A", "B"]

    expect = pd.DataFrame({"x": ["A", "A", "B", "B"]}, index=[0, 1, 0, 1])

    # First Check - Pass in valid metadata
    meta = pd.DataFrame({"x": ["A"]}).iloc[:0]
    ddf = dd.from_map(func, iterable, meta=meta, s=2)
    assert_eq(ddf._meta, meta)
    assert_eq(ddf, expect)

    # Second Check - Pass in invalid metadata
    meta = pd.DataFrame({"a": ["A"]}).iloc[:0]
    ddf = dd.from_map(func, iterable, meta=meta, s=2)
    assert_eq(ddf._meta, meta)
    with pytest.raises(ValueError, match="The columns in the computed data"):
        assert_eq(ddf.compute(), expect)

    # Third Check - Pass in invalid metadata,
    # but use `enforce_metadata=False`
    ddf = dd.from_map(func, iterable, meta=meta, enforce_metadata=False, s=2)
    assert_eq(ddf._meta, meta)
    assert_eq(ddf.compute(), expect)
示例#2
0
def test_from_map_simple(vals):
    # Simple test to ensure required inputs (func & iterable)
    # and basic kwargs work as expected for `from_map`

    def func(input, size=0):
        # Simple function to create Series with a
        # repeated value and index
        value, index = input
        return pd.Series([value] * size, index=[index] * size)

    iterable = [(vals[0], 1), (vals[1], 2)]
    ser = dd.from_map(func, iterable, size=2)
    expect = pd.Series(
        [vals[0], vals[0], vals[1], vals[1]],
        index=[1, 1, 2, 2],
    )

    # Make sure `from_map` produces single `Blockwise` layer
    layers = ser.dask.layers
    assert len(layers) == 1
    assert isinstance(layers[ser._name], Blockwise)

    # Check that result and partition count make sense
    assert ser.npartitions == len(iterable)
    assert_eq(ser, expect)
示例#3
0
def test_from_map_args():
    # Test that the optional `args` argument works as expected

    func = lambda x, y, z: pd.DataFrame({"add": x + y + z})
    iterable = [np.arange(2, dtype="int64"), np.arange(2, dtype="int64")]
    index = np.array([0, 1, 0, 1], dtype="int64")
    expect = pd.DataFrame({"add": np.array([5, 6, 5, 6], dtype="int64")}, index=index)

    ddf = dd.from_map(func, iterable, args=[2, 3])
    assert_eq(ddf, expect)
示例#4
0
def test_from_map_custom_name():
    # Test that `label` and `token` arguments to
    # `from_map` works as expected

    func = lambda x: pd.DataFrame({"x": [x] * 2})
    iterable = ["A", "B"]
    label = "my-label"
    token = "8675309"
    expect = pd.DataFrame({"x": ["A", "A", "B", "B"]}, index=[0, 1, 0, 1])

    ddf = dd.from_map(func, iterable, label=label, token=token)
    assert ddf._name == label + "-" + token
    assert_eq(ddf, expect)
示例#5
0
def test_from_map_multi():
    # Test that `iterables` can contain multiple Iterables

    func = lambda x, y: pd.DataFrame({"add": x + y})
    iterables = (
        [np.arange(2, dtype="int64"), np.arange(2, dtype="int64")],
        [np.array([2, 2], dtype="int64"), np.array([2, 2], dtype="int64")],
    )
    index = np.array([0, 1, 0, 1], dtype="int64")
    expect = pd.DataFrame({"add": np.array([2, 3, 2, 3], dtype="int64")}, index=index)

    ddf = dd.from_map(func, *iterables)
    assert_eq(ddf, expect)
示例#6
0
def test_from_map_divisions():
    # Test that `divisions` argument works as expected for `from_map`

    func = lambda x: pd.Series([x[0]] * 2, index=range(x[1], x[1] + 2))
    iterable = [("B", 0), ("C", 2)]
    divisions = (0, 2, 4)
    ser = dd.from_map(func, iterable, divisions=divisions)
    expect = pd.Series(
        ["B", "B", "C", "C"],
        index=[0, 1, 2, 3],
    )

    assert ser.divisions == divisions
    assert_eq(ser, expect)
示例#7
0
def test_from_map_other_iterables(iterable):
    # Test that iterable arguments to `from_map`
    # can be enumerate and generator
    # See: https://github.com/dask/dask/issues/9064

    def func(t):
        size = t[0] + 1
        x = t[1]
        return pd.Series([x] * size)

    ddf = dd.from_map(func, iterable)
    expect = pd.Series(
        ["A", "B", "B", "C", "C", "C"],
        index=[0, 0, 1, 0, 1, 2],
    )
    assert_eq(ddf.compute(), expect)
示例#8
0
def test_from_map_column_projection():
    # Test that column projection works
    # as expected with from_map when
    # enforce_metadata=True

    projected = []

    class MyFunc:
        def __init__(self, columns=None):
            self.columns = columns

        def project_columns(self, columns):
            return MyFunc(columns)

        def __call__(self, t):
            size = t[0] + 1
            x = t[1]
            df = pd.DataFrame({"A": [x] * size, "B": [10] * size})
            if self.columns is None:
                return df
            projected.extend(self.columns)
            return df[self.columns]

    ddf = dd.from_map(
        MyFunc(),
        enumerate([0, 1, 2]),
        label="myfunc",
        enforce_metadata=True,
    )
    expect = pd.DataFrame(
        {
            "A": [0, 1, 1, 2, 2, 2],
            "B": [10] * 6,
        },
        index=[0, 0, 1, 0, 1, 2],
    )
    assert_eq(ddf["A"], expect["A"])
    assert set(projected) == {"A"}
    assert_eq(ddf, expect)