示例#1
0
def test_cupy_matmul():
    cupy = pytest.importorskip("cupy")
    a, b = cupy.arange(10), cupy.arange(10)
    c = a @ b
    assert c == proxy_object.asproxy(a) @ b
    assert c == a @ proxy_object.asproxy(b)
    assert c == proxy_object.asproxy(a) @ proxy_object.asproxy(b)
示例#2
0
def test_pandas():
    """Check pandas operations on proxy objects"""
    pandas = pytest.importorskip("pandas")

    df1 = pandas.DataFrame({"a": range(10)})
    df2 = pandas.DataFrame({"a": range(10)})

    res = dask.dataframe.methods.concat([df1, df2])
    got = dask.dataframe.methods.concat([df1, df2])
    assert_frame_equal(res, got)

    got = dask.dataframe.methods.concat([proxy_object.asproxy(df1), df2])
    assert_frame_equal(res, got)

    got = dask.dataframe.methods.concat([df1, proxy_object.asproxy(df2)])
    assert_frame_equal(res, got)

    df1 = pandas.Series(range(10))
    df2 = pandas.Series(range(10))

    res = dask.dataframe.methods.concat([df1, df2])
    got = dask.dataframe.methods.concat([df1, df2])
    assert all(res == got)

    got = dask.dataframe.methods.concat([proxy_object.asproxy(df1), df2])
    assert all(res == got)

    got = dask.dataframe.methods.concat([df1, proxy_object.asproxy(df2)])
    assert all(res == got)
示例#3
0
def test_proxy_object_serializer():
    """Check the serializers argument"""
    pxy = proxy_object.asproxy(DummyObj(), serializers=("dask", "pickle"))
    assert pxy._pxy_get().serializer == "pickle"
    assert "DummyObj (serialized='pickle')" in repr(pxy)

    with pytest.raises(ValueError) as excinfo:
        pxy = proxy_object.asproxy([42], serializers=("dask", "pickle"))
        assert "Cannot wrap a collection" in str(excinfo.value)
示例#4
0
def test_concatenate3_of_proxied_cupy_arrays():
    """Check concatenate of cupy arrays"""
    from dask.array.core import concatenate3

    cupy = pytest.importorskip("cupy")
    org = cupy.arange(10)
    a = proxy_object.asproxy(org.copy())
    b = proxy_object.asproxy(org.copy())
    assert all(concatenate3([a, b]) == concatenate3([org.copy(), org.copy()]))
示例#5
0
def test_tensordot_of_proxied_cupy_arrays():
    """Check tensordot of cupy arrays"""
    cupy = pytest.importorskip("cupy")

    org = cupy.arange(9).reshape((3, 3))
    a = proxy_object.asproxy(org.copy())
    b = proxy_object.asproxy(org.copy())
    res1 = dask.array.tensordot(a, b).flatten()
    res2 = dask.array.tensordot(org.copy(), org.copy()).flatten()
    assert all(res1 == res2)
示例#6
0
def test_double_proxy_object(serializers_first, serializers_second):
    """Check asproxy() when creating a proxy object of a proxy object"""
    org = list(range(10))
    pxy1 = proxy_object.asproxy(org, serializers=serializers_first)
    assert pxy1._obj_pxy["serializers"] == serializers_first
    pxy2 = proxy_object.asproxy(pxy1, serializers=serializers_second)
    if serializers_second is None:
        # Check that `serializers=None` doesn't change the initial serializers
        assert pxy2._obj_pxy["serializers"] == serializers_first
    else:
        assert pxy2._obj_pxy["serializers"] == serializers_second
    assert pxy1 is pxy2
示例#7
0
def test_cupy_imatmul():
    cupy = pytest.importorskip("cupy")
    a = cupy.arange(9).reshape(3, 3)
    c = a.copy()
    c @= a

    a1 = a.copy()
    a1 @= proxy_object.asproxy(a)
    assert (a1 == c).all()

    a2 = proxy_object.asproxy(a.copy())
    a2 @= a
    assert (a2 == c).all()
示例#8
0
def test_double_proxy_object(serializers_first, serializers_second):
    """Check asproxy() when creating a proxy object of a proxy object"""
    serializer1 = serializers_first[0] if serializers_first else None
    serializer2 = serializers_second[0] if serializers_second else None
    org = bytearray(range(10))
    pxy1 = proxy_object.asproxy(org, serializers=serializers_first)
    assert pxy1._pxy_get().serializer == serializer1
    pxy2 = proxy_object.asproxy(pxy1, serializers=serializers_second)
    if serializers_second is None:
        # Check that `serializers=None` doesn't change the initial serializers
        assert pxy2._pxy_get().serializer == serializer1
    else:
        assert pxy2._pxy_get().serializer == serializer2
    assert pxy1 is pxy2
示例#9
0
def test_communicating_disk_objects(protocol, shared_fs):
    """Testing disk serialization of cuDF dataframe when communicating"""
    cudf = pytest.importorskip("cudf")
    ProxifyHostFile._spill_shared_filesystem = shared_fs

    def task(x):
        # Check that the subclass survives the trip from client to worker
        assert isinstance(x, _PxyObjTest)
        serializer_used = x._pxy_get().serializer
        if shared_fs:
            assert serializer_used == "disk"
        else:
            assert serializer_used == "dask"

    with dask_cuda.LocalCUDACluster(
            n_workers=1, protocol=protocol,
            enable_tcp_over_ucx=protocol == "ucx") as cluster:
        with Client(cluster) as client:
            df = cudf.DataFrame({"a": range(10)})
            df = proxy_object.asproxy(df,
                                      serializers=("disk", ),
                                      subclass=_PxyObjTest)
            df._pxy_get().assert_on_deserializing = False
            df = client.scatter(df)
            client.submit(task, df).result()
            client.shutdown()  # Avoids a UCX shutdown error
def test_one_item_host_limit(capsys):
    memory_limit = sizeof(asproxy(one_item_array(), serializers=("dask", "pickle")))
    dhf = ProxifyHostFile(
        device_memory_limit=one_item_nbytes, memory_limit=memory_limit
    )

    a1 = one_item_array() + 1
    a2 = one_item_array() + 2
    dhf["k1"] = a1
    dhf["k2"] = a2
    dhf.manager.validate()

    # Check k1 is spilled because of the newer k2
    k1 = dhf["k1"]
    k2 = dhf["k2"]
    assert k1._pxy_get().is_serialized()
    assert not k2._pxy_get().is_serialized()
    dhf.manager.validate()
    assert is_proxies_equal(dhf.manager._disk.get_proxies(), [])
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k1])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [k2])

    # Check k1 is spilled to disk and k2 is spilled to host
    dhf["k3"] = one_item_array() + 3
    k3 = dhf["k3"]
    dhf.manager.validate()
    assert is_proxies_equal(dhf.manager._disk.get_proxies(), [k1])
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k2])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [k3])
    dhf.manager.validate()

    # Accessing k2 spills k3 and unspill k2
    k2_val = k2[0]
    assert k2_val == 2
    dhf.manager.validate()
    assert is_proxies_equal(dhf.manager._disk.get_proxies(), [k1])
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k3])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [k2])

    # Adding a new array spill k3 to disk and k2 to host
    dhf["k4"] = one_item_array() + 4
    k4 = dhf["k4"]
    dhf.manager.validate()
    assert is_proxies_equal(dhf.manager._disk.get_proxies(), [k1, k3])
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k2])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [k4])

    # Accessing k1 unspills k1 directly to device and spills k4 to host
    k1_val = k1[0]
    assert k1_val == 1
    dhf.manager.validate()
    assert is_proxies_equal(dhf.manager._disk.get_proxies(), [k2, k3])
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k4])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [k1])

    # Clean up
    del k1, k2, k3, k4
    dhf.clear()
    assert len(dhf.manager) == 0
示例#11
0
def test_cupy_broadcast_to():
    cupy = pytest.importorskip("cupy")
    a = cupy.arange(10)
    a_b = np.broadcast_to(a, (10, 10))
    p_b = np.broadcast_to(proxy_object.asproxy(a), (10, 10))

    assert a_b.shape == p_b.shape
    assert (a_b == p_b).all()
示例#12
0
def test_serializing_to_disk(obj):
    """Check serializing to disk"""

    if isinstance(obj, str):
        backend = pytest.importorskip(obj)
        obj = backend.arange(100)

    # Serialize from device to disk
    pxy = proxy_object.asproxy(obj)
    ProxifyHostFile.serialize_proxy_to_disk_inplace(pxy)
    assert pxy._pxy_get().serializer == "disk"
    assert obj == proxy_object.unproxy(pxy)

    # Serialize from host to disk
    pxy = proxy_object.asproxy(obj, serializers=("pickle", ))
    ProxifyHostFile.serialize_proxy_to_disk_inplace(pxy)
    assert pxy._pxy_get().serializer == "disk"
    assert obj == proxy_object.unproxy(pxy)
示例#13
0
def test_einsum_of_proxied_cupy_arrays():
    """Check tensordot of cupy arrays"""
    cupy = pytest.importorskip("cupy")

    org = cupy.arange(25).reshape(5, 5)
    res1 = dask.array.einsum("ii", org)
    a = proxy_object.asproxy(org.copy())
    res2 = dask.array.einsum("ii", a)
    assert all(res1.flatten() == res2.flatten())
示例#14
0
def test_pickle_proxy_object(array_module, serializers):
    """Check pickle of the proxy object"""
    array_module = pytest.importorskip(array_module)
    org = array_module.arange(10)
    pxy = proxy_object.asproxy(org, serializers=serializers)
    data = pickle.dumps(pxy)
    restored = pickle.loads(data)
    repr(restored)
    assert all(org == restored)
示例#15
0
def test_proxy_object_parquet(tmp_path):
    """Check parquet read/write of a proxy object"""
    cudf = pytest.importorskip("cudf")
    tmp_path = tmp_path / "proxy_test.parquet"

    df = cudf.DataFrame({"a": range(10)})
    pxy = proxy_object.asproxy(df)
    pxy.to_parquet(str(tmp_path), engine="pyarrow")
    df2 = dask.dataframe.read_parquet(tmp_path)
    assert_frame_equal(df.to_pandas(), df2.compute())
示例#16
0
def test_fixed_attribute_name():
    """Test fixed attribute `x.name` access

    Notice, accessing fixed attributes shouldn't de-serialize the proxied object
    """
    obj_without_name = SimpleNamespace()
    obj_with_name = SimpleNamespace(name="I have a name")

    # Access `name` of an array
    pxy = proxy_object.asproxy(obj_without_name, serializers=("pickle", ))
    with pytest.raises(AttributeError) as excinfo:
        pxy.name
        assert "has no attribute 'name'" in str(excinfo.value)
        assert pxy._obj_pxy_is_serialized()

    # Access `name` of a datatype
    pxy = proxy_object.asproxy(obj_with_name, serializers=("pickle", ))
    assert pxy.name == "I have a name"
    assert pxy._obj_pxy_is_serialized()
示例#17
0
def test_fixed_attribute_length(backend):
    """Test fixed attribute `x.__len__` access

    Notice, accessing fixed attributes shouldn't de-serialize the proxied object
    """
    np = pytest.importorskip(backend)

    # Access `len()`` of an array
    pxy = proxy_object.asproxy(np.arange(10), serializers=("dask", ))
    assert len(pxy) == 10
    # Accessing the length shouldn't de-serialize the proxied object
    assert pxy._obj_pxy_is_serialized()

    # Access `len()` of a scalar
    pxy = proxy_object.asproxy(np.array(10), serializers=("dask", ))
    with pytest.raises(TypeError) as excinfo:
        len(pxy)
        assert "len() of unsized object" in str(excinfo.value)
        assert pxy._obj_pxy_is_serialized()
示例#18
0
def test_serializing_array_to_disk(backend, serializers, size):
    """Check serializing arrays to disk"""

    np = pytest.importorskip(backend)
    obj = np.arange(size)

    # Serialize from host to disk
    pxy = proxy_object.asproxy(obj, serializers=serializers)
    ProxifyHostFile.serialize_proxy_to_disk_inplace(pxy)
    assert pxy._pxy_get().serializer == "disk"
    assert list(obj) == list(proxy_object.unproxy(pxy))
示例#19
0
def test_from_cudf_of_proxy_object():
    """Check from_cudf() of a proxy object"""
    cudf = pytest.importorskip("cudf")

    df = proxy_object.asproxy(cudf.DataFrame({"a": range(10)}))
    assert has_parallel_type(df)

    ddf = dask_cudf.from_cudf(df, npartitions=1)
    assert has_parallel_type(ddf)

    # Notice, the output is a dask-cudf dataframe and not a proxy object
    assert type(ddf) is dask_cudf.core.DataFrame
示例#20
0
def test_serialize_of_proxied_cudf(proxy_serializers, dask_serializers):
    """Check that we can serialize a proxied cudf dataframe, which might
    be serialized already.
    """
    cudf = pytest.importorskip("cudf")
    df = cudf.DataFrame({"a": range(10)})
    pxy = proxy_object.asproxy(df, serializers=proxy_serializers)
    header, frames = serialize(pxy,
                               serializers=dask_serializers,
                               on_error="raise")
    pxy = deserialize(header, frames)
    assert_frame_equal(df.to_pandas(), pxy.to_pandas())
示例#21
0
def test_sizeof_cupy():
    cupy = pytest.importorskip("cupy")
    cupy.cuda.set_allocator(None)
    a = cupy.arange(1e7)
    a_size = sizeof(a)
    pxy = proxy_object.asproxy(a)
    assert a_size == pytest.approx(sizeof(pxy))
    pxy._pxy_serialize(serializers=("dask", ))
    assert a_size == pytest.approx(sizeof(pxy))
    assert pxy._pxy_get().is_serialized()
    pxy._pxy_cache = {}
    assert a_size == pytest.approx(sizeof(pxy))
    assert pxy._pxy_get().is_serialized()
示例#22
0
def test_sizeof_cudf():
    cudf = pytest.importorskip("cudf")
    a = cudf.datasets.timeseries().reset_index()
    a_size = sizeof(a)
    pxy = proxy_object.asproxy(a)
    assert a_size == pytest.approx(sizeof(pxy))
    pxy._pxy_serialize(serializers=("dask", ))
    assert a_size == pytest.approx(sizeof(pxy))
    assert pxy._pxy_get().is_serialized()
    # By clearing the cache, `sizeof(pxy)` now measure the serialized data
    # thus we have to increase the tolerance.
    pxy._pxy_cache = {}
    assert a_size == pytest.approx(sizeof(pxy), rel=1e-2)
    assert pxy._pxy_get().is_serialized()
示例#23
0
def test_proxy_object(serializers):
    """Check "transparency" of the proxy object"""

    org = list(range(10))
    pxy = proxy_object.asproxy(org, serializers=serializers)

    assert len(org) == len(pxy)
    assert org[0] == pxy[0]
    assert 1 in pxy
    assert -1 not in pxy
    assert str(org) == str(pxy)
    assert "dask_cuda.proxy_object.ProxyObject at " in repr(pxy)
    assert "list at " in repr(pxy)

    pxy._obj_pxy_serialize(serializers=["dask", "pickle"])
    assert "dask_cuda.proxy_object.ProxyObject at " in repr(pxy)
    assert "list (serialized=['dask', 'pickle'])" in repr(pxy)

    assert org == proxy_object.unproxy(pxy)
    assert org == proxy_object.unproxy(org)
示例#24
0
def test_proxy_object(serializers):
    """Check "transparency" of the proxy object"""

    org = bytearray(range(10))
    pxy = proxy_object.asproxy(org, serializers=serializers)

    assert len(org) == len(pxy)
    assert org[0] == pxy[0]
    assert 1 in pxy
    assert 10 not in pxy
    assert str(org) == str(pxy)
    assert "dask_cuda.proxy_object.ProxyObject at " in repr(pxy)
    assert "bytearray at " in repr(pxy)

    pxy._pxy_serialize(serializers=("dask", "pickle"))
    assert "dask_cuda.proxy_object.ProxyObject at " in repr(pxy)
    assert "bytearray (serialized='dask')" in repr(pxy)

    assert org == proxy_object.unproxy(pxy)
    assert org == proxy_object.unproxy(org)
示例#25
0
def test_communicating_proxy_objects(protocol, send_serializers):
    """Testing serialization of cuDF dataframe when communicating"""
    cudf = pytest.importorskip("cudf")

    def task(x):
        # Check that the subclass survives the trip from client to worker
        assert isinstance(x, _PxyObjTest)
        serializers_used = list(x._obj_pxy["serializers"])

        # Check that `x` is serialized with the expected serializers
        if protocol == "ucx":
            if send_serializers is None:
                assert serializers_used == ["cuda", "dask", "pickle"]
            else:
                assert serializers_used == send_serializers
        else:
            assert serializers_used == ["dask", "pickle"]

    with dask_cuda.LocalCUDACluster(
            n_workers=1, protocol=protocol,
            enable_tcp_over_ucx=protocol == "ucx") as cluster:
        with Client(cluster) as client:
            df = cudf.DataFrame({"a": range(10)})
            df = proxy_object.asproxy(df,
                                      serializers=send_serializers,
                                      subclass=_PxyObjTest)

            # Notice, in one case we expect deserialization when communicating.
            # Since "tcp" cannot send device memory directly, it will be re-serialized
            # using the default dask serializers that spill the data to main memory.
            if protocol == "tcp" and send_serializers == ["cuda"]:
                df.assert_on_deserializing = False
            else:
                df.assert_on_deserializing = True
            df = client.scatter(df)
            client.submit(task, df).result()
            client.shutdown()  # Avoids a UCX shutdown error
示例#26
0
def test_proxy_object_of_numpy(serializers):
    """Check that a proxied numpy array behaves as a regular dataframe"""

    np = pytest.importorskip("numpy")

    # Make sure that equality works, which we use to test the other operators
    org = np.arange(10) + 1
    pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
    assert all(org == pxy)
    assert all(org + 1 != pxy)

    # Check unary scalar operators
    for op in [int, float, complex, operator.index, oct, hex]:
        org = np.int64(42)
        pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
        expect = op(org)
        got = op(pxy)
        assert type(expect) == type(got)
        assert expect == got

    # Check unary operators
    for op_str in ["neg", "pos", "abs", "inv"]:
        op = getattr(operator, op_str)
        org = np.arange(10) + 1
        pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
        expect = op(org)
        got = op(pxy)
        assert type(expect) == type(got)
        assert all(expect == got)

    # Check binary operators that takes a scalar as second argument
    for op_str in ["rshift", "lshift", "pow"]:
        op = getattr(operator, op_str)
        org = np.arange(10) + 1
        pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
        expect = op(org, 2)
        got = op(pxy, 2)
        assert type(expect) == type(got)
        assert all(expect == got)

    # Check binary operators
    for op_str in [
            "add",
            "eq",
            "floordiv",
            "ge",
            "gt",
            "le",
            "lshift",
            "lt",
            "mod",
            "mul",
            "ne",
            "or_",
            "sub",
            "truediv",
            "xor",
            "iadd",
            "ior",
            "iand",
            "ifloordiv",
            "ilshift",
            "irshift",
            "ipow",
            "imod",
            "imul",
            "isub",
            "ixor",
    ]:
        op = getattr(operator, op_str)
        org = np.arange(10) + 1
        pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
        expect = op(org.copy(), org)
        got = op(org.copy(), pxy)
        assert isinstance(got, type(expect))
        assert all(expect == got)

        expect = op(org.copy(), org)
        got = op(pxy, org)
        assert isinstance(got, type(expect))
        assert all(expect == got)

        # Check proxy-proxy operations
        if "i" != op_str[0]:  # Skip in-place operators
            expect = op(org.copy(), org)
            got = op(pxy, proxy_object.asproxy(org.copy()))
            assert all(expect == got)

    # Check unary truth operators
    for op_str in ["not_", "truth"]:
        op = getattr(operator, op_str)
        org = np.arange(1) + 1
        pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
        expect = op(org)
        got = op(pxy)
        assert type(expect) == type(got)
        assert expect == got

    # Check reflected methods
    for op_str in [
            "__radd__",
            "__rsub__",
            "__rmul__",
            "__rtruediv__",
            "__rfloordiv__",
            "__rmod__",
            "__rpow__",
            "__rlshift__",
            "__rrshift__",
            "__rxor__",
            "__ror__",
    ]:
        org = np.arange(10) + 1
        pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
        expect = getattr(org, op_str)(org)
        got = getattr(org, op_str)(pxy)
        assert isinstance(got, type(expect))
        assert all(expect == got)
示例#27
0
def test_proxy_object_of_cudf(serializers):
    """Check that a proxied cudf dataframe behaves as a regular dataframe"""
    cudf = pytest.importorskip("cudf")
    df = cudf.DataFrame({"a": range(10)})
    pxy = proxy_object.asproxy(df, serializers=serializers)
    assert_frame_equal(df.to_pandas(), pxy.to_pandas())
示例#28
0
def test_assignments():
    """Check assignment to a proxied dataframe"""
    cudf = pytest.importorskip("cudf")

    df = proxy_object.asproxy(cudf.DataFrame({"a": range(10)}))
    df.index = df["a"].copy(deep=False)