def test_dispatch_variadic_on_first_argument(): foo = Dispatch() foo.register(int, lambda a, b: a + b) foo.register(float, lambda a, b: a - b) assert foo(1, 2) == 3 assert foo(1., 2.) == -1
def test_dispatch(): foo = Dispatch() foo.register(int, lambda a: a + 1) foo.register(float, lambda a: a - 1) foo.register(tuple, lambda a: tuple(foo(i) for i in a)) foo.register(object, lambda a: a) class Bar(object): pass b = Bar() assert foo(1) == 2 assert foo.dispatch(int)(1) == 2 assert foo(1.0) == 0.0 assert foo(b) == b assert foo((1, 2.0, b)) == (2, 1.0, b)
def test_dispatch_lazy(): # this tests the recursive component of dispatch foo = Dispatch() foo.register(int, lambda a: a) import decimal # keep it outside lazy dec for test def foo_dec(a): return a + 1 @foo.register_lazy("decimal") def register_decimal(): import decimal foo.register(decimal.Decimal, foo_dec) # This test needs to be *before* any other calls assert foo.dispatch(decimal.Decimal) == foo_dec assert foo(decimal.Decimal(1)) == decimal.Decimal(2) assert foo(1) == 1
def test_dispatch_variadic_on_first_argument(): foo = Dispatch() foo.register(int, lambda a, b: a + b) foo.register(float, lambda a, b: a - b) assert foo(1, 2) == 3 assert foo(1.0, 2.0) == -1
def test_dispatch(): foo = Dispatch() foo.register(int, lambda a: a + 1) foo.register(float, lambda a: a - 1) foo.register(tuple, lambda a: tuple(foo(i) for i in a)) def f(a): """ My Docstring """ return a foo.register(object, f) class Bar(object): pass b = Bar() assert foo(1) == 2 assert foo.dispatch(int)(1) == 2 assert foo(1.0) == 0.0 assert foo(b) == b assert foo((1, 2.0, b)) == (2, 1.0, b) assert foo.__doc__ == f.__doc__
""" Dispatch in dask.array. Also see backends.py """ from dask.utils import Dispatch concatenate_lookup = Dispatch("concatenate") tensordot_lookup = Dispatch("tensordot") einsum_lookup = Dispatch("einsum") empty_lookup = Dispatch("empty") divide_lookup = Dispatch("divide") percentile_lookup = Dispatch("percentile")
def test_dispatch_kwargs(): foo = Dispatch() foo.register(int, lambda a, b=10: a + b) assert foo(1, b=20) == 21
""" Support for pandas ExtensionArray in dask.dataframe. See :ref:`extensionarrays` for more. """ from dask.dataframe.accessor import ( register_dataframe_accessor, register_index_accessor, register_series_accessor, ) from dask.utils import Dispatch make_array_nonempty = Dispatch("make_array_nonempty") make_scalar = Dispatch("make_scalar") __all__ = [ "make_array_nonempty", "make_scalar", "register_dataframe_accessor", "register_index_accessor", "register_series_accessor", ]
from __future__ import absolute_import, division, print_function from dask.utils import Dispatch is_device_object = Dispatch(name="is_device_object") @is_device_object.register(object) def is_device_object_default(o): return hasattr(o, "__cuda_array_interface__") @is_device_object.register(list) @is_device_object.register(tuple) @is_device_object.register(set) @is_device_object.register(frozenset) def is_device_object_python_collection(seq): return any([is_device_object(s) for s in seq]) @is_device_object.register_lazy("cudf") def register_cudf(): import cudf @is_device_object.register(cudf.DataFrame) def is_device_object_cudf_dataframe(df): return True @is_device_object.register(cudf.Series) def is_device_object_cudf_series(s): return True
from dask.sizeof import sizeof from dask.utils import Dispatch dispatch = Dispatch(name="get_device_memory_objects") def get_device_memory_objects(obj) -> set: """ Find all CUDA device objects in `obj` Search through `obj` and find all CUDA device objects, which are objects that either are known to `dispatch` or implement `__cuda_array_interface__`. Notice, the CUDA device objects must be hashable. Parameters ---------- obj: Any Object to search through Returns ------- ret: set Set of CUDA device memory objects """ return set(dispatch(obj)) @dispatch.register(object) def get_device_memory_objects_default(obj): if hasattr(obj, "_obj_pxy"): if obj._obj_pxy["serializers"] is None:
from typing import Any, Dict, List from dask.utils import Dispatch from .proxy_object import ProxyObject, asproxy dispatch = Dispatch(name="proxify_device_objects") def proxify_device_objects( obj: Any, proxied_id_to_proxy: Dict[int, ProxyObject], found_proxies: List[ProxyObject], ): """ Wrap device objects in ProxyObject Search through `obj` and wraps all CUDA device objects in ProxyObject. It uses `proxied_id_to_proxy` to make sure that identical CUDA device objects found in `obj` are wrapped by the same ProxyObject. Parameters ---------- obj: Any Object to search through or wrap in a ProxyObject. proxied_id_to_proxy: Dict[int, ProxyObject] Dict mapping the id() of proxied objects (CUDA device objects) to their proxy and is updated with all new proxied objects found in `obj`. found_proxies: List[ProxyObject] List of found proxies in `obj`. Notice, this includes all proxies found, including those already in `proxied_id_to_proxy`.
import itertools import random import sys from array import array from dask.utils import Dispatch sizeof = Dispatch(name="sizeof") @sizeof.register(object) def sizeof_default(o): return sys.getsizeof(o) @sizeof.register(bytes) @sizeof.register(bytearray) def sizeof_bytes(o): return len(o) @sizeof.register(memoryview) def sizeof_memoryview(o): return o.nbytes @sizeof.register(array) def sizeof_array(o): return o.itemsize * len(o)
from __future__ import print_function, division, absolute_import import sys from dask.utils import Dispatch is_device_object = Dispatch(name='is_device_object') @is_device_object.register(object) def is_device_object_default(o): return hasattr(o, "__cuda_array_interface__") @is_device_object.register(list) @is_device_object.register(tuple) @is_device_object.register(set) @is_device_object.register(frozenset) def is_device_object_python_collection(seq): return any([is_device_object(s) for s in seq]) @is_device_object.register_lazy("cudf") def register_cudf(): import cudf @is_device_object.register(cudf.DataFrame) def is_device_object_cudf_dataframe(df): return True @is_device_object.register(cudf.Series)
def tokenize(*args, **kwargs): """Deterministic token >>> tokenize([1, 2, '3']) '7d6a880cd9ec03506eee6973ff551339' >>> tokenize('Hello') == tokenize('Hello') True """ hasher = _md5(str(tuple(map(normalize_token, args))).encode()) if kwargs: hasher.update(str(normalize_token(kwargs)).encode()) return hasher.hexdigest() normalize_token = Dispatch() normalize_token.register( ( int, float, str, bytes, type(None), type, slice, complex, type(Ellipsis), datetime.date, ), identity, )
""" Dispatch in dask.dataframe. Also see extension.py """ import pandas as pd import dask.array as da import dask.dataframe as dd from dask.utils import Dispatch make_meta_dispatch = Dispatch("make_meta_dispatch") make_meta_obj = Dispatch("make_meta_obj") meta_nonempty = Dispatch("meta_nonempty") hash_object_dispatch = Dispatch("hash_object_dispatch") group_split_dispatch = Dispatch("group_split_dispatch") get_parallel_type = Dispatch("get_parallel_type") categorical_dtype_dispatch = Dispatch("CategoricalDtype") concat_dispatch = Dispatch("concat") tolist_dispatch = Dispatch("tolist") is_categorical_dtype_dispatch = Dispatch("is_categorical_dtype") union_categoricals_dispatch = Dispatch("union_categoricals") grouper_dispatch = Dispatch("grouper") def concat( dfs, axis=0, join="outer", uniform=False,
import logging import sys from dask.utils import Dispatch try: # PyPy does not support sys.getsizeof sys.getsizeof(1) getsizeof = sys.getsizeof except (AttributeError, TypeError): # Monkey patch getsizeof = lambda x: 100 logger = logging.getLogger(__name__) sizeof = Dispatch() @sizeof.register(object) def sizeof_default(o): return getsizeof(o) @sizeof.register(list) @sizeof.register(tuple) @sizeof.register(set) @sizeof.register(frozenset) def sizeof_python_collection(seq): return getsizeof(seq) + sum(map(sizeof, seq))
def test_dispatch(): foo = Dispatch() foo.register(int, lambda a: a + 1) foo.register(float, lambda a: a - 1) foo.register(tuple, lambda a: tuple(foo(i) for i in a)) def f(a): """My Docstring""" return a foo.register(object, f) class Bar: pass b = Bar() assert foo(1) == 2 assert foo.dispatch(int)(1) == 2 assert foo(1.0) == 0.0 assert foo(b) == b assert foo((1, 2.0, b)) == (2, 1.0, b) assert foo.__doc__ == f.__doc__
class DaskBaseEstimator(Base): """Base class for dask-backed estimators""" _default_get = staticmethod(threaded_get) @staticmethod def _optimize(dsk, keys, **kwargs): dsk2, deps = fuse(dsk, keys) return dsk2 def _keys(self): return [self._name] @partial(normalize_token.register, BaseEstimator) def normalize_BaseEstimator(est): return type(est).__name__, normalize_token(vars(est)) @partial(normalize_token.register, DaskBaseEstimator) def normalize_dask_estimators(est): return type(est).__name__, est._name def from_sklearn(est): """Wrap a scikit-learn estimator in a dask object.""" return from_sklearn.dispatch(est) from_sklearn.dispatch = Dispatch() from_sklearn.dispatch.register(DaskBaseEstimator, identity)