def __init__( self, device_memory_limit=None, memory_limit=None, local_directory="dask-worker-space", ): path = os.path.join(local_directory, "storage") self.host_func = dict() self.disk_func = Func(serialize_bytelist, deserialize_bytes, File(path)) self.host_buffer = Buffer(self.host_func, self.disk_func, memory_limit, weight=weight) self.device_keys = set() self.device_func = dict() self.device_host_func = Func(device_to_host, host_to_device, self.host_buffer) self.device_buffer = Buffer(self.device_func, self.device_host_func, device_memory_limit, weight=weight) self.device = self.device_buffer.fast.d self.host = self.host_buffer.fast.d self.disk = self.host_buffer.slow.d # For Worker compatibility only, where `fast` is host memory buffer self.fast = self.host_buffer.fast
def __init__(self, device_memory_limit=None, memory_limit=None, local_dir="dask-worker-space"): path = os.path.join(local_dir, "storage") self.host_func = dict() self.disk_func = Func(partial(serialize_bytes, on_error="raise"), deserialize_bytes, File(path)) self.host_buffer = Buffer(self.host_func, self.disk_func, memory_limit, weight=weight) self.device_func = dict() self.device_host_func = Func(_serialize_if_device, _deserialize_if_device, self.host_buffer) self.device_buffer = Buffer(self.device_func, self.device_host_func, device_memory_limit, weight=weight) self.device = self.device_buffer.fast.d self.host = self.host_buffer.fast.d self.disk = self.host_buffer.slow.d # For Worker compatibility only, where `fast` is host memory buffer self.fast = self.host_buffer.fast
def __init__( self, device_memory_limit=None, memory_limit=None, local_directory=None, log_spilling=False, ): self.disk_func_path = os.path.join( local_directory or dask.config.get("temporary-directory") or os.getcwd(), "dask-worker-space", "storage", ) os.makedirs(self.disk_func_path, exist_ok=True) self.host_func = dict() self.disk_func = Func( functools.partial(serialize_bytelist, on_error="raise"), deserialize_bytes, File(self.disk_func_path), ) host_buffer_kwargs = {} device_buffer_kwargs = {} buffer_class = Buffer if log_spilling is True: buffer_class = LoggedBuffer host_buffer_kwargs = {"fast_name": "Host", "slow_name": "Disk"} device_buffer_kwargs = {"fast_name": "Device", "slow_name": "Host"} if memory_limit == 0: self.host_buffer = self.host_func else: self.host_buffer = buffer_class( self.host_func, self.disk_func, memory_limit, weight=lambda k, v: safe_sizeof(v), **host_buffer_kwargs, ) self.device_keys = set() self.device_func = dict() self.device_host_func = Func(device_to_host, host_to_device, self.host_buffer) self.device_buffer = Buffer( self.device_func, self.device_host_func, device_memory_limit, weight=lambda k, v: safe_sizeof(v), **device_buffer_kwargs, ) self.device = self.device_buffer.fast.d self.host = self.host_buffer if memory_limit == 0 else self.host_buffer.fast.d self.disk = None if memory_limit == 0 else self.host_buffer.slow.d # For Worker compatibility only, where `fast` is host memory buffer self.fast = self.host_buffer if memory_limit == 0 else self.host_buffer.fast
def __init__( self, device_memory_limit=None, memory_limit=None, local_directory=None, jit_unspill=False, ): if local_directory is None: local_directory = dask.config.get( "temporary-directory") or os.getcwd() if not os.path.exists(local_directory): os.makedirs(local_directory, exist_ok=True) local_directory = os.path.join(local_directory, "dask-worker-space") self.disk_func_path = os.path.join(local_directory, "storage") self.host_func = dict() self.disk_func = Func( functools.partial(serialize_bytelist, on_error="raise"), deserialize_bytes, File(self.disk_func_path), ) if memory_limit == 0: self.host_buffer = self.host_func else: self.host_buffer = Buffer(self.host_func, self.disk_func, memory_limit, weight=weight) self.device_keys = set() self.device_func = dict() if jit_unspill: self.device_host_func = Func(pxy_obj_device_to_host, pxy_obj_host_to_device, self.host_buffer) else: self.device_host_func = Func(device_to_host, host_to_device, self.host_buffer) self.device_buffer = Buffer(self.device_func, self.device_host_func, device_memory_limit, weight=weight) self.device = self.device_buffer.fast.d self.host = self.host_buffer if memory_limit == 0 else self.host_buffer.fast.d self.disk = None if memory_limit == 0 else self.host_buffer.slow.d # For Worker compatibility only, where `fast` is host memory buffer self.fast = self.host_buffer if memory_limit == 0 else self.host_buffer.fast
def test_simple(): d = dict() f = Func(inc, dec, d) f['x'] = 10 assert f['x'] == 10 assert d['x'] == 11 assert 'x' in f assert list(f) == ['x'] assert list(f.values()) == [10] assert list(f.items()) == [('x', 10)] assert all(s in str(f) for s in ['inc', 'dec', 'x', 'Func']) assert all(s in repr(f) for s in ['inc', 'dec', 'x', 'Func']) del f['x'] assert 'x' not in d
def test_mapping(): """ Test mapping interface for Func(). """ d = {} z = Func(rotl, rotr, d) utils_test.check_mapping(z) utils_test.check_closing(z)
def test_simple(): d = dict() f = Func(inc, dec, d) f["x"] = 10 assert f["x"] == 10 assert d["x"] == 11 assert "x" in f assert list(f) == ["x"] assert list(f.values()) == [10] assert list(f.items()) == [("x", 10)] assert all(s in str(f) for s in ["inc", "dec", "x", "Func"]) assert all(s in repr(f) for s in ["inc", "dec", "x", "Func"]) del f["x"] assert "x" not in d
def __init__( self, device_memory_limit=None, memory_limit=None, local_directory=None, ): if local_directory is None: local_directory = dask.config.get( "temporary-directory") or os.getcwd() os.makedirs(local_directory, exist_ok=True) local_directory = os.path.join(local_directory, "dask-worker-space") self.disk_func_path = os.path.join(local_directory, "storage") self.host_func = dict() self.disk_func = Func(serialize_bytelist, deserialize_bytes, File(self.disk_func_path)) self.host_buffer = Buffer(self.host_func, self.disk_func, memory_limit, weight=weight) self.device_keys = set() self.device_func = dict() self.device_host_func = Func(device_to_host, host_to_device, self.host_buffer) self.device_buffer = Buffer(self.device_func, self.device_host_func, device_memory_limit, weight=weight) self.device = self.device_buffer.fast.d self.host = self.host_buffer.fast.d self.disk = self.host_buffer.slow.d # For Worker compatibility only, where `fast` is host memory buffer self.fast = self.host_buffer.fast
def __init__(self, spill_directory: str, target: int): self.spilled_by_key = {} self.spilled_total = 0 storage = Func( partial(serialize_bytelist, on_error="raise"), deserialize_bytes, File(spill_directory), ) super().__init__( {}, storage, target, weight=self._weight, fast_to_slow_callbacks=[self._on_evict], slow_to_fast_callbacks=[self._on_retrieve], )
def setup(self, worker): self.cache = Buffer( fast={}, slow=Func( dump=blosc.pack_array, load=blosc.unpack_array, d=Buffer( fast={}, slow=LRU( n=self._maxdisk, d=File(os.path.join(worker.local_directory, 'cache')), weight=lambda k, v: len(v), ), n=self._maxcompressed, weight=lambda k, v: len(v), ), ), n=self._maxmem, weight=lambda k, v: v.nbytes, ) self.lock = Lock() self.hits = 0 self.misses = 0
def redis_dict(r): b = Func(r.hmset) return Func(r, partial(read_from_redis, r), partial(red))
def numpy_redis_mapping(): return Func(arr_to_dict, dict_to_arr, Redis())
def __init__(self, scheduler_ip, scheduler_port, ip=None, ncores=None, loop=None, local_dir=None, services=None, service_ports=None, name=None, heartbeat_interval=5000, memory_limit=TOTAL_MEMORY, **kwargs): self.ip = ip or get_ip() self._port = 0 self.ncores = ncores or _ncores self.local_dir = local_dir or tempfile.mkdtemp(prefix='worker-') if not os.path.exists(self.local_dir): os.mkdir(self.local_dir) self.memory_limit = memory_limit if memory_limit: try: from zict import Buffer, File, Func except ImportError: raise ImportError("Please `pip install zict` for spill-to-disk workers") path = os.path.join(self.local_dir, 'storage') storage = Func(dumps_to_disk, loads_from_disk, File(path)) self.data = Buffer({}, storage, int(float(memory_limit)), weight) else: self.data = dict() self.loop = loop or IOLoop.current() self.status = None self.executor = ThreadPoolExecutor(self.ncores) self.scheduler = rpc(ip=scheduler_ip, port=scheduler_port) self.active = set() self.name = name self.heartbeat_interval = heartbeat_interval self.heartbeat_active = False self.execution_state = {'scheduler': self.scheduler.address, 'ioloop': self.loop, 'worker': self} self._last_disk_io = None self._last_net_io = None self._ipython_kernel = None if self.local_dir not in sys.path: sys.path.insert(0, self.local_dir) self.services = {} self.service_ports = service_ports or {} for k, v in (services or {}).items(): if isinstance(k, tuple): k, port = k else: port = 0 self.services[k] = v(self, io_loop=self.loop) self.services[k].listen(port) self.service_ports[k] = self.services[k].port handlers = {'compute': self.compute, 'gather': self.gather, 'compute-stream': self.compute_stream, 'run': self.run, 'get_data': self.get_data, 'update_data': self.update_data, 'delete_data': self.delete_data, 'terminate': self.terminate, 'ping': pingpong, 'health': self.host_health, 'upload_file': self.upload_file, 'start_ipython': self.start_ipython, 'keys': self.keys, } super(Worker, self).__init__(handlers, io_loop=self.loop, **kwargs) self.heartbeat_callback = PeriodicCallback(self.heartbeat, self.heartbeat_interval, io_loop=self.loop) self.loop.add_callback(self.heartbeat_callback.start)
def get_mapper(cls, fs_mapper): if isinstance(fs_mapper, (str, pathlib.Path)): fs_mapper = File(fs_mapper, mode='a') mapper = Func(cls.serialize, cls.deserialize, fs_mapper) return mapper