示例#1
0
    def __init__(
        self,
        device_memory_limit=None,
        memory_limit=None,
        local_directory="dask-worker-space",
    ):
        path = os.path.join(local_directory, "storage")

        self.host_func = dict()
        self.disk_func = Func(serialize_bytelist, deserialize_bytes,
                              File(path))
        self.host_buffer = Buffer(self.host_func,
                                  self.disk_func,
                                  memory_limit,
                                  weight=weight)

        self.device_keys = set()
        self.device_func = dict()
        self.device_host_func = Func(device_to_host, host_to_device,
                                     self.host_buffer)
        self.device_buffer = Buffer(self.device_func,
                                    self.device_host_func,
                                    device_memory_limit,
                                    weight=weight)

        self.device = self.device_buffer.fast.d
        self.host = self.host_buffer.fast.d
        self.disk = self.host_buffer.slow.d

        # For Worker compatibility only, where `fast` is host memory buffer
        self.fast = self.host_buffer.fast
示例#2
0
    def __init__(self,
                 device_memory_limit=None,
                 memory_limit=None,
                 local_dir="dask-worker-space"):
        path = os.path.join(local_dir, "storage")

        self.host_func = dict()
        self.disk_func = Func(partial(serialize_bytes, on_error="raise"),
                              deserialize_bytes, File(path))
        self.host_buffer = Buffer(self.host_func,
                                  self.disk_func,
                                  memory_limit,
                                  weight=weight)

        self.device_func = dict()
        self.device_host_func = Func(_serialize_if_device,
                                     _deserialize_if_device, self.host_buffer)
        self.device_buffer = Buffer(self.device_func,
                                    self.device_host_func,
                                    device_memory_limit,
                                    weight=weight)

        self.device = self.device_buffer.fast.d
        self.host = self.host_buffer.fast.d
        self.disk = self.host_buffer.slow.d

        # For Worker compatibility only, where `fast` is host memory buffer
        self.fast = self.host_buffer.fast
示例#3
0
    def __init__(
        self,
        device_memory_limit=None,
        memory_limit=None,
        local_directory=None,
        log_spilling=False,
    ):
        self.disk_func_path = os.path.join(
            local_directory or dask.config.get("temporary-directory") or os.getcwd(),
            "dask-worker-space",
            "storage",
        )
        os.makedirs(self.disk_func_path, exist_ok=True)

        self.host_func = dict()
        self.disk_func = Func(
            functools.partial(serialize_bytelist, on_error="raise"),
            deserialize_bytes,
            File(self.disk_func_path),
        )

        host_buffer_kwargs = {}
        device_buffer_kwargs = {}
        buffer_class = Buffer
        if log_spilling is True:
            buffer_class = LoggedBuffer
            host_buffer_kwargs = {"fast_name": "Host", "slow_name": "Disk"}
            device_buffer_kwargs = {"fast_name": "Device", "slow_name": "Host"}

        if memory_limit == 0:
            self.host_buffer = self.host_func
        else:
            self.host_buffer = buffer_class(
                self.host_func,
                self.disk_func,
                memory_limit,
                weight=lambda k, v: safe_sizeof(v),
                **host_buffer_kwargs,
            )

        self.device_keys = set()
        self.device_func = dict()
        self.device_host_func = Func(device_to_host, host_to_device, self.host_buffer)
        self.device_buffer = Buffer(
            self.device_func,
            self.device_host_func,
            device_memory_limit,
            weight=lambda k, v: safe_sizeof(v),
            **device_buffer_kwargs,
        )

        self.device = self.device_buffer.fast.d
        self.host = self.host_buffer if memory_limit == 0 else self.host_buffer.fast.d
        self.disk = None if memory_limit == 0 else self.host_buffer.slow.d

        # For Worker compatibility only, where `fast` is host memory buffer
        self.fast = self.host_buffer if memory_limit == 0 else self.host_buffer.fast
示例#4
0
    def __init__(
        self,
        device_memory_limit=None,
        memory_limit=None,
        local_directory=None,
        jit_unspill=False,
    ):
        if local_directory is None:
            local_directory = dask.config.get(
                "temporary-directory") or os.getcwd()

        if not os.path.exists(local_directory):
            os.makedirs(local_directory, exist_ok=True)
        local_directory = os.path.join(local_directory, "dask-worker-space")

        self.disk_func_path = os.path.join(local_directory, "storage")

        self.host_func = dict()
        self.disk_func = Func(
            functools.partial(serialize_bytelist, on_error="raise"),
            deserialize_bytes,
            File(self.disk_func_path),
        )
        if memory_limit == 0:
            self.host_buffer = self.host_func
        else:
            self.host_buffer = Buffer(self.host_func,
                                      self.disk_func,
                                      memory_limit,
                                      weight=weight)

        self.device_keys = set()
        self.device_func = dict()
        if jit_unspill:
            self.device_host_func = Func(pxy_obj_device_to_host,
                                         pxy_obj_host_to_device,
                                         self.host_buffer)
        else:
            self.device_host_func = Func(device_to_host, host_to_device,
                                         self.host_buffer)
        self.device_buffer = Buffer(self.device_func,
                                    self.device_host_func,
                                    device_memory_limit,
                                    weight=weight)

        self.device = self.device_buffer.fast.d
        self.host = self.host_buffer if memory_limit == 0 else self.host_buffer.fast.d
        self.disk = None if memory_limit == 0 else self.host_buffer.slow.d

        # For Worker compatibility only, where `fast` is host memory buffer
        self.fast = self.host_buffer if memory_limit == 0 else self.host_buffer.fast
示例#5
0
def test_simple():
    d = dict()
    f = Func(inc, dec, d)
    f['x'] = 10
    assert f['x'] == 10
    assert d['x'] == 11

    assert 'x' in f
    assert list(f) == ['x']
    assert list(f.values()) == [10]
    assert list(f.items()) == [('x', 10)]

    assert all(s in str(f) for s in ['inc', 'dec', 'x', 'Func'])
    assert all(s in repr(f) for s in ['inc', 'dec', 'x', 'Func'])

    del f['x']
    assert 'x' not in d
示例#6
0
def test_mapping():
    """
    Test mapping interface for Func().
    """
    d = {}
    z = Func(rotl, rotr, d)
    utils_test.check_mapping(z)
    utils_test.check_closing(z)
示例#7
0
def test_simple():
    d = dict()
    f = Func(inc, dec, d)
    f['x'] = 10
    assert f['x'] == 10
    assert d['x'] == 11

    assert 'x' in f
    assert list(f) == ['x']
    assert list(f.values()) == [10]
    assert list(f.items()) == [('x', 10)]

    assert all(s in str(f) for s in ['inc', 'dec', 'x', 'Func'])
    assert all(s in repr(f) for s in ['inc', 'dec', 'x', 'Func'])

    del f['x']
    assert 'x' not in d
示例#8
0
def test_simple():
    d = dict()
    f = Func(inc, dec, d)
    f["x"] = 10
    assert f["x"] == 10
    assert d["x"] == 11

    assert "x" in f
    assert list(f) == ["x"]
    assert list(f.values()) == [10]
    assert list(f.items()) == [("x", 10)]

    assert all(s in str(f) for s in ["inc", "dec", "x", "Func"])
    assert all(s in repr(f) for s in ["inc", "dec", "x", "Func"])

    del f["x"]
    assert "x" not in d
示例#9
0
    def __init__(
        self,
        device_memory_limit=None,
        memory_limit=None,
        local_directory=None,
    ):
        if local_directory is None:
            local_directory = dask.config.get(
                "temporary-directory") or os.getcwd()
            os.makedirs(local_directory, exist_ok=True)
            local_directory = os.path.join(local_directory,
                                           "dask-worker-space")

        self.disk_func_path = os.path.join(local_directory, "storage")

        self.host_func = dict()
        self.disk_func = Func(serialize_bytelist, deserialize_bytes,
                              File(self.disk_func_path))
        self.host_buffer = Buffer(self.host_func,
                                  self.disk_func,
                                  memory_limit,
                                  weight=weight)

        self.device_keys = set()
        self.device_func = dict()
        self.device_host_func = Func(device_to_host, host_to_device,
                                     self.host_buffer)
        self.device_buffer = Buffer(self.device_func,
                                    self.device_host_func,
                                    device_memory_limit,
                                    weight=weight)

        self.device = self.device_buffer.fast.d
        self.host = self.host_buffer.fast.d
        self.disk = self.host_buffer.slow.d

        # For Worker compatibility only, where `fast` is host memory buffer
        self.fast = self.host_buffer.fast
示例#10
0
 def __init__(self, spill_directory: str, target: int):
     self.spilled_by_key = {}
     self.spilled_total = 0
     storage = Func(
         partial(serialize_bytelist, on_error="raise"),
         deserialize_bytes,
         File(spill_directory),
     )
     super().__init__(
         {},
         storage,
         target,
         weight=self._weight,
         fast_to_slow_callbacks=[self._on_evict],
         slow_to_fast_callbacks=[self._on_retrieve],
     )
示例#11
0
 def setup(self, worker):
     self.cache = Buffer(
         fast={},
         slow=Func(
             dump=blosc.pack_array,
             load=blosc.unpack_array,
             d=Buffer(
                 fast={},
                 slow=LRU(
                     n=self._maxdisk,
                     d=File(os.path.join(worker.local_directory, 'cache')),
                     weight=lambda k, v: len(v),
                 ),
                 n=self._maxcompressed,
                 weight=lambda k, v: len(v),
             ),
         ),
         n=self._maxmem,
         weight=lambda k, v: v.nbytes,
     )
     self.lock = Lock()
     self.hits = 0
     self.misses = 0
示例#12
0
def redis_dict(r):
    b = Func(r.hmset)
    return Func(r, partial(read_from_redis, r), partial(red))
示例#13
0
def numpy_redis_mapping():
    return Func(arr_to_dict, dict_to_arr, Redis())
示例#14
0
    def __init__(self, scheduler_ip, scheduler_port, ip=None, ncores=None,
                 loop=None, local_dir=None, services=None, service_ports=None,
                 name=None, heartbeat_interval=5000, memory_limit=TOTAL_MEMORY,
                 **kwargs):
        self.ip = ip or get_ip()
        self._port = 0
        self.ncores = ncores or _ncores
        self.local_dir = local_dir or tempfile.mkdtemp(prefix='worker-')
        if not os.path.exists(self.local_dir):
            os.mkdir(self.local_dir)
        self.memory_limit = memory_limit
        if memory_limit:
            try:
                from zict import Buffer, File, Func
            except ImportError:
                raise ImportError("Please `pip install zict` for spill-to-disk workers")
            path = os.path.join(self.local_dir, 'storage')
            storage = Func(dumps_to_disk, loads_from_disk, File(path))
            self.data = Buffer({}, storage, int(float(memory_limit)), weight)
        else:
            self.data = dict()
        self.loop = loop or IOLoop.current()
        self.status = None
        self.executor = ThreadPoolExecutor(self.ncores)
        self.scheduler = rpc(ip=scheduler_ip, port=scheduler_port)
        self.active = set()
        self.name = name
        self.heartbeat_interval = heartbeat_interval
        self.heartbeat_active = False
        self.execution_state = {'scheduler': self.scheduler.address,
                                'ioloop': self.loop,
                                'worker': self}
        self._last_disk_io = None
        self._last_net_io = None
        self._ipython_kernel = None

        if self.local_dir not in sys.path:
            sys.path.insert(0, self.local_dir)

        self.services = {}
        self.service_ports = service_ports or {}
        for k, v in (services or {}).items():
            if isinstance(k, tuple):
                k, port = k
            else:
                port = 0

            self.services[k] = v(self, io_loop=self.loop)
            self.services[k].listen(port)
            self.service_ports[k] = self.services[k].port

        handlers = {'compute': self.compute,
                    'gather': self.gather,
                    'compute-stream': self.compute_stream,
                    'run': self.run,
                    'get_data': self.get_data,
                    'update_data': self.update_data,
                    'delete_data': self.delete_data,
                    'terminate': self.terminate,
                    'ping': pingpong,
                    'health': self.host_health,
                    'upload_file': self.upload_file,
                    'start_ipython': self.start_ipython,
                    'keys': self.keys,
                }

        super(Worker, self).__init__(handlers, io_loop=self.loop, **kwargs)

        self.heartbeat_callback = PeriodicCallback(self.heartbeat,
                                                   self.heartbeat_interval,
                                                   io_loop=self.loop)
        self.loop.add_callback(self.heartbeat_callback.start)
示例#15
0
 def get_mapper(cls, fs_mapper):
     if isinstance(fs_mapper, (str, pathlib.Path)):
         fs_mapper = File(fs_mapper, mode='a')
     mapper = Func(cls.serialize, cls.deserialize, fs_mapper)
     return mapper