Пример #1
0
def test_service_hosts_match_worker(s):
    pytest.importorskip("bokeh")
    from distributed.dashboard import BokehWorker

    services = {("dashboard", ":0"): BokehWorker}

    w = Worker(s.address, services={("dashboard", ":0"): BokehWorker})
    yield w._start("tcp://0.0.0.0")
    sock = first(w.services["dashboard"].server._http._sockets.values())
    assert sock.getsockname()[0] in ("::", "0.0.0.0")
    yield w.close()

    w = Worker(s.address, services={("dashboard", ":0"): BokehWorker})
    yield w._start("tcp://127.0.0.1")
    sock = first(w.services["dashboard"].server._http._sockets.values())
    assert sock.getsockname()[0] in ("::", "0.0.0.0")
    yield w.close()

    w = Worker(s.address, services={("dashboard", 0): BokehWorker})
    yield w._start("tcp://127.0.0.1")
    sock = first(w.services["dashboard"].server._http._sockets.values())
    assert sock.getsockname()[0] == "127.0.0.1"
    yield w.close()
Пример #2
0
def test_health():
    w = Worker('127.0.0.1', 8019)
    d = w.host_health()
    assert isinstance(d, dict)
    d = w.host_health()
    try:
        import psutil
    except ImportError:
        pass
    else:
        assert 'disk-read' in d
        assert 'disk-write' in d
        assert 'network-recv' in d
        assert 'network-send' in d
Пример #3
0
def test_avoid_memory_monitor_if_zero_limit(c, s):
    worker = yield Worker(
        s.address, loop=s.loop, memory_limit=0, memory_monitor_interval=10
    )
    assert type(worker.data) is dict
    assert "memory" not in worker.periodic_callbacks

    future = c.submit(inc, 1)
    assert (yield future) == 2
    yield gen.sleep(worker.memory_monitor_interval / 1000)

    yield c.submit(inc, 2)  # worker doesn't pause

    yield worker.close()
Пример #4
0
async def test_bad_local_directory(cleanup):
    async with await Scheduler() as s:
        try:
            async with Worker(s.address,
                              local_directory="/not/a/valid-directory"):
                pass
        except PermissionError:
            pass
        else:
            if WINDOWS:
                pass
            else:
                assert False

        assert not any("error" in log for log in s.get_logs())
Пример #5
0
def test_worker_fds(s):
    psutil = pytest.importorskip('psutil')
    start = psutil.Process().num_fds()

    worker = Worker(s.address, loop=s.loop)
    yield worker._start()
    middle = psutil.Process().num_fds()
    assert middle > start

    yield worker._close()

    start = time()
    while psutil.Process().num_fds() > start:
        yield gen.sleep(0.01)
        assert time() < start + 0.5
Пример #6
0
def test_worker_fds(s):
    psutil = pytest.importorskip("psutil")
    yield gen.sleep(0.05)
    start = psutil.Process().num_fds()

    worker = yield Worker(s.address, loop=s.loop)
    yield gen.sleep(0.1)
    middle = psutil.Process().num_fds()
    start = time()
    while middle > start:
        yield gen.sleep(0.01)
        assert time() < start + 1

    yield worker.close()

    start = time()
    while psutil.Process().num_fds() > start:
        yield gen.sleep(0.01)
        assert time() < start + 0.5
Пример #7
0
def test_spill_to_disk(e, s):
    np = pytest.importorskip('numpy')
    w = Worker(s.ip, s.port, loop=s.loop, memory_limit=1000)
    yield w._start()

    x = e.submit(np.random.randint, 0, 255, size=500, dtype='u1', key='x')
    yield _wait(x)
    y = e.submit(np.random.randint, 0, 255, size=500, dtype='u1', key='y')
    yield _wait(y)

    assert set(w.data) == {x.key, y.key}
    assert set(w.data.fast) == {x.key, y.key}

    z = e.submit(np.random.randint, 0, 255, size=500, dtype='u1', key='z')
    yield _wait(z)
    assert set(w.data) == {x.key, y.key, z.key}
    assert set(w.data.fast) == {y.key, z.key}
    assert set(w.data.slow) == {x.key}

    yield x._result()
    assert set(w.data.fast) == {x.key, z.key}
    assert set(w.data.slow) == {y.key}
Пример #8
0
def test_spill_to_disk(c, s):
    np = pytest.importorskip('numpy')
    w = Worker(s.address, loop=s.loop, memory_limit=1200 / 0.6,
               memory_pause_fraction=None, memory_spill_fraction=None)
    yield w._start()

    x = c.submit(np.random.randint, 0, 255, size=500, dtype='u1', key='x')
    yield wait(x)
    y = c.submit(np.random.randint, 0, 255, size=500, dtype='u1', key='y')
    yield wait(y)

    assert set(w.data) == {x.key, y.key}
    assert set(w.data.fast) == {x.key, y.key}

    z = c.submit(np.random.randint, 0, 255, size=500, dtype='u1', key='z')
    yield wait(z)
    assert set(w.data) == {x.key, y.key, z.key}
    assert set(w.data.fast) == {y.key, z.key}
    assert set(w.data.slow) == {x.key} or set(w.data.slow) == {x.key, y.key}

    yield x
    assert set(w.data.fast) == {x.key, z.key}
    assert set(w.data.slow) == {y.key} or set(w.data.slow) == {x.key, y.key}
    yield w._close()
Пример #9
0
async def test_io_loop(cleanup):
    async with Scheduler(port=0) as s:
        async with Worker(s.address, loop=s.loop) as w:
            assert w.io_loop is s.loop
Пример #10
0
async def test_worker_listens_on_same_interface_by_default(Worker):
    async with Scheduler(host="localhost") as s:
        assert s.ip in {"127.0.0.1", "localhost"}
        async with Worker(s.address) as w:
            assert s.ip == w.ip
Пример #11
0
def test_local_directory(s):
    with tmpfile() as fn:
        with dask.config.set(temporary_directory=fn):
            w = yield Worker(s.address)
            assert w.local_directory.startswith(fn)
            assert "dask-worker-space" in w.local_directory
Пример #12
0
def test_register_worker_callbacks(c, s, a, b):
    # preload function to run
    def mystartup(dask_worker):
        dask_worker.init_variable = 1

    def mystartup2():
        import os

        os.environ["MY_ENV_VALUE"] = "WORKER_ENV_VALUE"
        return "Env set."

    # Check that preload function has been run
    def test_import(dask_worker):
        return hasattr(dask_worker, "init_variable")
        #       and dask_worker.init_variable == 1

    def test_startup2():
        import os

        return os.getenv("MY_ENV_VALUE", None) == "WORKER_ENV_VALUE"

    # Nothing has been run yet
    result = yield c.run(test_import)
    assert list(result.values()) == [False] * 2
    result = yield c.run(test_startup2)
    assert list(result.values()) == [False] * 2

    # Start a worker and check that startup is not run
    worker = yield Worker(s.address, loop=s.loop)
    result = yield c.run(test_import, workers=[worker.address])
    assert list(result.values()) == [False]
    yield worker.close()

    # Add a preload function
    response = yield c.register_worker_callbacks(setup=mystartup)
    assert len(response) == 2

    # Check it has been ran on existing worker
    result = yield c.run(test_import)
    assert list(result.values()) == [True] * 2

    # Start a worker and check it is ran on it
    worker = yield Worker(s.address, loop=s.loop)
    result = yield c.run(test_import, workers=[worker.address])
    assert list(result.values()) == [True]
    yield worker.close()

    # Register another preload function
    response = yield c.register_worker_callbacks(setup=mystartup2)
    assert len(response) == 2

    # Check it has been run
    result = yield c.run(test_startup2)
    assert list(result.values()) == [True] * 2

    # Start a worker and check it is ran on it
    worker = yield Worker(s.address, loop=s.loop)
    result = yield c.run(test_import, workers=[worker.address])
    assert list(result.values()) == [True]
    result = yield c.run(test_startup2, workers=[worker.address])
    assert list(result.values()) == [True]
    yield worker.close()
Пример #13
0
def test_scheduler_address_config(c, s):
    with dask.config.set({"scheduler-address": s.address}):
        worker = yield Worker(loop=s.loop)
        assert worker.scheduler.address == s.address
    yield worker.close()
Пример #14
0
def test_worker_nthreads():
    w = Worker("127.0.0.1", 8019)
    try:
        assert w.executor._max_workers == CPU_COUNT
    finally:
        shutil.rmtree(w.local_directory)
Пример #15
0
def test_io_loop(loop):
    s = Scheduler(loop=loop)
    s.listen(0)
    assert s.io_loop is loop
    w = Worker(s.ip, s.port, loop=loop)
    assert w.io_loop is loop
Пример #16
0
def test_worker_death_timeout():
    w = Worker('127.0.0.1', 38848, death_timeout=1)
    yield w._start()

    yield gen.sleep(3)
    assert w.status == 'closed'
Пример #17
0
 def f():
     w = Worker('127.0.0.1', 8007)
     yield w._start()
Пример #18
0
 def f():
     w = yield Worker("127.0.0.1", 8007)
Пример #19
0
def test_scheduler_address_config(c, s):
    with dask.config.set({'scheduler-address': s.address}):
        worker = Worker(loop=s.loop)
        yield worker._start()
        assert worker.scheduler.address == s.address
    yield worker._close()
Пример #20
0
def test_register_worker_callbacks(c, s, a, b):
    #preload function to run
    def mystartup(dask_worker):
        dask_worker.init_variable = 1

    def mystartup2():
        import os
        os.environ['MY_ENV_VALUE'] = 'WORKER_ENV_VALUE'
        return "Env set."

    #Check that preload function has been run
    def test_import(dask_worker):
        return hasattr(dask_worker, 'init_variable')
        #       and dask_worker.init_variable == 1

    def test_startup2():
        import os
        return os.getenv('MY_ENV_VALUE', None) == 'WORKER_ENV_VALUE'

    # Nothing has been run yet
    assert len(s.worker_setups) == 0
    result = yield c.run(test_import)
    assert list(result.values()) == [False] * 2
    result = yield c.run(test_startup2)
    assert list(result.values()) == [False] * 2

    # Start a worker and check that startup is not run
    worker = Worker(s.address, loop=s.loop)
    yield worker._start()
    result = yield c.run(test_import, workers=[worker.address])
    assert list(result.values()) == [False]
    yield worker._close()

    # Add a preload function
    response = yield c.register_worker_callbacks(setup=mystartup)
    assert len(response) == 2
    assert len(s.worker_setups) == 1

    # Check it has been ran on existing worker
    result = yield c.run(test_import)
    assert list(result.values()) == [True] * 2

    # Start a worker and check it is ran on it
    worker = Worker(s.address, loop=s.loop)
    yield worker._start()
    result = yield c.run(test_import, workers=[worker.address])
    assert list(result.values()) == [True]
    yield worker._close()

    # Register another preload function
    response = yield c.register_worker_callbacks(setup=mystartup2)
    assert len(response) == 2
    assert len(s.worker_setups) == 2

    # Check it has been run
    result = yield c.run(test_startup2)
    assert list(result.values()) == [True] * 2

    # Start a worker and check it is ran on it
    worker = Worker(s.address, loop=s.loop)
    yield worker._start()
    result = yield c.run(test_import, workers=[worker.address])
    assert list(result.values()) == [True]
    result = yield c.run(test_startup2, workers=[worker.address])
    assert list(result.values()) == [True]
    yield worker._close()

    # Final exception test
    with pytest.raises(ZeroDivisionError):
        yield c.register_worker_callbacks(setup=lambda: 1 / 0)
Пример #21
0
def test_worker_nthreads():
    w = Worker("127.0.0.1", 8019)
    try:
        assert w.executor._max_workers == multiprocessing.cpu_count()
    finally:
        shutil.rmtree(w.local_directory)
Пример #22
0
async def test_false_worker_dir(s):
    async with Worker(s.address, local_directory="") as w:
        local_directory = w.local_directory

    cwd = os.getcwd()
    assert os.path.dirname(local_directory) == os.path.join(cwd, "dask-worker-space")
Пример #23
0
    def _run(
        cls,
        worker_kwargs,
        worker_start_args,
        silence_logs,
        init_result_q,
        child_stop_q,
        uid,
        env,
        config,
        Worker,
    ):  # pragma: no cover
        try:
            os.environ.update(env)
            dask.config.set(config)
            try:
                from dask.multiprocessing import initialize_worker_process
            except ImportError:  # old Dask version
                pass
            else:
                initialize_worker_process()

            if silence_logs:
                logger.setLevel(silence_logs)

            IOLoop.clear_instance()
            loop = IOLoop()
            loop.make_current()
            worker = Worker(**worker_kwargs)

            async def do_stop(timeout=5, executor_wait=True):
                try:
                    await worker.close(
                        report=True,
                        nanny=False,
                        safe=True,  # TODO: Graceful or not?
                        executor_wait=executor_wait,
                        timeout=timeout,
                    )
                finally:
                    loop.stop()

            def watch_stop_q():
                """
                Wait for an incoming stop message and then stop the
                worker cleanly.
                """
                msg = child_stop_q.get()
                child_stop_q.close()
                assert msg.pop("op") == "stop"
                loop.add_callback(do_stop, **msg)

            t = threading.Thread(target=watch_stop_q,
                                 name="Nanny stop queue watch")
            t.daemon = True
            t.start()

            async def run():
                """
                Try to start worker and inform parent of outcome.
                """
                try:
                    await worker
                except Exception as e:
                    logger.exception("Failed to start worker")
                    init_result_q.put({"uid": uid, "exception": e})
                    init_result_q.close()
                    # If we hit an exception here we need to wait for a least
                    # one interval for the outside to pick up this message.
                    # Otherwise we arrive in a race condition where the process
                    # cleanup wipes the queue before the exception can be
                    # properly handled. See also
                    # WorkerProcess._wait_until_connected (the 2 is for good
                    # measure)
                    sync_sleep(cls._init_msg_interval * 2)
                else:
                    try:
                        assert worker.address
                    except ValueError:
                        pass
                    else:
                        init_result_q.put({
                            "address": worker.address,
                            "dir": worker.local_directory,
                            "uid": uid,
                        })
                        init_result_q.close()
                        await worker.finished()
                        logger.info("Worker closed")

        except Exception as e:
            logger.exception("Failed to initialize Worker")
            init_result_q.put({"uid": uid, "exception": e})
            init_result_q.close()
            # If we hit an exception here we need to wait for a least one
            # interval for the outside to pick up this message. Otherwise we
            # arrive in a race condition where the process cleanup wipes the
            # queue before the exception can be properly handled. See also
            # WorkerProcess._wait_until_connected (the 2 is for good measure)
            sync_sleep(cls._init_msg_interval * 2)
        else:
            try:
                loop.run_sync(run)
            except (TimeoutError, gen.TimeoutError):
                # Loop was stopped before wait_until_closed() returned, ignore
                pass
            except KeyboardInterrupt:
                # At this point the loop is not running thus we have to run
                # do_stop() explicitly.
                loop.run_sync(do_stop)
Пример #24
0
def test_identity():
    w = Worker('127.0.0.1', 8019)
    ident = w.identity(None)
    assert ident['type'] == 'Worker'
    assert ident['scheduler'] == ('127.0.0.1', 8019)
Пример #25
0
def test_io_loop(loop):
    s = Scheduler(loop=loop)
    s.listen(0)
    assert s.io_loop is loop
    w = Worker(s.address, loop=loop)
    assert w.io_loop is loop
Пример #26
0
async def test_start_services(s):
    async with Worker(s.address, dashboard_address=1234) as w:
        assert w.http_server.port == 1234
Пример #27
0
async def test_worker_nthreads(cleanup):
    async with Scheduler() as s:
        async with Worker(s.address) as w:
            assert w.executor._max_workers == CPU_COUNT
Пример #28
0
def test_pool():
    c = Center('127.0.0.1', 8017, loop=loop)

    a = Worker('127.0.0.1', 8018, c.ip, c.port, loop=loop, ncores=1)
    b = Worker('127.0.0.1', 8019, c.ip, c.port, loop=loop, ncores=1)

    p = Pool(c.ip, c.port, loop=loop, start=False)

    @asyncio.coroutine
    def f():
        yield from p._sync_center()

        computation = yield from p._apply_async(add, [1, 2])
        assert computation.status == b'running'
        assert set(p.available_cores.values()) == set([0, 1])
        x = yield from computation._get()
        assert computation.status == x.status == b'success'
        assert list(p.available_cores.values()) == [1, 1]
        result = yield from x._get()
        assert result == 3

        computation = yield from p._apply_async(add, [x, 10])
        y = yield from computation._get()
        result = yield from y._get()
        assert result == 13

        assert set((len(a.data), len(b.data))) == set((0, 2))

        x = yield from p._apply_async(add, [1, 2])
        y = yield from p._apply_async(add, [1, 2])
        assert list(p.available_cores.values()) == [0, 0]
        xx = yield from x._get()
        yield from xx._get()
        assert set(p.available_cores.values()) == set([0, 1])
        yy = yield from y._get()
        yield from yy._get()
        assert list(p.available_cores.values()) == [1, 1]

        seq = yield from p._map(lambda x: x * 100, [1, 2, 3])
        result = yield from seq[0]._get(False)
        assert result == 100
        result = yield from seq[1]._get(False)
        assert result == 200
        result = yield from seq[2]._get(True)
        assert result == 300

        # Handle errors gracefully
        results = yield from p._map(lambda x: 3 / x, [0, 1, 2, 3])
        assert all(isinstance(result, RemoteData) for result in results)
        try:
            yield from results[0]._get()
            assert False
        except ZeroDivisionError:
            pass

        yield from p._close_connections()

        yield from a._close()
        yield from b._close()
        yield from c._close()

    loop.run_until_complete(asyncio.gather(c.go(), a.go(), b.go(), f()))