Exemplo n.º 1
0
def test_environment_variable(c, s):
    a = Nanny(s.address, loop=s.loop, memory_limit=0, env={"FOO": "123"})
    b = Nanny(s.address, loop=s.loop, memory_limit=0, env={"FOO": "456"})
    yield [a._start(), b._start()]
    results = yield c.run(lambda: os.environ['FOO'])
    assert results == {a.worker_address: "123", b.worker_address: "456"}
    yield [a._close(), b._close()]
Exemplo n.º 2
0
def test_environment_variable(c, s):
    a = Nanny(s.address, loop=s.loop, memory_limit=0, env={"FOO": "123"})
    b = Nanny(s.address, loop=s.loop, memory_limit=0, env={"FOO": "456"})
    yield [a._start(), b._start()]
    results = yield c.run(lambda: os.environ['FOO'])
    assert results == {a.worker_address: "123", b.worker_address: "456"}
    yield [a._close(), b._close()]
Exemplo n.º 3
0
def test_wait_for_scheduler():
    with captured_logger('distributed') as log:
        w = Nanny('127.0.0.1:44737')
        w._start()
        yield gen.sleep(6)

    log = log.getvalue()
    assert 'error' not in log.lower(), log
    assert 'restart' not in log.lower(), log
Exemplo n.º 4
0
def test_wait_for_scheduler():
    with captured_logger('distributed') as log:
        w = Nanny('127.0.0.1:44737')
        w._start()
        yield gen.sleep(6)

    log = log.getvalue()
    assert 'error' not in log.lower(), log
    assert 'restart' not in log.lower(), log
def test_wait_for_scheduler():
    with captured_logger("distributed") as log:
        w = Nanny("127.0.0.1:44737")
        w._start()
        yield gen.sleep(6)

    log = log.getvalue()
    assert "error" not in log.lower(), log
    assert "restart" not in log.lower(), log
Exemplo n.º 6
0
def test_nanny(s):
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)

    yield n._start(0)
    with rpc(n.address) as nn:
        assert n.is_alive()
        assert s.ncores[n.worker_address] == 2
        assert s.worker_info[n.worker_address]['services']['nanny'] > 1024

        yield nn.kill()
        assert not n.is_alive()
        assert n.worker_address not in s.ncores
        assert n.worker_address not in s.worker_info

        yield nn.kill()
        assert not n.is_alive()
        assert n.worker_address not in s.ncores
        assert n.worker_address not in s.worker_info

        yield nn.instantiate()
        assert n.is_alive()
        assert s.ncores[n.worker_address] == 2
        assert s.worker_info[n.worker_address]['services']['nanny'] > 1024

        yield nn.terminate()
        assert not n.is_alive()

    yield n._close()
Exemplo n.º 7
0
def test_many_kills(s):
    n = Nanny(s.address, ncores=2, loop=s.loop)
    yield n._start(0)
    assert n.is_alive()
    yield [n.kill() for i in range(5)]
    yield [n.kill() for i in range(5)]
    yield n._close()
Exemplo n.º 8
0
def test_nanny(s):
    n = Nanny(s.ip, s.port, ncores=2, ip='127.0.0.1', loop=s.loop)

    yield n._start(0)
    nn = rpc(ip=n.ip, port=n.port)
    assert n.process.is_alive()
    assert s.ncores[n.worker_address] == 2
    assert s.worker_info[n.worker_address]['services']['nanny'] > 1024

    yield nn.kill()
    assert n.worker_address not in s.ncores
    assert n.worker_address not in s.worker_info
    assert not n.process

    yield nn.kill()
    assert n.worker_address not in s.ncores
    assert n.worker_address not in s.worker_info
    assert not n.process

    yield nn.instantiate()
    assert n.process.is_alive()
    assert s.ncores[n.worker_address] == 2
    assert s.worker_info[n.worker_address]['services']['nanny'] > 1024

    yield nn.terminate()
    assert not n.process

    yield n._close()
Exemplo n.º 9
0
def test_nanny(s):
    n = Nanny(s.ip, s.port, ncores=2, ip='127.0.0.1', loop=s.loop)

    yield n._start(0)
    with rpc(ip=n.ip, port=n.port) as nn:
        assert isalive(n.process)  # alive
        assert s.ncores[n.worker_address] == 2

        assert s.worker_info[n.worker_address]['services']['nanny'] > 1024

        yield nn.kill()
        assert not n.process
        assert n.worker_address not in s.ncores
        assert n.worker_address not in s.worker_info

        yield nn.kill()
        assert n.worker_address not in s.ncores
        assert n.worker_address not in s.worker_info
        assert not n.process

        yield nn.instantiate()
        assert isalive(n.process)
        assert s.ncores[n.worker_address] == 2
        assert s.worker_info[n.worker_address]['services']['nanny'] > 1024

        yield nn.terminate()
        assert not n.process

    yield n._close()
Exemplo n.º 10
0
def test_nanny_death_timeout(s):
    yield s.close()
    w = Nanny(s.address, death_timeout=1)
    yield w._start()

    yield gen.sleep(3)
    assert w.status == 'closed'
Exemplo n.º 11
0
def test_nanny():
    c = Center('127.0.0.1')
    c.listen(0)
    n = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')

    yield n._start(0)
    nn = rpc(ip=n.ip, port=n.port)
    assert n.process.is_alive()
    assert c.ncores[n.worker_address] == 2
    assert c.worker_services[n.worker_address]['nanny'] > 1024

    yield nn.kill()
    assert n.worker_address not in c.ncores
    assert n.worker_address not in c.worker_services
    assert not n.process

    yield nn.kill()
    assert n.worker_address not in c.ncores
    assert n.worker_address not in c.worker_services
    assert not n.process

    yield nn.instantiate()
    assert n.process.is_alive()
    assert c.ncores[n.worker_address] == 2
    assert c.worker_services[n.worker_address]['nanny'] > 1024

    yield nn.terminate()
    assert not n.process

    if n.process:
        n.process.terminate()

    yield n._close()
    c.stop()
Exemplo n.º 12
0
def test_many_kills(s):
    n = Nanny(s.address, ncores=2, loop=s.loop)
    yield n._start(0)
    assert n.is_alive()
    yield [n.kill() for i in range(5)]
    yield [n.kill() for i in range(5)]
    yield n._close()
Exemplo n.º 13
0
def test_nanny(s):
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)

    yield n._start(0)
    with rpc(n.address) as nn:
        assert n.is_alive()
        assert s.ncores[n.worker_address] == 2
        assert s.workers[n.worker_address].services['nanny'] > 1024

        yield nn.kill()
        assert not n.is_alive()
        assert n.worker_address not in s.ncores
        assert n.worker_address not in s.workers

        yield nn.kill()
        assert not n.is_alive()
        assert n.worker_address not in s.ncores
        assert n.worker_address not in s.workers

        yield nn.instantiate()
        assert n.is_alive()
        assert s.ncores[n.worker_address] == 2
        assert s.workers[n.worker_address].services['nanny'] > 1024

        yield nn.terminate()
        assert not n.is_alive()

    yield n._close()
Exemplo n.º 14
0
def test_monitor_resources():
    pytest.importorskip('psutil')
    c = Center(ip='127.0.0.1')
    c.listen(0)
    n = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')

    yield n._start()
    nn = rpc(ip=n.ip, port=n.port)
    assert n.process.is_alive()
    d = n.resource_collect()
    assert {'cpu_percent', 'memory_percent'}.issubset(d)

    assert isinstance(d['timestamp'], datetime)

    stream = yield connect(ip=n.ip, port=n.port)
    yield write(stream, {'op': 'monitor_resources', 'interval': 0.01})

    for i in range(3):
        msg = yield read(stream)
        assert isinstance(msg, dict)
        assert {'cpu_percent', 'memory_percent'}.issubset(msg)

    stream.close()
    yield n._close()
    c.stop()
Exemplo n.º 15
0
def test_nanny_death_timeout(s):
    yield s.close()
    w = Nanny(s.address, death_timeout=1)
    yield w._start()

    yield gen.sleep(3)
    assert w.status == 'closed'
Exemplo n.º 16
0
def test_scheduler_file():
    with tmpfile() as fn:
        s = Scheduler(scheduler_file=fn)
        s.start(8008)
        w = Nanny(scheduler_file=fn)
        yield w._start()
        assert s.workers == {w.worker_address}
        yield w._close()
        s.stop()
Exemplo n.º 17
0
def test_scheduler_file():
    with tmpfile() as fn:
        s = Scheduler(scheduler_file=fn)
        s.start(8008)
        w = Nanny(scheduler_file=fn)
        yield w._start()
        assert set(s.workers) == {w.worker_address}
        yield w._close()
        s.stop()
Exemplo n.º 18
0
def test_run(s):
    pytest.importorskip('psutil')
    n = Nanny(s.ip, s.port, ncores=2, ip='127.0.0.1', loop=s.loop)
    yield n._start()

    nn = rpc(n.address)

    response = yield nn.run(function=dumps(lambda: 1))
    assert response['status'] == 'OK'
    assert loads(response['result']) == 1
Exemplo n.º 19
0
def test_restart():
    from distributed import Nanny, rpc
    c = Center('127.0.0.1')
    c.listen(0)
    a = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    b = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')

    yield [a._start(), b._start()]

    e = Executor((c.ip, c.port), start=False, loop=IOLoop.current())
    yield e._start()

    assert e.scheduler.ncores == {a.worker_address: 2, b.worker_address: 2}

    x = e.submit(inc, 1)
    y = e.submit(inc, x)
    yield y._result()

    cc = rpc(ip=c.ip, port=c.port)
    who_has = yield cc.who_has()
    try:
        assert e.scheduler.who_has == who_has
        assert set(e.scheduler.who_has) == {x.key, y.key}

        f = yield e._restart()
        assert f is e

        assert len(e.scheduler.stacks) == 2
        assert len(e.scheduler.processing) == 2

        who_has = yield cc.who_has()
        assert not who_has
        assert not e.scheduler.who_has

        assert x.cancelled()
        assert y.cancelled()

    finally:
        yield a._close()
        yield b._close()
        yield e._shutdown(fast=True)
        c.stop()
Exemplo n.º 20
0
def test_restart():
    from distributed import Nanny, rpc
    c = Center('127.0.0.1')
    c.listen(0)
    a = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    b = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')

    yield [a._start(), b._start()]

    e = Executor((c.ip, c.port), start=False, loop=IOLoop.current())
    yield e._start()

    assert e.scheduler.ncores == {a.worker_address: 2, b.worker_address: 2}

    x = e.submit(inc, 1)
    y = e.submit(inc, x)
    yield y._result()

    cc = rpc(ip=c.ip, port=c.port)
    who_has = yield cc.who_has()
    try:
        assert e.scheduler.who_has == who_has
        assert set(e.scheduler.who_has) == {x.key, y.key}

        f = yield e._restart()
        assert f is e

        assert len(e.scheduler.stacks) == 2
        assert len(e.scheduler.processing) == 2

        who_has = yield cc.who_has()
        assert not who_has
        assert not e.scheduler.who_has

        assert x.cancelled()
        assert y.cancelled()

    finally:
        yield a._close()
        yield b._close()
        yield e._shutdown(fast=True)
        c.stop()
Exemplo n.º 21
0
def test_worker_uses_same_host_as_nanny(c, s):
    for host in ['tcp://0.0.0.0', 'tcp://127.0.0.2']:
        n = Nanny(s.address)
        yield n._start(host)

        def func(dask_worker):
            return dask_worker.listener.listen_address

        result = yield c.run(func)
        assert host in first(result.values())
        yield n._close()
Exemplo n.º 22
0
def test_run(s):
    pytest.importorskip('psutil')
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)
    yield n._start()

    with rpc(n.address) as nn:
        response = yield nn.run(function=dumps(lambda: 1))
        assert response['status'] == 'OK'
        assert response['result'] == 1

    yield n._close()
Exemplo n.º 23
0
def test_worker_uses_same_host_as_nanny(c, s):
    for host in ['tcp://0.0.0.0', 'tcp://127.0.0.2']:
        n = Nanny(s.address)
        yield n._start(host)

        def func(dask_worker):
            return dask_worker.listener.listen_address

        result = yield c.run(func)
        assert host in first(result.values())
        yield n._close()
Exemplo n.º 24
0
def test_run(s):
    pytest.importorskip('psutil')
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)
    yield n._start()

    with rpc(n.address) as nn:
        response = yield nn.run(function=dumps(lambda: 1))
        assert response['status'] == 'OK'
        assert response['result'] == 1

    yield n._close()
Exemplo n.º 25
0
def test_monitor_resources():
    pytest.importorskip('psutil')
    c = Center('127.0.0.1')
    c.listen(0)
    a = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    b = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    s = Scheduler((c.ip, c.port), resource_interval=0.01, resource_log_size=3)

    yield a._start()
    yield b._start()
    yield s.sync_center()
    done = s.start()

    try:
        assert s.ncores == {
            ('127.0.0.1', a.worker_port): 2,
            ('127.0.0.1', b.worker_port): 2
        }
        assert s.nannies == {(n.ip, n.worker_port): n.port for n in [a, b]}

        while any(len(v) < 3 for v in s.resource_logs.values()):
            yield gen.sleep(0.01)

        yield gen.sleep(0.1)

        assert set(s.resource_logs) == {a.address, b.address}
        assert all(len(v) == 3 for v in s.resource_logs.values())

        d = s.diagnostic_resources(n=2)
        assert set(d) == {a.worker_address, b.worker_address}
        assert set(d[a.worker_address]).issubset({'cpu', 'memory', 'time'})
        assert all(len(v) == 2 for v in d[a.worker_address].values())

        s.put({'op': 'close'})
        yield done
    finally:
        with ignoring(TimeoutError, StreamClosedError, OSError):
            yield a._close(timeout=0.5)
        with ignoring(TimeoutError, StreamClosedError, OSError):
            yield b._close(timeout=0.5)
        c.stop()
Exemplo n.º 26
0
def test_scheduler_address_config(c, s):
    with dask.config.set({'scheduler-address': s.address}):
        nanny = Nanny(loop=s.loop)
        yield nanny._start()
        assert nanny.scheduler.address == s.address

        start = time()
        while not s.workers:
            yield gen.sleep(0.1)
            assert time() < start + 10

    yield nanny._close()
Exemplo n.º 27
0
def test_scheduler_address_config(c, s):
    with dask.config.set({'scheduler-address': s.address}):
        nanny = Nanny(loop=s.loop)
        yield nanny._start()
        assert nanny.scheduler.address == s.address

        start = time()
        while not s.workers:
            yield gen.sleep(0.1)
            assert time() < start + 10

    yield nanny._close()
Exemplo n.º 28
0
def test_monitor_resources():
    pytest.importorskip('psutil')
    c = Center('127.0.0.1')
    c.listen(0)
    a = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    b = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    s = Scheduler((c.ip, c.port), resource_interval=0.01, resource_log_size=3)

    yield a._start()
    yield b._start()
    yield s.sync_center()
    done = s.start()

    try:
        assert s.ncores == {('127.0.0.1', a.worker_port): 2,
                            ('127.0.0.1', b.worker_port): 2}
        assert s.nannies == {(n.ip, n.worker_port): n.port
                             for n in [a, b]}

        while any(len(v) < 3 for v in s.resource_logs.values()):
            yield gen.sleep(0.01)

        yield gen.sleep(0.1)

        assert set(s.resource_logs) == {a.address, b.address}
        assert all(len(v) == 3 for v in s.resource_logs.values())

        d = s.diagnostic_resources(n=2)
        assert set(d) == {a.worker_address, b.worker_address}
        assert set(d[a.worker_address]).issubset({'cpu', 'memory', 'time'})
        assert all(len(v) == 2 for v in d[a.worker_address].values())

        s.put({'op': 'close'})
        yield done
    finally:
        with ignoring(TimeoutError, StreamClosedError, OSError):
            yield a._close(timeout=0.5)
        with ignoring(TimeoutError, StreamClosedError, OSError):
            yield b._close(timeout=0.5)
        c.stop()
Exemplo n.º 29
0
def run_nanny(q, center_port, **kwargs):
    from distributed import Nanny
    from tornado.ioloop import IOLoop, PeriodicCallback
    import logging
    with log_errors():
        IOLoop.clear_instance()
        loop = IOLoop(); loop.make_current()
        PeriodicCallback(lambda: None, 500).start()
        logging.getLogger("tornado").setLevel(logging.CRITICAL)
        worker = Nanny('127.0.0.1', center_port, ip='127.0.0.1', **kwargs)
        loop.run_sync(lambda: worker._start(0))
        q.put(worker.port)
        loop.start()
Exemplo n.º 30
0
def run_nanny(q, scheduler_q, **kwargs):
    from distributed import Nanny

    with log_errors():
        with pristine_loop() as loop:
            scheduler_addr = scheduler_q.get()
            worker = Nanny(scheduler_addr, validate=True, **kwargs)
            loop.run_sync(lambda: worker._start(0))
            q.put(worker.address)
            try:
                loop.start()
            finally:
                loop.run_sync(worker._close)
                loop.close(all_fds=True)
Exemplo n.º 31
0
def run_nanny(q, scheduler_q, **kwargs):
    from distributed import Nanny

    with log_errors():
        with pristine_loop() as loop:
            scheduler_addr = scheduler_q.get()
            worker = Nanny(scheduler_addr, validate=True, **kwargs)
            loop.run_sync(lambda: worker._start(0))
            q.put(worker.address)
            try:
                loop.start()
            finally:
                loop.run_sync(worker._close)
                loop.close(all_fds=True)
Exemplo n.º 32
0
def test_num_fds(s):
    psutil = pytest.importorskip('psutil')
    proc = psutil.Process()

    # Warm up
    w = Nanny(s.address)
    yield w._start()
    yield w._close()
    del w
    gc.collect()

    before = proc.num_fds()

    for i in range(3):
        w = Nanny(s.address)
        yield w._start()
        yield gen.sleep(0.1)
        yield w._close()

    start = time()
    while proc.num_fds() > before:
        print("fds:", before, proc.num_fds())
        yield gen.sleep(0.1)
        assert time() < start + 10
Exemplo n.º 33
0
def test_scheduler_address_config(c, s):
    config['scheduler-address'] = s.address
    try:
        nanny = Nanny(loop=s.loop)
        yield nanny._start()
        assert nanny.scheduler.address == s.address

        start = time()
        while not s.workers:
            yield gen.sleep(0.1)
            assert time() < start + 10

    finally:
        del config['scheduler-address']
    yield nanny._close()
Exemplo n.º 34
0
def test_num_fds(s):
    psutil = pytest.importorskip('psutil')
    proc = psutil.Process()

    # Warm up
    w = Nanny(s.address)
    yield w._start()
    yield w._close()
    del w
    gc.collect()

    before = proc.num_fds()

    for i in range(3):
        w = Nanny(s.address)
        yield w._start()
        yield gen.sleep(0.1)
        yield w._close()

    start = time()
    while proc.num_fds() > before:
        print("fds:", before, proc.num_fds())
        yield gen.sleep(0.1)
        assert time() < start + 10
Exemplo n.º 35
0
def test_avoid_memory_monitor_if_zero_limit(c, s):
    nanny = Nanny(s.address, loop=s.loop, memory_limit=0)
    yield nanny._start()
    typ = yield c.run(lambda dask_worker: type(dask_worker.data))
    assert typ == {nanny.worker_address: dict}
    pcs = yield c.run(lambda dask_worker: list(dask_worker.periodic_callbacks))
    assert 'memory' not in pcs
    assert 'memory' not in nanny.periodic_callbacks

    future = c.submit(inc, 1)
    assert (yield future) == 2
    yield gen.sleep(0.02)

    yield c.submit(inc, 2)  # worker doesn't pause

    yield nanny._close()
Exemplo n.º 36
0
def run_nanny(q, scheduler_port, **kwargs):
    from distributed import Nanny
    from tornado.ioloop import IOLoop, PeriodicCallback
    with log_errors():
        IOLoop.clear_instance()
        loop = IOLoop(); loop.make_current()
        PeriodicCallback(lambda: None, 500).start()
        worker = Nanny('127.0.0.1', scheduler_port, ip='127.0.0.1',
                       loop=loop, validate=True, **kwargs)
        loop.run_sync(lambda: worker._start(0))
        q.put(worker.port)
        try:
            loop.start()
        finally:
            loop.run_sync(worker._close)
            loop.close(all_fds=True)
Exemplo n.º 37
0
def run_nanny(q, scheduler_port, **kwargs):
    from distributed import Nanny
    from tornado.ioloop import IOLoop, PeriodicCallback
    with log_errors():
        IOLoop.clear_instance()
        loop = IOLoop(); loop.make_current()
        PeriodicCallback(lambda: None, 500).start()
        worker = Nanny('127.0.0.1', scheduler_port, ip='127.0.0.1',
                       loop=loop, **kwargs)
        loop.run_sync(lambda: worker._start(0))
        q.put(worker.port)
        try:
            loop.start()
        finally:
            loop.run_sync(worker._close)
            loop.close(all_fds=True)
Exemplo n.º 38
0
def test_avoid_memory_monitor_if_zero_limit(c, s):
    nanny = Nanny(s.address, loop=s.loop, memory_limit=0)
    yield nanny._start()
    typ = yield c.run(lambda dask_worker: type(dask_worker.data))
    assert typ == {nanny.worker_address: dict}
    pcs = yield c.run(lambda dask_worker: list(dask_worker.periodic_callbacks))
    assert 'memory' not in pcs
    assert 'memory' not in nanny.periodic_callbacks

    future = c.submit(inc, 1)
    assert (yield future) == 2
    yield gen.sleep(0.02)

    yield c.submit(inc, 2)  # worker doesn't pause

    yield nanny._close()
Exemplo n.º 39
0
def run_nanny(q, scheduler_q, **kwargs):
    from distributed import Nanny
    from tornado.ioloop import IOLoop, PeriodicCallback

    with log_errors():
        with pristine_loop() as loop:
            PeriodicCallback(lambda: None, 500).start()

            scheduler_addr = scheduler_q.get()
            worker = Nanny(scheduler_addr, validate=True, **kwargs)
            loop.run_sync(lambda: worker._start(0))
            q.put(worker.address)
            try:
                loop.start()
            finally:
                loop.run_sync(worker._close)
                loop.close(all_fds=True)
Exemplo n.º 40
0
def test_nanny_process_failure(c, s):
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)
    yield n._start()
    first_dir = n.worker_dir

    assert os.path.exists(first_dir)

    original_address = n.worker_address
    ww = rpc(n.worker_address)
    yield ww.update_data(data=valmap(dumps, {'x': 1, 'y': 2}))
    pid = n.pid
    assert pid is not None
    with ignoring(CommClosedError):
        yield c._run(os._exit, 0, workers=[n.worker_address])

    start = time()
    while n.pid == pid:  # wait while process dies and comes back
        yield gen.sleep(0.01)
        assert time() - start < 5

    start = time()
    while not n.is_alive():  # wait while process comes back
        yield gen.sleep(0.01)
        assert time() - start < 5

    # assert n.worker_address != original_address  # most likely

    start = time()
    while n.worker_address not in s.ncores or n.worker_dir is None:
        yield gen.sleep(0.01)
        assert time() - start < 5

    second_dir = n.worker_dir

    yield n._close()
    assert not os.path.exists(second_dir)
    assert not os.path.exists(first_dir)
    assert first_dir != n.worker_dir
    ww.close_rpc()
    s.stop()
Exemplo n.º 41
0
def test_nanny_process_failure():
    c = Center('127.0.0.1')
    c.listen(0)
    n = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    yield n._start()
    nn = rpc(ip=n.ip, port=n.port)
    first_dir = n.worker_dir

    assert os.path.exists(first_dir)

    ww = rpc(ip=n.ip, port=n.worker_port)
    yield ww.update_data(data=valmap(dumps, {'x': 1, 'y': 2}))
    with ignoring(StreamClosedError):
        yield ww.compute(function=dumps(sys.exit),
                         args=dumps((0,)),
                         key='z')

    start = time()
    while n.process.is_alive():  # wait while process dies
        yield gen.sleep(0.01)
        assert time() - start < 2

    start = time()
    while not n.process.is_alive():  # wait while process comes back
        yield gen.sleep(0.01)
        assert time() - start < 2

    start = time()
    while n.worker_address not in c.ncores or n.worker_dir is None:
        yield gen.sleep(0.01)
        assert time() - start < 2

    second_dir = n.worker_dir

    yield n._close()
    assert not os.path.exists(second_dir)
    assert not os.path.exists(first_dir)
    assert first_dir != n.worker_dir
    nn.close_streams()
    c.stop()
Exemplo n.º 42
0
def test_nanny_process_failure(c, s):
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)
    yield n._start()
    first_dir = n.worker_dir

    assert os.path.exists(first_dir)

    original_address = n.worker_address
    ww = rpc(n.worker_address)
    yield ww.update_data(data=valmap(dumps, {'x': 1, 'y': 2}))
    pid = n.pid
    assert pid is not None
    with ignoring(CommClosedError):
        yield c._run(os._exit, 0, workers=[n.worker_address])

    start = time()
    while n.pid == pid:  # wait while process dies and comes back
        yield gen.sleep(0.01)
        assert time() - start < 5

    start = time()
    while not n.is_alive():  # wait while process comes back
        yield gen.sleep(0.01)
        assert time() - start < 5

    # assert n.worker_address != original_address  # most likely

    start = time()
    while n.worker_address not in s.ncores or n.worker_dir is None:
        yield gen.sleep(0.01)
        assert time() - start < 5

    second_dir = n.worker_dir

    yield n._close()
    assert not os.path.exists(second_dir)
    assert not os.path.exists(first_dir)
    assert first_dir != n.worker_dir
    ww.close_rpc()
    s.stop()
Exemplo n.º 43
0
def test_nanny_process_failure(s):
    n = Nanny(s.ip, s.port, ncores=2, ip='127.0.0.1', loop=s.loop)
    yield n._start()
    nn = rpc(ip=n.ip, port=n.port)
    first_dir = n.worker_dir

    assert os.path.exists(first_dir)

    original_process = n.process
    ww = rpc(ip=n.ip, port=n.worker_port)
    yield ww.update_data(data=valmap(dumps, {'x': 1, 'y': 2}))
    with ignoring(StreamClosedError):
        yield ww.compute(function=dumps(sys.exit),
                         args=dumps((0,)),
                         key='z')

    start = time()
    while n.process is original_process:  # wait while process dies
        yield gen.sleep(0.01)
        assert time() - start < 5

    start = time()
    while not n.process.poll() is None:  # wait while process comes back
        yield gen.sleep(0.01)
        assert time() - start < 5

    start = time()
    while n.worker_address not in s.ncores or n.worker_dir is None:
        yield gen.sleep(0.01)
        assert time() - start < 5

    second_dir = n.worker_dir

    yield n._close()
    assert not os.path.exists(second_dir)
    assert not os.path.exists(first_dir)
    assert first_dir != n.worker_dir
    nn.close_streams()
    s.stop()
Exemplo n.º 44
0
def test_monitor_resources(s):
    pytest.importorskip('psutil')
    n = Nanny(s.ip, s.port, ncores=2, ip='127.0.0.1', loop=s.loop)

    yield n._start()
    assert isalive(n.process)
    d = n.resource_collect()
    assert {'cpu_percent', 'memory_percent'}.issubset(d)

    assert 'timestamp' in d

    stream = yield connect(ip=n.ip, port=n.port)
    yield write(stream, {'op': 'monitor_resources', 'interval': 0.01})

    for i in range(3):
        msg = yield read(stream)
        assert isinstance(msg, dict)
        assert {'cpu_percent', 'memory_percent'}.issubset(msg)

    close(stream)
    yield n._close()
    s.stop()
Exemplo n.º 45
0
def test_monitor_resources(s):
    pytest.importorskip('psutil')
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)

    yield n._start()
    assert isalive(n.process)
    d = n.resource_collect()
    assert {'cpu_percent', 'memory_percent'}.issubset(d)

    assert 'timestamp' in d

    comm = yield connect(n.address)
    yield comm.write({'op': 'monitor_resources', 'interval': 0.01})

    for i in range(3):
        msg = yield comm.read()
        assert isinstance(msg, dict)
        assert {'cpu_percent', 'memory_percent'}.issubset(msg)

    yield comm.close()
    yield n._close()
    s.stop()
Exemplo n.º 46
0
def test_nanny_process_failure(c, s):
    n = Nanny(s.ip, s.port, ncores=2, ip='127.0.0.1', loop=s.loop)
    yield n._start()
    first_dir = n.worker_dir

    assert os.path.exists(first_dir)

    original_process = n.process
    ww = rpc(ip=n.ip, port=n.worker_port)
    yield ww.update_data(data=valmap(dumps, {'x': 1, 'y': 2}))
    with ignoring(StreamClosedError):
        yield c._run(sys.exit, 0, workers=[n.worker_address])

    start = time()
    while n.process is original_process:  # wait while process dies
        yield gen.sleep(0.01)
        assert time() - start < 5

    start = time()
    while not isalive(n.process):  # wait while process comes back
        yield gen.sleep(0.01)
        assert time() - start < 5

    start = time()
    while n.worker_address not in s.ncores or n.worker_dir is None:
        yield gen.sleep(0.01)
        assert time() - start < 5

    second_dir = n.worker_dir

    yield n._close()
    assert not os.path.exists(second_dir)
    assert not os.path.exists(first_dir)
    assert first_dir != n.worker_dir
    ww.close_rpc()
    s.stop()
Exemplo n.º 47
0
def test_nanny_process_failure(c, s):
    n = Nanny(s.ip, s.port, ncores=2, ip='127.0.0.1', loop=s.loop)
    yield n._start()
    first_dir = n.worker_dir

    assert os.path.exists(first_dir)

    original_process = n.process
    ww = rpc(ip=n.ip, port=n.worker_port)
    yield ww.update_data(data=valmap(dumps, {'x': 1, 'y': 2}))
    with ignoring(StreamClosedError):
        yield c._run(sys.exit, 0, workers=[n.worker_address])

    start = time()
    while n.process is original_process:  # wait while process dies
        yield gen.sleep(0.01)
        assert time() - start < 5

    start = time()
    while not isalive(n.process):  # wait while process comes back
        yield gen.sleep(0.01)
        assert time() - start < 5

    start = time()
    while n.worker_address not in s.ncores or n.worker_dir is None:
        yield gen.sleep(0.01)
        assert time() - start < 5

    second_dir = n.worker_dir

    yield n._close()
    assert not os.path.exists(second_dir)
    assert not os.path.exists(first_dir)
    assert first_dir != n.worker_dir
    ww.close_rpc()
    s.stop()
Exemplo n.º 48
0
def test_nanny_death_timeout():
    w = Nanny('127.0.0.1', 38848, death_timeout=1)
    yield w._start()

    yield gen.sleep(3)
    assert w.status == 'closed'