Пример #1
0
def test_environment_variable(c, s):
    a = Nanny(s.address, loop=s.loop, memory_limit=0, env={"FOO": "123"})
    b = Nanny(s.address, loop=s.loop, memory_limit=0, env={"FOO": "456"})
    yield [a._start(), b._start()]
    results = yield c.run(lambda: os.environ['FOO'])
    assert results == {a.worker_address: "123", b.worker_address: "456"}
    yield [a._close(), b._close()]
Пример #2
0
def test_environment_variable(c, s):
    a = Nanny(s.address, loop=s.loop, memory_limit=0, env={"FOO": "123"})
    b = Nanny(s.address, loop=s.loop, memory_limit=0, env={"FOO": "456"})
    yield [a, b]
    results = yield c.run(lambda: os.environ['FOO'])
    assert results == {a.worker_address: "123", b.worker_address: "456"}
    yield [a._close(), b._close()]
Пример #3
0
def test_nanny(s):
    n = Nanny(s.ip, s.port, ncores=2, ip='127.0.0.1', loop=s.loop)

    yield n._start(0)
    with rpc(ip=n.ip, port=n.port) as nn:
        assert isalive(n.process)  # alive
        assert s.ncores[n.worker_address] == 2

        assert s.worker_info[n.worker_address]['services']['nanny'] > 1024

        yield nn.kill()
        assert not n.process
        assert n.worker_address not in s.ncores
        assert n.worker_address not in s.worker_info

        yield nn.kill()
        assert n.worker_address not in s.ncores
        assert n.worker_address not in s.worker_info
        assert not n.process

        yield nn.instantiate()
        assert isalive(n.process)
        assert s.ncores[n.worker_address] == 2
        assert s.worker_info[n.worker_address]['services']['nanny'] > 1024

        yield nn.terminate()
        assert not n.process

    yield n._close()
Пример #4
0
def test_nanny(s):
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)

    yield n._start(0)
    with rpc(n.address) as nn:
        assert n.is_alive()
        assert s.ncores[n.worker_address] == 2
        assert s.workers[n.worker_address].services['nanny'] > 1024

        yield nn.kill()
        assert not n.is_alive()
        assert n.worker_address not in s.ncores
        assert n.worker_address not in s.workers

        yield nn.kill()
        assert not n.is_alive()
        assert n.worker_address not in s.ncores
        assert n.worker_address not in s.workers

        yield nn.instantiate()
        assert n.is_alive()
        assert s.ncores[n.worker_address] == 2
        assert s.workers[n.worker_address].services['nanny'] > 1024

        yield nn.terminate()
        assert not n.is_alive()

    yield n._close()
Пример #5
0
def test_broken_worker_during_computation(c, s, a, b):
    s.allowed_failures = 100
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)
    n.start(0)

    start = time()
    while len(s.ncores) < 3:
        yield gen.sleep(0.01)
        assert time() < start + 5

    L = c.map(inc, range(256))
    for i in range(8):
        L = c.map(add, *zip(*partition_all(2, L)))

    from random import random
    yield gen.sleep(random() / 2)
    with ignoring(CommClosedError):  # comm will be closed abrupty
        yield c._run(os._exit, 1, workers=[n.worker_address])
    yield gen.sleep(random() / 2)
    with ignoring(
            CommClosedError,
            EnvironmentError):  # perhaps new worker can't be contacted yet
        yield c._run(os._exit, 1, workers=[n.worker_address])

    result = yield c._gather(L)
    assert isinstance(result[0], int)

    yield n._close()
def test_worker_who_has_clears_after_failed_connection(c, s, a, b):
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)
    n.start(0)

    start = time()
    while len(s.ncores) < 3:
        yield gen.sleep(0.01)
        assert time() < start + 5

    futures = c.map(slowinc,
                    range(20),
                    delay=0.01,
                    key=['f%d' % i for i in range(20)])
    yield wait(futures)

    result = yield c.submit(sum, futures, workers=a.address)
    for dep in set(a.dep_state) - set(a.task_state):
        a.release_dep(dep, report=True)

    n_worker_address = n.worker_address
    with ignoring(CommClosedError):
        yield c._run(os._exit, 1, workers=[n_worker_address])

    while len(s.workers) > 2:
        yield gen.sleep(0.01)

    total = c.submit(sum, futures, workers=a.address)
    yield total

    assert not a.has_what.get(n_worker_address)
    assert not any(n_worker_address in s for s in a.who_has.values())

    yield n._close()
Пример #7
0
def test_broken_worker_during_computation(c, s, a, b):
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)
    n.start(0)

    start = time()
    while len(s.ncores) < 3:
        yield gen.sleep(0.01)
        assert time() < start + 5

    L = c.map(inc, range(256))
    for i in range(8):
        L = c.map(add, *zip(*partition_all(2, L)))

    from random import random
    yield gen.sleep(random() / 2)
    with ignoring(OSError):
        n.process.terminate()
    yield gen.sleep(random() / 2)
    with ignoring(OSError):
        n.process.terminate()

    result = yield c._gather(L)
    assert isinstance(result[0], int)

    yield n._close()
Пример #8
0
def test_broken_worker_during_computation(c, s, a, b):
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)
    n.start(0)

    start = time()
    while len(s.ncores) < 3:
        yield gen.sleep(0.01)
        assert time() < start + 5

    L = c.map(inc, range(256))
    for i in range(8):
        L = c.map(add, *zip(*partition_all(2, L)))

    from random import random
    yield gen.sleep(random() / 2)
    with ignoring(OSError):
        n.process.terminate()
    yield gen.sleep(random() / 2)
    with ignoring(OSError):
        n.process.terminate()

    result = yield c._gather(L)
    assert isinstance(result[0], int)

    yield n._close()
Пример #9
0
def test_many_kills(s):
    n = Nanny(s.address, ncores=2, loop=s.loop)
    yield n._start(0)
    assert n.is_alive()
    yield [n.kill() for i in range(5)]
    yield [n.kill() for i in range(5)]
    yield n._close()
Пример #10
0
def test_monitor_resources():
    pytest.importorskip('psutil')
    c = Center(ip='127.0.0.1')
    c.listen(0)
    n = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')

    yield n._start()
    nn = rpc(ip=n.ip, port=n.port)
    assert n.process.is_alive()
    d = n.resource_collect()
    assert {'cpu_percent', 'memory_percent'}.issubset(d)

    assert isinstance(d['timestamp'], datetime)

    stream = yield connect(ip=n.ip, port=n.port)
    yield write(stream, {'op': 'monitor_resources', 'interval': 0.01})

    for i in range(3):
        msg = yield read(stream)
        assert isinstance(msg, dict)
        assert {'cpu_percent', 'memory_percent'}.issubset(msg)

    stream.close()
    yield n._close()
    c.stop()
Пример #11
0
def test_worker_who_has_clears_after_failed_connection(c, s, a, b):
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)
    n.start(0)

    start = time()
    while len(s.ncores) < 3:
        yield gen.sleep(0.01)
        assert time() < start + 5

    futures = c.map(slowinc, range(20), delay=0.01,
                    key=['f%d' % i for i in range(20)])
    yield wait(futures)

    result = yield c.submit(sum, futures, workers=a.address)
    for dep in set(a.dep_state) - set(a.task_state):
        a.release_dep(dep, report=True)

    n_worker_address = n.worker_address
    with ignoring(CommClosedError):
        yield c._run(os._exit, 1, workers=[n_worker_address])

    while len(s.workers) > 2:
        yield gen.sleep(0.01)

    total = c.submit(sum, futures, workers=a.address)
    yield total

    assert not a.has_what.get(n_worker_address)
    assert not any(n_worker_address in s for s in a.who_has.values())

    yield n._close()
Пример #12
0
def test_nanny(s):
    n = Nanny(s.ip, s.port, ncores=2, ip='127.0.0.1', loop=s.loop)

    yield n._start(0)
    nn = rpc(ip=n.ip, port=n.port)
    assert n.process.is_alive()
    assert s.ncores[n.worker_address] == 2
    assert s.worker_info[n.worker_address]['services']['nanny'] > 1024

    yield nn.kill()
    assert n.worker_address not in s.ncores
    assert n.worker_address not in s.worker_info
    assert not n.process

    yield nn.kill()
    assert n.worker_address not in s.ncores
    assert n.worker_address not in s.worker_info
    assert not n.process

    yield nn.instantiate()
    assert n.process.is_alive()
    assert s.ncores[n.worker_address] == 2
    assert s.worker_info[n.worker_address]['services']['nanny'] > 1024

    yield nn.terminate()
    assert not n.process

    yield n._close()
Пример #13
0
def test_nanny(s):
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)

    yield n._start(0)
    with rpc(n.address) as nn:
        assert n.is_alive()
        assert s.ncores[n.worker_address] == 2
        assert s.worker_info[n.worker_address]['services']['nanny'] > 1024

        yield nn.kill()
        assert not n.is_alive()
        assert n.worker_address not in s.ncores
        assert n.worker_address not in s.worker_info

        yield nn.kill()
        assert not n.is_alive()
        assert n.worker_address not in s.ncores
        assert n.worker_address not in s.worker_info

        yield nn.instantiate()
        assert n.is_alive()
        assert s.ncores[n.worker_address] == 2
        assert s.worker_info[n.worker_address]['services']['nanny'] > 1024

        yield nn.terminate()
        assert not n.is_alive()

    yield n._close()
Пример #14
0
def test_worker_who_has_clears_after_failed_connection(c, s, a, b):
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)
    n.start(0)

    start = time()
    while len(s.ncores) < 3:
        yield gen.sleep(0.01)
        assert time() < start + 5

    futures = c.map(slowinc, range(20), delay=0.01)
    yield _wait(futures)

    result = yield c.submit(sum, futures, workers=a.address)
    for dep in set(a.dep_state) - set(a.task_state):
        a.release_dep(dep, report=True)

    n_worker_address = n.worker_address
    n.process.terminate()

    while len(s.workers) > 2:
        yield gen.sleep(0.01)

    total = c.submit(sum, futures, workers=a.address)
    yield total

    assert not a.has_what.get(n_worker_address)
    assert not any(n_worker_address in s for s in a.who_has.values())

    yield n._close()
Пример #15
0
def test_nanny():
    c = Center('127.0.0.1')
    c.listen(0)
    n = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')

    yield n._start(0)
    nn = rpc(ip=n.ip, port=n.port)
    assert n.process.is_alive()
    assert c.ncores[n.worker_address] == 2
    assert c.worker_services[n.worker_address]['nanny'] > 1024

    yield nn.kill()
    assert n.worker_address not in c.ncores
    assert n.worker_address not in c.worker_services
    assert not n.process

    yield nn.kill()
    assert n.worker_address not in c.ncores
    assert n.worker_address not in c.worker_services
    assert not n.process

    yield nn.instantiate()
    assert n.process.is_alive()
    assert c.ncores[n.worker_address] == 2
    assert c.worker_services[n.worker_address]['nanny'] > 1024

    yield nn.terminate()
    assert not n.process

    if n.process:
        n.process.terminate()

    yield n._close()
    c.stop()
Пример #16
0
def test_many_kills(s):
    n = Nanny(s.address, ncores=2, loop=s.loop)
    yield n._start(0)
    assert n.is_alive()
    yield [n.kill() for i in range(5)]
    yield [n.kill() for i in range(5)]
    yield n._close()
Пример #17
0
def test_scheduler_file():
    with tmpfile() as fn:
        s = Scheduler(scheduler_file=fn)
        s.start(8008)
        w = Nanny(scheduler_file=fn)
        yield w._start()
        assert s.workers == {w.worker_address}
        yield w._close()
        s.stop()
Пример #18
0
def test_scheduler_file():
    with tmpfile() as fn:
        s = Scheduler(scheduler_file=fn)
        s.start(8008)
        w = Nanny(scheduler_file=fn)
        yield w._start()
        assert set(s.workers) == {w.worker_address}
        yield w._close()
        s.stop()
    def create_and_destroy_worker(delay):
        start = time()
        while time() < start + 5:
            n = Nanny(s.address, ncores=2, loop=s.loop)
            n.start(0)

            yield gen.sleep(delay)

            yield n._close()
            print("Killed nanny")
Пример #20
0
    def create_and_destroy_worker(delay):
        start = time()
        while time() < start + 5:
            n = Nanny(s.address, ncores=2, loop=s.loop)
            n.start(0)

            yield gen.sleep(delay)

            yield n._close()
            print("Killed nanny")
Пример #21
0
def test_restart():
    from distributed import Nanny, rpc
    c = Center('127.0.0.1')
    c.listen(0)
    a = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    b = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')

    yield [a._start(), b._start()]

    e = Executor((c.ip, c.port), start=False, loop=IOLoop.current())
    yield e._start()

    assert e.scheduler.ncores == {a.worker_address: 2, b.worker_address: 2}

    x = e.submit(inc, 1)
    y = e.submit(inc, x)
    yield y._result()

    cc = rpc(ip=c.ip, port=c.port)
    who_has = yield cc.who_has()
    try:
        assert e.scheduler.who_has == who_has
        assert set(e.scheduler.who_has) == {x.key, y.key}

        f = yield e._restart()
        assert f is e

        assert len(e.scheduler.stacks) == 2
        assert len(e.scheduler.processing) == 2

        who_has = yield cc.who_has()
        assert not who_has
        assert not e.scheduler.who_has

        assert x.cancelled()
        assert y.cancelled()

    finally:
        yield a._close()
        yield b._close()
        yield e._shutdown(fast=True)
        c.stop()
Пример #22
0
def test_restart():
    from distributed import Nanny, rpc
    c = Center('127.0.0.1')
    c.listen(0)
    a = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    b = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')

    yield [a._start(), b._start()]

    e = Executor((c.ip, c.port), start=False, loop=IOLoop.current())
    yield e._start()

    assert e.scheduler.ncores == {a.worker_address: 2, b.worker_address: 2}

    x = e.submit(inc, 1)
    y = e.submit(inc, x)
    yield y._result()

    cc = rpc(ip=c.ip, port=c.port)
    who_has = yield cc.who_has()
    try:
        assert e.scheduler.who_has == who_has
        assert set(e.scheduler.who_has) == {x.key, y.key}

        f = yield e._restart()
        assert f is e

        assert len(e.scheduler.stacks) == 2
        assert len(e.scheduler.processing) == 2

        who_has = yield cc.who_has()
        assert not who_has
        assert not e.scheduler.who_has

        assert x.cancelled()
        assert y.cancelled()

    finally:
        yield a._close()
        yield b._close()
        yield e._shutdown(fast=True)
        c.stop()
Пример #23
0
def test_worker_uses_same_host_as_nanny(c, s):
    for host in ['tcp://0.0.0.0', 'tcp://127.0.0.2']:
        n = Nanny(s.address)
        yield n._start(host)

        def func(dask_worker):
            return dask_worker.listener.listen_address

        result = yield c.run(func)
        assert host in first(result.values())
        yield n._close()
Пример #24
0
def test_worker_uses_same_host_as_nanny(c, s):
    for host in ['tcp://0.0.0.0', 'tcp://127.0.0.2']:
        n = Nanny(s.address)
        yield n._start(host)

        def func(dask_worker):
            return dask_worker.listener.listen_address

        result = yield c.run(func)
        assert host in first(result.values())
        yield n._close()
Пример #25
0
def test_run(s):
    pytest.importorskip('psutil')
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)
    yield n._start()

    with rpc(n.address) as nn:
        response = yield nn.run(function=dumps(lambda: 1))
        assert response['status'] == 'OK'
        assert response['result'] == 1

    yield n._close()
Пример #26
0
def test_run(s):
    pytest.importorskip('psutil')
    n = Nanny(s.ip, s.port, ncores=2, ip='127.0.0.1', loop=s.loop)
    yield n._start()

    with rpc(n.address) as nn:
        response = yield nn.run(function=dumps(lambda: 1))
        assert response['status'] == 'OK'
        assert response['result'] == 1

    yield n._close()
Пример #27
0
def test_monitor_resources():
    pytest.importorskip('psutil')
    c = Center('127.0.0.1')
    c.listen(0)
    a = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    b = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    s = Scheduler((c.ip, c.port), resource_interval=0.01, resource_log_size=3)

    yield a._start()
    yield b._start()
    yield s.sync_center()
    done = s.start()

    try:
        assert s.ncores == {
            ('127.0.0.1', a.worker_port): 2,
            ('127.0.0.1', b.worker_port): 2
        }
        assert s.nannies == {(n.ip, n.worker_port): n.port for n in [a, b]}

        while any(len(v) < 3 for v in s.resource_logs.values()):
            yield gen.sleep(0.01)

        yield gen.sleep(0.1)

        assert set(s.resource_logs) == {a.address, b.address}
        assert all(len(v) == 3 for v in s.resource_logs.values())

        d = s.diagnostic_resources(n=2)
        assert set(d) == {a.worker_address, b.worker_address}
        assert set(d[a.worker_address]).issubset({'cpu', 'memory', 'time'})
        assert all(len(v) == 2 for v in d[a.worker_address].values())

        s.put({'op': 'close'})
        yield done
    finally:
        with ignoring(TimeoutError, StreamClosedError, OSError):
            yield a._close(timeout=0.5)
        with ignoring(TimeoutError, StreamClosedError, OSError):
            yield b._close(timeout=0.5)
        c.stop()
Пример #28
0
def test_scheduler_address_config(c, s):
    with dask.config.set({'scheduler-address': s.address}):
        nanny = Nanny(loop=s.loop)
        yield nanny._start()
        assert nanny.scheduler.address == s.address

        start = time()
        while not s.workers:
            yield gen.sleep(0.1)
            assert time() < start + 10

    yield nanny._close()
Пример #29
0
def test_scheduler_address_config(c, s):
    with dask.config.set({'scheduler-address': s.address}):
        nanny = Nanny(loop=s.loop)
        yield nanny._start()
        assert nanny.scheduler.address == s.address

        start = time()
        while not s.workers:
            yield gen.sleep(0.1)
            assert time() < start + 10

    yield nanny._close()
Пример #30
0
def test_monitor_resources():
    pytest.importorskip('psutil')
    c = Center('127.0.0.1')
    c.listen(0)
    a = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    b = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    s = Scheduler((c.ip, c.port), resource_interval=0.01, resource_log_size=3)

    yield a._start()
    yield b._start()
    yield s.sync_center()
    done = s.start()

    try:
        assert s.ncores == {('127.0.0.1', a.worker_port): 2,
                            ('127.0.0.1', b.worker_port): 2}
        assert s.nannies == {(n.ip, n.worker_port): n.port
                             for n in [a, b]}

        while any(len(v) < 3 for v in s.resource_logs.values()):
            yield gen.sleep(0.01)

        yield gen.sleep(0.1)

        assert set(s.resource_logs) == {a.address, b.address}
        assert all(len(v) == 3 for v in s.resource_logs.values())

        d = s.diagnostic_resources(n=2)
        assert set(d) == {a.worker_address, b.worker_address}
        assert set(d[a.worker_address]).issubset({'cpu', 'memory', 'time'})
        assert all(len(v) == 2 for v in d[a.worker_address].values())

        s.put({'op': 'close'})
        yield done
    finally:
        with ignoring(TimeoutError, StreamClosedError, OSError):
            yield a._close(timeout=0.5)
        with ignoring(TimeoutError, StreamClosedError, OSError):
            yield b._close(timeout=0.5)
        c.stop()
Пример #31
0
def test_scheduler_address_config(c, s):
    config['scheduler-address'] = s.address
    try:
        nanny = Nanny(loop=s.loop)
        yield nanny._start()
        assert nanny.scheduler.address == s.address

        start = time()
        while not s.workers:
            yield gen.sleep(0.1)
            assert time() < start + 10

    finally:
        del config['scheduler-address']
    yield nanny._close()
Пример #32
0
def test_num_fds(s):
    psutil = pytest.importorskip('psutil')
    proc = psutil.Process()

    # Warm up
    w = Nanny(s.address)
    yield w._start()
    yield w._close()
    del w
    gc.collect()

    before = proc.num_fds()

    for i in range(3):
        w = Nanny(s.address)
        yield w._start()
        yield gen.sleep(0.1)
        yield w._close()

    start = time()
    while proc.num_fds() > before:
        print("fds:", before, proc.num_fds())
        yield gen.sleep(0.1)
        assert time() < start + 10
Пример #33
0
def test_submit_after_failed_worker_async(c, s, a, b):
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)
    n.start(0)
    while len(s.workers) < 3:
        yield gen.sleep(0.1)

    L = c.map(inc, range(10))
    yield wait(L)

    s.loop.add_callback(n.kill)
    total = c.submit(sum, L)
    result = yield total
    assert result == sum(map(inc, range(10)))

    yield n._close()
def test_submit_after_failed_worker_async(c, s, a, b):
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)
    n.start(0)
    while len(s.workers) < 3:
        yield gen.sleep(0.1)

    L = c.map(inc, range(10))
    yield wait(L)

    s.loop.add_callback(n.kill)
    total = c.submit(sum, L)
    result = yield total
    assert result == sum(map(inc, range(10)))

    yield n._close()
Пример #35
0
def test_num_fds(s):
    psutil = pytest.importorskip('psutil')
    proc = psutil.Process()

    # Warm up
    w = Nanny(s.address)
    yield w._start()
    yield w._close()
    del w
    gc.collect()

    before = proc.num_fds()

    for i in range(3):
        w = Nanny(s.address)
        yield w._start()
        yield gen.sleep(0.1)
        yield w._close()

    start = time()
    while proc.num_fds() > before:
        print("fds:", before, proc.num_fds())
        yield gen.sleep(0.1)
        assert time() < start + 10
Пример #36
0
def test_avoid_memory_monitor_if_zero_limit(c, s):
    nanny = Nanny(s.address, loop=s.loop, memory_limit=0)
    yield nanny._start()
    typ = yield c.run(lambda dask_worker: type(dask_worker.data))
    assert typ == {nanny.worker_address: dict}
    pcs = yield c.run(lambda dask_worker: list(dask_worker.periodic_callbacks))
    assert 'memory' not in pcs
    assert 'memory' not in nanny.periodic_callbacks

    future = c.submit(inc, 1)
    assert (yield future) == 2
    yield gen.sleep(0.02)

    yield c.submit(inc, 2)  # worker doesn't pause

    yield nanny._close()
Пример #37
0
def test_avoid_memory_monitor_if_zero_limit(c, s):
    nanny = Nanny(s.address, loop=s.loop, memory_limit=0)
    yield nanny._start()
    typ = yield c.run(lambda dask_worker: type(dask_worker.data))
    assert typ == {nanny.worker_address: dict}
    pcs = yield c.run(lambda dask_worker: list(dask_worker.periodic_callbacks))
    assert 'memory' not in pcs
    assert 'memory' not in nanny.periodic_callbacks

    future = c.submit(inc, 1)
    assert (yield future) == 2
    yield gen.sleep(0.02)

    yield c.submit(inc, 2)  # worker doesn't pause

    yield nanny._close()
Пример #38
0
def test_nanny_process_failure():
    c = Center('127.0.0.1')
    c.listen(0)
    n = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    yield n._start()
    nn = rpc(ip=n.ip, port=n.port)
    first_dir = n.worker_dir

    assert os.path.exists(first_dir)

    ww = rpc(ip=n.ip, port=n.worker_port)
    yield ww.update_data(data=valmap(dumps, {'x': 1, 'y': 2}))
    with ignoring(StreamClosedError):
        yield ww.compute(function=dumps(sys.exit),
                         args=dumps((0,)),
                         key='z')

    start = time()
    while n.process.is_alive():  # wait while process dies
        yield gen.sleep(0.01)
        assert time() - start < 2

    start = time()
    while not n.process.is_alive():  # wait while process comes back
        yield gen.sleep(0.01)
        assert time() - start < 2

    start = time()
    while n.worker_address not in c.ncores or n.worker_dir is None:
        yield gen.sleep(0.01)
        assert time() - start < 2

    second_dir = n.worker_dir

    yield n._close()
    assert not os.path.exists(second_dir)
    assert not os.path.exists(first_dir)
    assert first_dir != n.worker_dir
    nn.close_streams()
    c.stop()
Пример #39
0
def test_nanny_process_failure(c, s):
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)
    yield n._start()
    first_dir = n.worker_dir

    assert os.path.exists(first_dir)

    original_address = n.worker_address
    ww = rpc(n.worker_address)
    yield ww.update_data(data=valmap(dumps, {'x': 1, 'y': 2}))
    pid = n.pid
    assert pid is not None
    with ignoring(CommClosedError):
        yield c._run(os._exit, 0, workers=[n.worker_address])

    start = time()
    while n.pid == pid:  # wait while process dies and comes back
        yield gen.sleep(0.01)
        assert time() - start < 5

    start = time()
    while not n.is_alive():  # wait while process comes back
        yield gen.sleep(0.01)
        assert time() - start < 5

    # assert n.worker_address != original_address  # most likely

    start = time()
    while n.worker_address not in s.ncores or n.worker_dir is None:
        yield gen.sleep(0.01)
        assert time() - start < 5

    second_dir = n.worker_dir

    yield n._close()
    assert not os.path.exists(second_dir)
    assert not os.path.exists(first_dir)
    assert first_dir != n.worker_dir
    ww.close_rpc()
    s.stop()
Пример #40
0
def test_nanny_process_failure(c, s):
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)
    yield n._start()
    first_dir = n.worker_dir

    assert os.path.exists(first_dir)

    original_address = n.worker_address
    ww = rpc(n.worker_address)
    yield ww.update_data(data=valmap(dumps, {'x': 1, 'y': 2}))
    pid = n.pid
    assert pid is not None
    with ignoring(CommClosedError):
        yield c._run(os._exit, 0, workers=[n.worker_address])

    start = time()
    while n.pid == pid:  # wait while process dies and comes back
        yield gen.sleep(0.01)
        assert time() - start < 5

    start = time()
    while not n.is_alive():  # wait while process comes back
        yield gen.sleep(0.01)
        assert time() - start < 5

    # assert n.worker_address != original_address  # most likely

    start = time()
    while n.worker_address not in s.ncores or n.worker_dir is None:
        yield gen.sleep(0.01)
        assert time() - start < 5

    second_dir = n.worker_dir

    yield n._close()
    assert not os.path.exists(second_dir)
    assert not os.path.exists(first_dir)
    assert first_dir != n.worker_dir
    ww.close_rpc()
    s.stop()
Пример #41
0
def test_nanny_process_failure(s):
    n = Nanny(s.ip, s.port, ncores=2, ip='127.0.0.1', loop=s.loop)
    yield n._start()
    nn = rpc(ip=n.ip, port=n.port)
    first_dir = n.worker_dir

    assert os.path.exists(first_dir)

    original_process = n.process
    ww = rpc(ip=n.ip, port=n.worker_port)
    yield ww.update_data(data=valmap(dumps, {'x': 1, 'y': 2}))
    with ignoring(StreamClosedError):
        yield ww.compute(function=dumps(sys.exit),
                         args=dumps((0,)),
                         key='z')

    start = time()
    while n.process is original_process:  # wait while process dies
        yield gen.sleep(0.01)
        assert time() - start < 5

    start = time()
    while not n.process.poll() is None:  # wait while process comes back
        yield gen.sleep(0.01)
        assert time() - start < 5

    start = time()
    while n.worker_address not in s.ncores or n.worker_dir is None:
        yield gen.sleep(0.01)
        assert time() - start < 5

    second_dir = n.worker_dir

    yield n._close()
    assert not os.path.exists(second_dir)
    assert not os.path.exists(first_dir)
    assert first_dir != n.worker_dir
    nn.close_streams()
    s.stop()
Пример #42
0
def test_monitor_resources(s):
    pytest.importorskip('psutil')
    n = Nanny(s.ip, s.port, ncores=2, ip='127.0.0.1', loop=s.loop)

    yield n._start()
    assert isalive(n.process)
    d = n.resource_collect()
    assert {'cpu_percent', 'memory_percent'}.issubset(d)

    assert 'timestamp' in d

    stream = yield connect(ip=n.ip, port=n.port)
    yield write(stream, {'op': 'monitor_resources', 'interval': 0.01})

    for i in range(3):
        msg = yield read(stream)
        assert isinstance(msg, dict)
        assert {'cpu_percent', 'memory_percent'}.issubset(msg)

    close(stream)
    yield n._close()
    s.stop()
Пример #43
0
def test_monitor_resources(s):
    pytest.importorskip('psutil')
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)

    yield n._start()
    assert isalive(n.process)
    d = n.resource_collect()
    assert {'cpu_percent', 'memory_percent'}.issubset(d)

    assert 'timestamp' in d

    comm = yield connect(n.address)
    yield comm.write({'op': 'monitor_resources', 'interval': 0.01})

    for i in range(3):
        msg = yield comm.read()
        assert isinstance(msg, dict)
        assert {'cpu_percent', 'memory_percent'}.issubset(msg)

    yield comm.close()
    yield n._close()
    s.stop()
def test_broken_worker_during_computation(c, s, a, b):
    s.allowed_failures = 100
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)
    n.start(0)

    start = time()
    while len(s.ncores) < 3:
        yield gen.sleep(0.01)
        assert time() < start + 5

    N = 256
    expected_result = N * (N + 1) // 2
    i = 0
    L = c.map(inc, range(N), key=['inc-%d-%d' % (i, j) for j in range(N)])
    while len(L) > 1:
        i += 1
        L = c.map(slowadd,
                  *zip(*partition_all(2, L)),
                  key=['add-%d-%d' % (i, j) for j in range(len(L) // 2)])

    yield gen.sleep(random.random() / 20)
    with ignoring(CommClosedError):  # comm will be closed abrupty
        yield c._run(os._exit, 1, workers=[n.worker_address])

    yield gen.sleep(random.random() / 20)
    while len(s.workers) < 3:
        yield gen.sleep(0.01)

    with ignoring(
            CommClosedError,
            EnvironmentError):  # perhaps new worker can't be contacted yet
        yield c._run(os._exit, 1, workers=[n.worker_address])

    [result] = yield c.gather(L)
    assert isinstance(result, int)
    assert result == expected_result

    yield n._close()
Пример #45
0
def test_nanny_process_failure(c, s):
    n = Nanny(s.ip, s.port, ncores=2, ip='127.0.0.1', loop=s.loop)
    yield n._start()
    first_dir = n.worker_dir

    assert os.path.exists(first_dir)

    original_process = n.process
    ww = rpc(ip=n.ip, port=n.worker_port)
    yield ww.update_data(data=valmap(dumps, {'x': 1, 'y': 2}))
    with ignoring(StreamClosedError):
        yield c._run(sys.exit, 0, workers=[n.worker_address])

    start = time()
    while n.process is original_process:  # wait while process dies
        yield gen.sleep(0.01)
        assert time() - start < 5

    start = time()
    while not isalive(n.process):  # wait while process comes back
        yield gen.sleep(0.01)
        assert time() - start < 5

    start = time()
    while n.worker_address not in s.ncores or n.worker_dir is None:
        yield gen.sleep(0.01)
        assert time() - start < 5

    second_dir = n.worker_dir

    yield n._close()
    assert not os.path.exists(second_dir)
    assert not os.path.exists(first_dir)
    assert first_dir != n.worker_dir
    ww.close_rpc()
    s.stop()
Пример #46
0
def test_broken_worker_during_computation(c, s, a, b):
    s.allowed_failures = 100
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)
    n.start(0)

    start = time()
    while len(s.ncores) < 3:
        yield gen.sleep(0.01)
        assert time() < start + 5

    N = 256
    expected_result = N * (N + 1) // 2
    i = 0
    L = c.map(inc, range(N),
              key=['inc-%d-%d' % (i, j) for j in range(N)])
    while len(L) > 1:
        i += 1
        L = c.map(slowadd, *zip(*partition_all(2, L)),
                  key=['add-%d-%d' % (i, j) for j in range(len(L) // 2)])

    yield gen.sleep(random.random() / 20)
    with ignoring(CommClosedError):  # comm will be closed abrupty
        yield c._run(os._exit, 1, workers=[n.worker_address])

    yield gen.sleep(random.random() / 20)
    while len(s.workers) < 3:
        yield gen.sleep(0.01)

    with ignoring(CommClosedError, EnvironmentError):  # perhaps new worker can't be contacted yet
        yield c._run(os._exit, 1, workers=[n.worker_address])

    [result] = yield c.gather(L)
    assert isinstance(result, int)
    assert result == expected_result

    yield n._close()
Пример #47
0
def test_nanny_process_failure(c, s):
    n = Nanny(s.ip, s.port, ncores=2, ip='127.0.0.1', loop=s.loop)
    yield n._start()
    first_dir = n.worker_dir

    assert os.path.exists(first_dir)

    original_process = n.process
    ww = rpc(ip=n.ip, port=n.worker_port)
    yield ww.update_data(data=valmap(dumps, {'x': 1, 'y': 2}))
    with ignoring(StreamClosedError):
        yield c._run(sys.exit, 0, workers=[n.worker_address])

    start = time()
    while n.process is original_process:  # wait while process dies
        yield gen.sleep(0.01)
        assert time() - start < 5

    start = time()
    while not isalive(n.process):  # wait while process comes back
        yield gen.sleep(0.01)
        assert time() - start < 5

    start = time()
    while n.worker_address not in s.ncores or n.worker_dir is None:
        yield gen.sleep(0.01)
        assert time() - start < 5

    second_dir = n.worker_dir

    yield n._close()
    assert not os.path.exists(second_dir)
    assert not os.path.exists(first_dir)
    assert first_dir != n.worker_dir
    ww.close_rpc()
    s.stop()