def test_update_state_respects_data_in_memory(loop): s = Scheduler() s.start(0) s.add_worker(address=alice, ncores=1, coerce_address=False) s.update_graph(tasks={'x': 1, 'y': (inc, 'x')}, keys=['y'], dependencies={'y': {'x'}, 'x': set()}, client='client') s.mark_task_finished('x', alice, nbytes=10, type=dumps(int)) s.mark_task_finished('y', alice, nbytes=10, type=dumps(int)) assert s.released == {'x'} assert s.who_has == {'y': {alice}} s.update_graph(tasks={'x': 1, 'y': (inc, 'x'), 'z': (add, 'y', 'x')}, keys=['z'], dependencies={'y': {'x'}, 'z': {'y', 'x'}}, client='client') assert s.released == set() assert s.waiting == {'z': {'x'}} assert s.processing[alice] == {'x'} # x was released, need to recompute assert s.waiting_data == {'x': {'z'}, 'y': {'z'}, 'z': set()} assert s.who_wants == {'y': {'client'}, 'z': {'client'}} assert s.wants_what == {'client': {'y', 'z'}} s.stop()
def test_coerce_address(): with dask.config.set({'distributed.comm.timeouts.connect': '100ms'}): s = Scheduler(validate=True) s.start(0) print("scheduler:", s.address, s.listen_address) a = Worker(s.ip, s.port, name='alice') b = Worker(s.ip, s.port, name=123) c = Worker('127.0.0.1', s.port, name='charlie') yield [a._start(), b._start(), c._start()] assert s.coerce_address('127.0.0.1:8000') == 'tcp://127.0.0.1:8000' assert s.coerce_address('[::1]:8000') == 'tcp://[::1]:8000' assert s.coerce_address('tcp://127.0.0.1:8000') == 'tcp://127.0.0.1:8000' assert s.coerce_address('tcp://[::1]:8000') == 'tcp://[::1]:8000' assert s.coerce_address('localhost:8000') in ('tcp://127.0.0.1:8000', 'tcp://[::1]:8000') assert s.coerce_address(u'localhost:8000') in ('tcp://127.0.0.1:8000', 'tcp://[::1]:8000') assert s.coerce_address(a.address) == a.address # Aliases assert s.coerce_address('alice') == a.address assert s.coerce_address(123) == b.address assert s.coerce_address('charlie') == c.address assert s.coerce_hostname('127.0.0.1') == '127.0.0.1' assert s.coerce_hostname('alice') == a.ip assert s.coerce_hostname(123) == b.ip assert s.coerce_hostname('charlie') == c.ip assert s.coerce_hostname('jimmy') == 'jimmy' assert s.coerce_address('zzzt:8000', resolve=False) == 'tcp://zzzt:8000' yield s.close() yield [w._close() for w in [a, b, c]]
def test_update_state_with_processing(loop): s = Scheduler() s.start(0) s.add_worker(address=alice, ncores=1, coerce_address=False) s.update_graph(tasks={'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y')}, keys=['z'], dependencies={'y': {'x'}, 'x': set(), 'z': {'y'}}, client='client') s.mark_task_finished('x', alice, nbytes=10, type=dumps(int)) assert s.waiting == {'z': {'y'}} assert s.waiting_data == {'x': {'y'}, 'y': {'z'}, 'z': set()} assert list(s.ready) == [] assert s.who_wants == {'z': {'client'}} assert s.wants_what == {'client': {'z'}} assert s.who_has == {'x': {alice}} s.update_graph(tasks={'a': (inc, 'x'), 'b': (add,'a','y'), 'c': (inc, 'z')}, keys=['b', 'c'], dependencies={'a': {'x'}, 'b': {'a', 'y'}, 'c': {'z'}}, client='client') assert s.waiting == {'z': {'y'}, 'b': {'a', 'y'}, 'c': {'z'}} assert s.stacks[alice] == ['a'] assert not s.ready assert s.waiting_data == {'x': {'y', 'a'}, 'y': {'z', 'b'}, 'z': {'c'}, 'a': {'b'}, 'b': set(), 'c': set()} assert s.who_wants == {'b': {'client'}, 'c': {'client'}, 'z': {'client'}} assert s.wants_what == {'client': {'b', 'c', 'z'}} s.stop()
def test_coerce_address(): s = Scheduler(validate=True) s.start(0) print("scheduler:", s.address, s.listen_address) a = Worker(s.ip, s.port, name='alice') b = Worker(s.ip, s.port, name=123) c = Worker('127.0.0.1', s.port, name='charlie') yield [a._start(), b._start(), c._start()] assert s.coerce_address('127.0.0.1:8000') == 'tcp://127.0.0.1:8000' assert s.coerce_address('[::1]:8000') == 'tcp://[::1]:8000' assert s.coerce_address('tcp://127.0.0.1:8000') == 'tcp://127.0.0.1:8000' assert s.coerce_address('tcp://[::1]:8000') == 'tcp://[::1]:8000' assert s.coerce_address('localhost:8000') in ('tcp://127.0.0.1:8000', 'tcp://[::1]:8000') assert s.coerce_address(u'localhost:8000') in ('tcp://127.0.0.1:8000', 'tcp://[::1]:8000') assert s.coerce_address(a.address) == a.address # Aliases assert s.coerce_address('alice') == a.address assert s.coerce_address(123) == b.address assert s.coerce_address('charlie') == c.address assert s.coerce_hostname('127.0.0.1') == '127.0.0.1' assert s.coerce_hostname('alice') == a.ip assert s.coerce_hostname(123) == b.ip assert s.coerce_hostname('charlie') == c.ip assert s.coerce_hostname('jimmy') == 'jimmy' assert s.coerce_address('zzzt:8000', resolve=False) == 'tcp://zzzt:8000' yield s.close() yield [w._close() for w in [a, b, c]]
def test_update_state_respects_data_in_memory(loop): s = Scheduler() s.start(0) s.add_worker(address=alice, ncores=1, coerce_address=False) s.update_graph(tasks={'x': 1, 'y': (inc, 'x')}, keys=['y'], dependencies={'y': {'x'}, 'x': set()}, client='client') s.mark_task_finished('x', alice, nbytes=10, type=dumps(int), compute_start=10, compute_stop=11) s.ensure_occupied(alice) s.mark_task_finished('y', alice, nbytes=10, type=dumps(int), compute_start=11, compute_stop=12) s.ensure_occupied(alice) assert s.released == {'x'} assert s.who_has == {'y': {alice}} s.update_graph(tasks={'x': 1, 'y': (inc, 'x'), 'z': (add, 'y', 'x')}, keys=['z'], dependencies={'y': {'x'}, 'z': {'y', 'x'}}, client='client') assert s.released == set() assert s.waiting == {'z': {'x'}} assert s.processing[alice] == {'x'} # x was released, need to recompute assert s.waiting_data == {'x': {'z'}, 'y': {'z'}, 'z': set()} assert s.who_wants == {'y': {'client'}, 'z': {'client'}} assert s.wants_what == {'client': {'y', 'z'}} s.stop()
def test_update_state_supports_recomputing_released_results(loop): s = Scheduler() s.start(0) s.add_worker(address=alice, ncores=1, coerce_address=False) s.update_graph(tasks={ 'x': 1, 'y': (inc, 'x'), 'z': (inc, 'x') }, keys=['z'], dependencies={ 'y': {'x'}, 'x': set(), 'z': {'y'} }, client='client') s.mark_task_finished('x', alice, nbytes=10, type=dumps(int), compute_start=10, compute_stop=11) s.ensure_occupied(alice) s.mark_task_finished('y', alice, nbytes=10, type=dumps(int), compute_start=10, compute_stop=11) s.ensure_occupied(alice) s.mark_task_finished('z', alice, nbytes=10, type=dumps(int), compute_start=10, compute_stop=11) s.ensure_occupied(alice) assert not s.waiting assert not s.ready assert s.waiting_data == {'z': set()} assert s.who_has == {'z': {alice}} s.update_graph(tasks={ 'x': 1, 'y': (inc, 'x') }, keys=['y'], dependencies={'y': {'x'}}, client='client') assert s.waiting == {'y': {'x'}} assert s.waiting_data == {'x': {'y'}, 'y': set(), 'z': set()} assert s.who_wants == {'z': {'client'}, 'y': {'client'}} assert s.wants_what == {'client': {'y', 'z'}} assert set(s.processing[alice]) == {'x'} s.stop()
def test_service_hosts(): pytest.importorskip("bokeh") from distributed.bokeh.scheduler import BokehScheduler port = 0 for url, expected in [ ("tcp://0.0.0.0", ("::", "0.0.0.0")), ("tcp://127.0.0.1", "127.0.0.1"), ("tcp://127.0.0.1:38275", "127.0.0.1"), ]: services = {("bokeh", port): BokehScheduler} s = Scheduler(services=services) yield s.start(url) sock = first(s.services["bokeh"].server._http._sockets.values()) if isinstance(expected, tuple): assert sock.getsockname()[0] in expected else: assert sock.getsockname()[0] == expected yield s.close() port = ("127.0.0.1", 0) for url in ["tcp://0.0.0.0", "tcp://127.0.0.1", "tcp://127.0.0.1:38275"]: services = {("bokeh", port): BokehScheduler} s = Scheduler(services=services) yield s.start(url) sock = first(s.services["bokeh"].server._http._sockets.values()) assert sock.getsockname()[0] == "127.0.0.1" yield s.close()
def test_service_hosts(): pytest.importorskip('bokeh') from distributed.bokeh.scheduler import BokehScheduler port = 0 for url, expected in [('tcp://0.0.0.0', ('::', '0.0.0.0')), ('tcp://127.0.0.1', '127.0.0.1'), ('tcp://127.0.0.1:38275', '127.0.0.1')]: services = {('bokeh', port): BokehScheduler} s = Scheduler(services=services) yield s.start(url) sock = first(s.services['bokeh'].server._http._sockets.values()) if isinstance(expected, tuple): assert sock.getsockname()[0] in expected else: assert sock.getsockname()[0] == expected yield s.close() port = ('127.0.0.1', 0) for url in ['tcp://0.0.0.0', 'tcp://127.0.0.1', 'tcp://127.0.0.1:38275']: services = {('bokeh', port): BokehScheduler} s = Scheduler(services=services) yield s.start(url) sock = first(s.services['bokeh'].server._http._sockets.values()) assert sock.getsockname()[0] == '127.0.0.1' yield s.close()
def test_add_worker_is_idempotent(loop): s = Scheduler(loop=loop) s.start(0) s.add_worker(address=alice, ncores=1, coerce_address=False) ncores = s.ncores.copy() s.add_worker(address=alice, coerce_address=False) assert s.ncores == s.ncores
def test_worker_with_port_zero(): s = Scheduler() s.start(8007) w = Worker(s.ip, s.port) yield w._start() assert isinstance(w.port, int) assert w.port > 1024
def test_persist_taskstate(): s = Scheduler(validate=True, persist_file='persist_test') s.start(0) assert s.persist_scheduler s.update_graph(tasks={ 'x': dumps_task((inc, 1)), 'y': dumps_task((inc, 'x')), 'z': dumps_task((inc, 2)) }, keys=['y'], dependencies={ 'y': 'x', 'x': [], 'z': [] }, client='client') taskstates = s.tasks s.close() s.stop() del s s = Scheduler(validate=True, persist_file='persist_test') s.start(0) assert ([taskstates.keys()] == [s.tasks.keys()] and [x.state for x in taskstates.values() ] == [x.state for x in s.tasks.values()]) s.close() for f in glob.glob("persist_test*"): os.remove(f)
def test_update_state_respects_data_in_memory(loop): s = Scheduler() s.start(0) s.add_worker(address='alice', ncores=1) s.update_graph(tasks={'x': 1, 'y': (inc, 'x')}, keys=['y'], dependencies={'y': {'x'}, 'x': set()}, client='client') s.mark_task_finished('x', 'alice', nbytes=10, type=int) s.mark_task_finished('y', 'alice', nbytes=10, type=int) assert s.who_has == {'y': {'alice'}} s.update_graph(tasks={'x': 1, 'y': (inc, 'x'), 'z': (add, 'y', 'x')}, keys=['z'], dependencies={'y': {'x'}, 'z': {'y', 'x'}}, client='client') assert s.waiting == {'z': {'x'}} assert s.processing['alice'] == {'x'} # x was released, need to recompute assert s.waiting_data == {'x': {'z'}, 'y': {'z'}, 'z': set()} assert s.who_wants == {'y': {'client'}, 'z': {'client'}} assert s.wants_what == {'client': {'y', 'z'}} assert s.in_play == {'x', 'y', 'z'} s.stop()
def test_update_state_supports_recomputing_released_results(loop): s = Scheduler() s.start(0) s.add_worker(address=alice, ncores=1, coerce_address=False) s.update_graph(tasks={'x': 1, 'y': (inc, 'x'), 'z': (inc, 'x')}, keys=['z'], dependencies={'y': {'x'}, 'x': set(), 'z': {'y'}}, client='client') s.mark_task_finished('x', alice, nbytes=10, type=dumps(int)) s.mark_task_finished('y', alice, nbytes=10, type=dumps(int)) s.mark_task_finished('z', alice, nbytes=10, type=dumps(int)) assert not s.waiting assert not s.ready assert s.waiting_data == {'z': set()} assert s.who_has == {'z': {alice}} s.update_graph(tasks={'x': 1, 'y': (inc, 'x')}, keys=['y'], dependencies={'y': {'x'}}, client='client') assert s.waiting == {'y': {'x'}} assert s.waiting_data == {'x': {'y'}, 'y': set(), 'z': set()} assert s.who_wants == {'z': {'client'}, 'y': {'client'}} assert s.wants_what == {'client': {'y', 'z'}} assert s.processing[alice] == {'x'} s.stop()
def test_scheduler_file(): with tmpfile() as fn: s = Scheduler(scheduler_file=fn) s.start(8009) w = Worker(scheduler_file=fn) yield w._start() assert s.workers == {w.address} yield w._close() s.stop()
def test_scheduler_file(): with tmpfile() as fn: s = Scheduler(scheduler_file=fn) s.start(0) with open(fn) as f: data = json.load(f) assert data['address'] == s.address c = yield Client(scheduler_file=fn, loop=s.loop, asynchronous=True) yield s.close()
def test_launch_without_blocked_services(): from distributed.http import HTTPScheduler s = Scheduler(services={('http', 3849): HTTPScheduler}) s.start(0) s2 = Scheduler(services={('http', 3849): HTTPScheduler}) s2.start(0) assert not s2.services yield [s.close(), s2.close()]
def test_scheduler_file(): with tmpfile() as fn: s = Scheduler(scheduler_file=fn) s.start(0) with open(fn) as f: data = json.load(f) assert data['address'] == s.address c = Client(scheduler_file=fn, loop=s.loop, start=False) yield c._start() yield s.close()
def test_worker_name(): s = Scheduler(validate=True) s.start(0) w = yield Worker(s.ip, s.port, name="alice") assert s.workers[w.address].name == "alice" assert s.aliases["alice"] == w.address with pytest.raises(ValueError): w2 = yield Worker(s.ip, s.port, name="alice") yield w2._close() yield s.close() yield w._close()
def test_worker_name(): s = Scheduler() s.start(0) w = Worker(s.ip, s.port, name='alice') yield w._start() assert s.worker_info[w.address]['name'] == 'alice' assert s.aliases['alice'] == w.address with pytest.raises(ValueError): w = Worker(s.ip, s.port, name='alice') yield w._start() yield s.close() yield w._close()
def test_worker_name(): s = Scheduler(validate=True) s.start(0) w = Worker(s.ip, s.port, name='alice') yield w._start() assert s.worker_info[w.address]['name'] == 'alice' assert s.aliases['alice'] == w.address with pytest.raises(ValueError): w = Worker(s.ip, s.port, name='alice') yield w._start() yield s.close() yield w._close()
def test_worker_name(): s = Scheduler(validate=True) s.start(0) w = Worker(s.ip, s.port, name='alice') yield w._start() assert s.workers[w.address].name == 'alice' assert s.aliases['alice'] == w.address with pytest.raises(ValueError): w2 = Worker(s.ip, s.port, name='alice') yield w2._start() yield w2._close() yield s.close() yield w._close()
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s.sync_center() done = s.start(0) sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' class Counter(SchedulerPlugin): def start(self, scheduler): scheduler.add_plugin(self) self.count = 0 def task_finished(self, scheduler, key, worker, nbytes): self.count += 1 counter = Counter() counter.start(s) assert counter.count == 0 sched.put_nowait({'op': 'update-graph', 'tasks': {'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, 'dependencies': {'y': {'x'}, 'z': {'y'}}, 'keys': ['z']}) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break assert counter.count == 3 sched.put_nowait({'op': 'close'}) yield done
def test_scheduler_as_center(): s = Scheduler() done = s.start(0) a = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=1) a.data.update({'x': 1, 'y': 2}) b = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=2) b.data.update({'y': 2, 'z': 3}) c = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=3) yield [w._start(0) for w in [a, b, c]] assert s.ncores == {w.address: w.ncores for w in [a, b, c]} assert s.who_has == { 'x': {a.address}, 'y': {a.address, b.address}, 'z': {b.address} } s.update_graph(tasks={'a': dumps_task((inc, 1))}, keys=['a'], dependencies={'a': []}) start = time() while not s.who_has['a']: assert time() - start < 5 yield gen.sleep(0.01) assert 'a' in a.data or 'a' in b.data or 'a' in c.data with ignoring(StreamClosedError): yield [w._close() for w in [a, b, c]] assert s.ncores == {} assert s.who_has == {} yield s.close()
def f(c, a, b): e1 = Executor((c.ip, c.port), start=False, loop=loop) yield e1._start() assert isinstance(e1.center, rpc) assert isinstance(e1.scheduler, Scheduler) s = Scheduler((c.ip, c.port)) yield s.sync_center() done = s.start() e2 = Executor(s, start=False, loop=loop) yield e2._start() assert isinstance(e2.center, rpc) assert isinstance(e2.scheduler, Scheduler) s.listen(8042) e3 = Executor(('127.0.0.1', s.port), start=False, loop=loop) yield e3._start() assert isinstance(e3.center, rpc) assert isinstance(e3.scheduler, rpc) s.stop() yield e1._shutdown() yield e2._shutdown() yield e3._shutdown()
def test_scheduler_as_center(): s = Scheduler() s.listen(0) done = s.start() a = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=1) a.data.update({'x': 1, 'y': 2}) b = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=2) b.data.update({'y': 2, 'z': 3}) c = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=3) yield [w._start() for w in [a, b, c]] assert s.ncores == {w.address: w.ncores for w in [a, b, c]} assert s.who_has == {'x': {a.address}, 'y': {a.address, b.address}, 'z': {b.address}} s.update_graph(dsk={'a': (inc, 1)}, keys=['a']) while not s.who_has['a']: yield gen.sleep(0.01) assert 'a' in a.data or 'a' in b.data or 'a' in c.data yield [w._close() for w in [a, b, c]] assert s.ncores == {} assert s.who_has == {} yield s.close()
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' class Bad(SchedulerPlugin): def task_finished(self, scheduler, key, worker, nbytes): raise Exception() bad = Bad() s.add_plugin(bad) sched.put_nowait({'op': 'update-graph', 'dsk': {'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, 'keys': ['z']}) while True: # normal execution msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break sched.put_nowait({'op': 'close'}) yield done
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x-1': (inc, 1), 'x-2': (inc, 'x-1'), 'x-3': (inc, 'x-2'), 'y-1': (dec, 'x-3'), 'y-2': (dec, 'y-1'), 'e': (throws, 'y-2'), 'other': (inc, 123)}, keys=['e']) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'y-2': break p = MultiProgressWidget(['x-1', 'x-2', 'x-3'], scheduler=s) assert set(concat(p.all_keys.values())).issuperset({'x-1', 'x-2', 'x-3'}) assert 'x' in p.bars sched.put_nowait({'op': 'close'}) yield done
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x-1': (inc, 1), 'x-2': (inc, 'x-1'), 'x-3': (inc, 'x-2'), 'y-1': (dec, 'x-3'), 'y-2': (dec, 'y-1'), 'e': (throws, 'y-2'), 'other': (inc, 123)}, keys=['e']) while True: msg = yield report.get() if msg['op'] == 'task-erred' and msg['key'] == 'e': break p = MultiProgressWidget(['e'], scheduler=s, complete=True) assert set(concat(p.all_keys.values())) == {'x-1', 'x-2', 'x-3', 'y-1', 'y-2', 'e'} assert all(b.value == 1.0 for b in p.bars.values()) assert p.texts['x'].value == '3 / 3' assert p.texts['y'].value == '2 / 2' sched.put_nowait({'op': 'close'}) yield done
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' class Counter(SchedulerPlugin): def start(self, scheduler): scheduler.add_plugin(self) self.count = 0 def task_finished(self, scheduler, key, worker, nbytes): self.count += 1 counter = Counter() counter.start(s) assert counter.count == 0 sched.put_nowait({'op': 'update-graph', 'dsk': {'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, 'keys': ['z']}) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break assert counter.count == 3 sched.put_nowait({'op': 'close'}) yield done
def test_scheduler_as_center(): s = Scheduler() s.listen(0) done = s.start() a = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=1) a.data.update({'x': 1, 'y': 2}) b = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=2) b.data.update({'y': 2, 'z': 3}) c = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=3) yield [w._start() for w in [a, b, c]] assert s.ncores == {w.address: w.ncores for w in [a, b, c]} assert s.who_has == { 'x': {a.address}, 'y': {a.address, b.address}, 'z': {b.address} } s.update_graph(dsk={'a': (inc, 1)}, keys=['a']) while not s.who_has['a']: yield gen.sleep(0.01) assert 'a' in a.data or 'a' in b.data or 'a' in c.data yield [w._close() for w in [a, b, c]] assert s.ncores == {} assert s.who_has == {} yield s.close()
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x': (div, 1, 0)}, keys=['x']) progress = TextProgressBar(['x'], scheduler=s) progress.start() while True: msg = yield report.get() if msg.get('key') == 'x': break assert progress.status == 'error' assert not progress._timer.is_alive() progress = TextProgressBar(['x'], scheduler=s) progress.start() assert progress.status == 'error' assert not progress._timer or not progress._timer.is_alive() sched.put_nowait({'op': 'close'}) yield done
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, keys=['z']) progress = TextProgressBar(['z'], scheduler=s) progress.start() assert progress.all_keys == {'x', 'y', 'z'} assert progress.keys == {'x', 'y', 'z'} while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break assert progress.keys == set() check_bar_completed(capsys) assert progress not in s.plugins sched.put_nowait({'op': 'close'}) yield done
def test_scheduler_as_center(): s = Scheduler(validate=True) done = s.start(0) a = Worker(s.address, ncores=1) a.data.update({'x': 1, 'y': 2}) b = Worker(s.address, ncores=2) b.data.update({'y': 2, 'z': 3}) c = Worker(s.address, ncores=3) yield [w._start(0) for w in [a, b, c]] assert s.ncores == {w.address: w.ncores for w in [a, b, c]} assert not s.who_has s.update_graph(tasks={'a': dumps_task((inc, 1))}, keys=['a'], dependencies={'a': []}) start = time() while not 'a' in s.who_has: assert time() - start < 5 yield gen.sleep(0.01) assert 'a' in a.data or 'a' in b.data or 'a' in c.data yield [w._close() for w in [a, b, c]] assert s.ncores == {} assert s.who_has == {} yield s.close()
def test_scheduler_as_center(): s = Scheduler() done = s.start(0) a = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=1) a.data.update({'x': 1, 'y': 2}) b = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=2) b.data.update({'y': 2, 'z': 3}) c = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=3) yield [w._start(0) for w in [a, b, c]] assert s.ncores == {w.address: w.ncores for w in [a, b, c]} assert s.who_has == {'x': {a.address}, 'y': {a.address, b.address}, 'z': {b.address}} s.update_graph(tasks={'a': dumps_task((inc, 1))}, keys=['a'], dependencies={'a': []}) start = time() while not s.who_has['a']: assert time() - start < 5 yield gen.sleep(0.01) assert 'a' in a.data or 'a' in b.data or 'a' in c.data with ignoring(StreamClosedError): yield [w._close() for w in [a, b, c]] assert s.ncores == {} assert s.who_has == {} yield s.close()
def test_service_hosts_match_scheduler(): from distributed.http.scheduler import HTTPScheduler services = {('http', 0): HTTPScheduler} s = Scheduler(services=services) yield s.start('tcp://0.0.0.0') sock = first(s.services['http']._sockets.values()) assert sock.getsockname()[0] in ('::', '0.0.0.0') yield s.close() for host in ['tcp://127.0.0.2', 'tcp://127.0.0.2:38275']: s = Scheduler(services=services) yield s.start(host) sock = first(s.services['http']._sockets.values()) assert sock.getsockname()[0] == '127.0.0.2' yield s.close()
def test_service_hosts_match_scheduler(): pytest.importorskip('bokeh') from distributed.bokeh.scheduler import BokehScheduler services = {('bokeh', 0): BokehScheduler} s = Scheduler(services=services) yield s.start('tcp://0.0.0.0') sock = first(s.services['bokeh'].server._http._sockets.values()) assert sock.getsockname()[0] in ('::', '0.0.0.0') yield s.close() for host in ['tcp://127.0.0.2', 'tcp://127.0.0.2:38275']: s = Scheduler(services=services) yield s.start(host) sock = first(s.services['bokeh'].server._http._sockets.values()) assert sock.getsockname()[0] == '127.0.0.2' yield s.close()
def f(c, a, b): s = Scheduler((c.ip, c.port)) yield s._sync_center() done = s.start() sched, report = Queue(), Queue() s.handle_queues(sched, report) msg = yield report.get() assert msg['op'] == 'stream-start' # Test update graph s.put({'op': 'update-graph', 'dsk': {'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, 'keys': ['z']}) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break assert a.data.get('x') == 2 or b.data.get('x') == 2 # Test erring tasks s.put({'op': 'update-graph', 'dsk': {'a': (div, 1, 0), 'b': (inc, 'a')}, 'keys': ['a', 'b']}) while True: msg = yield report.get() if msg['op'] == 'task-erred' and msg['key'] == 'b': break # Test missing data s.put({'op': 'missing-data', 'missing': ['z']}) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break # Test missing data without being informed for w in [a, b]: if 'z' in w.data: del w.data['z'] s.put({'op': 'update-graph', 'dsk': {'zz': (inc, 'z')}, 'keys': ['zz']}) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'zz': break s.put({'op': 'close'}) yield done
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' p = TextProgressBar([], scheduler=s) p.start() assert p.status == 'finished' check_bar_completed(capsys)
def test_coerce_address(): with dask.config.set({"distributed.comm.timeouts.connect": "100ms"}): s = Scheduler(validate=True) s.start(0) print("scheduler:", s.address, s.listen_address) a = Worker(s.ip, s.port, name="alice") b = Worker(s.ip, s.port, name=123) c = Worker("127.0.0.1", s.port, name="charlie") yield [a, b, c] assert s.coerce_address("127.0.0.1:8000") == "tcp://127.0.0.1:8000" assert s.coerce_address("[::1]:8000") == "tcp://[::1]:8000" assert s.coerce_address( "tcp://127.0.0.1:8000") == "tcp://127.0.0.1:8000" assert s.coerce_address("tcp://[::1]:8000") == "tcp://[::1]:8000" assert s.coerce_address("localhost:8000") in ( "tcp://127.0.0.1:8000", "tcp://[::1]:8000", ) assert s.coerce_address(u"localhost:8000") in ( "tcp://127.0.0.1:8000", "tcp://[::1]:8000", ) assert s.coerce_address(a.address) == a.address # Aliases assert s.coerce_address("alice") == a.address assert s.coerce_address(123) == b.address assert s.coerce_address("charlie") == c.address assert s.coerce_hostname("127.0.0.1") == "127.0.0.1" assert s.coerce_hostname("alice") == a.ip assert s.coerce_hostname(123) == b.ip assert s.coerce_hostname("charlie") == c.ip assert s.coerce_hostname("jimmy") == "jimmy" assert s.coerce_address("zzzt:8000", resolve=False) == "tcp://zzzt:8000" yield s.close() yield [w._close() for w in [a, b, c]]
def test_file_descriptors_dont_leak(loop): psutil = pytest.importorskip('psutil') proc = psutil.Process() before = proc.num_fds() s = Scheduler() s.start(0) w = Worker(s.ip, s.port) @gen.coroutine def f(): yield w._start(0) yield w._close() loop.run_sync(f) during = proc.num_fds() s.stop() s.close() start = time() while proc.num_fds() > before: loop.run_sync(lambda: gen.sleep(0.01)) assert time() < start + 5
def test_update_state(loop): s = Scheduler() s.start(0) s.add_worker(address=alice, ncores=1, coerce_address=False) s.update_graph(tasks={'x': 1, 'y': (inc, 'x')}, keys=['y'], dependencies={'y': 'x', 'x': set()}, client='client') s.mark_task_finished('x', alice, nbytes=10, type=dumps(int), compute_start=10, compute_stop=11) s.ensure_occupied(alice) assert set(s.processing[alice]) == {'y'} assert set(s.rprocessing['y']) == {alice} assert not s.ready assert s.who_wants == {'y': {'client'}} assert s.wants_what == {'client': {'y'}} s.update_graph(tasks={'a': 1, 'z': (add, 'y', 'a')}, keys=['z'], dependencies={'z': {'y', 'a'}}, client='client') assert s.tasks == {'x': 1, 'y': (inc, 'x'), 'a': 1, 'z': (add, 'y', 'a')} assert s.dependencies == {'x': set(), 'a': set(), 'y': {'x'}, 'z': {'a', 'y'}} assert s.dependents == {'z': set(), 'y': {'z'}, 'a': {'z'}, 'x': {'y'}} assert s.waiting == {'z': {'a', 'y'}} assert s.waiting_data == {'x': {'y'}, 'y': {'z'}, 'a': {'z'}, 'z': set()} assert s.who_wants == {'z': {'client'}, 'y': {'client'}} assert s.wants_what == {'client': {'y', 'z'}} assert 'a' in s.ready or 'a' in s.processing[alice] s.stop()
def test_coerce_address(): s = Scheduler(validate=True) s.start(0) a = Worker(s.ip, s.port, name='alice') b = Worker(s.ip, s.port, name=123) c = Worker(s.ip, s.port, name='charlie', ip='127.0.0.2') yield [a._start(), b._start(), c._start()] assert s.coerce_address(b'127.0.0.1') == '127.0.0.1' assert s.coerce_address(('127.0.0.1', 8000)) == '127.0.0.1:8000' assert s.coerce_address(['127.0.0.1', 8000]) == '127.0.0.1:8000' assert s.coerce_address([b'127.0.0.1', 8000]) == '127.0.0.1:8000' assert s.coerce_address(('127.0.0.1', '8000')) == '127.0.0.1:8000' assert s.coerce_address(b'localhost') == '127.0.0.1' assert s.coerce_address('localhost') == '127.0.0.1' assert s.coerce_address(u'localhost') == '127.0.0.1' assert s.coerce_address('localhost:8000') == '127.0.0.1:8000' assert s.coerce_address(a.address) == a.address assert s.coerce_address(a.address_tuple) == a.address assert s.coerce_address(123) == b.address assert s.coerce_address('charlie') == c.address yield s.close() yield [w._close() for w in [a, b, c]]
def test_coerce_address(): s = Scheduler() s.start(0) a = Worker(s.ip, s.port, name='alice') b = Worker(s.ip, s.port, name=123) c = Worker(s.ip, s.port, name='charlie', ip='127.0.0.2') yield [a._start(), b._start(), c._start()] assert s.coerce_address(b'127.0.0.1') == '127.0.0.1' assert s.coerce_address(('127.0.0.1', 8000)) == '127.0.0.1:8000' assert s.coerce_address(['127.0.0.1', 8000]) == '127.0.0.1:8000' assert s.coerce_address([b'127.0.0.1', 8000]) == '127.0.0.1:8000' assert s.coerce_address(('127.0.0.1', '8000')) == '127.0.0.1:8000' assert s.coerce_address(b'localhost') == '127.0.0.1' assert s.coerce_address('localhost') == '127.0.0.1' assert s.coerce_address(u'localhost') == '127.0.0.1' assert s.coerce_address('localhost:8000') == '127.0.0.1:8000' assert s.coerce_address(a.address) == a.address assert s.coerce_address(a.address_tuple) == a.address assert s.coerce_address(123) == b.address assert s.coerce_address('charlie') == c.address yield s.close() yield [w._close() for w in [a, b, c]]
def test_update_state(loop): s = Scheduler() s.start(0) s.add_worker(address='alice', ncores=1) s.update_graph(tasks={'x': 1, 'y': (inc, 'x')}, keys=['y'], dependencies={'y': 'x', 'x': set()}, client='client') s.mark_task_finished('x', 'alice', nbytes=10, type=int) assert s.processing['alice'] == {'y'} assert not s.ready assert s.who_wants == {'y': {'client'}} assert s.wants_what == {'client': {'y'}} s.update_graph(tasks={'a': 1, 'z': (add, 'y', 'a')}, keys=['z'], dependencies={'z': {'y', 'a'}}, client='client') assert s.tasks == {'x': 1, 'y': (inc, 'x'), 'a': 1, 'z': (add, 'y', 'a')} assert s.dependencies == {'x': set(), 'a': set(), 'y': {'x'}, 'z': {'a', 'y'}} assert s.dependents == {'z': set(), 'y': {'z'}, 'a': {'z'}, 'x': {'y'}} assert s.waiting == {'z': {'a', 'y'}} assert s.waiting_data == {'x': {'y'}, 'y': {'z'}, 'a': {'z'}, 'z': set()} assert s.who_wants == {'z': {'client'}, 'y': {'client'}} assert s.wants_what == {'client': {'y', 'z'}} assert list(s.ready) == ['a'] assert s.in_play == {'a', 'x', 'y', 'z'} s.stop()
def test_monitor_resources(): pytest.importorskip('psutil') c = Center('127.0.0.1') c.listen(0) a = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1') b = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1') s = Scheduler((c.ip, c.port), resource_interval=0.01, resource_log_size=3) yield a._start() yield b._start() yield s.sync_center() done = s.start() try: assert s.ncores == { ('127.0.0.1', a.worker_port): 2, ('127.0.0.1', b.worker_port): 2 } assert s.nannies == {(n.ip, n.worker_port): n.port for n in [a, b]} while any(len(v) < 3 for v in s.resource_logs.values()): yield gen.sleep(0.01) yield gen.sleep(0.1) assert set(s.resource_logs) == {a.address, b.address} assert all(len(v) == 3 for v in s.resource_logs.values()) d = s.diagnostic_resources(n=2) assert set(d) == {a.worker_address, b.worker_address} assert set(d[a.worker_address]).issubset({'cpu', 'memory', 'time'}) assert all(len(v) == 2 for v in d[a.worker_address].values()) s.put({'op': 'close'}) yield done finally: with ignoring(TimeoutError, StreamClosedError, OSError): yield a._close(timeout=0.5) with ignoring(TimeoutError, StreamClosedError, OSError): yield b._close(timeout=0.5) c.stop()
def f(c, a, b): s = Scheduler((c.ip, c.port)) yield s._sync_center() done = s.start() sched, report = Queue(), Queue() s.handle_queues(sched, report) msg = yield report.get() assert msg['op'] == 'stream-start' # Test update graph sched.put_nowait({'op': 'update-graph', 'dsk': {'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, 'keys': ['z']}) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break slen, rlen = len(s.scheduler_queues), len(s.report_queues) sched2, report2 = Queue(), Queue() s.handle_queues(sched2, report2) assert slen + 1 == len(s.scheduler_queues) assert rlen + 1 == len(s.report_queues) sched2.put_nowait({'op': 'update-graph', 'dsk': {'a': (inc, 10)}, 'keys': ['a']}) for q in [report, report2]: while True: msg = yield q.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'a': break sched.put_nowait({'op': 'close'}) yield done
def test_service_hosts(): pytest.importorskip('bokeh') from distributed.bokeh.scheduler import BokehScheduler for port in [0, ('127.0.0.3', 0)]: for url, expected in [('tcp://0.0.0.0', ('::', '0.0.0.0')), ('tcp://127.0.0.2', '127.0.0.2'), ('tcp://127.0.0.2:38275', '127.0.0.2')]: services = {('bokeh', port): BokehScheduler} s = Scheduler(services=services) yield s.start(url) sock = first(s.services['bokeh'].server._http._sockets.values()) if isinstance(port, tuple): # host explicitly overridden assert sock.getsockname()[0] == port[0] elif isinstance(expected, tuple): assert sock.getsockname()[0] in expected else: assert sock.getsockname()[0] == expected yield s.close()
def test_monitor_resources(): pytest.importorskip('psutil') c = Center('127.0.0.1') c.listen(0) a = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1') b = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1') s = Scheduler((c.ip, c.port), resource_interval=0.01, resource_log_size=3) yield a._start() yield b._start() yield s.sync_center() done = s.start() try: assert s.ncores == {('127.0.0.1', a.worker_port): 2, ('127.0.0.1', b.worker_port): 2} assert s.nannies == {(n.ip, n.worker_port): n.port for n in [a, b]} while any(len(v) < 3 for v in s.resource_logs.values()): yield gen.sleep(0.01) yield gen.sleep(0.1) assert set(s.resource_logs) == {a.address, b.address} assert all(len(v) == 3 for v in s.resource_logs.values()) d = s.diagnostic_resources(n=2) assert set(d) == {a.worker_address, b.worker_address} assert set(d[a.worker_address]).issubset({'cpu', 'memory', 'time'}) assert all(len(v) == 2 for v in d[a.worker_address].values()) s.put({'op': 'close'}) yield done finally: with ignoring(TimeoutError, StreamClosedError, OSError): yield a._close(timeout=0.5) with ignoring(TimeoutError, StreamClosedError, OSError): yield b._close(timeout=0.5) c.stop()
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x-1': (inc, 1), 'x-2': (inc, 'x-1'), 'x-3': (inc, 'x-2'), 'y-1': (dec, 'x-3'), 'y-2': (dec, 'y-1')}, keys=['y-2']) p = MultiProgress(['y-2'], scheduler=s, func=lambda s: s.split('-')[0]) assert p.keys == {'x': {'x-1', 'x-2', 'x-3'}, 'y': {'y-1', 'y-2'}} while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'x-3': break assert p.keys == {'x': set(), 'y': {'y-1', 'y-2'}} while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'y-2': break assert p.keys == {'x': set(), 'y': set()} assert p.status == 'finished' sched.put_nowait({'op': 'close'}) yield done
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, keys=['z']) bars = [Progress(keys=['z'], scheduler=s) for i in range(10)] while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break assert all(b.status == 'finished' for b in bars) sched.put_nowait({'op': 'close'}) yield done