def test_update_state_with_processing(loop): s = Scheduler() s.start(0) s.add_worker(address=alice, ncores=1, coerce_address=False) s.update_graph(tasks={'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y')}, keys=['z'], dependencies={'y': {'x'}, 'x': set(), 'z': {'y'}}, client='client') s.mark_task_finished('x', alice, nbytes=10, type=dumps(int)) assert s.waiting == {'z': {'y'}} assert s.waiting_data == {'x': {'y'}, 'y': {'z'}, 'z': set()} assert list(s.ready) == [] assert s.who_wants == {'z': {'client'}} assert s.wants_what == {'client': {'z'}} assert s.who_has == {'x': {alice}} s.update_graph(tasks={'a': (inc, 'x'), 'b': (add,'a','y'), 'c': (inc, 'z')}, keys=['b', 'c'], dependencies={'a': {'x'}, 'b': {'a', 'y'}, 'c': {'z'}}, client='client') assert s.waiting == {'z': {'y'}, 'b': {'a', 'y'}, 'c': {'z'}} assert s.stacks[alice] == ['a'] assert not s.ready assert s.waiting_data == {'x': {'y', 'a'}, 'y': {'z', 'b'}, 'z': {'c'}, 'a': {'b'}, 'b': set(), 'c': set()} assert s.who_wants == {'b': {'client'}, 'c': {'client'}, 'z': {'client'}} assert s.wants_what == {'client': {'b', 'c', 'z'}} s.stop()
def test_scheduler_as_center(): s = Scheduler(validate=True) done = s.start(0) a = Worker(s.address, ncores=1) a.data.update({'x': 1, 'y': 2}) b = Worker(s.address, ncores=2) b.data.update({'y': 2, 'z': 3}) c = Worker(s.address, ncores=3) yield [w._start(0) for w in [a, b, c]] assert s.ncores == {w.address: w.ncores for w in [a, b, c]} assert not s.who_has s.update_graph(tasks={'a': dumps_task((inc, 1))}, keys=['a'], dependencies={'a': []}) start = time() while not 'a' in s.who_has: assert time() - start < 5 yield gen.sleep(0.01) assert 'a' in a.data or 'a' in b.data or 'a' in c.data yield [w._close() for w in [a, b, c]] assert s.ncores == {} assert s.who_has == {} yield s.close()
def test_scheduler_as_center(): s = Scheduler() s.listen(0) done = s.start() a = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=1) a.data.update({'x': 1, 'y': 2}) b = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=2) b.data.update({'y': 2, 'z': 3}) c = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=3) yield [w._start() for w in [a, b, c]] assert s.ncores == {w.address: w.ncores for w in [a, b, c]} assert s.who_has == {'x': {a.address}, 'y': {a.address, b.address}, 'z': {b.address}} s.update_graph(dsk={'a': (inc, 1)}, keys=['a']) while not s.who_has['a']: yield gen.sleep(0.01) assert 'a' in a.data or 'a' in b.data or 'a' in c.data yield [w._close() for w in [a, b, c]] assert s.ncores == {} assert s.who_has == {} yield s.close()
def test_persist_taskstate(): s = Scheduler(validate=True, persist_file='persist_test') s.start(0) assert s.persist_scheduler s.update_graph(tasks={ 'x': dumps_task((inc, 1)), 'y': dumps_task((inc, 'x')), 'z': dumps_task((inc, 2)) }, keys=['y'], dependencies={ 'y': 'x', 'x': [], 'z': [] }, client='client') taskstates = s.tasks s.close() s.stop() del s s = Scheduler(validate=True, persist_file='persist_test') s.start(0) assert ([taskstates.keys()] == [s.tasks.keys()] and [x.state for x in taskstates.values() ] == [x.state for x in s.tasks.values()]) s.close() for f in glob.glob("persist_test*"): os.remove(f)
def test_update_state_supports_recomputing_released_results(loop): s = Scheduler() s.start(0) s.add_worker(address=alice, ncores=1, coerce_address=False) s.update_graph(tasks={ 'x': 1, 'y': (inc, 'x'), 'z': (inc, 'x') }, keys=['z'], dependencies={ 'y': {'x'}, 'x': set(), 'z': {'y'} }, client='client') s.mark_task_finished('x', alice, nbytes=10, type=dumps(int), compute_start=10, compute_stop=11) s.ensure_occupied(alice) s.mark_task_finished('y', alice, nbytes=10, type=dumps(int), compute_start=10, compute_stop=11) s.ensure_occupied(alice) s.mark_task_finished('z', alice, nbytes=10, type=dumps(int), compute_start=10, compute_stop=11) s.ensure_occupied(alice) assert not s.waiting assert not s.ready assert s.waiting_data == {'z': set()} assert s.who_has == {'z': {alice}} s.update_graph(tasks={ 'x': 1, 'y': (inc, 'x') }, keys=['y'], dependencies={'y': {'x'}}, client='client') assert s.waiting == {'y': {'x'}} assert s.waiting_data == {'x': {'y'}, 'y': set(), 'z': set()} assert s.who_wants == {'z': {'client'}, 'y': {'client'}} assert s.wants_what == {'client': {'y', 'z'}} assert set(s.processing[alice]) == {'x'} s.stop()
def test_update_state_respects_data_in_memory(loop): s = Scheduler() s.start(0) s.add_worker(address=alice, ncores=1, coerce_address=False) s.update_graph(tasks={'x': 1, 'y': (inc, 'x')}, keys=['y'], dependencies={'y': {'x'}, 'x': set()}, client='client') s.mark_task_finished('x', alice, nbytes=10, type=dumps(int)) s.mark_task_finished('y', alice, nbytes=10, type=dumps(int)) assert s.released == {'x'} assert s.who_has == {'y': {alice}} s.update_graph(tasks={'x': 1, 'y': (inc, 'x'), 'z': (add, 'y', 'x')}, keys=['z'], dependencies={'y': {'x'}, 'z': {'y', 'x'}}, client='client') assert s.released == set() assert s.waiting == {'z': {'x'}} assert s.processing[alice] == {'x'} # x was released, need to recompute assert s.waiting_data == {'x': {'z'}, 'y': {'z'}, 'z': set()} assert s.who_wants == {'y': {'client'}, 'z': {'client'}} assert s.wants_what == {'client': {'y', 'z'}} s.stop()
def test_update_state_respects_data_in_memory(loop): s = Scheduler() s.start(0) s.add_worker(address=alice, ncores=1, coerce_address=False) s.update_graph(tasks={'x': 1, 'y': (inc, 'x')}, keys=['y'], dependencies={'y': {'x'}, 'x': set()}, client='client') s.mark_task_finished('x', alice, nbytes=10, type=dumps(int), compute_start=10, compute_stop=11) s.ensure_occupied(alice) s.mark_task_finished('y', alice, nbytes=10, type=dumps(int), compute_start=11, compute_stop=12) s.ensure_occupied(alice) assert s.released == {'x'} assert s.who_has == {'y': {alice}} s.update_graph(tasks={'x': 1, 'y': (inc, 'x'), 'z': (add, 'y', 'x')}, keys=['z'], dependencies={'y': {'x'}, 'z': {'y', 'x'}}, client='client') assert s.released == set() assert s.waiting == {'z': {'x'}} assert s.processing[alice] == {'x'} # x was released, need to recompute assert s.waiting_data == {'x': {'z'}, 'y': {'z'}, 'z': set()} assert s.who_wants == {'y': {'client'}, 'z': {'client'}} assert s.wants_what == {'client': {'y', 'z'}} s.stop()
def test_scheduler_as_center(): s = Scheduler() done = s.start(0) a = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=1) a.data.update({'x': 1, 'y': 2}) b = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=2) b.data.update({'y': 2, 'z': 3}) c = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=3) yield [w._start(0) for w in [a, b, c]] assert s.ncores == {w.address: w.ncores for w in [a, b, c]} assert s.who_has == {'x': {a.address}, 'y': {a.address, b.address}, 'z': {b.address}} s.update_graph(tasks={'a': dumps_task((inc, 1))}, keys=['a'], dependencies={'a': []}) start = time() while not s.who_has['a']: assert time() - start < 5 yield gen.sleep(0.01) assert 'a' in a.data or 'a' in b.data or 'a' in c.data with ignoring(StreamClosedError): yield [w._close() for w in [a, b, c]] assert s.ncores == {} assert s.who_has == {} yield s.close()
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, keys=['z']) progress = TextProgressBar(['z'], scheduler=s) progress.start() assert progress.all_keys == {'x', 'y', 'z'} assert progress.keys == {'x', 'y', 'z'} while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break assert progress.keys == set() check_bar_completed(capsys) assert progress not in s.plugins sched.put_nowait({'op': 'close'}) yield done
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x': (div, 1, 0)}, keys=['x']) progress = TextProgressBar(['x'], scheduler=s) progress.start() while True: msg = yield report.get() if msg.get('key') == 'x': break assert progress.status == 'error' assert not progress._timer.is_alive() progress = TextProgressBar(['x'], scheduler=s) progress.start() assert progress.status == 'error' assert not progress._timer or not progress._timer.is_alive() sched.put_nowait({'op': 'close'}) yield done
def test_scheduler_as_center(): s = Scheduler() done = s.start(0) a = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=1) a.data.update({'x': 1, 'y': 2}) b = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=2) b.data.update({'y': 2, 'z': 3}) c = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=3) yield [w._start(0) for w in [a, b, c]] assert s.ncores == {w.address: w.ncores for w in [a, b, c]} assert s.who_has == { 'x': {a.address}, 'y': {a.address, b.address}, 'z': {b.address} } s.update_graph(tasks={'a': dumps_task((inc, 1))}, keys=['a'], dependencies={'a': []}) start = time() while not s.who_has['a']: assert time() - start < 5 yield gen.sleep(0.01) assert 'a' in a.data or 'a' in b.data or 'a' in c.data with ignoring(StreamClosedError): yield [w._close() for w in [a, b, c]] assert s.ncores == {} assert s.who_has == {} yield s.close()
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x-1': (inc, 1), 'x-2': (inc, 'x-1'), 'x-3': (inc, 'x-2'), 'y-1': (dec, 'x-3'), 'y-2': (dec, 'y-1'), 'e': (throws, 'y-2'), 'other': (inc, 123)}, keys=['e']) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'y-2': break p = MultiProgressWidget(['x-1', 'x-2', 'x-3'], scheduler=s) assert set(concat(p.all_keys.values())).issuperset({'x-1', 'x-2', 'x-3'}) assert 'x' in p.bars sched.put_nowait({'op': 'close'}) yield done
def test_update_state_respects_data_in_memory(loop): s = Scheduler() s.start(0) s.add_worker(address='alice', ncores=1) s.update_graph(tasks={'x': 1, 'y': (inc, 'x')}, keys=['y'], dependencies={'y': {'x'}, 'x': set()}, client='client') s.mark_task_finished('x', 'alice', nbytes=10, type=int) s.mark_task_finished('y', 'alice', nbytes=10, type=int) assert s.who_has == {'y': {'alice'}} s.update_graph(tasks={'x': 1, 'y': (inc, 'x'), 'z': (add, 'y', 'x')}, keys=['z'], dependencies={'y': {'x'}, 'z': {'y', 'x'}}, client='client') assert s.waiting == {'z': {'x'}} assert s.processing['alice'] == {'x'} # x was released, need to recompute assert s.waiting_data == {'x': {'z'}, 'y': {'z'}, 'z': set()} assert s.who_wants == {'y': {'client'}, 'z': {'client'}} assert s.wants_what == {'client': {'y', 'z'}} assert s.in_play == {'x', 'y', 'z'} s.stop()
def test_update_state_supports_recomputing_released_results(loop): s = Scheduler() s.start(0) s.add_worker(address=alice, ncores=1, coerce_address=False) s.update_graph(tasks={'x': 1, 'y': (inc, 'x'), 'z': (inc, 'x')}, keys=['z'], dependencies={'y': {'x'}, 'x': set(), 'z': {'y'}}, client='client') s.mark_task_finished('x', alice, nbytes=10, type=dumps(int)) s.mark_task_finished('y', alice, nbytes=10, type=dumps(int)) s.mark_task_finished('z', alice, nbytes=10, type=dumps(int)) assert not s.waiting assert not s.ready assert s.waiting_data == {'z': set()} assert s.who_has == {'z': {alice}} s.update_graph(tasks={'x': 1, 'y': (inc, 'x')}, keys=['y'], dependencies={'y': {'x'}}, client='client') assert s.waiting == {'y': {'x'}} assert s.waiting_data == {'x': {'y'}, 'y': set(), 'z': set()} assert s.who_wants == {'z': {'client'}, 'y': {'client'}} assert s.wants_what == {'client': {'y', 'z'}} assert s.processing[alice] == {'x'} s.stop()
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x-1': (inc, 1), 'x-2': (inc, 'x-1'), 'x-3': (inc, 'x-2'), 'y-1': (dec, 'x-3'), 'y-2': (dec, 'y-1'), 'e': (throws, 'y-2'), 'other': (inc, 123)}, keys=['e']) while True: msg = yield report.get() if msg['op'] == 'task-erred' and msg['key'] == 'e': break p = MultiProgressWidget(['e'], scheduler=s, complete=True) assert set(concat(p.all_keys.values())) == {'x-1', 'x-2', 'x-3', 'y-1', 'y-2', 'e'} assert all(b.value == 1.0 for b in p.bars.values()) assert p.texts['x'].value == '3 / 3' assert p.texts['y'].value == '2 / 2' sched.put_nowait({'op': 'close'}) yield done
def test_scheduler_as_center(): s = Scheduler() s.listen(0) done = s.start() a = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=1) a.data.update({'x': 1, 'y': 2}) b = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=2) b.data.update({'y': 2, 'z': 3}) c = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=3) yield [w._start() for w in [a, b, c]] assert s.ncores == {w.address: w.ncores for w in [a, b, c]} assert s.who_has == { 'x': {a.address}, 'y': {a.address, b.address}, 'z': {b.address} } s.update_graph(dsk={'a': (inc, 1)}, keys=['a']) while not s.who_has['a']: yield gen.sleep(0.01) assert 'a' in a.data or 'a' in b.data or 'a' in c.data yield [w._close() for w in [a, b, c]] assert s.ncores == {} assert s.who_has == {} yield s.close()
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, keys=['z']) bars = [Progress(keys=['z'], scheduler=s) for i in range(10)] while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break assert all(b.status == 'finished' for b in bars) sched.put_nowait({'op': 'close'}) yield done
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x-1': (inc, 1), 'x-2': (inc, 'x-1'), 'x-3': (inc, 'x-2'), 'y-1': (dec, 'x-3'), 'y-2': (dec, 'y-1')}, keys=['y-2']) p = MultiProgress(['y-2'], scheduler=s, func=lambda s: s.split('-')[0]) assert p.keys == {'x': {'x-1', 'x-2', 'x-3'}, 'y': {'y-1', 'y-2'}} while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'x-3': break assert p.keys == {'x': set(), 'y': {'y-1', 'y-2'}} while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'y-2': break assert p.keys == {'x': set(), 'y': set()} assert p.status == 'finished' sched.put_nowait({'op': 'close'}) yield done
def test_update_state(loop): s = Scheduler() s.start(0) s.add_worker(address=alice, ncores=1, coerce_address=False) s.update_graph(tasks={'x': 1, 'y': (inc, 'x')}, keys=['y'], dependencies={'y': 'x', 'x': set()}, client='client') s.mark_task_finished('x', alice, nbytes=10, type=dumps(int), compute_start=10, compute_stop=11) s.ensure_occupied(alice) assert set(s.processing[alice]) == {'y'} assert set(s.rprocessing['y']) == {alice} assert not s.ready assert s.who_wants == {'y': {'client'}} assert s.wants_what == {'client': {'y'}} s.update_graph(tasks={'a': 1, 'z': (add, 'y', 'a')}, keys=['z'], dependencies={'z': {'y', 'a'}}, client='client') assert s.tasks == {'x': 1, 'y': (inc, 'x'), 'a': 1, 'z': (add, 'y', 'a')} assert s.dependencies == {'x': set(), 'a': set(), 'y': {'x'}, 'z': {'a', 'y'}} assert s.dependents == {'z': set(), 'y': {'z'}, 'a': {'z'}, 'x': {'y'}} assert s.waiting == {'z': {'a', 'y'}} assert s.waiting_data == {'x': {'y'}, 'y': {'z'}, 'a': {'z'}, 'z': set()} assert s.who_wants == {'z': {'client'}, 'y': {'client'}} assert s.wants_what == {'client': {'y', 'z'}} assert 'a' in s.ready or 'a' in s.processing[alice] s.stop()
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, keys=['z']) progress = ProgressWidget(['z'], scheduler=s) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break progress._update() assert progress.bar.value == 1.0 assert 's' in progress.bar.description sched.put_nowait({'op': 'close'}) yield done
def test_update_state(loop): s = Scheduler() s.start(0) s.add_worker(address='alice', ncores=1) s.update_graph(tasks={'x': 1, 'y': (inc, 'x')}, keys=['y'], dependencies={'y': 'x', 'x': set()}, client='client') s.mark_task_finished('x', 'alice', nbytes=10, type=int) assert s.processing['alice'] == {'y'} assert not s.ready assert s.who_wants == {'y': {'client'}} assert s.wants_what == {'client': {'y'}} s.update_graph(tasks={'a': 1, 'z': (add, 'y', 'a')}, keys=['z'], dependencies={'z': {'y', 'a'}}, client='client') assert s.tasks == {'x': 1, 'y': (inc, 'x'), 'a': 1, 'z': (add, 'y', 'a')} assert s.dependencies == {'x': set(), 'a': set(), 'y': {'x'}, 'z': {'a', 'y'}} assert s.dependents == {'z': set(), 'y': {'z'}, 'a': {'z'}, 'x': {'y'}} assert s.waiting == {'z': {'a', 'y'}} assert s.waiting_data == {'x': {'y'}, 'y': {'z'}, 'a': {'z'}, 'z': set()} assert s.who_wants == {'z': {'client'}, 'y': {'client'}} assert s.wants_what == {'client': {'y', 'z'}} assert list(s.ready) == ['a'] assert s.in_play == {'a', 'x', 'y', 'z'} s.stop()
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x-1': (inc, 1), 'x-2': (inc, 'x-1'), 'x-3': (inc, 'x-2'), 'y-1': (dec, 'x-3'), 'y-2': (dec, 'y-1'), 'e': (throws, 'y-2'), 'other': (inc, 123)}, keys=['e']) p = MultiProgressWidget(['e'], scheduler=s) assert p.keys == {'x': {'x-1', 'x-2', 'x-3'}, 'y': {'y-1', 'y-2'}, 'e': {'e'}} while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'x-3': break assert p.keys == {'x': set(), 'y': {'y-1', 'y-2'}, 'e': {'e'}} p._update() assert p.bars['x'].value == 1.0 assert p.bars['y'].value == 0.0 assert p.bars['e'].value == 0.0 assert '3 / 3' in p.texts['x'].value assert '0 / 2' in p.texts['y'].value assert '0 / 1' in p.texts['e'].value while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'y-2': break p._update() assert p.bars['x'].value == 1.0 assert p.bars['y'].value == 1.0 assert p.bars['e'].value == 0.0 assert p.keys == {'x': set(), 'y': set(), 'e': {'e'}} while True: msg = yield report.get() if msg['op'] == 'task-erred' and msg['key'] == 'e': break assert p.bars['x'].bar_style == 'success' assert p.bars['y'].bar_style == 'success' assert p.bars['e'].bar_style == 'danger' assert p.status == 'error' sched.put_nowait({'op': 'close'}) yield done