class WorkerTest(unittest.TestCase): def run(self, result=None): self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.time = time.time with Worker(scheduler=self.sch, worker_id='X') as w, Worker(scheduler=self.sch, worker_id='Y') as w2: self.w = w self.w2 = w2 super(WorkerTest, self).run(result) if time.time != self.time: time.time = self.time def setTime(self, t): time.time = lambda: t def test_dep(self): class A(Task): def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertTrue(a.has_run) self.assertTrue(b.has_run) def test_stop_getting_new_work(self): d = DummyTask() self.w.add(d) self.assertFalse(d.complete()) try: self.w.handle_interrupt(signal.SIGUSR1, None) except AttributeError: raise unittest.SkipTest('signal.SIGUSR1 not found on this system') self.w.run() self.assertFalse(d.complete()) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_tracking_url(self): tracking_url = 'http://test_url.com/' class A(Task): has_run = False def complete(self): return self.has_run def run(self, tracking_url_callback=None): if tracking_url_callback is not None: tracking_url_callback(tracking_url) self.has_run = True a = A() self.assertTrue(self.w.add(a)) self.assertTrue(self.w.run()) tasks = self.sch.task_list('DONE', '') self.assertEqual(1, len(tasks)) self.assertEqual(tracking_url, tasks['A()']['tracking_url']) def test_type_error_in_tracking_run(self): class A(Task): num_runs = 0 def complete(self): return False def run(self, tracking_url_callback=None): self.num_runs += 1 raise TypeError('bad type') a = A() self.assertTrue(self.w.add(a)) self.assertFalse(self.w.run()) # Should only run and fail once, not retry because of the type error self.assertEqual(1, a.num_runs) def test_fail(self): class CustomException(BaseException): def __init__(self, msg): self.msg = msg class A(Task): def run(self): self.has_run = True raise CustomException('bad things') def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertFalse(self.w.run()) self.assertTrue(a.has_run) self.assertFalse(b.has_run) def test_unknown_dep(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.assertTrue(self.w.add(b_a)) # So now another worker goes in and schedules C -> B # This should remove the dep A -> B but will screw up the first worker self.assertTrue(self.w2.add(b_c)) self.assertFalse(self.w.run()) # should not run anything - the worker should detect that A is broken self.assertFalse(b_a.has_run) # not sure what should happen?? # self.w2.run() # should run B since C is fulfilled # self.assertTrue(b_c.has_run) def test_unfulfilled_dep(self): class A(Task): def complete(self): return self.done def run(self): self.done = True def get_b(a): class B(A): def requires(self): return a b = B() b.done = False a.done = True return b a = A() b = get_b(a) self.assertTrue(self.w.add(b)) a.done = False self.w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_gets_missed_work(self): class A(Task): done = False def complete(self): return self.done def run(self): self.done = True a = A() self.assertTrue(self.w.add(a)) # simulate a missed get_work response self.assertEqual('A()', self.sch.get_work(worker='X')['task_id']) self.assertTrue(self.w.run()) self.assertTrue(a.complete()) def test_avoid_infinite_reschedule(self): class A(Task): def complete(self): return False class B(Task): def complete(self): return False def requires(self): return A() self.assertTrue(self.w.add(B())) self.assertFalse(self.w.run()) def test_fails_registering_signal(self): with mock.patch('luigi.worker.signal', spec=['signal']): # mock will raise an attribute error getting signal.SIGUSR1 Worker() def test_allow_reschedule_with_many_missing_deps(self): class A(Task): """ Task that must run twice to succeed """ i = luigi.IntParameter() runs = 0 def complete(self): return self.runs >= 2 def run(self): self.runs += 1 class B(Task): done = False def requires(self): return map(A, range(20)) def complete(self): return self.done def run(self): self.done = True b = B() w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1) self.assertTrue(w.add(b)) self.assertFalse(w.run()) # For b to be done, we must have rescheduled its dependencies to run them twice self.assertTrue(b.complete()) self.assertTrue(all(a.complete() for a in b.deps())) def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id='X') as w, Worker(scheduler=sch, worker_id='Y') as w2: self.assertTrue(w.add(b)) self.assertTrue(w2.add(eb)) logging.debug("RUNNING BROKEN WORKER") self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") self.assertTrue(w.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id='X') as w, Worker(scheduler=sch, worker_id='Y') as w2: self.assertTrue(w2.add(eb)) self.assertTrue(w.add(b)) self.assertTrue(w2.run()) self.assertFalse(b.complete()) self.assertTrue(w.run()) self.assertTrue(b.complete()) def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) as w: with Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) as w2: self.assertTrue(w.add(a)) self.assertTrue(w2.add(b)) threading.Thread(target=w.run).start() self.assertTrue(w2.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_die_for_non_unique_pending(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) as w: with Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) as w2: self.assertTrue(w.add(b)) self.assertTrue(w2.add(b)) self.assertEqual(w._get_work()[0], 'A()') self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id="foo") as w: self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) def test_requires_exception(self): class A(DummyTask): def requires(self): raise Exception("doh") a = A() class D(DummyTask): pass d = D() class C(DummyTask): def requires(self): return d c = C() class B(DummyTask): def requires(self): return c, a b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id="foo") as w: self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertTrue(d.has_run) self.assertFalse(a.has_run)
class CentralPlannerTest(unittest.TestCase): def setUp(self): super(CentralPlannerTest, self).setUp() conf = self.get_scheduler_config() self.sch = CentralPlannerScheduler(**conf) self.time = time.time def get_scheduler_config(self): return { 'retry_delay': 100, 'remove_delay': 1000, 'worker_disconnect_delay': 10, 'disable_persist': 10, 'disable_window': 10, 'disable_failures': 3, 'disable_hard_timeout': 60 * 60, } def tearDown(self): super(CentralPlannerTest, self).tearDown() if time.time != self.time: time.time = self.time def setTime(self, t): time.time = lambda: t def test_dep(self): self.sch.add_task(worker=WORKER, task_id='B', deps=('A',)) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') self.sch.add_task(worker=WORKER, task_id='A', status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'B') self.sch.add_task(worker=WORKER, task_id='B', status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) def test_failed_dep(self): self.sch.add_task(worker=WORKER, task_id='B', deps=('A',)) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) # can still wait and retry: TODO: do we want this? self.sch.add_task(worker=WORKER, task_id='A', status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'B') self.sch.add_task(worker=WORKER, task_id='B', status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) def test_broken_dep(self): self.sch.add_task(worker=WORKER, task_id='B', deps=('A',)) self.sch.add_task(worker=WORKER, task_id='A', runnable=False) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) # can still wait and retry: TODO: do we want this? self.sch.add_task(worker=WORKER, task_id='A', status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'B') self.sch.add_task(worker=WORKER, task_id='B', status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) def test_two_workers(self): # Worker X wants to build A -> B # Worker Y wants to build A -> C self.sch.add_task(worker='X', task_id='A') self.sch.add_task(worker='Y', task_id='A') self.sch.add_task(task_id='B', deps=('A',), worker='X') self.sch.add_task(task_id='C', deps=('A',), worker='Y') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None) # Worker Y is pending on A to be done self.sch.add_task(worker='X', task_id='A', status=DONE) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'C') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'B') def test_retry(self): # Try to build A but fails, will retry after 100s self.setTime(0) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) for t in range(100): self.setTime(t) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) self.sch.ping(worker=WORKER) if t % 10 == 0: self.sch.prune() self.setTime(101) self.sch.prune() self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') def test_disconnect_running(self): # X and Y wants to run A. # X starts but does not report back. Y does. # After some timeout, Y will build it instead self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.sch.add_task(task_id='A', worker='Y') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') for t in range(200): self.setTime(t) self.sch.ping(worker='Y') if t % 10 == 0: self.sch.prune() self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'A') def test_remove_dep(self): # X schedules A -> B, A is broken # Y schedules C -> B: this should remove A as a dep of B self.sch.add_task(task_id='A', worker='X', runnable=False) self.sch.add_task(task_id='B', deps=('A',), worker='X') # X can't build anything self.assertEqual(self.sch.get_work(worker='X')['task_id'], None) self.sch.add_task(task_id='B', deps=('C',), worker='Y') # should reset dependencies for A self.sch.add_task(task_id='C', worker='Y', status=DONE) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B') def test_timeout(self): # A bug that was earlier present when restarting the same flow self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.setTime(10000) self.sch.add_task(task_id='A', worker='Y') # Will timeout X but not schedule A for removal for i in range(2000): self.setTime(10000 + i) self.sch.ping(worker='Y') self.sch.add_task(task_id='A', status=DONE, worker='Y') # This used to raise an exception since A was removed def test_disallowed_state_changes(self): # Test that we can not schedule an already running task t = 'A' self.sch.add_task(task_id=t, worker='X') self.assertEqual(self.sch.get_work(worker='X')['task_id'], t) self.sch.add_task(task_id=t, worker='Y') self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None) def test_two_worker_info(self): # Make sure the scheduler returns info that some other worker is running task A self.sch.add_task(worker='X', task_id='A') self.sch.add_task(worker='Y', task_id='A') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') r = self.sch.get_work(worker='Y') self.assertEqual(r['task_id'], None) # Worker Y is pending on A to be done s = r['running_tasks'][0] self.assertEqual(s['task_id'], 'A') self.assertEqual(s['worker'], 'X') def test_assistant_get_work(self): self.sch.add_task(worker='X', task_id='A') self.sch.add_worker('Y', []) self.assertEqual(self.sch.get_work(worker='Y', assistant=True)['task_id'], 'A') # check that the scheduler recognizes tasks as running running_tasks = self.sch.task_list('RUNNING', '') self.assertEqual(len(running_tasks), 1) self.assertEqual(list(running_tasks.keys()), ['A']) self.assertEqual(running_tasks['A']['worker_running'], 'Y') def test_assistant_get_work_external_task(self): self.sch.add_task(worker='X', task_id='A', runnable=False) self.assertTrue(self.sch.get_work(worker='Y', assistant=True)['task_id'] is None) def test_task_fails_when_assistant_dies(self): self.setTime(0) self.sch.add_task(worker='X', task_id='A') self.sch.add_worker('Y', []) self.assertEqual(self.sch.get_work(worker='Y', assistant=True)['task_id'], 'A') self.assertEqual(list(self.sch.task_list('RUNNING', '').keys()), ['A']) # Y dies for 50 seconds, X stays alive self.setTime(50) self.sch.ping(worker='X') self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), ['A']) def test_prune_with_live_assistant(self): self.setTime(0) self.sch.add_task(worker='X', task_id='A') self.sch.get_work(worker='Y', assistant=True) self.sch.add_task(worker='Y', task_id='A', status=DONE, assistant=True) # worker X stops communicating, A should be marked for removal self.setTime(600) self.sch.ping(worker='Y') self.sch.prune() # A will now be pruned self.setTime(2000) self.sch.prune() self.assertFalse(list(self.sch.task_list('', ''))) def test_fail_job_from_dead_worker_with_live_assistant(self): self.setTime(0) self.sch.add_task(worker='X', task_id='A') self.assertEqual('A', self.sch.get_work(worker='X')['task_id']) self.sch.add_worker('Y', [('assistant', True)]) self.setTime(600) self.sch.ping(worker='Y') self.sch.prune() self.assertEqual(['A'], list(self.sch.task_list('FAILED', '').keys())) def test_assistant_request_runnable_task(self): self.setTime(0) self.sch.add_task(worker='X', task_id='A', runnable=True) self.setTime(600) self.sch.prune() self.assertEqual('A', self.sch.get_work(worker='Y', assistant=True)['task_id']) def test_assistant_request_external_task(self): self.sch.add_task(worker='X', task_id='A', runnable=False) self.assertIsNone(self.sch.get_work(worker='Y', assistant=True)['task_id']) def test_prune_done_tasks(self, expected=None): self.setTime(0) self.sch.add_task(worker=WORKER, task_id='A', status=DONE) self.sch.add_task(worker=WORKER, task_id='B', deps=['A'], status=DONE) self.sch.add_task(worker=WORKER, task_id='C', deps=['B']) self.setTime(600) self.sch.ping(worker='ASSISTANT') self.sch.prune() self.setTime(2000) self.sch.ping(worker='ASSISTANT') self.sch.prune() self.assertEqual(set(expected or ()), set(self.sch.task_list('', '').keys())) def test_keep_tasks_for_assistant(self): self.sch.get_work(worker='ASSISTANT', assistant=True) # tell the scheduler this is an assistant self.test_prune_done_tasks(['B', 'C']) def test_keep_scheduler_disabled_tasks_for_assistant(self): self.sch.get_work(worker='ASSISTANT', assistant=True) # tell the scheduler this is an assistant # create a scheduler disabled task and a worker disabled task for i in range(10): self.sch.add_task(worker=WORKER, task_id='D', status=FAILED) self.sch.add_task(worker=WORKER, task_id='E', status=DISABLED) # scheduler prunes the worker disabled task self.assertEqual(set(['D', 'E']), set(self.sch.task_list(DISABLED, ''))) self.test_prune_done_tasks(['B', 'C', 'D']) def test_keep_failed_tasks_for_assistant(self): self.sch.get_work(worker='ASSISTANT', assistant=True) # tell the scheduler this is an assistant self.sch.add_task(worker=WORKER, task_id='D', status=FAILED, deps='A') self.test_prune_done_tasks(['A', 'B', 'C', 'D']) def test_scheduler_resources_none_allow_one(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 1}) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_resources_none_disallow_two(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 2}) self.assertFalse(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_with_insufficient_resources(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 3}) self.sch.update_resources(R1=2) self.assertFalse(self.sch.get_work(worker='X')['task_id']) def test_scheduler_with_sufficient_resources(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 3}) self.sch.update_resources(R1=3) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_with_resources_used(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 1}) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='Y', task_id='B', resources={'R1': 1}) self.sch.update_resources(R1=1) self.assertFalse(self.sch.get_work(worker='Y')['task_id']) def test_scheduler_overprovisioned_on_other_resource(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 2}) self.sch.update_resources(R1=2) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='Y', task_id='B', resources={'R2': 2}) self.sch.update_resources(R1=1, R2=2) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B') def test_scheduler_with_priority_and_competing_resources(self): self.sch.add_task(worker='X', task_id='A') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=10) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.assertFalse(self.sch.get_work(worker='Y')['task_id']) self.sch.add_task(worker='Y', task_id='D', priority=0) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'D') def test_do_not_lock_resources_when_not_ready(self): """ Test to make sure that resources won't go unused waiting on workers """ self.sch.add_task(worker='X', task_id='A', priority=10) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker('X', [('workers', 1)]) self.assertEqual('C', self.sch.get_work(worker='Y')['task_id']) def test_lock_resources_when_one_of_multiple_workers_is_ready(self): self.sch.get_work(worker='X') # indicate to the scheduler that X is active self.sch.add_task(worker='X', task_id='A', priority=10) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker('X', [('workers', 2)]) self.sch.add_worker('Y', []) self.assertFalse(self.sch.get_work(worker='Y')['task_id']) def test_do_not_lock_resources_while_running_higher_priority(self): """ Test to make sure that resources won't go unused waiting on workers """ self.sch.add_task(worker='X', task_id='A', priority=10) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker('X', [('workers', 1)]) self.assertEqual('A', self.sch.get_work(worker='X')['task_id']) self.assertEqual('C', self.sch.get_work(worker='Y')['task_id']) def test_lock_resources_while_running_lower_priority(self): """ Make sure resources will be made available while working on lower priority tasks """ self.sch.add_task(worker='X', task_id='A', priority=4) self.assertEqual('A', self.sch.get_work(worker='X')['task_id']) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker('X', [('workers', 1)]) self.assertFalse(self.sch.get_work(worker='Y')['task_id']) def test_lock_resources_for_second_worker(self): self.sch.get_work(worker='Y') # indicate to the scheduler that Y is active self.sch.add_task(worker='X', task_id='A', resources={'R': 1}) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=10) self.sch.add_worker('X', {'workers': 2}) self.sch.add_worker('Y', {'workers': 1}) self.sch.update_resources(R=2) self.assertEqual('A', self.sch.get_work(worker='X')['task_id']) self.assertFalse(self.sch.get_work(worker='X')['task_id']) def test_can_work_on_lower_priority_while_waiting_for_resources(self): self.sch.add_task(worker='X', task_id='A', resources={'R': 1}, priority=0) self.assertEqual('A', self.sch.get_work(worker='X')['task_id']) self.sch.add_task(worker='Y', task_id='B', resources={'R': 1}, priority=10) self.sch.add_task(worker='Y', task_id='C', priority=0) self.sch.update_resources(R=1) self.assertEqual('C', self.sch.get_work(worker='Y')['task_id']) def test_priority_update_with_pruning(self): self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.setTime(50) # after worker disconnects self.sch.prune() self.sch.add_task(task_id='B', deps=['A'], worker='X') self.setTime(2000) # after remove for task A self.sch.prune() # Here task A that B depends on is missing self.sch.add_task(worker=WORKER, task_id='C', deps=['B'], priority=100) self.sch.add_task(worker=WORKER, task_id='B', deps=['A']) self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='D', priority=10) self.check_task_order('ABCD') def test_update_resources(self): self.sch.add_task(worker=WORKER, task_id='A', deps=['B']) self.sch.add_task(worker=WORKER, task_id='B', resources={'r': 2}) self.sch.update_resources(r=1) # B requires too many resources, we can't schedule self.check_task_order([]) self.sch.add_task(worker=WORKER, task_id='B', resources={'r': 1}) # now we have enough resources self.check_task_order(['B', 'A']) def test_hendle_multiple_resources(self): self.sch.add_task(worker=WORKER, task_id='A', resources={'r1': 1, 'r2': 1}) self.sch.add_task(worker=WORKER, task_id='B', resources={'r1': 1, 'r2': 1}) self.sch.add_task(worker=WORKER, task_id='C', resources={'r1': 1}) self.sch.update_resources(r1=2, r2=1) self.assertEqual('A', self.sch.get_work(worker=WORKER)['task_id']) self.check_task_order('C') def test_single_resource_lock(self): self.sch.add_task(worker='X', task_id='A', resources={'r': 1}) self.assertEqual('A', self.sch.get_work(worker='X')['task_id']) self.sch.add_task(worker=WORKER, task_id='B', resources={'r': 2}, priority=10) self.sch.add_task(worker=WORKER, task_id='C', resources={'r': 1}) self.sch.update_resources(r=2) # Should wait for 2 units of r to be available for B before scheduling C self.check_task_order([]) def test_no_lock_if_too_many_resources_required(self): self.sch.add_task(worker=WORKER, task_id='A', resources={'r': 2}, priority=10) self.sch.add_task(worker=WORKER, task_id='B', resources={'r': 1}) self.sch.update_resources(r=1) self.check_task_order('B') def test_multiple_resources_lock(self): self.sch.get_work(worker='X') # indicate to the scheduler that X is active self.sch.add_task(worker='X', task_id='A', resources={'r1': 1, 'r2': 1}, priority=10) self.sch.add_task(worker=WORKER, task_id='B', resources={'r2': 1}) self.sch.add_task(worker=WORKER, task_id='C', resources={'r1': 1}) self.sch.update_resources(r1=1, r2=1) # should preserve both resources for worker 'X' self.check_task_order([]) def test_multiple_resources_no_lock(self): self.sch.add_task(worker=WORKER, task_id='A', resources={'r1': 1}, priority=10) self.sch.add_task(worker=WORKER, task_id='B', resources={'r1': 1, 'r2': 1}, priority=10) self.sch.add_task(worker=WORKER, task_id='C', resources={'r2': 1}) self.sch.update_resources(r1=1, r2=2) self.assertEqual('A', self.sch.get_work(worker=WORKER)['task_id']) # C doesn't block B, so it can go first self.check_task_order('C') def test_allow_resource_use_while_scheduling(self): self.sch.update_resources(r1=1) self.sch.add_task(worker='SCHEDULING', task_id='A', resources={'r1': 1}, priority=10) self.sch.add_task(worker=WORKER, task_id='B', resources={'r1': 1}, priority=1) self.assertEqual('B', self.sch.get_work(worker=WORKER)['task_id']) def test_stop_locking_resource_for_uninterested_worker(self): self.setTime(0) self.sch.update_resources(r1=1) self.assertIsNone(self.sch.get_work(worker=WORKER)['task_id']) self.sch.add_task(worker=WORKER, task_id='A', resources={'r1': 1}, priority=10) self.sch.add_task(worker='LOW_PRIO', task_id='B', resources={'r1': 1}, priority=1) self.assertIsNone(self.sch.get_work(worker='LOW_PRIO')['task_id']) self.setTime(120) self.assertEqual('B', self.sch.get_work(worker='LOW_PRIO')['task_id']) def check_task_order(self, order): for expected_id in order: self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], expected_id) self.sch.add_task(worker=WORKER, task_id=expected_id, status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) def test_priorities(self): self.sch.add_task(worker=WORKER, task_id='A', priority=10) self.sch.add_task(worker=WORKER, task_id='B', priority=5) self.sch.add_task(worker=WORKER, task_id='C', priority=15) self.sch.add_task(worker=WORKER, task_id='D', priority=9) self.check_task_order(['C', 'A', 'D', 'B']) def test_priorities_default_and_negative(self): self.sch.add_task(worker=WORKER, task_id='A', priority=10) self.sch.add_task(worker=WORKER, task_id='B') self.sch.add_task(worker=WORKER, task_id='C', priority=15) self.sch.add_task(worker=WORKER, task_id='D', priority=-20) self.sch.add_task(worker=WORKER, task_id='E', priority=1) self.check_task_order(['C', 'A', 'E', 'B', 'D']) def test_priorities_and_dependencies(self): self.sch.add_task(worker=WORKER, task_id='A', deps=['Z'], priority=10) self.sch.add_task(worker=WORKER, task_id='B', priority=5) self.sch.add_task(worker=WORKER, task_id='C', deps=['Z'], priority=3) self.sch.add_task(worker=WORKER, task_id='D', priority=2) self.sch.add_task(worker=WORKER, task_id='Z', priority=1) self.check_task_order(['Z', 'A', 'B', 'C', 'D']) def test_priority_update_dependency_after_scheduling(self): self.sch.add_task(worker=WORKER, task_id='A', priority=1) self.sch.add_task(worker=WORKER, task_id='B', priority=5, deps=['A']) self.sch.add_task(worker=WORKER, task_id='C', priority=10, deps=['B']) self.sch.add_task(worker=WORKER, task_id='D', priority=6) self.check_task_order(['A', 'B', 'C', 'D']) def test_disable(self): self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) def test_disable_and_reenable(self): self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.re_enable_task('A') # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('FAILED', '')), 1) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') def test_disable_and_reenable_and_disable_again(self): self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.re_enable_task('A') # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('FAILED', '')), 1) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be still enabled self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('FAILED', '')), 1) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be disabled now self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) def test_disable_and_done(self): self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.add_task(worker=WORKER, task_id='A', status=DONE) # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('DONE', '')), 1) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') def test_disable_by_worker(self): self.sch.add_task(worker=WORKER, task_id='A', status=DISABLED) self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.sch.add_task(worker=WORKER, task_id='A') # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') def test_task_list_beyond_limit(self): sch = CentralPlannerScheduler(max_shown_tasks=3) for c in 'ABCD': sch.add_task(worker=WORKER, task_id=c) self.assertEqual(set('ABCD'), set(sch.task_list('PENDING', '', False).keys())) self.assertEqual({'num_tasks': 4}, sch.task_list('PENDING', '')) def test_task_list_within_limit(self): sch = CentralPlannerScheduler(max_shown_tasks=4) for c in 'ABCD': sch.add_task(worker=WORKER, task_id=c) self.assertEqual(set('ABCD'), set(sch.task_list('PENDING', '').keys())) def test_task_lists_some_beyond_limit(self): sch = CentralPlannerScheduler(max_shown_tasks=3) for c in 'ABCD': sch.add_task(worker=WORKER, task_id=c, status=DONE) for c in 'EFG': sch.add_task(worker=WORKER, task_id=c) self.assertEqual(set('EFG'), set(sch.task_list('PENDING', '').keys())) self.assertEqual({'num_tasks': 4}, sch.task_list('DONE', '')) def test_task_list_filter_by_search(self): self.sch.add_task(worker=WORKER, task_id='test_match_task') self.sch.add_task(worker=WORKER, task_id='test_filter_task') matches = self.sch.task_list('PENDING', '', search='match') self.assertEqual(['test_match_task'], list(matches.keys())) def test_task_list_filter_by_multiple_search_terms(self): self.sch.add_task(worker=WORKER, task_id='abcd') self.sch.add_task(worker=WORKER, task_id='abd') self.sch.add_task(worker=WORKER, task_id='acd') self.sch.add_task(worker=WORKER, task_id='ad') self.sch.add_task(worker=WORKER, task_id='bc') matches = self.sch.task_list('PENDING', '', search='b c') self.assertEqual(set(['abcd', 'bc']), set(matches.keys())) def test_search_results_beyond_limit(self): sch = CentralPlannerScheduler(max_shown_tasks=3) sch.add_task(worker=WORKER, task_id='task_a') sch.add_task(worker=WORKER, task_id='task_b') sch.add_task(worker=WORKER, task_id='task_c') sch.add_task(worker=WORKER, task_id='task_d') self.assertEqual({'num_tasks': 4}, sch.task_list('PENDING', '', search='a')) self.assertEqual(['task_a'], list(sch.task_list('PENDING', '', search='_a').keys())) def test_priority_update_dependency_chain(self): self.sch.add_task(worker=WORKER, task_id='A', priority=10, deps=['B']) self.sch.add_task(worker=WORKER, task_id='B', priority=5, deps=['C']) self.sch.add_task(worker=WORKER, task_id='C', priority=1) self.sch.add_task(worker=WORKER, task_id='D', priority=6) self.check_task_order(['C', 'B', 'A', 'D']) def test_priority_no_decrease_with_multiple_updates(self): self.sch.add_task(worker=WORKER, task_id='A', priority=1) self.sch.add_task(worker=WORKER, task_id='B', priority=10, deps=['A']) self.sch.add_task(worker=WORKER, task_id='C', priority=5, deps=['A']) self.sch.add_task(worker=WORKER, task_id='D', priority=6) self.check_task_order(['A', 'B', 'D', 'C']) def test_unique_tasks(self): self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='B') self.sch.add_task(worker=WORKER, task_id='C') self.sch.add_task(worker=WORKER + "_2", task_id='B') response = self.sch.get_work(worker=WORKER) self.assertEqual(3, response['n_pending_tasks']) self.assertEqual(2, response['n_unique_pending']) def test_pending_downstream_disable(self): self.sch.add_task(worker=WORKER, task_id='A', status=DISABLED) self.sch.add_task(worker=WORKER, task_id='B', deps=('A',)) self.sch.add_task(worker=WORKER, task_id='C', deps=('B',)) response = self.sch.get_work(worker=WORKER) self.assertTrue(response['task_id'] is None) self.assertEqual(0, response['n_pending_tasks']) self.assertEqual(0, response['n_unique_pending']) def test_pending_downstream_failure(self): self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='B', deps=('A',)) self.sch.add_task(worker=WORKER, task_id='C', deps=('B',)) response = self.sch.get_work(worker=WORKER) self.assertTrue(response['task_id'] is None) self.assertEqual(2, response['n_pending_tasks']) self.assertEqual(2, response['n_unique_pending']) def test_task_list_no_deps(self): self.sch.add_task(worker=WORKER, task_id='B', deps=('A',)) self.sch.add_task(worker=WORKER, task_id='A') task_list = self.sch.task_list('PENDING', '') self.assertFalse('deps' in task_list['A']) def test_task_first_failure_time(self): self.sch.add_task(worker=WORKER, task_id='A') test_task = self.sch._state.get_task('A') self.assertIsNone(test_task.failures.first_failure_time) time_before_failure = time.time() test_task.add_failure() time_after_failure = time.time() self.assertLessEqual(time_before_failure, test_task.failures.first_failure_time) self.assertGreaterEqual(time_after_failure, test_task.failures.first_failure_time) def test_task_first_failure_time_remains_constant(self): self.sch.add_task(worker=WORKER, task_id='A') test_task = self.sch._state.get_task('A') self.assertIsNone(test_task.failures.first_failure_time) test_task.add_failure() first_failure_time = test_task.failures.first_failure_time test_task.add_failure() self.assertEqual(first_failure_time, test_task.failures.first_failure_time) def test_task_has_excessive_failures(self): self.sch.add_task(worker=WORKER, task_id='A') test_task = self.sch._state.get_task('A') self.assertIsNone(test_task.failures.first_failure_time) self.assertFalse(test_task.has_excessive_failures()) test_task.add_failure() self.assertFalse(test_task.has_excessive_failures()) fake_failure_time = (test_task.failures.first_failure_time - 2 * 60 * 60) test_task.failures.first_failure_time = fake_failure_time self.assertTrue(test_task.has_excessive_failures()) def test_quadratic_behavior(self): """ Test that get_work is not taking linear amount of time. This is of course impossible to test, however, doing reasonable assumptions about hardware. This time should finish in a timely manner. """ # For 10000 it takes almost 1 second on my laptop. Prior to these # changes it was being slow already at NUM_TASKS=300 NUM_TASKS = 10000 for i in range(NUM_TASKS): self.sch.add_task(worker=str(i), task_id=str(i), resources={}) for i in range(NUM_TASKS): self.assertEqual(self.sch.get_work(worker=str(i))['task_id'], str(i)) self.sch.add_task(worker=str(i), task_id=str(i), status=DONE) def test_get_work_speed(self): """ Test that get_work is fast for few workers and many DONEs. In #986, @daveFNbuck reported that he got a slowdown. """ # This took almost 4 minutes without optimization. # Now it takes 10 seconds on my machine. NUM_PENDING = 1000 NUM_DONE = 200000 assert NUM_DONE >= NUM_PENDING for i in range(NUM_PENDING): self.sch.add_task(worker=WORKER, task_id=str(i), resources={}) for i in range(NUM_PENDING, NUM_DONE): self.sch.add_task(worker=WORKER, task_id=str(i), status=DONE) for i in range(NUM_PENDING): res = int(self.sch.get_work(worker=WORKER)['task_id']) self.assertTrue(0 <= res < NUM_PENDING) self.sch.add_task(worker=WORKER, task_id=str(res), status=DONE)
class CentralPlannerTest(unittest.TestCase): def setUp(self): self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10, disable_persist=10, disable_window=10, disable_failures=3) self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time def setTime(self, t): time.time = lambda: t def test_dep(self): self.sch.add_task(WORKER, 'B', deps=('A',)) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B') self.sch.add_task(WORKER, 'B', status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_failed_dep(self): self.sch.add_task(WORKER, 'B', deps=('A',)) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) # can still wait and retry: TODO: do we want this? self.sch.add_task(WORKER, 'A', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B') self.sch.add_task(WORKER, 'B', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_broken_dep(self): self.sch.add_task(WORKER, 'B', deps=('A',)) self.sch.add_task(WORKER, 'A', runnable=False) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) # can still wait and retry: TODO: do we want this? self.sch.add_task(WORKER, 'A', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B') self.sch.add_task(WORKER, 'B', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_two_workers(self): # Worker X wants to build A -> B # Worker Y wants to build A -> C self.sch.add_task(worker='X', task_id='A') self.sch.add_task(worker='Y', task_id='A') self.sch.add_task(task_id='B', deps=('A',), worker='X') self.sch.add_task(task_id='C', deps=('A',), worker='Y') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None) # Worker Y is pending on A to be done self.sch.add_task(worker='X', task_id='A', status=DONE) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'C') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'B') def test_retry(self): # Try to build A but fails, will retry after 100s self.setTime(0) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', FAILED) for t in xrange(100): self.setTime(t) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) self.sch.ping(WORKER) if t % 10 == 0: self.sch.prune() self.setTime(101) self.sch.prune() self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') def test_disconnect_running(self): # X and Y wants to run A. # X starts but does not report back. Y does. # After some timeout, Y will build it instead self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.sch.add_task(task_id='A', worker='Y') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') for t in xrange(200): self.setTime(t) self.sch.ping(worker='Y') if t % 10 == 0: self.sch.prune() self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'A') def test_remove_dep(self): # X schedules A -> B, A is broken # Y schedules C -> B: this should remove A as a dep of B self.sch.add_task(task_id='A', worker='X', runnable=False) self.sch.add_task(task_id='B', deps=('A',), worker='X') # X can't build anything self.assertEqual(self.sch.get_work(worker='X')['task_id'], None) self.sch.add_task(task_id='B', deps=('C',), worker='Y') # should reset dependencies for A self.sch.add_task(task_id='C', worker='Y', status=DONE) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B') def test_timeout(self): # A bug that was earlier present when restarting the same flow self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.setTime(10000) self.sch.add_task(task_id='A', worker='Y') # Will timeout X but not schedule A for removal for i in xrange(2000): self.setTime(10000 + i) self.sch.ping(worker='Y') self.sch.add_task(task_id='A', status=DONE, worker='Y') # This used to raise an exception since A was removed def test_disallowed_state_changes(self): # Test that we can not schedule an already running task t = 'A' self.sch.add_task(task_id=t, worker='X') self.assertEqual(self.sch.get_work(worker='X')['task_id'], t) self.sch.add_task(task_id=t, worker='Y') self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None) def test_two_worker_info(self): # Make sure the scheduler returns info that some other worker is running task A self.sch.add_task(worker='X', task_id='A') self.sch.add_task(worker='Y', task_id='A') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') r = self.sch.get_work(worker='Y') self.assertEqual(r['task_id'], None) # Worker Y is pending on A to be done s = r['running_tasks'][0] self.assertEqual(s['task_id'], 'A') self.assertEqual(s['worker'], 'X') def test_scheduler_resources_none_allow_one(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 1}) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_resources_none_disallow_two(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 2}) self.assertFalse(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_with_insufficient_resources(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 3}) self.sch.update_resources(R1=2) self.assertFalse(self.sch.get_work(worker='X')['task_id']) def test_scheduler_with_sufficient_resources(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 3}) self.sch.update_resources(R1=3) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_with_resources_used(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 1}) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='Y', task_id='B', resources={'R1': 1}) self.sch.update_resources(R1=1) self.assertFalse(self.sch.get_work(worker='Y')['task_id']) def test_scheduler_overprovisioned_on_other_resource(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 2}) self.sch.update_resources(R1=2) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='Y', task_id='B', resources={'R2': 2}) self.sch.update_resources(R1=1, R2=2) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B') def test_scheduler_with_priority_and_competing_resources(self): self.sch.add_task(worker='X', task_id='A') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=10) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.assertFalse(self.sch.get_work(worker='Y')['task_id']) self.sch.add_task(worker='Y', task_id='D', priority=0) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'D') def test_priority_update_with_pruning(self): self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.setTime(50) # after worker disconnects self.sch.prune() self.sch.add_task(task_id='B', deps=['A'], worker='X') self.setTime(2000) # after remove for task A self.sch.prune() # Here task A that B depends on is missing self.sch.add_task(WORKER, task_id='C', deps=['B'], priority=100) self.sch.add_task(WORKER, task_id='B', deps=['A']) self.sch.add_task(WORKER, task_id='A') self.sch.add_task(WORKER, task_id='D', priority=10) self.check_task_order('ABCD') def test_update_resources(self): self.sch.add_task(WORKER, task_id='A', deps=['B']) self.sch.add_task(WORKER, task_id='B', resources={'r': 2}) self.sch.update_resources(r=1) # B requires too many resources, we can't schedule self.check_task_order([]) self.sch.add_task(WORKER, task_id='B', resources={'r': 1}) # now we have enough resources self.check_task_order(['B', 'A']) def test_hendle_multiple_resources(self): self.sch.add_task(WORKER, task_id='A', resources={'r1': 1, 'r2': 1}) self.sch.add_task(WORKER, task_id='B', resources={'r1': 1, 'r2': 1}) self.sch.add_task(WORKER, task_id='C', resources={'r1': 1}) self.sch.update_resources(r1=2, r2=1) self.assertEqual('A', self.sch.get_work(WORKER)['task_id']) self.check_task_order('C') def test_single_resource_lock(self): self.sch.add_task(WORKER, task_id='A', resources={'r': 1}, priority=10) self.sch.add_task(WORKER, task_id='B', resources={'r': 2}, priority=10) self.sch.add_task(WORKER, task_id='C', resources={'r': 1}) self.sch.update_resources(r=2) self.assertEqual('A', self.sch.get_work(WORKER)['task_id']) # Should wait for 2 units of r to be available for B before scheduling C self.check_task_order([]) def test_no_lock_if_too_many_resources_required(self): self.sch.add_task(WORKER, task_id='A', resources={'r': 2}, priority=10) self.sch.add_task(WORKER, task_id='B', resources={'r': 1}) self.sch.update_resources(r=1) self.check_task_order('B') def test_multiple_resources_lock(self): self.sch.add_task(WORKER, task_id='A', resources={'r1': 1}, priority=10) self.sch.add_task(WORKER, task_id='B', resources={'r1': 1, 'r2': 1}, priority=10) self.sch.add_task(WORKER, task_id='C', resources={'r2': 1}) self.sch.update_resources(r1=1, r2=1) self.assertEqual('A', self.sch.get_work(WORKER)['task_id']) # Should wait for r1 to be available for B before using up r2 self.check_task_order([]) def test_multiple_resources_no_lock(self): self.sch.add_task(WORKER, task_id='A', resources={'r1': 1}, priority=10) self.sch.add_task(WORKER, task_id='B', resources={'r1': 1, 'r2': 1}, priority=10) self.sch.add_task(WORKER, task_id='C', resources={'r2': 1}) self.sch.update_resources(r1=1, r2=2) self.assertEqual('A', self.sch.get_work(WORKER)['task_id']) # C doesn't block B, so it can go first self.check_task_order('C') def check_task_order(self, order): for expected_id in order: self.assertEqual(self.sch.get_work(WORKER)['task_id'], expected_id) self.sch.add_task(WORKER, expected_id, status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_priorities(self): self.sch.add_task(WORKER, 'A', priority=10) self.sch.add_task(WORKER, 'B', priority=5) self.sch.add_task(WORKER, 'C', priority=15) self.sch.add_task(WORKER, 'D', priority=9) self.check_task_order(['C', 'A', 'D', 'B']) def test_priorities_default_and_negative(self): self.sch.add_task(WORKER, 'A', priority=10) self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C', priority=15) self.sch.add_task(WORKER, 'D', priority=-20) self.sch.add_task(WORKER, 'E', priority=1) self.check_task_order(['C', 'A', 'E', 'B', 'D']) def test_priorities_and_dependencies(self): self.sch.add_task(WORKER, 'A', deps=['Z'], priority=10) self.sch.add_task(WORKER, 'B', priority=5) self.sch.add_task(WORKER, 'C', deps=['Z'], priority=3) self.sch.add_task(WORKER, 'D', priority=2) self.sch.add_task(WORKER, 'Z', priority=1) self.check_task_order(['Z', 'A', 'B', 'C', 'D']) def test_priority_update_dependency_after_scheduling(self): self.sch.add_task(WORKER, 'A', priority=1) self.sch.add_task(WORKER, 'B', priority=5, deps=['A']) self.sch.add_task(WORKER, 'C', priority=10, deps=['B']) self.sch.add_task(WORKER, 'D', priority=6) self.check_task_order(['A', 'B', 'C', 'D']) def test_disable(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_disable_and_reenable(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.re_enable_task('A') # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('FAILED', '')), 1) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') def test_disable_and_reenable_and_disable_again(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.re_enable_task('A') # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('FAILED', '')), 1) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', status=FAILED) # should be still enabled self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('FAILED', '')), 1) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) # should be disabled now self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_disable_and_done(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.add_task(WORKER, 'A', status=DONE) # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('DONE', '')), 1) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') def test_disable_by_worker(self): self.sch.add_task(WORKER, 'A', status=DISABLED) self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.sch.add_task(WORKER, 'A') # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') def test_priority_update_dependency_chain(self): self.sch.add_task(WORKER, 'A', priority=10, deps=['B']) self.sch.add_task(WORKER, 'B', priority=5, deps=['C']) self.sch.add_task(WORKER, 'C', priority=1) self.sch.add_task(WORKER, 'D', priority=6) self.check_task_order(['C', 'B', 'A', 'D']) def test_priority_no_decrease_with_multiple_updates(self): self.sch.add_task(WORKER, 'A', priority=1) self.sch.add_task(WORKER, 'B', priority=10, deps=['A']) self.sch.add_task(WORKER, 'C', priority=5, deps=['A']) self.sch.add_task(WORKER, 'D', priority=6) self.check_task_order(['A', 'B', 'D', 'C']) def test_unique_tasks(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C') self.sch.add_task(WORKER + "_2", 'B') response = self.sch.get_work(WORKER) self.assertEqual(3, response['n_pending_tasks']) self.assertEqual(2, response['n_unique_pending']) def test_prefer_more_dependents(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C', deps=['B']) self.sch.add_task(WORKER, 'D', deps=['B']) self.sch.add_task(WORKER, 'E', deps=['A']) self.check_task_order('BACDE') def test_prefer_readier_dependents(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C') self.sch.add_task(WORKER, 'D') self.sch.add_task(WORKER, 'F', deps=['A', 'B', 'C']) self.sch.add_task(WORKER, 'G', deps=['A', 'B', 'C']) self.sch.add_task(WORKER, 'E', deps=['D']) self.check_task_order('DABCFGE') def test_ignore_done_dependents(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C') self.sch.add_task(WORKER, 'D', priority=1) self.sch.add_task(WORKER, 'E', deps=['C', 'D']) self.sch.add_task(WORKER, 'F', deps=['A', 'B']) self.check_task_order('DCABEF')
class CentralPlannerTest(unittest.TestCase): def setUp(self): self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time def setTime(self, t): time.time = lambda: t def test_dep(self): self.sch.add_task(WORKER, 'B', deps=('A',)) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)[1], 'A') self.sch.add_task(WORKER, 'A', status=DONE) self.assertEqual(self.sch.get_work(WORKER)[1], 'B') self.sch.add_task(WORKER, 'B', status=DONE) self.assertEqual(self.sch.get_work(WORKER), (0, None)) def test_failed_dep(self): self.sch.add_task(WORKER, 'B', deps=('A',)) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)[1], 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.assertEqual(self.sch.get_work(WORKER)[1], None) # can still wait and retry: TODO: do we want this? self.sch.add_task(WORKER, 'A', DONE) self.assertEqual(self.sch.get_work(WORKER)[1], 'B') self.sch.add_task(WORKER, 'B', DONE) self.assertEqual(self.sch.get_work(WORKER), (0, None)) def test_broken_dep(self): self.sch.add_task(WORKER, 'B', deps=('A',)) self.sch.add_task(WORKER, 'A', runnable=False) self.assertEqual(self.sch.get_work(WORKER)[1], None) # can still wait and retry: TODO: do we want this? self.sch.add_task(WORKER, 'A', DONE) self.assertEqual(self.sch.get_work(WORKER)[1], 'B') self.sch.add_task(WORKER, 'B', DONE) self.assertEqual(self.sch.get_work(WORKER), (0, None)) def test_two_workers(self): # Worker X wants to build A -> B # Worker Y wants to build A -> C self.sch.add_task(worker='X', task_id='A') self.sch.add_task(worker='Y', task_id='A') self.sch.add_task(task_id='B', deps=('A',), worker='X') self.sch.add_task(task_id='C', deps=('A',), worker='Y') self.assertEqual(self.sch.get_work(worker='X')[1], 'A') self.assertEqual(self.sch.get_work(worker='Y')[1], None) # Worker Y is pending on A to be done self.sch.add_task(worker='X', task_id='A', status=DONE) self.assertEqual(self.sch.get_work(worker='Y')[1], 'C') self.assertEqual(self.sch.get_work(worker='X')[1], 'B') def test_retry(self): # Try to build A but fails, will retry after 100s self.setTime(0) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)[1], 'A') self.sch.add_task(WORKER, 'A', FAILED) for t in xrange(100): self.setTime(t) self.assertEqual(self.sch.get_work(WORKER)[1], None) self.sch.ping(WORKER) self.setTime(101) self.assertEqual(self.sch.get_work(WORKER)[1], 'A') def test_disconnect_running(self): # X and Y wants to run A. # X starts but does not report back. Y does. # After some timeout, Y will build it instead self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.sch.add_task(task_id='A', worker='Y') self.assertEqual(self.sch.get_work(worker='X')[1], 'A') for t in xrange(200): self.setTime(t) self.sch.ping(worker='Y') self.assertEqual(self.sch.get_work(worker='Y')[1], 'A') def test_remove_dep(self): # X schedules A -> B, A is broken # Y schedules C -> B: this should remove A as a dep of B self.sch.add_task(task_id='A', worker='X', runnable=False) self.sch.add_task(task_id='B', deps=('A',), worker='X') # X can't build anything self.assertEqual(self.sch.get_work(worker='X')[1], None) self.sch.add_task(task_id='B', deps=('C',), worker='Y') # should reset dependencies for A self.sch.add_task(task_id='C', worker='Y', status=DONE) self.assertEqual(self.sch.get_work(worker='Y')[1], 'B') def test_timeout(self): # A bug that was earlier present when restarting the same flow self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.assertEqual(self.sch.get_work(worker='X')[1], 'A') self.setTime(10000) self.sch.add_task(task_id='A', worker='Y') # Will timeout X but not schedule A for removal for i in xrange(2000): self.setTime(10000 + i) self.sch.ping(worker='Y') self.sch.add_task(task_id='A', status=DONE, worker='Y') # This used to raise an exception since A was removed def test_disallowed_state_changes(self): # Test that we can not schedule an already running task t = 'A' self.sch.add_task(task_id=t, worker='X') self.assertEqual(self.sch.get_work(worker='X')[1], t) self.sch.add_task(task_id=t, worker='Y') self.assertEqual(self.sch.get_work(worker='Y')[1], None)
class CentralPlannerTest(unittest.TestCase): def setUp(self): super(CentralPlannerTest, self).setUp() conf = self.get_scheduler_config() self.sch = CentralPlannerScheduler(**conf) self.time = time.time def get_scheduler_config(self): return { "retry_delay": 100, "remove_delay": 1000, "worker_disconnect_delay": 10, "disable_persist": 10, "disable_window": 10, "disable_failures": 3, "disable_hard_timeout": 60 * 60, } def tearDown(self): super(CentralPlannerTest, self).tearDown() if time.time != self.time: time.time = self.time def setTime(self, t): time.time = lambda: t def test_dep(self): self.sch.add_task(worker=WORKER, task_id="B", deps=("A",)) self.sch.add_task(worker=WORKER, task_id="A") self.assertEqual(self.sch.get_work(worker=WORKER)["task_id"], "A") self.sch.add_task(worker=WORKER, task_id="A", status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)["task_id"], "B") self.sch.add_task(worker=WORKER, task_id="B", status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)["task_id"], None) def test_failed_dep(self): self.sch.add_task(worker=WORKER, task_id="B", deps=("A",)) self.sch.add_task(worker=WORKER, task_id="A") self.assertEqual(self.sch.get_work(worker=WORKER)["task_id"], "A") self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) self.assertEqual( self.sch.get_work(worker=WORKER)["task_id"], None ) # can still wait and retry: TODO: do we want this? self.sch.add_task(worker=WORKER, task_id="A", status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)["task_id"], "B") self.sch.add_task(worker=WORKER, task_id="B", status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)["task_id"], None) def test_broken_dep(self): self.sch.add_task(worker=WORKER, task_id="B", deps=("A",)) self.sch.add_task(worker=WORKER, task_id="A", runnable=False) self.assertEqual( self.sch.get_work(worker=WORKER)["task_id"], None ) # can still wait and retry: TODO: do we want this? self.sch.add_task(worker=WORKER, task_id="A", status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)["task_id"], "B") self.sch.add_task(worker=WORKER, task_id="B", status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)["task_id"], None) def test_two_workers(self): # Worker X wants to build A -> B # Worker Y wants to build A -> C self.sch.add_task(worker="X", task_id="A") self.sch.add_task(worker="Y", task_id="A") self.sch.add_task(task_id="B", deps=("A",), worker="X") self.sch.add_task(task_id="C", deps=("A",), worker="Y") self.assertEqual(self.sch.get_work(worker="X")["task_id"], "A") self.assertEqual(self.sch.get_work(worker="Y")["task_id"], None) # Worker Y is pending on A to be done self.sch.add_task(worker="X", task_id="A", status=DONE) self.assertEqual(self.sch.get_work(worker="Y")["task_id"], "C") self.assertEqual(self.sch.get_work(worker="X")["task_id"], "B") def test_retry(self): # Try to build A but fails, will retry after 100s self.setTime(0) self.sch.add_task(worker=WORKER, task_id="A") self.assertEqual(self.sch.get_work(worker=WORKER)["task_id"], "A") self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) for t in range(100): self.setTime(t) self.assertEqual(self.sch.get_work(worker=WORKER)["task_id"], None) self.sch.ping(worker=WORKER) if t % 10 == 0: self.sch.prune() self.setTime(101) self.sch.prune() self.assertEqual(self.sch.get_work(worker=WORKER)["task_id"], "A") def test_resend_task(self): self.sch.add_task(worker=WORKER, task_id="A") self.sch.add_task(worker=WORKER, task_id="B") for _ in range(10): self.assertEqual("A", self.sch.get_work(worker=WORKER, current_tasks=[])["task_id"]) self.assertEqual("B", self.sch.get_work(worker=WORKER, current_tasks=["A"])["task_id"]) def test_resend_multiple_tasks(self): self.sch.add_task(worker=WORKER, task_id="A") self.sch.add_task(worker=WORKER, task_id="B") self.sch.add_task(worker=WORKER, task_id="C") # get A and B running self.assertEqual("A", self.sch.get_work(worker=WORKER)["task_id"]) self.assertEqual("B", self.sch.get_work(worker=WORKER)["task_id"]) for _ in range(10): self.assertEqual("A", self.sch.get_work(worker=WORKER, current_tasks=[])["task_id"]) self.assertEqual("A", self.sch.get_work(worker=WORKER, current_tasks=["B"])["task_id"]) self.assertEqual("B", self.sch.get_work(worker=WORKER, current_tasks=["A"])["task_id"]) self.assertEqual("C", self.sch.get_work(worker=WORKER, current_tasks=["A", "B"])["task_id"]) def test_disconnect_running(self): # X and Y wants to run A. # X starts but does not report back. Y does. # After some timeout, Y will build it instead self.setTime(0) self.sch.add_task(task_id="A", worker="X") self.sch.add_task(task_id="A", worker="Y") self.assertEqual(self.sch.get_work(worker="X")["task_id"], "A") for t in range(200): self.setTime(t) self.sch.ping(worker="Y") if t % 10 == 0: self.sch.prune() self.assertEqual(self.sch.get_work(worker="Y")["task_id"], "A") def test_do_not_overwrite_tracking_url_while_running(self): self.sch.add_task(task_id="A", worker="X", status="RUNNING", tracking_url="trackme") self.assertEqual("trackme", self.sch.task_list("RUNNING", "")["A"]["tracking_url"]) # not wiped out by another working scheduling as pending self.sch.add_task(task_id="A", worker="Y", status="PENDING") self.assertEqual("trackme", self.sch.task_list("RUNNING", "")["A"]["tracking_url"]) def test_do_update_tracking_url_while_running(self): self.sch.add_task(task_id="A", worker="X", status="RUNNING", tracking_url="trackme") self.assertEqual("trackme", self.sch.task_list("RUNNING", "")["A"]["tracking_url"]) self.sch.add_task(task_id="A", worker="X", status="RUNNING", tracking_url="stage_2") self.assertEqual("stage_2", self.sch.task_list("RUNNING", "")["A"]["tracking_url"]) def test_keep_tracking_url_on_done_and_fail(self): for status in ("DONE", "FAILED"): self.sch.add_task(task_id="A", worker="X", status="RUNNING", tracking_url="trackme") self.assertEqual("trackme", self.sch.task_list("RUNNING", "")["A"]["tracking_url"]) self.sch.add_task(task_id="A", worker="X", status=status) self.assertEqual("trackme", self.sch.task_list(status, "")["A"]["tracking_url"]) def test_drop_tracking_url_when_rescheduled_while_not_running(self): for status in ("DONE", "FAILED", "PENDING"): self.sch.add_task(task_id="A", worker="X", status=status, tracking_url="trackme") self.assertEqual("trackme", self.sch.task_list(status, "")["A"]["tracking_url"]) self.sch.add_task(task_id="A", worker="Y", status="PENDING") self.assertIsNone(self.sch.task_list("PENDING", "")["A"]["tracking_url"]) def test_reset_tracking_url_on_new_run(self): self.sch.add_task(task_id="A", worker="X", status="PENDING", tracking_url="trackme") self.assertEqual("trackme", self.sch.task_list("PENDING", "")["A"]["tracking_url"]) self.sch.add_task(task_id="A", worker="Y", status="RUNNING") self.assertIsNone(self.sch.task_list("RUNNING", "")["A"]["tracking_url"]) def test_remove_dep(self): # X schedules A -> B, A is broken # Y schedules C -> B: this should remove A as a dep of B self.sch.add_task(task_id="A", worker="X", runnable=False) self.sch.add_task(task_id="B", deps=("A",), worker="X") # X can't build anything self.assertEqual(self.sch.get_work(worker="X")["task_id"], None) self.sch.add_task(task_id="B", deps=("C",), worker="Y") # should reset dependencies for A self.sch.add_task(task_id="C", worker="Y", status=DONE) self.assertEqual(self.sch.get_work(worker="Y")["task_id"], "B") def test_timeout(self): # A bug that was earlier present when restarting the same flow self.setTime(0) self.sch.add_task(task_id="A", worker="X") self.assertEqual(self.sch.get_work(worker="X")["task_id"], "A") self.setTime(10000) self.sch.add_task(task_id="A", worker="Y") # Will timeout X but not schedule A for removal for i in range(2000): self.setTime(10000 + i) self.sch.ping(worker="Y") self.sch.add_task(task_id="A", status=DONE, worker="Y") # This used to raise an exception since A was removed def test_disallowed_state_changes(self): # Test that we can not schedule an already running task t = "A" self.sch.add_task(task_id=t, worker="X") self.assertEqual(self.sch.get_work(worker="X")["task_id"], t) self.sch.add_task(task_id=t, worker="Y") self.assertEqual(self.sch.get_work(worker="Y")["task_id"], None) def test_two_worker_info(self): # Make sure the scheduler returns info that some other worker is running task A self.sch.add_task(worker="X", task_id="A") self.sch.add_task(worker="Y", task_id="A") self.assertEqual(self.sch.get_work(worker="X")["task_id"], "A") r = self.sch.get_work(worker="Y") self.assertEqual(r["task_id"], None) # Worker Y is pending on A to be done s = r["running_tasks"][0] self.assertEqual(s["task_id"], "A") self.assertEqual(s["worker"], "X") def test_assistant_get_work(self): self.sch.add_task(worker="X", task_id="A") self.sch.add_worker("Y", []) self.assertEqual(self.sch.get_work(worker="Y", assistant=True)["task_id"], "A") # check that the scheduler recognizes tasks as running running_tasks = self.sch.task_list("RUNNING", "") self.assertEqual(len(running_tasks), 1) self.assertEqual(list(running_tasks.keys()), ["A"]) self.assertEqual(running_tasks["A"]["worker_running"], "Y") def test_assistant_get_work_external_task(self): self.sch.add_task(worker="X", task_id="A", runnable=False) self.assertTrue(self.sch.get_work(worker="Y", assistant=True)["task_id"] is None) def test_task_fails_when_assistant_dies(self): self.setTime(0) self.sch.add_task(worker="X", task_id="A") self.sch.add_worker("Y", []) self.assertEqual(self.sch.get_work(worker="Y", assistant=True)["task_id"], "A") self.assertEqual(list(self.sch.task_list("RUNNING", "").keys()), ["A"]) # Y dies for 50 seconds, X stays alive self.setTime(50) self.sch.ping(worker="X") self.assertEqual(list(self.sch.task_list("FAILED", "").keys()), ["A"]) def test_prune_with_live_assistant(self): self.setTime(0) self.sch.add_task(worker="X", task_id="A") self.sch.get_work(worker="Y", assistant=True) self.sch.add_task(worker="Y", task_id="A", status=DONE, assistant=True) # worker X stops communicating, A should be marked for removal self.setTime(600) self.sch.ping(worker="Y") self.sch.prune() # A will now be pruned self.setTime(2000) self.sch.prune() self.assertFalse(list(self.sch.task_list("", ""))) def test_fail_job_from_dead_worker_with_live_assistant(self): self.setTime(0) self.sch.add_task(worker="X", task_id="A") self.assertEqual("A", self.sch.get_work(worker="X")["task_id"]) self.sch.add_worker("Y", [("assistant", True)]) self.setTime(600) self.sch.ping(worker="Y") self.sch.prune() self.assertEqual(["A"], list(self.sch.task_list("FAILED", "").keys())) def test_assistant_request_runnable_task(self): self.setTime(0) self.sch.add_task(worker="X", task_id="A", runnable=True) self.setTime(600) self.sch.prune() self.assertEqual("A", self.sch.get_work(worker="Y", assistant=True)["task_id"]) def test_assistant_request_external_task(self): self.sch.add_task(worker="X", task_id="A", runnable=False) self.assertIsNone(self.sch.get_work(worker="Y", assistant=True)["task_id"]) def test_prune_done_tasks(self, expected=None): self.setTime(0) self.sch.add_task(worker=WORKER, task_id="A", status=DONE) self.sch.add_task(worker=WORKER, task_id="B", deps=["A"], status=DONE) self.sch.add_task(worker=WORKER, task_id="C", deps=["B"]) self.setTime(600) self.sch.ping(worker="ASSISTANT") self.sch.prune() self.setTime(2000) self.sch.ping(worker="ASSISTANT") self.sch.prune() self.assertEqual(set(expected or ()), set(self.sch.task_list("", "").keys())) def test_keep_tasks_for_assistant(self): self.sch.get_work(worker="ASSISTANT", assistant=True) # tell the scheduler this is an assistant self.test_prune_done_tasks(["B", "C"]) def test_keep_scheduler_disabled_tasks_for_assistant(self): self.sch.get_work(worker="ASSISTANT", assistant=True) # tell the scheduler this is an assistant # create a scheduler disabled task and a worker disabled task for i in range(10): self.sch.add_task(worker=WORKER, task_id="D", status=FAILED) self.sch.add_task(worker=WORKER, task_id="E", status=DISABLED) # scheduler prunes the worker disabled task self.assertEqual(set(["D", "E"]), set(self.sch.task_list(DISABLED, ""))) self.test_prune_done_tasks(["B", "C", "D"]) def test_keep_failed_tasks_for_assistant(self): self.sch.get_work(worker="ASSISTANT", assistant=True) # tell the scheduler this is an assistant self.sch.add_task(worker=WORKER, task_id="D", status=FAILED, deps="A") self.test_prune_done_tasks(["A", "B", "C", "D"]) def test_scheduler_resources_none_allow_one(self): self.sch.add_task(worker="X", task_id="A", resources={"R1": 1}) self.assertEqual(self.sch.get_work(worker="X")["task_id"], "A") def test_scheduler_resources_none_disallow_two(self): self.sch.add_task(worker="X", task_id="A", resources={"R1": 2}) self.assertFalse(self.sch.get_work(worker="X")["task_id"], "A") def test_scheduler_with_insufficient_resources(self): self.sch.add_task(worker="X", task_id="A", resources={"R1": 3}) self.sch.update_resources(R1=2) self.assertFalse(self.sch.get_work(worker="X")["task_id"]) def test_scheduler_with_sufficient_resources(self): self.sch.add_task(worker="X", task_id="A", resources={"R1": 3}) self.sch.update_resources(R1=3) self.assertEqual(self.sch.get_work(worker="X")["task_id"], "A") def test_scheduler_with_resources_used(self): self.sch.add_task(worker="X", task_id="A", resources={"R1": 1}) self.assertEqual(self.sch.get_work(worker="X")["task_id"], "A") self.sch.add_task(worker="Y", task_id="B", resources={"R1": 1}) self.sch.update_resources(R1=1) self.assertFalse(self.sch.get_work(worker="Y")["task_id"]) def test_scheduler_overprovisioned_on_other_resource(self): self.sch.add_task(worker="X", task_id="A", resources={"R1": 2}) self.sch.update_resources(R1=2) self.assertEqual(self.sch.get_work(worker="X")["task_id"], "A") self.sch.add_task(worker="Y", task_id="B", resources={"R2": 2}) self.sch.update_resources(R1=1, R2=2) self.assertEqual(self.sch.get_work(worker="Y")["task_id"], "B") def test_scheduler_with_priority_and_competing_resources(self): self.sch.add_task(worker="X", task_id="A") self.assertEqual(self.sch.get_work(worker="X")["task_id"], "A") self.sch.add_task(worker="X", task_id="B", resources={"R": 1}, priority=10) self.sch.add_task(worker="Y", task_id="C", resources={"R": 1}, priority=1) self.sch.update_resources(R=1) self.assertFalse(self.sch.get_work(worker="Y")["task_id"]) self.sch.add_task(worker="Y", task_id="D", priority=0) self.assertEqual(self.sch.get_work(worker="Y")["task_id"], "D") def test_do_not_lock_resources_when_not_ready(self): """ Test to make sure that resources won't go unused waiting on workers """ self.sch.add_task(worker="X", task_id="A", priority=10) self.sch.add_task(worker="X", task_id="B", resources={"R": 1}, priority=5) self.sch.add_task(worker="Y", task_id="C", resources={"R": 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker("X", [("workers", 1)]) self.assertEqual("C", self.sch.get_work(worker="Y")["task_id"]) def test_lock_resources_when_one_of_multiple_workers_is_ready(self): self.sch.get_work(worker="X") # indicate to the scheduler that X is active self.sch.add_task(worker="X", task_id="A", priority=10) self.sch.add_task(worker="X", task_id="B", resources={"R": 1}, priority=5) self.sch.add_task(worker="Y", task_id="C", resources={"R": 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker("X", [("workers", 2)]) self.sch.add_worker("Y", []) self.assertFalse(self.sch.get_work(worker="Y")["task_id"]) def test_do_not_lock_resources_while_running_higher_priority(self): """ Test to make sure that resources won't go unused waiting on workers """ self.sch.add_task(worker="X", task_id="A", priority=10) self.sch.add_task(worker="X", task_id="B", resources={"R": 1}, priority=5) self.sch.add_task(worker="Y", task_id="C", resources={"R": 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker("X", [("workers", 1)]) self.assertEqual("A", self.sch.get_work(worker="X")["task_id"]) self.assertEqual("C", self.sch.get_work(worker="Y")["task_id"]) def test_lock_resources_while_running_lower_priority(self): """ Make sure resources will be made available while working on lower priority tasks """ self.sch.add_task(worker="X", task_id="A", priority=4) self.assertEqual("A", self.sch.get_work(worker="X")["task_id"]) self.sch.add_task(worker="X", task_id="B", resources={"R": 1}, priority=5) self.sch.add_task(worker="Y", task_id="C", resources={"R": 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker("X", [("workers", 1)]) self.assertFalse(self.sch.get_work(worker="Y")["task_id"]) def test_lock_resources_for_second_worker(self): self.sch.get_work(worker="Y") # indicate to the scheduler that Y is active self.sch.add_task(worker="X", task_id="A", resources={"R": 1}) self.sch.add_task(worker="X", task_id="B", resources={"R": 1}) self.sch.add_task(worker="Y", task_id="C", resources={"R": 1}, priority=10) self.sch.add_worker("X", {"workers": 2}) self.sch.add_worker("Y", {"workers": 1}) self.sch.update_resources(R=2) self.assertEqual("A", self.sch.get_work(worker="X")["task_id"]) self.assertFalse(self.sch.get_work(worker="X")["task_id"]) def test_can_work_on_lower_priority_while_waiting_for_resources(self): self.sch.add_task(worker="X", task_id="A", resources={"R": 1}, priority=0) self.assertEqual("A", self.sch.get_work(worker="X")["task_id"]) self.sch.add_task(worker="Y", task_id="B", resources={"R": 1}, priority=10) self.sch.add_task(worker="Y", task_id="C", priority=0) self.sch.update_resources(R=1) self.assertEqual("C", self.sch.get_work(worker="Y")["task_id"]) def test_priority_update_with_pruning(self): self.setTime(0) self.sch.add_task(task_id="A", worker="X") self.setTime(50) # after worker disconnects self.sch.prune() self.sch.add_task(task_id="B", deps=["A"], worker="X") self.setTime(2000) # after remove for task A self.sch.prune() # Here task A that B depends on is missing self.sch.add_task(worker=WORKER, task_id="C", deps=["B"], priority=100) self.sch.add_task(worker=WORKER, task_id="B", deps=["A"]) self.sch.add_task(worker=WORKER, task_id="A") self.sch.add_task(worker=WORKER, task_id="D", priority=10) self.check_task_order("ABCD") def test_update_resources(self): self.sch.add_task(worker=WORKER, task_id="A", deps=["B"]) self.sch.add_task(worker=WORKER, task_id="B", resources={"r": 2}) self.sch.update_resources(r=1) # B requires too many resources, we can't schedule self.check_task_order([]) self.sch.add_task(worker=WORKER, task_id="B", resources={"r": 1}) # now we have enough resources self.check_task_order(["B", "A"]) def test_hendle_multiple_resources(self): self.sch.add_task(worker=WORKER, task_id="A", resources={"r1": 1, "r2": 1}) self.sch.add_task(worker=WORKER, task_id="B", resources={"r1": 1, "r2": 1}) self.sch.add_task(worker=WORKER, task_id="C", resources={"r1": 1}) self.sch.update_resources(r1=2, r2=1) self.assertEqual("A", self.sch.get_work(worker=WORKER)["task_id"]) self.check_task_order("C") def test_single_resource_lock(self): self.sch.add_task(worker="X", task_id="A", resources={"r": 1}) self.assertEqual("A", self.sch.get_work(worker="X")["task_id"]) self.sch.add_task(worker=WORKER, task_id="B", resources={"r": 2}, priority=10) self.sch.add_task(worker=WORKER, task_id="C", resources={"r": 1}) self.sch.update_resources(r=2) # Should wait for 2 units of r to be available for B before scheduling C self.check_task_order([]) def test_no_lock_if_too_many_resources_required(self): self.sch.add_task(worker=WORKER, task_id="A", resources={"r": 2}, priority=10) self.sch.add_task(worker=WORKER, task_id="B", resources={"r": 1}) self.sch.update_resources(r=1) self.check_task_order("B") def test_multiple_resources_lock(self): self.sch.get_work(worker="X") # indicate to the scheduler that X is active self.sch.add_task(worker="X", task_id="A", resources={"r1": 1, "r2": 1}, priority=10) self.sch.add_task(worker=WORKER, task_id="B", resources={"r2": 1}) self.sch.add_task(worker=WORKER, task_id="C", resources={"r1": 1}) self.sch.update_resources(r1=1, r2=1) # should preserve both resources for worker 'X' self.check_task_order([]) def test_multiple_resources_no_lock(self): self.sch.add_task(worker=WORKER, task_id="A", resources={"r1": 1}, priority=10) self.sch.add_task(worker=WORKER, task_id="B", resources={"r1": 1, "r2": 1}, priority=10) self.sch.add_task(worker=WORKER, task_id="C", resources={"r2": 1}) self.sch.update_resources(r1=1, r2=2) self.assertEqual("A", self.sch.get_work(worker=WORKER)["task_id"]) # C doesn't block B, so it can go first self.check_task_order("C") def test_allow_resource_use_while_scheduling(self): self.sch.update_resources(r1=1) self.sch.add_task(worker="SCHEDULING", task_id="A", resources={"r1": 1}, priority=10) self.sch.add_task(worker=WORKER, task_id="B", resources={"r1": 1}, priority=1) self.assertEqual("B", self.sch.get_work(worker=WORKER)["task_id"]) def test_stop_locking_resource_for_uninterested_worker(self): self.setTime(0) self.sch.update_resources(r1=1) self.assertIsNone(self.sch.get_work(worker=WORKER)["task_id"]) self.sch.add_task(worker=WORKER, task_id="A", resources={"r1": 1}, priority=10) self.sch.add_task(worker="LOW_PRIO", task_id="B", resources={"r1": 1}, priority=1) self.assertIsNone(self.sch.get_work(worker="LOW_PRIO")["task_id"]) self.setTime(120) self.assertEqual("B", self.sch.get_work(worker="LOW_PRIO")["task_id"]) def check_task_order(self, order): for expected_id in order: self.assertEqual(self.sch.get_work(worker=WORKER)["task_id"], expected_id) self.sch.add_task(worker=WORKER, task_id=expected_id, status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)["task_id"], None) def test_priorities(self): self.sch.add_task(worker=WORKER, task_id="A", priority=10) self.sch.add_task(worker=WORKER, task_id="B", priority=5) self.sch.add_task(worker=WORKER, task_id="C", priority=15) self.sch.add_task(worker=WORKER, task_id="D", priority=9) self.check_task_order(["C", "A", "D", "B"]) def test_priorities_default_and_negative(self): self.sch.add_task(worker=WORKER, task_id="A", priority=10) self.sch.add_task(worker=WORKER, task_id="B") self.sch.add_task(worker=WORKER, task_id="C", priority=15) self.sch.add_task(worker=WORKER, task_id="D", priority=-20) self.sch.add_task(worker=WORKER, task_id="E", priority=1) self.check_task_order(["C", "A", "E", "B", "D"]) def test_priorities_and_dependencies(self): self.sch.add_task(worker=WORKER, task_id="A", deps=["Z"], priority=10) self.sch.add_task(worker=WORKER, task_id="B", priority=5) self.sch.add_task(worker=WORKER, task_id="C", deps=["Z"], priority=3) self.sch.add_task(worker=WORKER, task_id="D", priority=2) self.sch.add_task(worker=WORKER, task_id="Z", priority=1) self.check_task_order(["Z", "A", "B", "C", "D"]) def test_priority_update_dependency_after_scheduling(self): self.sch.add_task(worker=WORKER, task_id="A", priority=1) self.sch.add_task(worker=WORKER, task_id="B", priority=5, deps=["A"]) self.sch.add_task(worker=WORKER, task_id="C", priority=10, deps=["B"]) self.sch.add_task(worker=WORKER, task_id="D", priority=6) self.check_task_order(["A", "B", "C", "D"]) def test_disable(self): self.sch.add_task(worker=WORKER, task_id="A") self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list("DISABLED", "")), 1) self.assertEqual(len(self.sch.task_list("FAILED", "")), 0) self.sch.add_task(worker=WORKER, task_id="A") self.assertEqual(self.sch.get_work(worker=WORKER)["task_id"], None) def test_disable_and_reenable(self): self.sch.add_task(worker=WORKER, task_id="A") self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list("DISABLED", "")), 1) self.assertEqual(len(self.sch.task_list("FAILED", "")), 0) self.sch.re_enable_task("A") # should be enabled at this point self.assertEqual(len(self.sch.task_list("DISABLED", "")), 0) self.assertEqual(len(self.sch.task_list("FAILED", "")), 1) self.sch.add_task(worker=WORKER, task_id="A") self.assertEqual(self.sch.get_work(worker=WORKER)["task_id"], "A") def test_disable_and_reenable_and_disable_again(self): self.sch.add_task(worker=WORKER, task_id="A") self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list("DISABLED", "")), 1) self.assertEqual(len(self.sch.task_list("FAILED", "")), 0) self.sch.re_enable_task("A") # should be enabled at this point self.assertEqual(len(self.sch.task_list("DISABLED", "")), 0) self.assertEqual(len(self.sch.task_list("FAILED", "")), 1) self.sch.add_task(worker=WORKER, task_id="A") self.assertEqual(self.sch.get_work(worker=WORKER)["task_id"], "A") self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) # should be still enabled self.assertEqual(len(self.sch.task_list("DISABLED", "")), 0) self.assertEqual(len(self.sch.task_list("FAILED", "")), 1) self.sch.add_task(worker=WORKER, task_id="A") self.assertEqual(self.sch.get_work(worker=WORKER)["task_id"], "A") self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) # should be disabled now self.assertEqual(len(self.sch.task_list("DISABLED", "")), 1) self.assertEqual(len(self.sch.task_list("FAILED", "")), 0) self.sch.add_task(worker=WORKER, task_id="A") self.assertEqual(self.sch.get_work(worker=WORKER)["task_id"], None) def test_disable_and_done(self): self.sch.add_task(worker=WORKER, task_id="A") self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list("DISABLED", "")), 1) self.assertEqual(len(self.sch.task_list("FAILED", "")), 0) self.sch.add_task(worker=WORKER, task_id="A", status=DONE) # should be enabled at this point self.assertEqual(len(self.sch.task_list("DISABLED", "")), 0) self.assertEqual(len(self.sch.task_list("DONE", "")), 1) self.sch.add_task(worker=WORKER, task_id="A") self.assertEqual(self.sch.get_work(worker=WORKER)["task_id"], "A") def test_automatic_re_enable(self): self.sch = CentralPlannerScheduler(disable_failures=2, disable_persist=100) self.setTime(0) self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) # should be disabled now self.assertEqual(DISABLED, self.sch.task_list("", "")["A"]["status"]) # re-enables after 100 seconds self.setTime(101) self.assertEqual(FAILED, self.sch.task_list("", "")["A"]["status"]) def test_automatic_re_enable_with_one_failure_allowed(self): self.sch = CentralPlannerScheduler(disable_failures=1, disable_persist=100) self.setTime(0) self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) # should be disabled now self.assertEqual(DISABLED, self.sch.task_list("", "")["A"]["status"]) # re-enables after 100 seconds self.setTime(101) self.assertEqual(FAILED, self.sch.task_list("", "")["A"]["status"]) def test_no_automatic_re_enable_after_manual_disable(self): self.sch = CentralPlannerScheduler(disable_persist=100) self.setTime(0) self.sch.add_task(worker=WORKER, task_id="A", status=DISABLED) # should be disabled now self.assertEqual(DISABLED, self.sch.task_list("", "")["A"]["status"]) # should not re-enable after 100 seconds self.setTime(101) self.assertEqual(DISABLED, self.sch.task_list("", "")["A"]["status"]) def test_no_automatic_re_enable_after_auto_then_manual_disable(self): self.sch = CentralPlannerScheduler(disable_failures=2, disable_persist=100) self.setTime(0) self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) # should be disabled now self.assertEqual(DISABLED, self.sch.task_list("", "")["A"]["status"]) # should remain disabled once set self.sch.add_task(worker=WORKER, task_id="A", status=DISABLED) self.assertEqual(DISABLED, self.sch.task_list("", "")["A"]["status"]) # should not re-enable after 100 seconds self.setTime(101) self.assertEqual(DISABLED, self.sch.task_list("", "")["A"]["status"]) def test_disable_by_worker(self): self.sch.add_task(worker=WORKER, task_id="A", status=DISABLED) self.assertEqual(len(self.sch.task_list("DISABLED", "")), 1) self.sch.add_task(worker=WORKER, task_id="A") # should be enabled at this point self.assertEqual(len(self.sch.task_list("DISABLED", "")), 0) self.sch.add_task(worker=WORKER, task_id="A") self.assertEqual(self.sch.get_work(worker=WORKER)["task_id"], "A") def test_task_list_beyond_limit(self): sch = CentralPlannerScheduler(max_shown_tasks=3) for c in "ABCD": sch.add_task(worker=WORKER, task_id=c) self.assertEqual(set("ABCD"), set(sch.task_list("PENDING", "", False).keys())) self.assertEqual({"num_tasks": 4}, sch.task_list("PENDING", "")) def test_task_list_within_limit(self): sch = CentralPlannerScheduler(max_shown_tasks=4) for c in "ABCD": sch.add_task(worker=WORKER, task_id=c) self.assertEqual(set("ABCD"), set(sch.task_list("PENDING", "").keys())) def test_task_lists_some_beyond_limit(self): sch = CentralPlannerScheduler(max_shown_tasks=3) for c in "ABCD": sch.add_task(worker=WORKER, task_id=c, status=DONE) for c in "EFG": sch.add_task(worker=WORKER, task_id=c) self.assertEqual(set("EFG"), set(sch.task_list("PENDING", "").keys())) self.assertEqual({"num_tasks": 4}, sch.task_list("DONE", "")) def test_task_list_filter_by_search(self): self.sch.add_task(worker=WORKER, task_id="test_match_task") self.sch.add_task(worker=WORKER, task_id="test_filter_task") matches = self.sch.task_list("PENDING", "", search="match") self.assertEqual(["test_match_task"], list(matches.keys())) def test_task_list_filter_by_multiple_search_terms(self): self.sch.add_task(worker=WORKER, task_id="abcd") self.sch.add_task(worker=WORKER, task_id="abd") self.sch.add_task(worker=WORKER, task_id="acd") self.sch.add_task(worker=WORKER, task_id="ad") self.sch.add_task(worker=WORKER, task_id="bc") matches = self.sch.task_list("PENDING", "", search="b c") self.assertEqual(set(["abcd", "bc"]), set(matches.keys())) def test_search_results_beyond_limit(self): sch = CentralPlannerScheduler(max_shown_tasks=3) sch.add_task(worker=WORKER, task_id="task_a") sch.add_task(worker=WORKER, task_id="task_b") sch.add_task(worker=WORKER, task_id="task_c") sch.add_task(worker=WORKER, task_id="task_d") self.assertEqual({"num_tasks": 4}, sch.task_list("PENDING", "", search="a")) self.assertEqual(["task_a"], list(sch.task_list("PENDING", "", search="_a").keys())) def test_priority_update_dependency_chain(self): self.sch.add_task(worker=WORKER, task_id="A", priority=10, deps=["B"]) self.sch.add_task(worker=WORKER, task_id="B", priority=5, deps=["C"]) self.sch.add_task(worker=WORKER, task_id="C", priority=1) self.sch.add_task(worker=WORKER, task_id="D", priority=6) self.check_task_order(["C", "B", "A", "D"]) def test_priority_no_decrease_with_multiple_updates(self): self.sch.add_task(worker=WORKER, task_id="A", priority=1) self.sch.add_task(worker=WORKER, task_id="B", priority=10, deps=["A"]) self.sch.add_task(worker=WORKER, task_id="C", priority=5, deps=["A"]) self.sch.add_task(worker=WORKER, task_id="D", priority=6) self.check_task_order(["A", "B", "D", "C"]) def test_unique_tasks(self): self.sch.add_task(worker=WORKER, task_id="A") self.sch.add_task(worker=WORKER, task_id="B") self.sch.add_task(worker=WORKER, task_id="C") self.sch.add_task(worker=WORKER + "_2", task_id="B") response = self.sch.get_work(worker=WORKER) self.assertEqual(3, response["n_pending_tasks"]) self.assertEqual(2, response["n_unique_pending"]) def test_pending_downstream_disable(self): self.sch.add_task(worker=WORKER, task_id="A", status=DISABLED) self.sch.add_task(worker=WORKER, task_id="B", deps=("A",)) self.sch.add_task(worker=WORKER, task_id="C", deps=("B",)) response = self.sch.get_work(worker=WORKER) self.assertTrue(response["task_id"] is None) self.assertEqual(0, response["n_pending_tasks"]) self.assertEqual(0, response["n_unique_pending"]) def test_pending_downstream_failure(self): self.sch.add_task(worker=WORKER, task_id="A", status=FAILED) self.sch.add_task(worker=WORKER, task_id="B", deps=("A",)) self.sch.add_task(worker=WORKER, task_id="C", deps=("B",)) response = self.sch.get_work(worker=WORKER) self.assertTrue(response["task_id"] is None) self.assertEqual(2, response["n_pending_tasks"]) self.assertEqual(2, response["n_unique_pending"]) def test_task_list_no_deps(self): self.sch.add_task(worker=WORKER, task_id="B", deps=("A",)) self.sch.add_task(worker=WORKER, task_id="A") task_list = self.sch.task_list("PENDING", "") self.assertFalse("deps" in task_list["A"]) def test_task_first_failure_time(self): self.sch.add_task(worker=WORKER, task_id="A") test_task = self.sch._state.get_task("A") self.assertIsNone(test_task.failures.first_failure_time) time_before_failure = time.time() test_task.add_failure() time_after_failure = time.time() self.assertLessEqual(time_before_failure, test_task.failures.first_failure_time) self.assertGreaterEqual(time_after_failure, test_task.failures.first_failure_time) def test_task_first_failure_time_remains_constant(self): self.sch.add_task(worker=WORKER, task_id="A") test_task = self.sch._state.get_task("A") self.assertIsNone(test_task.failures.first_failure_time) test_task.add_failure() first_failure_time = test_task.failures.first_failure_time test_task.add_failure() self.assertEqual(first_failure_time, test_task.failures.first_failure_time) def test_task_has_excessive_failures(self): self.sch.add_task(worker=WORKER, task_id="A") test_task = self.sch._state.get_task("A") self.assertIsNone(test_task.failures.first_failure_time) self.assertFalse(test_task.has_excessive_failures()) test_task.add_failure() self.assertFalse(test_task.has_excessive_failures()) fake_failure_time = test_task.failures.first_failure_time - 2 * 60 * 60 test_task.failures.first_failure_time = fake_failure_time self.assertTrue(test_task.has_excessive_failures()) def test_quadratic_behavior(self): """ Test that get_work is not taking linear amount of time. This is of course impossible to test, however, doing reasonable assumptions about hardware. This time should finish in a timely manner. """ # For 10000 it takes almost 1 second on my laptop. Prior to these # changes it was being slow already at NUM_TASKS=300 NUM_TASKS = 10000 for i in range(NUM_TASKS): self.sch.add_task(worker=str(i), task_id=str(i), resources={}) for i in range(NUM_TASKS): self.assertEqual(self.sch.get_work(worker=str(i))["task_id"], str(i)) self.sch.add_task(worker=str(i), task_id=str(i), status=DONE) def test_get_work_speed(self): """ Test that get_work is fast for few workers and many DONEs. In #986, @daveFNbuck reported that he got a slowdown. """ # This took almost 4 minutes without optimization. # Now it takes 10 seconds on my machine. NUM_PENDING = 1000 NUM_DONE = 200000 assert NUM_DONE >= NUM_PENDING for i in range(NUM_PENDING): self.sch.add_task(worker=WORKER, task_id=str(i), resources={}) for i in range(NUM_PENDING, NUM_DONE): self.sch.add_task(worker=WORKER, task_id=str(i), status=DONE) for i in range(NUM_PENDING): res = int(self.sch.get_work(worker=WORKER)["task_id"]) self.assertTrue(0 <= res < NUM_PENDING) self.sch.add_task(worker=WORKER, task_id=str(res), status=DONE)
class CentralPlannerTest(unittest.TestCase): def setUp(self): super(CentralPlannerTest, self).setUp() conf = self.get_scheduler_config() self.sch = CentralPlannerScheduler(**conf) self.time = time.time def get_scheduler_config(self): return { 'retry_delay': 100, 'remove_delay': 1000, 'worker_disconnect_delay': 10, 'disable_persist': 10, 'disable_window': 10, 'disable_failures': 3, } def tearDown(self): super(CentralPlannerTest, self).tearDown() if time.time != self.time: time.time = self.time def setTime(self, t): time.time = lambda: t def test_dep(self): self.sch.add_task(WORKER, 'B', deps=('A',)) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B') self.sch.add_task(WORKER, 'B', status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_failed_dep(self): self.sch.add_task(WORKER, 'B', deps=('A',)) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) # can still wait and retry: TODO: do we want this? self.sch.add_task(WORKER, 'A', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B') self.sch.add_task(WORKER, 'B', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_broken_dep(self): self.sch.add_task(WORKER, 'B', deps=('A',)) self.sch.add_task(WORKER, 'A', runnable=False) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) # can still wait and retry: TODO: do we want this? self.sch.add_task(WORKER, 'A', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B') self.sch.add_task(WORKER, 'B', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_two_workers(self): # Worker X wants to build A -> B # Worker Y wants to build A -> C self.sch.add_task(worker='X', task_id='A') self.sch.add_task(worker='Y', task_id='A') self.sch.add_task(task_id='B', deps=('A',), worker='X') self.sch.add_task(task_id='C', deps=('A',), worker='Y') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None) # Worker Y is pending on A to be done self.sch.add_task(worker='X', task_id='A', status=DONE) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'C') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'B') def test_retry(self): # Try to build A but fails, will retry after 100s self.setTime(0) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', FAILED) for t in range(100): self.setTime(t) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) self.sch.ping(WORKER) if t % 10 == 0: self.sch.prune() self.setTime(101) self.sch.prune() self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') def test_disconnect_running(self): # X and Y wants to run A. # X starts but does not report back. Y does. # After some timeout, Y will build it instead self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.sch.add_task(task_id='A', worker='Y') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') for t in range(200): self.setTime(t) self.sch.ping(worker='Y') if t % 10 == 0: self.sch.prune() self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'A') def test_remove_dep(self): # X schedules A -> B, A is broken # Y schedules C -> B: this should remove A as a dep of B self.sch.add_task(task_id='A', worker='X', runnable=False) self.sch.add_task(task_id='B', deps=('A',), worker='X') # X can't build anything self.assertEqual(self.sch.get_work(worker='X')['task_id'], None) self.sch.add_task(task_id='B', deps=('C',), worker='Y') # should reset dependencies for A self.sch.add_task(task_id='C', worker='Y', status=DONE) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B') def test_timeout(self): # A bug that was earlier present when restarting the same flow self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.setTime(10000) self.sch.add_task(task_id='A', worker='Y') # Will timeout X but not schedule A for removal for i in range(2000): self.setTime(10000 + i) self.sch.ping(worker='Y') self.sch.add_task(task_id='A', status=DONE, worker='Y') # This used to raise an exception since A was removed def test_disallowed_state_changes(self): # Test that we can not schedule an already running task t = 'A' self.sch.add_task(task_id=t, worker='X') self.assertEqual(self.sch.get_work(worker='X')['task_id'], t) self.sch.add_task(task_id=t, worker='Y') self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None) def test_two_worker_info(self): # Make sure the scheduler returns info that some other worker is running task A self.sch.add_task(worker='X', task_id='A') self.sch.add_task(worker='Y', task_id='A') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') r = self.sch.get_work(worker='Y') self.assertEqual(r['task_id'], None) # Worker Y is pending on A to be done s = r['running_tasks'][0] self.assertEqual(s['task_id'], 'A') self.assertEqual(s['worker'], 'X') def test_assistant_get_work(self): self.sch.add_task(worker='X', task_id='A') self.sch.add_worker('Y', []) self.assertEqual(self.sch.get_work('Y', assistant=True)['task_id'], 'A') # check that the scheduler recognizes tasks as running running_tasks = self.sch.task_list('RUNNING', '') self.assertEqual(len(running_tasks), 1) self.assertEqual(list(running_tasks.keys()), ['A']) self.assertEqual(running_tasks['A']['worker_running'], 'Y') def test_assistant_get_work_external_task(self): self.sch.add_task('X', task_id='A', runnable=False) self.assertTrue(self.sch.get_work('Y', assistant=True)['task_id'] is None) def test_task_fails_when_assistant_dies(self): self.setTime(0) self.sch.add_task(worker='X', task_id='A') self.sch.add_worker('Y', []) self.assertEqual(self.sch.get_work('Y', assistant=True)['task_id'], 'A') self.assertEqual(list(self.sch.task_list('RUNNING', '').keys()), ['A']) # Y dies for 50 seconds, X stays alive self.setTime(50) self.sch.ping('X') self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), ['A']) def test_prune_with_live_assistant(self): self.setTime(0) self.sch.add_task(worker='X', task_id='A') self.sch.get_work('Y', assistant=True) self.sch.add_task(worker='Y', task_id='A', status=DONE, assistant=True) # worker X stops communicating, A should be marked for removal self.setTime(600) self.sch.ping('Y') self.sch.prune() # A will now be pruned self.setTime(2000) self.sch.prune() self.assertFalse(list(self.sch.task_list('', ''))) def test_prune_done_tasks(self, expected=None): self.setTime(0) self.sch.add_task(WORKER, task_id='A', status=DONE) self.sch.add_task(WORKER, task_id='B', deps=['A'], status=DONE) self.sch.add_task(WORKER, task_id='C', deps=['B']) self.setTime(600) self.sch.ping('ASSISTANT') self.sch.prune() self.setTime(2000) self.sch.ping('ASSISTANT') self.sch.prune() self.assertEqual(set(expected or ()), set(self.sch.task_list('', '').keys())) def test_keep_tasks_for_assistant(self): self.sch.get_work('ASSISTANT', assistant=True) # tell the scheduler this is an assistant self.test_prune_done_tasks(['B', 'C']) def test_keep_scheduler_disabled_tasks_for_assistant(self): self.sch.get_work('ASSISTANT', assistant=True) # tell the scheduler this is an assistant # create a scheduler disabled task and a worker disabled task for i in range(10): self.sch.add_task(WORKER, 'D', status=FAILED) self.sch.add_task(WORKER, 'E', status=DISABLED) # scheduler prunes the worker disabled task self.assertEqual(set(['D', 'E']), set(self.sch.task_list(DISABLED, ''))) self.test_prune_done_tasks(['B', 'C', 'D']) def test_keep_failed_tasks_for_assistant(self): self.sch.get_work('ASSISTANT', assistant=True) # tell the scheduler this is an assistant self.sch.add_task(WORKER, 'D', status=FAILED, deps='A') self.test_prune_done_tasks(['A', 'B', 'C', 'D']) def test_scheduler_resources_none_allow_one(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 1}) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_resources_none_disallow_two(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 2}) self.assertFalse(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_with_insufficient_resources(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 3}) self.sch.update_resources(R1=2) self.assertFalse(self.sch.get_work(worker='X')['task_id']) def test_scheduler_with_sufficient_resources(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 3}) self.sch.update_resources(R1=3) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_with_resources_used(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 1}) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='Y', task_id='B', resources={'R1': 1}) self.sch.update_resources(R1=1) self.assertFalse(self.sch.get_work(worker='Y')['task_id']) def test_scheduler_overprovisioned_on_other_resource(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 2}) self.sch.update_resources(R1=2) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='Y', task_id='B', resources={'R2': 2}) self.sch.update_resources(R1=1, R2=2) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B') def test_scheduler_with_priority_and_competing_resources(self): self.sch.add_task(worker='X', task_id='A') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=10) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.assertFalse(self.sch.get_work(worker='Y')['task_id']) self.sch.add_task(worker='Y', task_id='D', priority=0) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'D') def test_do_not_lock_resources_when_not_ready(self): """ Test to make sure that resources won't go unused waiting on workers """ self.sch.add_task(worker='X', task_id='A', priority=10) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker('X', [('workers', 1)]) self.assertEqual('C', self.sch.get_work(worker='Y')['task_id']) def test_lock_resources_when_one_of_multiple_workers_is_ready(self): self.sch.add_task(worker='X', task_id='A', priority=10) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker('X', [('workers', 2)]) self.sch.add_worker('Y', []) self.assertFalse(self.sch.get_work('Y')['task_id']) def test_do_not_lock_resources_while_running_higher_priority(self): """ Test to make sure that resources won't go unused waiting on workers """ self.sch.add_task(worker='X', task_id='A', priority=10) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker('X', [('workers', 1)]) self.assertEqual('A', self.sch.get_work('X')['task_id']) self.assertEqual('C', self.sch.get_work('Y')['task_id']) def test_lock_resources_while_running_lower_priority(self): """ Make sure resources will be made available while working on lower priority tasks """ self.sch.add_task(worker='X', task_id='A', priority=4) self.assertEqual('A', self.sch.get_work('X')['task_id']) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker('X', [('workers', 1)]) self.assertFalse(self.sch.get_work('Y')['task_id']) def test_lock_resources_for_second_worker(self): self.sch.add_task(worker='X', task_id='A', resources={'R': 1}) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=10) self.sch.add_worker('X', {'workers': 2}) self.sch.add_worker('Y', {'workers': 1}) self.sch.update_resources(R=2) self.assertEqual('A', self.sch.get_work('X')['task_id']) self.assertFalse(self.sch.get_work('X')['task_id']) def test_can_work_on_lower_priority_while_waiting_for_resources(self): self.sch.add_task(worker='X', task_id='A', resources={'R': 1}, priority=0) self.assertEqual('A', self.sch.get_work('X')['task_id']) self.sch.add_task(worker='Y', task_id='B', resources={'R': 1}, priority=10) self.sch.add_task(worker='Y', task_id='C', priority=0) self.sch.update_resources(R=1) self.assertEqual('C', self.sch.get_work('Y')['task_id']) def test_priority_update_with_pruning(self): self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.setTime(50) # after worker disconnects self.sch.prune() self.sch.add_task(task_id='B', deps=['A'], worker='X') self.setTime(2000) # after remove for task A self.sch.prune() # Here task A that B depends on is missing self.sch.add_task(WORKER, task_id='C', deps=['B'], priority=100) self.sch.add_task(WORKER, task_id='B', deps=['A']) self.sch.add_task(WORKER, task_id='A') self.sch.add_task(WORKER, task_id='D', priority=10) self.check_task_order('ABCD') def test_update_resources(self): self.sch.add_task(WORKER, task_id='A', deps=['B']) self.sch.add_task(WORKER, task_id='B', resources={'r': 2}) self.sch.update_resources(r=1) # B requires too many resources, we can't schedule self.check_task_order([]) self.sch.add_task(WORKER, task_id='B', resources={'r': 1}) # now we have enough resources self.check_task_order(['B', 'A']) def test_hendle_multiple_resources(self): self.sch.add_task(WORKER, task_id='A', resources={'r1': 1, 'r2': 1}) self.sch.add_task(WORKER, task_id='B', resources={'r1': 1, 'r2': 1}) self.sch.add_task(WORKER, task_id='C', resources={'r1': 1}) self.sch.update_resources(r1=2, r2=1) self.assertEqual('A', self.sch.get_work(WORKER)['task_id']) self.check_task_order('C') def test_single_resource_lock(self): self.sch.add_task('X', task_id='A', resources={'r': 1}) self.assertEqual('A', self.sch.get_work('X')['task_id']) self.sch.add_task(WORKER, task_id='B', resources={'r': 2}, priority=10) self.sch.add_task(WORKER, task_id='C', resources={'r': 1}) self.sch.update_resources(r=2) # Should wait for 2 units of r to be available for B before scheduling C self.check_task_order([]) def test_no_lock_if_too_many_resources_required(self): self.sch.add_task(WORKER, task_id='A', resources={'r': 2}, priority=10) self.sch.add_task(WORKER, task_id='B', resources={'r': 1}) self.sch.update_resources(r=1) self.check_task_order('B') def test_multiple_resources_lock(self): self.sch.add_task('X', task_id='A', resources={'r1': 1, 'r2': 1}, priority=10) self.sch.add_task(WORKER, task_id='B', resources={'r2': 1}) self.sch.add_task(WORKER, task_id='C', resources={'r1': 1}) self.sch.update_resources(r1=1, r2=1) # should preserve both resources for worker 'X' self.check_task_order([]) def test_multiple_resources_no_lock(self): self.sch.add_task(WORKER, task_id='A', resources={'r1': 1}, priority=10) self.sch.add_task(WORKER, task_id='B', resources={'r1': 1, 'r2': 1}, priority=10) self.sch.add_task(WORKER, task_id='C', resources={'r2': 1}) self.sch.update_resources(r1=1, r2=2) self.assertEqual('A', self.sch.get_work(WORKER)['task_id']) # C doesn't block B, so it can go first self.check_task_order('C') def check_task_order(self, order): for expected_id in order: self.assertEqual(self.sch.get_work(WORKER)['task_id'], expected_id) self.sch.add_task(WORKER, expected_id, status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_priorities(self): self.sch.add_task(WORKER, 'A', priority=10) self.sch.add_task(WORKER, 'B', priority=5) self.sch.add_task(WORKER, 'C', priority=15) self.sch.add_task(WORKER, 'D', priority=9) self.check_task_order(['C', 'A', 'D', 'B']) def test_priorities_default_and_negative(self): self.sch.add_task(WORKER, 'A', priority=10) self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C', priority=15) self.sch.add_task(WORKER, 'D', priority=-20) self.sch.add_task(WORKER, 'E', priority=1) self.check_task_order(['C', 'A', 'E', 'B', 'D']) def test_priorities_and_dependencies(self): self.sch.add_task(WORKER, 'A', deps=['Z'], priority=10) self.sch.add_task(WORKER, 'B', priority=5) self.sch.add_task(WORKER, 'C', deps=['Z'], priority=3) self.sch.add_task(WORKER, 'D', priority=2) self.sch.add_task(WORKER, 'Z', priority=1) self.check_task_order(['Z', 'A', 'B', 'C', 'D']) def test_priority_update_dependency_after_scheduling(self): self.sch.add_task(WORKER, 'A', priority=1) self.sch.add_task(WORKER, 'B', priority=5, deps=['A']) self.sch.add_task(WORKER, 'C', priority=10, deps=['B']) self.sch.add_task(WORKER, 'D', priority=6) self.check_task_order(['A', 'B', 'C', 'D']) def test_disable(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_disable_and_reenable(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.re_enable_task('A') # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('FAILED', '')), 1) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') def test_disable_and_reenable_and_disable_again(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.re_enable_task('A') # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('FAILED', '')), 1) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', status=FAILED) # should be still enabled self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('FAILED', '')), 1) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) # should be disabled now self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_disable_and_done(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.add_task(WORKER, 'A', status=DONE) # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('DONE', '')), 1) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') def test_disable_by_worker(self): self.sch.add_task(WORKER, 'A', status=DISABLED) self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.sch.add_task(WORKER, 'A') # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') def test_task_list_beyond_limit(self): sch = CentralPlannerScheduler(max_shown_tasks=3) for c in 'ABCD': sch.add_task(WORKER, c) self.assertEqual(set('ABCD'), set(sch.task_list('PENDING', '', False).keys())) self.assertEqual({'num_tasks': 4}, sch.task_list('PENDING', '')) def test_task_list_within_limit(self): sch = CentralPlannerScheduler(max_shown_tasks=4) for c in 'ABCD': sch.add_task(WORKER, c) self.assertEqual(set('ABCD'), set(sch.task_list('PENDING', '').keys())) def test_task_lists_some_beyond_limit(self): sch = CentralPlannerScheduler(max_shown_tasks=3) for c in 'ABCD': sch.add_task(WORKER, c, 'DONE') for c in 'EFG': sch.add_task(WORKER, c) self.assertEqual(set('EFG'), set(sch.task_list('PENDING', '').keys())) self.assertEqual({'num_tasks': 4}, sch.task_list('DONE', '')) def test_priority_update_dependency_chain(self): self.sch.add_task(WORKER, 'A', priority=10, deps=['B']) self.sch.add_task(WORKER, 'B', priority=5, deps=['C']) self.sch.add_task(WORKER, 'C', priority=1) self.sch.add_task(WORKER, 'D', priority=6) self.check_task_order(['C', 'B', 'A', 'D']) def test_priority_no_decrease_with_multiple_updates(self): self.sch.add_task(WORKER, 'A', priority=1) self.sch.add_task(WORKER, 'B', priority=10, deps=['A']) self.sch.add_task(WORKER, 'C', priority=5, deps=['A']) self.sch.add_task(WORKER, 'D', priority=6) self.check_task_order(['A', 'B', 'D', 'C']) def test_unique_tasks(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C') self.sch.add_task(WORKER + "_2", 'B') response = self.sch.get_work(WORKER) self.assertEqual(3, response['n_pending_tasks']) self.assertEqual(2, response['n_unique_pending']) def test_pending_downstream_disable(self): self.sch.add_task(WORKER, 'A', status=DISABLED) self.sch.add_task(WORKER, 'B', deps=('A',)) self.sch.add_task(WORKER, 'C', deps=('B',)) response = self.sch.get_work(WORKER) self.assertTrue(response['task_id'] is None) self.assertEqual(0, response['n_pending_tasks']) self.assertEqual(0, response['n_unique_pending']) def test_pending_downstream_failure(self): self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'B', deps=('A',)) self.sch.add_task(WORKER, 'C', deps=('B',)) response = self.sch.get_work(WORKER) self.assertTrue(response['task_id'] is None) self.assertEqual(2, response['n_pending_tasks']) self.assertEqual(2, response['n_unique_pending']) def test_prefer_more_dependents(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C', deps=['B']) self.sch.add_task(WORKER, 'D', deps=['B']) self.sch.add_task(WORKER, 'E', deps=['A']) self.check_task_order('BACDE') def test_prefer_readier_dependents(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C') self.sch.add_task(WORKER, 'D') self.sch.add_task(WORKER, 'F', deps=['A', 'B', 'C']) self.sch.add_task(WORKER, 'G', deps=['A', 'B', 'C']) self.sch.add_task(WORKER, 'E', deps=['D']) self.check_task_order('DABCFGE') def test_ignore_done_dependents(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C') self.sch.add_task(WORKER, 'D', priority=1) self.sch.add_task(WORKER, 'E', deps=['C', 'D']) self.sch.add_task(WORKER, 'F', deps=['A', 'B']) self.check_task_order('DCABEF')
class WorkerTest(unittest.TestCase): def run(self, result=None): self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.time = time.time with Worker(scheduler=self.sch, worker_id='X') as w, Worker(scheduler=self.sch, worker_id='Y') as w2: self.w = w self.w2 = w2 super(WorkerTest, self).run(result) if time.time != self.time: time.time = self.time def setTime(self, t): time.time = lambda: t def test_dep(self): class A(Task): def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertTrue(a.has_run) self.assertTrue(b.has_run) def test_stop_getting_new_work(self): d = DummyTask() self.w.add(d) self.assertFalse(d.complete()) try: self.w.handle_interrupt(signal.SIGUSR1, None) except AttributeError: raise unittest.SkipTest('signal.SIGUSR1 not found on this system') self.w.run() self.assertFalse(d.complete()) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_tracking_url(self): tracking_url = 'http://test_url.com/' class A(Task): has_run = False def complete(self): return self.has_run def run(self, tracking_url_callback=None): if tracking_url_callback is not None: tracking_url_callback(tracking_url) self.has_run = True a = A() self.assertTrue(self.w.add(a)) self.assertTrue(self.w.run()) tasks = self.sch.task_list('DONE', '') self.assertEqual(1, len(tasks)) self.assertEqual(tracking_url, tasks[a.task_id]['tracking_url']) def test_type_error_in_tracking_run(self): class A(Task): num_runs = 0 def complete(self): return False def run(self, tracking_url_callback=None): self.num_runs += 1 raise TypeError('bad type') a = A() self.assertTrue(self.w.add(a)) self.assertFalse(self.w.run()) # Should only run and fail once, not retry because of the type error self.assertEqual(1, a.num_runs) def test_fail(self): class CustomException(BaseException): def __init__(self, msg): self.msg = msg class A(Task): def run(self): self.has_run = True raise CustomException('bad things') def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertFalse(self.w.run()) self.assertTrue(a.has_run) self.assertFalse(b.has_run) def test_unknown_dep(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.assertTrue(self.w.add(b_a)) # So now another worker goes in and schedules C -> B # This should remove the dep A -> B but will screw up the first worker self.assertTrue(self.w2.add(b_c)) self.assertFalse( self.w.run() ) # should not run anything - the worker should detect that A is broken self.assertFalse(b_a.has_run) # not sure what should happen?? # self.w2.run() # should run B since C is fulfilled # self.assertTrue(b_c.has_run) def test_unfulfilled_dep(self): class A(Task): def complete(self): return self.done def run(self): self.done = True def get_b(a): class B(A): def requires(self): return a b = B() b.done = False a.done = True return b a = A() b = get_b(a) self.assertTrue(self.w.add(b)) a.done = False self.w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_gets_missed_work(self): class A(Task): done = False def complete(self): return self.done def run(self): self.done = True a = A() self.assertTrue(self.w.add(a)) # simulate a missed get_work response self.assertEqual(a.task_id, self.sch.get_work(worker='X')['task_id']) self.assertTrue(self.w.run()) self.assertTrue(a.complete()) def test_avoid_infinite_reschedule(self): class A(Task): def complete(self): return False class B(Task): def complete(self): return False def requires(self): return A() self.assertTrue(self.w.add(B())) self.assertFalse(self.w.run()) def test_fails_registering_signal(self): with mock.patch('luigi.worker.signal', spec=['signal']): # mock will raise an attribute error getting signal.SIGUSR1 Worker() def test_allow_reschedule_with_many_missing_deps(self): class A(Task): """ Task that must run twice to succeed """ i = luigi.IntParameter() runs = 0 def complete(self): return self.runs >= 2 def run(self): self.runs += 1 class B(Task): done = False def requires(self): return map(A, range(20)) def complete(self): return self.done def run(self): self.done = True b = B() w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1) self.assertTrue(w.add(b)) self.assertFalse(w.run()) # For b to be done, we must have rescheduled its dependencies to run them twice self.assertTrue(b.complete()) self.assertTrue(all(a.complete() for a in b.deps())) def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(str(eb), "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id='X') as w, Worker(scheduler=sch, worker_id='Y') as w2: self.assertTrue(w.add(b)) self.assertTrue(w2.add(eb)) logging.debug("RUNNING BROKEN WORKER") self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") self.assertTrue(w.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(str(eb), "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id='X') as w, Worker(scheduler=sch, worker_id='Y') as w2: self.assertTrue(w2.add(eb)) self.assertTrue(w.add(b)) self.assertTrue(w2.run()) self.assertFalse(b.complete()) self.assertTrue(w.run()) self.assertTrue(b.complete()) def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) as w: with Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) as w2: self.assertTrue(w.add(a)) self.assertTrue(w2.add(b)) threading.Thread(target=w.run).start() self.assertTrue(w2.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_die_for_non_unique_pending(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) as w: with Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) as w2: self.assertTrue(w.add(b)) self.assertTrue(w2.add(b)) self.assertEqual(w._get_work()[0], a.task_id) self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id="foo") as w: self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) def test_requires_exception(self): class A(DummyTask): def requires(self): raise Exception("doh") a = A() class D(DummyTask): pass d = D() class C(DummyTask): def requires(self): return d c = C() class B(DummyTask): def requires(self): return c, a b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id="foo") as w: self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertTrue(d.has_run) self.assertFalse(a.has_run)
class CentralPlannerTest(unittest.TestCase): def setUp(self): self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time def setTime(self, t): time.time = lambda: t def test_dep(self): self.sch.add_task(WORKER, 'B', deps=('A',)) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B') self.sch.add_task(WORKER, 'B', status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_failed_dep(self): self.sch.add_task(WORKER, 'B', deps=('A',)) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) # can still wait and retry: TODO: do we want this? self.sch.add_task(WORKER, 'A', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B') self.sch.add_task(WORKER, 'B', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_broken_dep(self): self.sch.add_task(WORKER, 'B', deps=('A',)) self.sch.add_task(WORKER, 'A', runnable=False) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) # can still wait and retry: TODO: do we want this? self.sch.add_task(WORKER, 'A', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B') self.sch.add_task(WORKER, 'B', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_two_workers(self): # Worker X wants to build A -> B # Worker Y wants to build A -> C self.sch.add_task(worker='X', task_id='A') self.sch.add_task(worker='Y', task_id='A') self.sch.add_task(task_id='B', deps=('A',), worker='X') self.sch.add_task(task_id='C', deps=('A',), worker='Y') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None) # Worker Y is pending on A to be done self.sch.add_task(worker='X', task_id='A', status=DONE) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'C') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'B') def test_retry(self): # Try to build A but fails, will retry after 100s self.setTime(0) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', FAILED) for t in xrange(100): self.setTime(t) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) self.sch.ping(WORKER) if t % 10 == 0: self.sch.prune() self.setTime(101) self.sch.prune() self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') def test_disconnect_running(self): # X and Y wants to run A. # X starts but does not report back. Y does. # After some timeout, Y will build it instead self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.sch.add_task(task_id='A', worker='Y') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') for t in xrange(200): self.setTime(t) self.sch.ping(worker='Y') if t % 10 == 0: self.sch.prune() self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'A') def test_remove_dep(self): # X schedules A -> B, A is broken # Y schedules C -> B: this should remove A as a dep of B self.sch.add_task(task_id='A', worker='X', runnable=False) self.sch.add_task(task_id='B', deps=('A',), worker='X') # X can't build anything self.assertEqual(self.sch.get_work(worker='X')['task_id'], None) self.sch.add_task(task_id='B', deps=('C',), worker='Y') # should reset dependencies for A self.sch.add_task(task_id='C', worker='Y', status=DONE) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B') def test_timeout(self): # A bug that was earlier present when restarting the same flow self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.setTime(10000) self.sch.add_task(task_id='A', worker='Y') # Will timeout X but not schedule A for removal for i in xrange(2000): self.setTime(10000 + i) self.sch.ping(worker='Y') self.sch.add_task(task_id='A', status=DONE, worker='Y') # This used to raise an exception since A was removed def test_disallowed_state_changes(self): # Test that we can not schedule an already running task t = 'A' self.sch.add_task(task_id=t, worker='X') self.assertEqual(self.sch.get_work(worker='X')['task_id'], t) self.sch.add_task(task_id=t, worker='Y') self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None) def test_two_worker_info(self): # Make sure the scheduler returns info that some other worker is running task A self.sch.add_task(worker='X', task_id='A') self.sch.add_task(worker='Y', task_id='A') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') r = self.sch.get_work(worker='Y') self.assertEqual(r['task_id'], None) # Worker Y is pending on A to be done s = r['running_tasks'][0] self.assertEqual(s['task_id'], 'A') self.assertEqual(s['worker'], 'X') def test_scheduler_resources_none_allow_one(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 1}) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_resources_none_disallow_two(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 2}) self.assertFalse(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_with_insufficient_resources(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 3}) self.sch.update_resources(R1=2) self.assertFalse(self.sch.get_work(worker='X')['task_id']) def test_scheduler_with_sufficient_resources(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 3}) self.sch.update_resources(R1=3) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_with_resources_used(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 1}) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='Y', task_id='B', resources={'R1': 1}) self.sch.update_resources(R1=1) self.assertFalse(self.sch.get_work(worker='Y')['task_id']) def test_scheduler_overprovisioned_on_other_resource(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 2}) self.sch.update_resources(R1=2) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='Y', task_id='B', resources={'R2': 2}) self.sch.update_resources(R1=1, R2=2) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B') def test_scheduler_with_priority_and_competing_resources(self): self.sch.add_task(worker='X', task_id='A') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=10) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.assertFalse(self.sch.get_work(worker='Y')['task_id']) self.sch.add_task(worker='Y', task_id='D', priority=0) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'D') def test_update_resources(self): self.sch.add_task(WORKER, task_id='A', deps=['B']) self.sch.add_task(WORKER, task_id='B', resources={'r': 2}) self.sch.update_resources(r=1) # B requires too many resources, we can't schedule self.check_task_order([]) self.sch.add_task(WORKER, task_id='B', resources={'r': 1}) # now we have enough resources self.check_task_order(['B', 'A']) def check_task_order(self, order): for expected_id in order: self.assertEqual(self.sch.get_work(WORKER)['task_id'], expected_id) self.sch.add_task(WORKER, expected_id, status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_priorities(self): self.sch.add_task(WORKER, 'A', priority=10) self.sch.add_task(WORKER, 'B', priority=5) self.sch.add_task(WORKER, 'C', priority=15) self.sch.add_task(WORKER, 'D', priority=9) self.check_task_order(['C', 'A', 'D', 'B']) def test_priorities_default_and_negative(self): self.sch.add_task(WORKER, 'A', priority=10) self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C', priority=15) self.sch.add_task(WORKER, 'D', priority=-20) self.sch.add_task(WORKER, 'E', priority=1) self.check_task_order(['C', 'A', 'E', 'B', 'D']) def test_priorities_and_dependencies(self): self.sch.add_task(WORKER, 'A', deps=['Z'], priority=10) self.sch.add_task(WORKER, 'B', priority=5) self.sch.add_task(WORKER, 'C', deps=['Z'], priority=3) self.sch.add_task(WORKER, 'D', priority=2) self.sch.add_task(WORKER, 'Z', priority=1) self.check_task_order(['Z', 'A', 'B', 'C', 'D']) def test_priority_update_dependency_after_scheduling(self): self.sch.add_task(WORKER, 'A', priority=1) self.sch.add_task(WORKER, 'B', priority=5, deps=['A']) self.sch.add_task(WORKER, 'C', priority=10, deps=['B']) self.sch.add_task(WORKER, 'D', priority=6) self.check_task_order(['A', 'B', 'C', 'D']) def test_priority_update_dependency_chain(self): self.sch.add_task(WORKER, 'A', priority=10, deps=['B']) self.sch.add_task(WORKER, 'B', priority=5, deps=['C']) self.sch.add_task(WORKER, 'C', priority=1) self.sch.add_task(WORKER, 'D', priority=6) self.check_task_order(['C', 'B', 'A', 'D']) def test_priority_no_decrease_with_multiple_updates(self): self.sch.add_task(WORKER, 'A', priority=1) self.sch.add_task(WORKER, 'B', priority=10, deps=['A']) self.sch.add_task(WORKER, 'C', priority=5, deps=['A']) self.sch.add_task(WORKER, 'D', priority=6) self.check_task_order(['A', 'B', 'D', 'C'])
class CentralPlannerTest(unittest.TestCase): def setUp(self): self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time def setTime(self, t): time.time = lambda: t def test_dep(self): self.sch.add_task(WORKER, 'B', deps=('A', )) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B') self.sch.add_task(WORKER, 'B', status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_failed_dep(self): self.sch.add_task(WORKER, 'B', deps=('A', )) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.assertEqual( self.sch.get_work(WORKER)['task_id'], None) # can still wait and retry: TODO: do we want this? self.sch.add_task(WORKER, 'A', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B') self.sch.add_task(WORKER, 'B', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_broken_dep(self): self.sch.add_task(WORKER, 'B', deps=('A', )) self.sch.add_task(WORKER, 'A', runnable=False) self.assertEqual( self.sch.get_work(WORKER)['task_id'], None) # can still wait and retry: TODO: do we want this? self.sch.add_task(WORKER, 'A', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B') self.sch.add_task(WORKER, 'B', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_two_workers(self): # Worker X wants to build A -> B # Worker Y wants to build A -> C self.sch.add_task(worker='X', task_id='A') self.sch.add_task(worker='Y', task_id='A') self.sch.add_task(task_id='B', deps=('A', ), worker='X') self.sch.add_task(task_id='C', deps=('A', ), worker='Y') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None) # Worker Y is pending on A to be done self.sch.add_task(worker='X', task_id='A', status=DONE) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'C') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'B') def test_retry(self): # Try to build A but fails, will retry after 100s self.setTime(0) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', FAILED) for t in xrange(100): self.setTime(t) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) self.sch.ping(WORKER) if t % 10 == 0: self.sch.prune() self.setTime(101) self.sch.prune() self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') def test_disconnect_running(self): # X and Y wants to run A. # X starts but does not report back. Y does. # After some timeout, Y will build it instead self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.sch.add_task(task_id='A', worker='Y') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') for t in xrange(200): self.setTime(t) self.sch.ping(worker='Y') if t % 10 == 0: self.sch.prune() self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'A') def test_remove_dep(self): # X schedules A -> B, A is broken # Y schedules C -> B: this should remove A as a dep of B self.sch.add_task(task_id='A', worker='X', runnable=False) self.sch.add_task(task_id='B', deps=('A', ), worker='X') # X can't build anything self.assertEqual(self.sch.get_work(worker='X')['task_id'], None) self.sch.add_task(task_id='B', deps=('C', ), worker='Y') # should reset dependencies for A self.sch.add_task(task_id='C', worker='Y', status=DONE) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B') def test_timeout(self): # A bug that was earlier present when restarting the same flow self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.setTime(10000) self.sch.add_task( task_id='A', worker='Y') # Will timeout X but not schedule A for removal for i in xrange(2000): self.setTime(10000 + i) self.sch.ping(worker='Y') self.sch.add_task( task_id='A', status=DONE, worker='Y') # This used to raise an exception since A was removed def test_disallowed_state_changes(self): # Test that we can not schedule an already running task t = 'A' self.sch.add_task(task_id=t, worker='X') self.assertEqual(self.sch.get_work(worker='X')['task_id'], t) self.sch.add_task(task_id=t, worker='Y') self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None) def test_two_worker_info(self): # Make sure the scheduler returns info that some other worker is running task A self.sch.add_task(worker='X', task_id='A') self.sch.add_task(worker='Y', task_id='A') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') r = self.sch.get_work(worker='Y') self.assertEqual(r['task_id'], None) # Worker Y is pending on A to be done s = r['running_tasks'][0] self.assertEqual(s['task_id'], 'A') self.assertEqual(s['worker'], 'X') def test_priorities(self): self.sch.add_task(WORKER, 'A', priority=10) self.sch.add_task(WORKER, 'B', priority=5) self.sch.add_task(WORKER, 'C', priority=15) self.sch.add_task(WORKER, 'D', priority=9) for expected_id in ['C', 'A', 'D', 'B']: self.assertEqual(self.sch.get_work(WORKER)['task_id'], expected_id) self.sch.add_task(WORKER, expected_id, status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_priorities_default_and_negative(self): self.sch.add_task(WORKER, 'A', priority=10) self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C', priority=15) self.sch.add_task(WORKER, 'D', priority=-20) self.sch.add_task(WORKER, 'E', priority=1) for expected_id in ['C', 'A', 'E', 'B', 'D']: self.assertEqual(self.sch.get_work(WORKER)['task_id'], expected_id) self.sch.add_task(WORKER, expected_id, status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_priorities_and_dependencies(self): self.sch.add_task(WORKER, 'A', deps=['Z'], priority=10) self.sch.add_task(WORKER, 'B', priority=5) self.sch.add_task(WORKER, 'C', deps=['Z'], priority=3) self.sch.add_task(WORKER, 'D', priority=2) self.sch.add_task(WORKER, 'Z', priority=1) for expected_id in ['B', 'D', 'Z', 'A', 'C']: self.assertEqual(self.sch.get_work(WORKER)['task_id'], expected_id) self.sch.add_task(WORKER, expected_id, status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None)
class CentralPlannerTest(unittest.TestCase): def setUp(self): self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10, disable_persist=10, disable_window=10, disable_failures=3) self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time def setTime(self, t): time.time = lambda: t def test_dep(self): self.sch.add_task(WORKER, 'B', deps=('A', )) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B') self.sch.add_task(WORKER, 'B', status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_failed_dep(self): self.sch.add_task(WORKER, 'B', deps=('A', )) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.assertEqual( self.sch.get_work(WORKER)['task_id'], None) # can still wait and retry: TODO: do we want this? self.sch.add_task(WORKER, 'A', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B') self.sch.add_task(WORKER, 'B', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_broken_dep(self): self.sch.add_task(WORKER, 'B', deps=('A', )) self.sch.add_task(WORKER, 'A', runnable=False) self.assertEqual( self.sch.get_work(WORKER)['task_id'], None) # can still wait and retry: TODO: do we want this? self.sch.add_task(WORKER, 'A', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B') self.sch.add_task(WORKER, 'B', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_two_workers(self): # Worker X wants to build A -> B # Worker Y wants to build A -> C self.sch.add_task(worker='X', task_id='A') self.sch.add_task(worker='Y', task_id='A') self.sch.add_task(task_id='B', deps=('A', ), worker='X') self.sch.add_task(task_id='C', deps=('A', ), worker='Y') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None) # Worker Y is pending on A to be done self.sch.add_task(worker='X', task_id='A', status=DONE) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'C') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'B') def test_retry(self): # Try to build A but fails, will retry after 100s self.setTime(0) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', FAILED) for t in xrange(100): self.setTime(t) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) self.sch.ping(WORKER) if t % 10 == 0: self.sch.prune() self.setTime(101) self.sch.prune() self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') def test_disconnect_running(self): # X and Y wants to run A. # X starts but does not report back. Y does. # After some timeout, Y will build it instead self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.sch.add_task(task_id='A', worker='Y') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') for t in xrange(200): self.setTime(t) self.sch.ping(worker='Y') if t % 10 == 0: self.sch.prune() self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'A') def test_remove_dep(self): # X schedules A -> B, A is broken # Y schedules C -> B: this should remove A as a dep of B self.sch.add_task(task_id='A', worker='X', runnable=False) self.sch.add_task(task_id='B', deps=('A', ), worker='X') # X can't build anything self.assertEqual(self.sch.get_work(worker='X')['task_id'], None) self.sch.add_task(task_id='B', deps=('C', ), worker='Y') # should reset dependencies for A self.sch.add_task(task_id='C', worker='Y', status=DONE) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B') def test_timeout(self): # A bug that was earlier present when restarting the same flow self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.setTime(10000) self.sch.add_task( task_id='A', worker='Y') # Will timeout X but not schedule A for removal for i in xrange(2000): self.setTime(10000 + i) self.sch.ping(worker='Y') self.sch.add_task( task_id='A', status=DONE, worker='Y') # This used to raise an exception since A was removed def test_disallowed_state_changes(self): # Test that we can not schedule an already running task t = 'A' self.sch.add_task(task_id=t, worker='X') self.assertEqual(self.sch.get_work(worker='X')['task_id'], t) self.sch.add_task(task_id=t, worker='Y') self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None) def test_two_worker_info(self): # Make sure the scheduler returns info that some other worker is running task A self.sch.add_task(worker='X', task_id='A') self.sch.add_task(worker='Y', task_id='A') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') r = self.sch.get_work(worker='Y') self.assertEqual(r['task_id'], None) # Worker Y is pending on A to be done s = r['running_tasks'][0] self.assertEqual(s['task_id'], 'A') self.assertEqual(s['worker'], 'X') def test_scheduler_resources_none_allow_one(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 1}) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_resources_none_disallow_two(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 2}) self.assertFalse(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_with_insufficient_resources(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 3}) self.sch.update_resources(R1=2) self.assertFalse(self.sch.get_work(worker='X')['task_id']) def test_scheduler_with_sufficient_resources(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 3}) self.sch.update_resources(R1=3) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_with_resources_used(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 1}) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='Y', task_id='B', resources={'R1': 1}) self.sch.update_resources(R1=1) self.assertFalse(self.sch.get_work(worker='Y')['task_id']) def test_scheduler_overprovisioned_on_other_resource(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 2}) self.sch.update_resources(R1=2) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='Y', task_id='B', resources={'R2': 2}) self.sch.update_resources(R1=1, R2=2) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B') def test_scheduler_with_priority_and_competing_resources(self): self.sch.add_task(worker='X', task_id='A') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=10) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.assertFalse(self.sch.get_work(worker='Y')['task_id']) self.sch.add_task(worker='Y', task_id='D', priority=0) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'D') def test_do_not_lock_resources_when_not_ready(self): """ Test to make sure that resources won't go unused waiting on workers """ self.sch.add_task(worker='X', task_id='A', priority=10) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker('X', [('workers', 1)]) self.assertEqual('C', self.sch.get_work(worker='Y')['task_id']) def test_lock_resources_when_one_of_multiple_workers_is_ready(self): self.sch.add_task(worker='X', task_id='A', priority=10) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker('X', [('workers', 2)]) self.sch.add_worker('Y', []) self.assertFalse(self.sch.get_work('Y')['task_id']) def test_do_not_lock_resources_while_running_higher_priority(self): """ Test to make sure that resources won't go unused waiting on workers """ self.sch.add_task(worker='X', task_id='A', priority=10) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker('X', [('workers', 1)]) self.assertEqual('A', self.sch.get_work('X')['task_id']) self.assertEqual('C', self.sch.get_work('Y')['task_id']) def test_lock_resources_while_running_lower_priority(self): """ Make sure resources will be made available while working on lower priority tasks """ self.sch.add_task(worker='X', task_id='A', priority=4) self.assertEqual('A', self.sch.get_work('X')['task_id']) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker('X', [('workers', 1)]) self.assertFalse(self.sch.get_work('Y')['task_id']) def test_lock_resources_for_second_worker(self): self.sch.add_task(worker='X', task_id='A', resources={'R': 1}) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=10) self.sch.add_worker('X', {'workers': 2}) self.sch.add_worker('Y', {'workers': 1}) self.sch.update_resources(R=2) self.assertEqual('A', self.sch.get_work('X')['task_id']) self.assertFalse(self.sch.get_work('X')['task_id']) def test_can_work_on_lower_priority_while_waiting_for_resources(self): self.sch.add_task(worker='X', task_id='A', resources={'R': 1}, priority=0) self.assertEqual('A', self.sch.get_work('X')['task_id']) self.sch.add_task(worker='Y', task_id='B', resources={'R': 1}, priority=10) self.sch.add_task(worker='Y', task_id='C', priority=0) self.sch.update_resources(R=1) self.assertEqual('C', self.sch.get_work('Y')['task_id']) def test_priority_update_with_pruning(self): self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.setTime(50) # after worker disconnects self.sch.prune() self.sch.add_task(task_id='B', deps=['A'], worker='X') self.setTime(2000) # after remove for task A self.sch.prune() # Here task A that B depends on is missing self.sch.add_task(WORKER, task_id='C', deps=['B'], priority=100) self.sch.add_task(WORKER, task_id='B', deps=['A']) self.sch.add_task(WORKER, task_id='A') self.sch.add_task(WORKER, task_id='D', priority=10) self.check_task_order('ABCD') def test_update_resources(self): self.sch.add_task(WORKER, task_id='A', deps=['B']) self.sch.add_task(WORKER, task_id='B', resources={'r': 2}) self.sch.update_resources(r=1) # B requires too many resources, we can't schedule self.check_task_order([]) self.sch.add_task(WORKER, task_id='B', resources={'r': 1}) # now we have enough resources self.check_task_order(['B', 'A']) def test_hendle_multiple_resources(self): self.sch.add_task(WORKER, task_id='A', resources={'r1': 1, 'r2': 1}) self.sch.add_task(WORKER, task_id='B', resources={'r1': 1, 'r2': 1}) self.sch.add_task(WORKER, task_id='C', resources={'r1': 1}) self.sch.update_resources(r1=2, r2=1) self.assertEqual('A', self.sch.get_work(WORKER)['task_id']) self.check_task_order('C') def test_single_resource_lock(self): self.sch.add_task('X', task_id='A', resources={'r': 1}) self.assertEqual('A', self.sch.get_work('X')['task_id']) self.sch.add_task(WORKER, task_id='B', resources={'r': 2}, priority=10) self.sch.add_task(WORKER, task_id='C', resources={'r': 1}) self.sch.update_resources(r=2) # Should wait for 2 units of r to be available for B before scheduling C self.check_task_order([]) def test_no_lock_if_too_many_resources_required(self): self.sch.add_task(WORKER, task_id='A', resources={'r': 2}, priority=10) self.sch.add_task(WORKER, task_id='B', resources={'r': 1}) self.sch.update_resources(r=1) self.check_task_order('B') def test_multiple_resources_lock(self): self.sch.add_task('X', task_id='A', resources={ 'r1': 1, 'r2': 1 }, priority=10) self.sch.add_task(WORKER, task_id='B', resources={'r2': 1}) self.sch.add_task(WORKER, task_id='C', resources={'r1': 1}) self.sch.update_resources(r1=1, r2=1) # should preserve both resources for worker 'X' self.check_task_order([]) def test_multiple_resources_no_lock(self): self.sch.add_task(WORKER, task_id='A', resources={'r1': 1}, priority=10) self.sch.add_task(WORKER, task_id='B', resources={ 'r1': 1, 'r2': 1 }, priority=10) self.sch.add_task(WORKER, task_id='C', resources={'r2': 1}) self.sch.update_resources(r1=1, r2=2) self.assertEqual('A', self.sch.get_work(WORKER)['task_id']) # C doesn't block B, so it can go first self.check_task_order('C') def check_task_order(self, order): for expected_id in order: self.assertEqual(self.sch.get_work(WORKER)['task_id'], expected_id) self.sch.add_task(WORKER, expected_id, status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_priorities(self): self.sch.add_task(WORKER, 'A', priority=10) self.sch.add_task(WORKER, 'B', priority=5) self.sch.add_task(WORKER, 'C', priority=15) self.sch.add_task(WORKER, 'D', priority=9) self.check_task_order(['C', 'A', 'D', 'B']) def test_priorities_default_and_negative(self): self.sch.add_task(WORKER, 'A', priority=10) self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C', priority=15) self.sch.add_task(WORKER, 'D', priority=-20) self.sch.add_task(WORKER, 'E', priority=1) self.check_task_order(['C', 'A', 'E', 'B', 'D']) def test_priorities_and_dependencies(self): self.sch.add_task(WORKER, 'A', deps=['Z'], priority=10) self.sch.add_task(WORKER, 'B', priority=5) self.sch.add_task(WORKER, 'C', deps=['Z'], priority=3) self.sch.add_task(WORKER, 'D', priority=2) self.sch.add_task(WORKER, 'Z', priority=1) self.check_task_order(['Z', 'A', 'B', 'C', 'D']) def test_priority_update_dependency_after_scheduling(self): self.sch.add_task(WORKER, 'A', priority=1) self.sch.add_task(WORKER, 'B', priority=5, deps=['A']) self.sch.add_task(WORKER, 'C', priority=10, deps=['B']) self.sch.add_task(WORKER, 'D', priority=6) self.check_task_order(['A', 'B', 'C', 'D']) def test_disable(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_disable_and_reenable(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.re_enable_task('A') # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('FAILED', '')), 1) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') def test_disable_and_reenable_and_disable_again(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.re_enable_task('A') # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('FAILED', '')), 1) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', status=FAILED) # should be still enabled self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('FAILED', '')), 1) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) # should be disabled now self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_disable_and_done(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) self.sch.add_task(WORKER, 'A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.add_task(WORKER, 'A', status=DONE) # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('DONE', '')), 1) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') def test_disable_by_worker(self): self.sch.add_task(WORKER, 'A', status=DISABLED) self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.sch.add_task(WORKER, 'A') # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') def test_priority_update_dependency_chain(self): self.sch.add_task(WORKER, 'A', priority=10, deps=['B']) self.sch.add_task(WORKER, 'B', priority=5, deps=['C']) self.sch.add_task(WORKER, 'C', priority=1) self.sch.add_task(WORKER, 'D', priority=6) self.check_task_order(['C', 'B', 'A', 'D']) def test_priority_no_decrease_with_multiple_updates(self): self.sch.add_task(WORKER, 'A', priority=1) self.sch.add_task(WORKER, 'B', priority=10, deps=['A']) self.sch.add_task(WORKER, 'C', priority=5, deps=['A']) self.sch.add_task(WORKER, 'D', priority=6) self.check_task_order(['A', 'B', 'D', 'C']) def test_unique_tasks(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C') self.sch.add_task(WORKER + "_2", 'B') response = self.sch.get_work(WORKER) self.assertEqual(3, response['n_pending_tasks']) self.assertEqual(2, response['n_unique_pending']) def test_prefer_more_dependents(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C', deps=['B']) self.sch.add_task(WORKER, 'D', deps=['B']) self.sch.add_task(WORKER, 'E', deps=['A']) self.check_task_order('BACDE') def test_prefer_readier_dependents(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C') self.sch.add_task(WORKER, 'D') self.sch.add_task(WORKER, 'F', deps=['A', 'B', 'C']) self.sch.add_task(WORKER, 'G', deps=['A', 'B', 'C']) self.sch.add_task(WORKER, 'E', deps=['D']) self.check_task_order('DABCFGE') def test_ignore_done_dependents(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C') self.sch.add_task(WORKER, 'D', priority=1) self.sch.add_task(WORKER, 'E', deps=['C', 'D']) self.sch.add_task(WORKER, 'F', deps=['A', 'B']) self.check_task_order('DCABEF')
class CentralPlannerTest(unittest.TestCase): def setUp(self): self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time def setTime(self, t): time.time = lambda: t def test_dep(self): self.sch.add_task(WORKER, 'B', deps=('A',)) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B') self.sch.add_task(WORKER, 'B', status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_failed_dep(self): self.sch.add_task(WORKER, 'B', deps=('A',)) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) # can still wait and retry: TODO: do we want this? self.sch.add_task(WORKER, 'A', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B') self.sch.add_task(WORKER, 'B', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_broken_dep(self): self.sch.add_task(WORKER, 'B', deps=('A',)) self.sch.add_task(WORKER, 'A', runnable=False) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) # can still wait and retry: TODO: do we want this? self.sch.add_task(WORKER, 'A', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B') self.sch.add_task(WORKER, 'B', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_two_workers(self): # Worker X wants to build A -> B # Worker Y wants to build A -> C self.sch.add_task(worker='X', task_id='A') self.sch.add_task(worker='Y', task_id='A') self.sch.add_task(task_id='B', deps=('A',), worker='X') self.sch.add_task(task_id='C', deps=('A',), worker='Y') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None) # Worker Y is pending on A to be done self.sch.add_task(worker='X', task_id='A', status=DONE) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'C') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'B') def test_retry(self): # Try to build A but fails, will retry after 100s self.setTime(0) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', FAILED) for t in xrange(100): self.setTime(t) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) self.sch.ping(WORKER) if t % 10 == 0: self.sch.prune() self.setTime(101) self.sch.prune() self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') def test_disconnect_running(self): # X and Y wants to run A. # X starts but does not report back. Y does. # After some timeout, Y will build it instead self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.sch.add_task(task_id='A', worker='Y') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') for t in xrange(200): self.setTime(t) self.sch.ping(worker='Y') if t % 10 == 0: self.sch.prune() self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'A') def test_remove_dep(self): # X schedules A -> B, A is broken # Y schedules C -> B: this should remove A as a dep of B self.sch.add_task(task_id='A', worker='X', runnable=False) self.sch.add_task(task_id='B', deps=('A',), worker='X') # X can't build anything self.assertEqual(self.sch.get_work(worker='X')['task_id'], None) self.sch.add_task(task_id='B', deps=('C',), worker='Y') # should reset dependencies for A self.sch.add_task(task_id='C', worker='Y', status=DONE) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B') def test_timeout(self): # A bug that was earlier present when restarting the same flow self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.setTime(10000) self.sch.add_task(task_id='A', worker='Y') # Will timeout X but not schedule A for removal for i in xrange(2000): self.setTime(10000 + i) self.sch.ping(worker='Y') self.sch.add_task(task_id='A', status=DONE, worker='Y') # This used to raise an exception since A was removed def test_disallowed_state_changes(self): # Test that we can not schedule an already running task t = 'A' self.sch.add_task(task_id=t, worker='X') self.assertEqual(self.sch.get_work(worker='X')['task_id'], t) self.sch.add_task(task_id=t, worker='Y') self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None) def test_two_worker_info(self): # Make sure the scheduler returns info that some other worker is running task A self.sch.add_task(worker='X', task_id='A') self.sch.add_task(worker='Y', task_id='A') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') r = self.sch.get_work(worker='Y') self.assertEqual(r['task_id'], None) # Worker Y is pending on A to be done s = r['running_tasks'][0] self.assertEqual(s['task_id'], 'A') self.assert_(s['worker'].startswith('X on ')) def test_priorities(self): self.sch.add_task(WORKER, 'A', priority=10) self.sch.add_task(WORKER, 'B', priority=5) self.sch.add_task(WORKER, 'C', priority=15) self.sch.add_task(WORKER, 'D', priority=9) for expected_id in ['C', 'A', 'D', 'B']: self.assertEqual(self.sch.get_work(WORKER)['task_id'], expected_id) self.sch.add_task(WORKER, expected_id, status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_priorities_default_and_negative(self): self.sch.add_task(WORKER, 'A', priority=10) self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C', priority=15) self.sch.add_task(WORKER, 'D', priority=-20) self.sch.add_task(WORKER, 'E', priority=1) for expected_id in ['C', 'A', 'E', 'B', 'D']: self.assertEqual(self.sch.get_work(WORKER)['task_id'], expected_id) self.sch.add_task(WORKER, expected_id, status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_priorities_and_dependencies(self): self.sch.add_task(WORKER, 'A', deps=['Z'], priority=10) self.sch.add_task(WORKER, 'B', priority=5) self.sch.add_task(WORKER, 'C', deps=['Z'], priority=3) self.sch.add_task(WORKER, 'D', priority=2) self.sch.add_task(WORKER, 'Z', priority=1) for expected_id in ['B', 'D', 'Z', 'A', 'C']: self.assertEqual(self.sch.get_work(WORKER)['task_id'], expected_id) self.sch.add_task(WORKER, expected_id, status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None)
class CentralPlannerTest(unittest.TestCase): def setUp(self): super(CentralPlannerTest, self).setUp() conf = self.get_scheduler_config() self.sch = CentralPlannerScheduler(**conf) self.time = time.time def get_scheduler_config(self): return { 'retry_delay': 100, 'remove_delay': 1000, 'worker_disconnect_delay': 10, 'disable_persist': 10, 'disable_window': 10, 'disable_failures': 3, 'disable_hard_timeout': 60 * 60, } def tearDown(self): super(CentralPlannerTest, self).tearDown() if time.time != self.time: time.time = self.time def setTime(self, t): time.time = lambda: t def test_dep(self): self.sch.add_task(worker=WORKER, task_id='B', deps=('A', )) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') self.sch.add_task(worker=WORKER, task_id='A', status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'B') self.sch.add_task(worker=WORKER, task_id='B', status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) def test_failed_dep(self): self.sch.add_task(worker=WORKER, task_id='B', deps=('A', )) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.assertEqual( self.sch.get_work(worker=WORKER)['task_id'], None) # can still wait and retry: TODO: do we want this? self.sch.add_task(worker=WORKER, task_id='A', status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'B') self.sch.add_task(worker=WORKER, task_id='B', status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) def test_broken_dep(self): self.sch.add_task(worker=WORKER, task_id='B', deps=('A', )) self.sch.add_task(worker=WORKER, task_id='A', runnable=False) self.assertEqual( self.sch.get_work(worker=WORKER)['task_id'], None) # can still wait and retry: TODO: do we want this? self.sch.add_task(worker=WORKER, task_id='A', status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'B') self.sch.add_task(worker=WORKER, task_id='B', status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) def test_two_workers(self): # Worker X wants to build A -> B # Worker Y wants to build A -> C self.sch.add_task(worker='X', task_id='A') self.sch.add_task(worker='Y', task_id='A') self.sch.add_task(task_id='B', deps=('A', ), worker='X') self.sch.add_task(task_id='C', deps=('A', ), worker='Y') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None) # Worker Y is pending on A to be done self.sch.add_task(worker='X', task_id='A', status=DONE) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'C') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'B') def test_retry(self): # Try to build A but fails, will retry after 100s self.setTime(0) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) for t in range(100): self.setTime(t) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) self.sch.ping(worker=WORKER) if t % 10 == 0: self.sch.prune() self.setTime(101) self.sch.prune() self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') def test_resend_task(self): self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='B') for _ in range(10): self.assertEqual( 'A', self.sch.get_work(worker=WORKER, current_tasks=[])['task_id']) self.assertEqual( 'B', self.sch.get_work(worker=WORKER, current_tasks=['A'])['task_id']) def test_resend_multiple_tasks(self): self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='B') self.sch.add_task(worker=WORKER, task_id='C') # get A and B running self.assertEqual('A', self.sch.get_work(worker=WORKER)['task_id']) self.assertEqual('B', self.sch.get_work(worker=WORKER)['task_id']) for _ in range(10): self.assertEqual( 'A', self.sch.get_work(worker=WORKER, current_tasks=[])['task_id']) self.assertEqual( 'A', self.sch.get_work(worker=WORKER, current_tasks=['B'])['task_id']) self.assertEqual( 'B', self.sch.get_work(worker=WORKER, current_tasks=['A'])['task_id']) self.assertEqual( 'C', self.sch.get_work(worker=WORKER, current_tasks=['A', 'B'])['task_id']) def test_disconnect_running(self): # X and Y wants to run A. # X starts but does not report back. Y does. # After some timeout, Y will build it instead self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.sch.add_task(task_id='A', worker='Y') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') for t in range(200): self.setTime(t) self.sch.ping(worker='Y') if t % 10 == 0: self.sch.prune() self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'A') def test_do_not_overwrite_tracking_url_while_running(self): self.sch.add_task(task_id='A', worker='X', status='RUNNING', tracking_url='trackme') self.assertEqual( 'trackme', self.sch.task_list('RUNNING', '')['A']['tracking_url']) # not wiped out by another working scheduling as pending self.sch.add_task(task_id='A', worker='Y', status='PENDING') self.assertEqual( 'trackme', self.sch.task_list('RUNNING', '')['A']['tracking_url']) def test_do_update_tracking_url_while_running(self): self.sch.add_task(task_id='A', worker='X', status='RUNNING', tracking_url='trackme') self.assertEqual( 'trackme', self.sch.task_list('RUNNING', '')['A']['tracking_url']) self.sch.add_task(task_id='A', worker='X', status='RUNNING', tracking_url='stage_2') self.assertEqual( 'stage_2', self.sch.task_list('RUNNING', '')['A']['tracking_url']) def test_keep_tracking_url_on_done_and_fail(self): for status in ('DONE', 'FAILED'): self.sch.add_task(task_id='A', worker='X', status='RUNNING', tracking_url='trackme') self.assertEqual( 'trackme', self.sch.task_list('RUNNING', '')['A']['tracking_url']) self.sch.add_task(task_id='A', worker='X', status=status) self.assertEqual( 'trackme', self.sch.task_list(status, '')['A']['tracking_url']) def test_drop_tracking_url_when_rescheduled_while_not_running(self): for status in ('DONE', 'FAILED', 'PENDING'): self.sch.add_task(task_id='A', worker='X', status=status, tracking_url='trackme') self.assertEqual( 'trackme', self.sch.task_list(status, '')['A']['tracking_url']) self.sch.add_task(task_id='A', worker='Y', status='PENDING') self.assertIsNone( self.sch.task_list('PENDING', '')['A']['tracking_url']) def test_reset_tracking_url_on_new_run(self): self.sch.add_task(task_id='A', worker='X', status='PENDING', tracking_url='trackme') self.assertEqual( 'trackme', self.sch.task_list('PENDING', '')['A']['tracking_url']) self.sch.add_task(task_id='A', worker='Y', status='RUNNING') self.assertIsNone( self.sch.task_list('RUNNING', '')['A']['tracking_url']) def test_remove_dep(self): # X schedules A -> B, A is broken # Y schedules C -> B: this should remove A as a dep of B self.sch.add_task(task_id='A', worker='X', runnable=False) self.sch.add_task(task_id='B', deps=('A', ), worker='X') # X can't build anything self.assertEqual(self.sch.get_work(worker='X')['task_id'], None) self.sch.add_task(task_id='B', deps=('C', ), worker='Y') # should reset dependencies for A self.sch.add_task(task_id='C', worker='Y', status=DONE) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B') def test_timeout(self): # A bug that was earlier present when restarting the same flow self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.setTime(10000) self.sch.add_task( task_id='A', worker='Y') # Will timeout X but not schedule A for removal for i in range(2000): self.setTime(10000 + i) self.sch.ping(worker='Y') self.sch.add_task( task_id='A', status=DONE, worker='Y') # This used to raise an exception since A was removed def test_disallowed_state_changes(self): # Test that we can not schedule an already running task t = 'A' self.sch.add_task(task_id=t, worker='X') self.assertEqual(self.sch.get_work(worker='X')['task_id'], t) self.sch.add_task(task_id=t, worker='Y') self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None) def test_two_worker_info(self): # Make sure the scheduler returns info that some other worker is running task A self.sch.add_task(worker='X', task_id='A') self.sch.add_task(worker='Y', task_id='A') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') r = self.sch.get_work(worker='Y') self.assertEqual(r['task_id'], None) # Worker Y is pending on A to be done s = r['running_tasks'][0] self.assertEqual(s['task_id'], 'A') self.assertEqual(s['worker'], 'X') def test_assistant_get_work(self): self.sch.add_task(worker='X', task_id='A') self.sch.add_worker('Y', []) self.assertEqual( self.sch.get_work(worker='Y', assistant=True)['task_id'], 'A') # check that the scheduler recognizes tasks as running running_tasks = self.sch.task_list('RUNNING', '') self.assertEqual(len(running_tasks), 1) self.assertEqual(list(running_tasks.keys()), ['A']) self.assertEqual(running_tasks['A']['worker_running'], 'Y') def test_assistant_get_work_external_task(self): self.sch.add_task(worker='X', task_id='A', runnable=False) self.assertTrue( self.sch.get_work(worker='Y', assistant=True)['task_id'] is None) def test_task_fails_when_assistant_dies(self): self.setTime(0) self.sch.add_task(worker='X', task_id='A') self.sch.add_worker('Y', []) self.assertEqual( self.sch.get_work(worker='Y', assistant=True)['task_id'], 'A') self.assertEqual(list(self.sch.task_list('RUNNING', '').keys()), ['A']) # Y dies for 50 seconds, X stays alive self.setTime(50) self.sch.ping(worker='X') self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), ['A']) def test_prune_with_live_assistant(self): self.setTime(0) self.sch.add_task(worker='X', task_id='A') self.sch.get_work(worker='Y', assistant=True) self.sch.add_task(worker='Y', task_id='A', status=DONE, assistant=True) # worker X stops communicating, A should be marked for removal self.setTime(600) self.sch.ping(worker='Y') self.sch.prune() # A will now be pruned self.setTime(2000) self.sch.prune() self.assertFalse(list(self.sch.task_list('', ''))) def test_fail_job_from_dead_worker_with_live_assistant(self): self.setTime(0) self.sch.add_task(worker='X', task_id='A') self.assertEqual('A', self.sch.get_work(worker='X')['task_id']) self.sch.add_worker('Y', [('assistant', True)]) self.setTime(600) self.sch.ping(worker='Y') self.sch.prune() self.assertEqual(['A'], list(self.sch.task_list('FAILED', '').keys())) def test_assistant_request_runnable_task(self): self.setTime(0) self.sch.add_task(worker='X', task_id='A', runnable=True) self.setTime(600) self.sch.prune() self.assertEqual( 'A', self.sch.get_work(worker='Y', assistant=True)['task_id']) def test_assistant_request_external_task(self): self.sch.add_task(worker='X', task_id='A', runnable=False) self.assertIsNone( self.sch.get_work(worker='Y', assistant=True)['task_id']) def test_prune_done_tasks(self, expected=None): self.setTime(0) self.sch.add_task(worker=WORKER, task_id='A', status=DONE) self.sch.add_task(worker=WORKER, task_id='B', deps=['A'], status=DONE) self.sch.add_task(worker=WORKER, task_id='C', deps=['B']) self.setTime(600) self.sch.ping(worker='ASSISTANT') self.sch.prune() self.setTime(2000) self.sch.ping(worker='ASSISTANT') self.sch.prune() self.assertEqual(set(expected or ()), set(self.sch.task_list('', '').keys())) def test_keep_tasks_for_assistant(self): self.sch.get_work( worker='ASSISTANT', assistant=True) # tell the scheduler this is an assistant self.test_prune_done_tasks(['B', 'C']) def test_keep_scheduler_disabled_tasks_for_assistant(self): self.sch.get_work( worker='ASSISTANT', assistant=True) # tell the scheduler this is an assistant # create a scheduler disabled task and a worker disabled task for i in range(10): self.sch.add_task(worker=WORKER, task_id='D', status=FAILED) self.sch.add_task(worker=WORKER, task_id='E', status=DISABLED) # scheduler prunes the worker disabled task self.assertEqual(set(['D', 'E']), set(self.sch.task_list(DISABLED, ''))) self.test_prune_done_tasks(['B', 'C', 'D']) def test_keep_failed_tasks_for_assistant(self): self.sch.get_work( worker='ASSISTANT', assistant=True) # tell the scheduler this is an assistant self.sch.add_task(worker=WORKER, task_id='D', status=FAILED, deps='A') self.test_prune_done_tasks(['A', 'B', 'C', 'D']) def test_scheduler_resources_none_allow_one(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 1}) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_resources_none_disallow_two(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 2}) self.assertFalse(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_with_insufficient_resources(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 3}) self.sch.update_resources(R1=2) self.assertFalse(self.sch.get_work(worker='X')['task_id']) def test_scheduler_with_sufficient_resources(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 3}) self.sch.update_resources(R1=3) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_with_resources_used(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 1}) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='Y', task_id='B', resources={'R1': 1}) self.sch.update_resources(R1=1) self.assertFalse(self.sch.get_work(worker='Y')['task_id']) def test_scheduler_overprovisioned_on_other_resource(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 2}) self.sch.update_resources(R1=2) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='Y', task_id='B', resources={'R2': 2}) self.sch.update_resources(R1=1, R2=2) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B') def test_scheduler_with_priority_and_competing_resources(self): self.sch.add_task(worker='X', task_id='A') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=10) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.assertFalse(self.sch.get_work(worker='Y')['task_id']) self.sch.add_task(worker='Y', task_id='D', priority=0) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'D') def test_do_not_lock_resources_when_not_ready(self): """ Test to make sure that resources won't go unused waiting on workers """ self.sch.add_task(worker='X', task_id='A', priority=10) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker('X', [('workers', 1)]) self.assertEqual('C', self.sch.get_work(worker='Y')['task_id']) def test_lock_resources_when_one_of_multiple_workers_is_ready(self): self.sch.get_work( worker='X') # indicate to the scheduler that X is active self.sch.add_task(worker='X', task_id='A', priority=10) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker('X', [('workers', 2)]) self.sch.add_worker('Y', []) self.assertFalse(self.sch.get_work(worker='Y')['task_id']) def test_do_not_lock_resources_while_running_higher_priority(self): """ Test to make sure that resources won't go unused waiting on workers """ self.sch.add_task(worker='X', task_id='A', priority=10) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker('X', [('workers', 1)]) self.assertEqual('A', self.sch.get_work(worker='X')['task_id']) self.assertEqual('C', self.sch.get_work(worker='Y')['task_id']) def test_lock_resources_while_running_lower_priority(self): """ Make sure resources will be made available while working on lower priority tasks """ self.sch.add_task(worker='X', task_id='A', priority=4) self.assertEqual('A', self.sch.get_work(worker='X')['task_id']) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker('X', [('workers', 1)]) self.assertFalse(self.sch.get_work(worker='Y')['task_id']) def test_lock_resources_for_second_worker(self): self.sch.get_work( worker='Y') # indicate to the scheduler that Y is active self.sch.add_task(worker='X', task_id='A', resources={'R': 1}) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=10) self.sch.add_worker('X', {'workers': 2}) self.sch.add_worker('Y', {'workers': 1}) self.sch.update_resources(R=2) self.assertEqual('A', self.sch.get_work(worker='X')['task_id']) self.assertFalse(self.sch.get_work(worker='X')['task_id']) def test_can_work_on_lower_priority_while_waiting_for_resources(self): self.sch.add_task(worker='X', task_id='A', resources={'R': 1}, priority=0) self.assertEqual('A', self.sch.get_work(worker='X')['task_id']) self.sch.add_task(worker='Y', task_id='B', resources={'R': 1}, priority=10) self.sch.add_task(worker='Y', task_id='C', priority=0) self.sch.update_resources(R=1) self.assertEqual('C', self.sch.get_work(worker='Y')['task_id']) def test_priority_update_with_pruning(self): self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.setTime(50) # after worker disconnects self.sch.prune() self.sch.add_task(task_id='B', deps=['A'], worker='X') self.setTime(2000) # after remove for task A self.sch.prune() # Here task A that B depends on is missing self.sch.add_task(worker=WORKER, task_id='C', deps=['B'], priority=100) self.sch.add_task(worker=WORKER, task_id='B', deps=['A']) self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='D', priority=10) self.check_task_order('ABCD') def test_update_resources(self): self.sch.add_task(worker=WORKER, task_id='A', deps=['B']) self.sch.add_task(worker=WORKER, task_id='B', resources={'r': 2}) self.sch.update_resources(r=1) # B requires too many resources, we can't schedule self.check_task_order([]) self.sch.add_task(worker=WORKER, task_id='B', resources={'r': 1}) # now we have enough resources self.check_task_order(['B', 'A']) def test_hendle_multiple_resources(self): self.sch.add_task(worker=WORKER, task_id='A', resources={ 'r1': 1, 'r2': 1 }) self.sch.add_task(worker=WORKER, task_id='B', resources={ 'r1': 1, 'r2': 1 }) self.sch.add_task(worker=WORKER, task_id='C', resources={'r1': 1}) self.sch.update_resources(r1=2, r2=1) self.assertEqual('A', self.sch.get_work(worker=WORKER)['task_id']) self.check_task_order('C') def test_single_resource_lock(self): self.sch.add_task(worker='X', task_id='A', resources={'r': 1}) self.assertEqual('A', self.sch.get_work(worker='X')['task_id']) self.sch.add_task(worker=WORKER, task_id='B', resources={'r': 2}, priority=10) self.sch.add_task(worker=WORKER, task_id='C', resources={'r': 1}) self.sch.update_resources(r=2) # Should wait for 2 units of r to be available for B before scheduling C self.check_task_order([]) def test_no_lock_if_too_many_resources_required(self): self.sch.add_task(worker=WORKER, task_id='A', resources={'r': 2}, priority=10) self.sch.add_task(worker=WORKER, task_id='B', resources={'r': 1}) self.sch.update_resources(r=1) self.check_task_order('B') def test_multiple_resources_lock(self): self.sch.get_work( worker='X') # indicate to the scheduler that X is active self.sch.add_task(worker='X', task_id='A', resources={ 'r1': 1, 'r2': 1 }, priority=10) self.sch.add_task(worker=WORKER, task_id='B', resources={'r2': 1}) self.sch.add_task(worker=WORKER, task_id='C', resources={'r1': 1}) self.sch.update_resources(r1=1, r2=1) # should preserve both resources for worker 'X' self.check_task_order([]) def test_multiple_resources_no_lock(self): self.sch.add_task(worker=WORKER, task_id='A', resources={'r1': 1}, priority=10) self.sch.add_task(worker=WORKER, task_id='B', resources={ 'r1': 1, 'r2': 1 }, priority=10) self.sch.add_task(worker=WORKER, task_id='C', resources={'r2': 1}) self.sch.update_resources(r1=1, r2=2) self.assertEqual('A', self.sch.get_work(worker=WORKER)['task_id']) # C doesn't block B, so it can go first self.check_task_order('C') def test_allow_resource_use_while_scheduling(self): self.sch.update_resources(r1=1) self.sch.add_task(worker='SCHEDULING', task_id='A', resources={'r1': 1}, priority=10) self.sch.add_task(worker=WORKER, task_id='B', resources={'r1': 1}, priority=1) self.assertEqual('B', self.sch.get_work(worker=WORKER)['task_id']) def test_stop_locking_resource_for_uninterested_worker(self): self.setTime(0) self.sch.update_resources(r1=1) self.assertIsNone(self.sch.get_work(worker=WORKER)['task_id']) self.sch.add_task(worker=WORKER, task_id='A', resources={'r1': 1}, priority=10) self.sch.add_task(worker='LOW_PRIO', task_id='B', resources={'r1': 1}, priority=1) self.assertIsNone(self.sch.get_work(worker='LOW_PRIO')['task_id']) self.setTime(120) self.assertEqual('B', self.sch.get_work(worker='LOW_PRIO')['task_id']) def check_task_order(self, order): for expected_id in order: self.assertEqual( self.sch.get_work(worker=WORKER)['task_id'], expected_id) self.sch.add_task(worker=WORKER, task_id=expected_id, status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) def test_priorities(self): self.sch.add_task(worker=WORKER, task_id='A', priority=10) self.sch.add_task(worker=WORKER, task_id='B', priority=5) self.sch.add_task(worker=WORKER, task_id='C', priority=15) self.sch.add_task(worker=WORKER, task_id='D', priority=9) self.check_task_order(['C', 'A', 'D', 'B']) def test_priorities_default_and_negative(self): self.sch.add_task(worker=WORKER, task_id='A', priority=10) self.sch.add_task(worker=WORKER, task_id='B') self.sch.add_task(worker=WORKER, task_id='C', priority=15) self.sch.add_task(worker=WORKER, task_id='D', priority=-20) self.sch.add_task(worker=WORKER, task_id='E', priority=1) self.check_task_order(['C', 'A', 'E', 'B', 'D']) def test_priorities_and_dependencies(self): self.sch.add_task(worker=WORKER, task_id='A', deps=['Z'], priority=10) self.sch.add_task(worker=WORKER, task_id='B', priority=5) self.sch.add_task(worker=WORKER, task_id='C', deps=['Z'], priority=3) self.sch.add_task(worker=WORKER, task_id='D', priority=2) self.sch.add_task(worker=WORKER, task_id='Z', priority=1) self.check_task_order(['Z', 'A', 'B', 'C', 'D']) def test_priority_update_dependency_after_scheduling(self): self.sch.add_task(worker=WORKER, task_id='A', priority=1) self.sch.add_task(worker=WORKER, task_id='B', priority=5, deps=['A']) self.sch.add_task(worker=WORKER, task_id='C', priority=10, deps=['B']) self.sch.add_task(worker=WORKER, task_id='D', priority=6) self.check_task_order(['A', 'B', 'C', 'D']) def test_disable(self): self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) def test_disable_and_reenable(self): self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.re_enable_task('A') # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('FAILED', '')), 1) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') def test_disable_and_reenable_and_disable_again(self): self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.re_enable_task('A') # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('FAILED', '')), 1) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be still enabled self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('FAILED', '')), 1) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be disabled now self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) def test_disable_and_done(self): self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.add_task(worker=WORKER, task_id='A', status=DONE) # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('DONE', '')), 1) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') def test_automatic_re_enable(self): self.sch = CentralPlannerScheduler(disable_failures=2, disable_persist=100) self.setTime(0) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be disabled now self.assertEqual(DISABLED, self.sch.task_list('', '')['A']['status']) # re-enables after 100 seconds self.setTime(101) self.assertEqual(FAILED, self.sch.task_list('', '')['A']['status']) def test_automatic_re_enable_with_one_failure_allowed(self): self.sch = CentralPlannerScheduler(disable_failures=1, disable_persist=100) self.setTime(0) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be disabled now self.assertEqual(DISABLED, self.sch.task_list('', '')['A']['status']) # re-enables after 100 seconds self.setTime(101) self.assertEqual(FAILED, self.sch.task_list('', '')['A']['status']) def test_no_automatic_re_enable_after_manual_disable(self): self.sch = CentralPlannerScheduler(disable_persist=100) self.setTime(0) self.sch.add_task(worker=WORKER, task_id='A', status=DISABLED) # should be disabled now self.assertEqual(DISABLED, self.sch.task_list('', '')['A']['status']) # should not re-enable after 100 seconds self.setTime(101) self.assertEqual(DISABLED, self.sch.task_list('', '')['A']['status']) def test_no_automatic_re_enable_after_auto_then_manual_disable(self): self.sch = CentralPlannerScheduler(disable_failures=2, disable_persist=100) self.setTime(0) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be disabled now self.assertEqual(DISABLED, self.sch.task_list('', '')['A']['status']) # should remain disabled once set self.sch.add_task(worker=WORKER, task_id='A', status=DISABLED) self.assertEqual(DISABLED, self.sch.task_list('', '')['A']['status']) # should not re-enable after 100 seconds self.setTime(101) self.assertEqual(DISABLED, self.sch.task_list('', '')['A']['status']) def test_disable_by_worker(self): self.sch.add_task(worker=WORKER, task_id='A', status=DISABLED) self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.sch.add_task(worker=WORKER, task_id='A') # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') def test_task_list_beyond_limit(self): sch = CentralPlannerScheduler(max_shown_tasks=3) for c in 'ABCD': sch.add_task(worker=WORKER, task_id=c) self.assertEqual(set('ABCD'), set(sch.task_list('PENDING', '', False).keys())) self.assertEqual({'num_tasks': 4}, sch.task_list('PENDING', '')) def test_task_list_within_limit(self): sch = CentralPlannerScheduler(max_shown_tasks=4) for c in 'ABCD': sch.add_task(worker=WORKER, task_id=c) self.assertEqual(set('ABCD'), set(sch.task_list('PENDING', '').keys())) def test_task_lists_some_beyond_limit(self): sch = CentralPlannerScheduler(max_shown_tasks=3) for c in 'ABCD': sch.add_task(worker=WORKER, task_id=c, status=DONE) for c in 'EFG': sch.add_task(worker=WORKER, task_id=c) self.assertEqual(set('EFG'), set(sch.task_list('PENDING', '').keys())) self.assertEqual({'num_tasks': 4}, sch.task_list('DONE', '')) def test_task_list_filter_by_search(self): self.sch.add_task(worker=WORKER, task_id='test_match_task') self.sch.add_task(worker=WORKER, task_id='test_filter_task') matches = self.sch.task_list('PENDING', '', search='match') self.assertEqual(['test_match_task'], list(matches.keys())) def test_task_list_filter_by_multiple_search_terms(self): self.sch.add_task(worker=WORKER, task_id='abcd') self.sch.add_task(worker=WORKER, task_id='abd') self.sch.add_task(worker=WORKER, task_id='acd') self.sch.add_task(worker=WORKER, task_id='ad') self.sch.add_task(worker=WORKER, task_id='bc') matches = self.sch.task_list('PENDING', '', search='b c') self.assertEqual(set(['abcd', 'bc']), set(matches.keys())) def test_search_results_beyond_limit(self): sch = CentralPlannerScheduler(max_shown_tasks=3) sch.add_task(worker=WORKER, task_id='task_a') sch.add_task(worker=WORKER, task_id='task_b') sch.add_task(worker=WORKER, task_id='task_c') sch.add_task(worker=WORKER, task_id='task_d') self.assertEqual({'num_tasks': 4}, sch.task_list('PENDING', '', search='a')) self.assertEqual(['task_a'], list( sch.task_list('PENDING', '', search='_a').keys())) def test_priority_update_dependency_chain(self): self.sch.add_task(worker=WORKER, task_id='A', priority=10, deps=['B']) self.sch.add_task(worker=WORKER, task_id='B', priority=5, deps=['C']) self.sch.add_task(worker=WORKER, task_id='C', priority=1) self.sch.add_task(worker=WORKER, task_id='D', priority=6) self.check_task_order(['C', 'B', 'A', 'D']) def test_priority_no_decrease_with_multiple_updates(self): self.sch.add_task(worker=WORKER, task_id='A', priority=1) self.sch.add_task(worker=WORKER, task_id='B', priority=10, deps=['A']) self.sch.add_task(worker=WORKER, task_id='C', priority=5, deps=['A']) self.sch.add_task(worker=WORKER, task_id='D', priority=6) self.check_task_order(['A', 'B', 'D', 'C']) def test_unique_tasks(self): self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='B') self.sch.add_task(worker=WORKER, task_id='C') self.sch.add_task(worker=WORKER + "_2", task_id='B') response = self.sch.get_work(worker=WORKER) self.assertEqual(3, response['n_pending_tasks']) self.assertEqual(2, response['n_unique_pending']) def test_pending_downstream_disable(self): self.sch.add_task(worker=WORKER, task_id='A', status=DISABLED) self.sch.add_task(worker=WORKER, task_id='B', deps=('A', )) self.sch.add_task(worker=WORKER, task_id='C', deps=('B', )) response = self.sch.get_work(worker=WORKER) self.assertTrue(response['task_id'] is None) self.assertEqual(0, response['n_pending_tasks']) self.assertEqual(0, response['n_unique_pending']) def test_pending_downstream_failure(self): self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='B', deps=('A', )) self.sch.add_task(worker=WORKER, task_id='C', deps=('B', )) response = self.sch.get_work(worker=WORKER) self.assertTrue(response['task_id'] is None) self.assertEqual(2, response['n_pending_tasks']) self.assertEqual(2, response['n_unique_pending']) def test_task_list_no_deps(self): self.sch.add_task(worker=WORKER, task_id='B', deps=('A', )) self.sch.add_task(worker=WORKER, task_id='A') task_list = self.sch.task_list('PENDING', '') self.assertFalse('deps' in task_list['A']) def test_task_first_failure_time(self): self.sch.add_task(worker=WORKER, task_id='A') test_task = self.sch._state.get_task('A') self.assertIsNone(test_task.failures.first_failure_time) time_before_failure = time.time() test_task.add_failure() time_after_failure = time.time() self.assertLessEqual(time_before_failure, test_task.failures.first_failure_time) self.assertGreaterEqual(time_after_failure, test_task.failures.first_failure_time) def test_task_first_failure_time_remains_constant(self): self.sch.add_task(worker=WORKER, task_id='A') test_task = self.sch._state.get_task('A') self.assertIsNone(test_task.failures.first_failure_time) test_task.add_failure() first_failure_time = test_task.failures.first_failure_time test_task.add_failure() self.assertEqual(first_failure_time, test_task.failures.first_failure_time) def test_task_has_excessive_failures(self): self.sch.add_task(worker=WORKER, task_id='A') test_task = self.sch._state.get_task('A') self.assertIsNone(test_task.failures.first_failure_time) self.assertFalse(test_task.has_excessive_failures()) test_task.add_failure() self.assertFalse(test_task.has_excessive_failures()) fake_failure_time = (test_task.failures.first_failure_time - 2 * 60 * 60) test_task.failures.first_failure_time = fake_failure_time self.assertTrue(test_task.has_excessive_failures()) def test_quadratic_behavior(self): """ Test that get_work is not taking linear amount of time. This is of course impossible to test, however, doing reasonable assumptions about hardware. This time should finish in a timely manner. """ # For 10000 it takes almost 1 second on my laptop. Prior to these # changes it was being slow already at NUM_TASKS=300 NUM_TASKS = 10000 for i in range(NUM_TASKS): self.sch.add_task(worker=str(i), task_id=str(i), resources={}) for i in range(NUM_TASKS): self.assertEqual( self.sch.get_work(worker=str(i))['task_id'], str(i)) self.sch.add_task(worker=str(i), task_id=str(i), status=DONE) def test_get_work_speed(self): """ Test that get_work is fast for few workers and many DONEs. In #986, @daveFNbuck reported that he got a slowdown. """ # This took almost 4 minutes without optimization. # Now it takes 10 seconds on my machine. NUM_PENDING = 1000 NUM_DONE = 200000 assert NUM_DONE >= NUM_PENDING for i in range(NUM_PENDING): self.sch.add_task(worker=WORKER, task_id=str(i), resources={}) for i in range(NUM_PENDING, NUM_DONE): self.sch.add_task(worker=WORKER, task_id=str(i), status=DONE) for i in range(NUM_PENDING): res = int(self.sch.get_work(worker=WORKER)['task_id']) self.assertTrue(0 <= res < NUM_PENDING) self.sch.add_task(worker=WORKER, task_id=str(res), status=DONE)
class CentralPlannerTest(unittest.TestCase): def setUp(self): self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time def setTime(self, t): time.time = lambda: t def test_dep(self): self.sch.add_task(WORKER, 'B', deps=('A', )) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B') self.sch.add_task(WORKER, 'B', status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_failed_dep(self): self.sch.add_task(WORKER, 'B', deps=('A', )) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', status=FAILED) self.assertEqual( self.sch.get_work(WORKER)['task_id'], None) # can still wait and retry: TODO: do we want this? self.sch.add_task(WORKER, 'A', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B') self.sch.add_task(WORKER, 'B', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_broken_dep(self): self.sch.add_task(WORKER, 'B', deps=('A', )) self.sch.add_task(WORKER, 'A', runnable=False) self.assertEqual( self.sch.get_work(WORKER)['task_id'], None) # can still wait and retry: TODO: do we want this? self.sch.add_task(WORKER, 'A', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B') self.sch.add_task(WORKER, 'B', DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_two_workers(self): # Worker X wants to build A -> B # Worker Y wants to build A -> C self.sch.add_task(worker='X', task_id='A') self.sch.add_task(worker='Y', task_id='A') self.sch.add_task(task_id='B', deps=('A', ), worker='X') self.sch.add_task(task_id='C', deps=('A', ), worker='Y') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None) # Worker Y is pending on A to be done self.sch.add_task(worker='X', task_id='A', status=DONE) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'C') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'B') def test_retry(self): # Try to build A but fails, will retry after 100s self.setTime(0) self.sch.add_task(WORKER, 'A') self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') self.sch.add_task(WORKER, 'A', FAILED) for t in xrange(100): self.setTime(t) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) self.sch.ping(WORKER) if t % 10 == 0: self.sch.prune() self.setTime(101) self.sch.prune() self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A') def test_disconnect_running(self): # X and Y wants to run A. # X starts but does not report back. Y does. # After some timeout, Y will build it instead self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.sch.add_task(task_id='A', worker='Y') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') for t in xrange(200): self.setTime(t) self.sch.ping(worker='Y') if t % 10 == 0: self.sch.prune() self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'A') def test_remove_dep(self): # X schedules A -> B, A is broken # Y schedules C -> B: this should remove A as a dep of B self.sch.add_task(task_id='A', worker='X', runnable=False) self.sch.add_task(task_id='B', deps=('A', ), worker='X') # X can't build anything self.assertEqual(self.sch.get_work(worker='X')['task_id'], None) self.sch.add_task(task_id='B', deps=('C', ), worker='Y') # should reset dependencies for A self.sch.add_task(task_id='C', worker='Y', status=DONE) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B') def test_timeout(self): # A bug that was earlier present when restarting the same flow self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.setTime(10000) self.sch.add_task( task_id='A', worker='Y') # Will timeout X but not schedule A for removal for i in xrange(2000): self.setTime(10000 + i) self.sch.ping(worker='Y') self.sch.add_task( task_id='A', status=DONE, worker='Y') # This used to raise an exception since A was removed def test_disallowed_state_changes(self): # Test that we can not schedule an already running task t = 'A' self.sch.add_task(task_id=t, worker='X') self.assertEqual(self.sch.get_work(worker='X')['task_id'], t) self.sch.add_task(task_id=t, worker='Y') self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None) def test_two_worker_info(self): # Make sure the scheduler returns info that some other worker is running task A self.sch.add_task(worker='X', task_id='A') self.sch.add_task(worker='Y', task_id='A') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') r = self.sch.get_work(worker='Y') self.assertEqual(r['task_id'], None) # Worker Y is pending on A to be done s = r['running_tasks'][0] self.assertEqual(s['task_id'], 'A') self.assertEqual(s['worker'], 'X') def test_scheduler_resources_none_allow_one(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 1}) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_resources_none_disallow_two(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 2}) self.assertFalse(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_with_insufficient_resources(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 3}) self.sch.update_resources(R1=2) self.assertFalse(self.sch.get_work(worker='X')['task_id']) def test_scheduler_with_sufficient_resources(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 3}) self.sch.update_resources(R1=3) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_with_resources_used(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 1}) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='Y', task_id='B', resources={'R1': 1}) self.sch.update_resources(R1=1) self.assertFalse(self.sch.get_work(worker='Y')['task_id']) def test_scheduler_overprovisioned_on_other_resource(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 2}) self.sch.update_resources(R1=2) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='Y', task_id='B', resources={'R2': 2}) self.sch.update_resources(R1=1, R2=2) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B') def test_scheduler_with_priority_and_competing_resources(self): self.sch.add_task(worker='X', task_id='A') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=10) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.assertFalse(self.sch.get_work(worker='Y')['task_id']) self.sch.add_task(worker='Y', task_id='D', priority=0) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'D') def test_update_resources(self): self.sch.add_task(WORKER, task_id='A', deps=['B']) self.sch.add_task(WORKER, task_id='B', resources={'r': 2}) self.sch.update_resources(r=1) # B requires too many resources, we can't schedule self.check_task_order([]) self.sch.add_task(WORKER, task_id='B', resources={'r': 1}) # now we have enough resources self.check_task_order(['B', 'A']) def check_task_order(self, order): for expected_id in order: self.assertEqual(self.sch.get_work(WORKER)['task_id'], expected_id) self.sch.add_task(WORKER, expected_id, status=DONE) self.assertEqual(self.sch.get_work(WORKER)['task_id'], None) def test_priorities(self): self.sch.add_task(WORKER, 'A', priority=10) self.sch.add_task(WORKER, 'B', priority=5) self.sch.add_task(WORKER, 'C', priority=15) self.sch.add_task(WORKER, 'D', priority=9) self.check_task_order(['C', 'A', 'D', 'B']) def test_priorities_default_and_negative(self): self.sch.add_task(WORKER, 'A', priority=10) self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C', priority=15) self.sch.add_task(WORKER, 'D', priority=-20) self.sch.add_task(WORKER, 'E', priority=1) self.check_task_order(['C', 'A', 'E', 'B', 'D']) def test_priorities_and_dependencies(self): self.sch.add_task(WORKER, 'A', deps=['Z'], priority=10) self.sch.add_task(WORKER, 'B', priority=5) self.sch.add_task(WORKER, 'C', deps=['Z'], priority=3) self.sch.add_task(WORKER, 'D', priority=2) self.sch.add_task(WORKER, 'Z', priority=1) self.check_task_order(['Z', 'A', 'B', 'C', 'D']) def test_priority_update_dependency_after_scheduling(self): self.sch.add_task(WORKER, 'A', priority=1) self.sch.add_task(WORKER, 'B', priority=5, deps=['A']) self.sch.add_task(WORKER, 'C', priority=10, deps=['B']) self.sch.add_task(WORKER, 'D', priority=6) self.check_task_order(['A', 'B', 'C', 'D']) def test_priority_update_dependency_chain(self): self.sch.add_task(WORKER, 'A', priority=10, deps=['B']) self.sch.add_task(WORKER, 'B', priority=5, deps=['C']) self.sch.add_task(WORKER, 'C', priority=1) self.sch.add_task(WORKER, 'D', priority=6) self.check_task_order(['C', 'B', 'A', 'D']) def test_priority_no_decrease_with_multiple_updates(self): self.sch.add_task(WORKER, 'A', priority=1) self.sch.add_task(WORKER, 'B', priority=10, deps=['A']) self.sch.add_task(WORKER, 'C', priority=5, deps=['A']) self.sch.add_task(WORKER, 'D', priority=6) self.check_task_order(['A', 'B', 'D', 'C']) def test_unique_tasks(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C') self.sch.add_task(WORKER + "_2", 'B') response = self.sch.get_work(WORKER) self.assertEqual(3, response['n_pending_tasks']) self.assertEqual(2, response['n_unique_pending']) def test_prefer_more_dependents(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C', deps=['B']) self.sch.add_task(WORKER, 'D', deps=['B']) self.sch.add_task(WORKER, 'E', deps=['A']) self.check_task_order('BACDE') def test_prefer_readier_dependents(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C') self.sch.add_task(WORKER, 'D') self.sch.add_task(WORKER, 'F', deps=['A', 'B', 'C']) self.sch.add_task(WORKER, 'G', deps=['A', 'B', 'C']) self.sch.add_task(WORKER, 'E', deps=['D']) self.check_task_order('DABCFGE') def test_ignore_done_dependents(self): self.sch.add_task(WORKER, 'A') self.sch.add_task(WORKER, 'B') self.sch.add_task(WORKER, 'C') self.sch.add_task(WORKER, 'D', priority=1) self.sch.add_task(WORKER, 'E', deps=['C', 'D']) self.sch.add_task(WORKER, 'F', deps=['A', 'B']) self.check_task_order('DCABEF')
class CentralPlannerTest(unittest.TestCase): def setUp(self): super(CentralPlannerTest, self).setUp() conf = self.get_scheduler_config() self.sch = CentralPlannerScheduler(**conf) self.time = time.time def get_scheduler_config(self): return { 'retry_delay': 100, 'remove_delay': 1000, 'worker_disconnect_delay': 10, 'disable_persist': 10, 'disable_window': 10, 'disable_failures': 3, 'disable_hard_timeout': 60 * 60, } def tearDown(self): super(CentralPlannerTest, self).tearDown() if time.time != self.time: time.time = self.time def setTime(self, t): time.time = lambda: t def test_dep(self): self.sch.add_task(worker=WORKER, task_id='B', deps=('A',)) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') self.sch.add_task(worker=WORKER, task_id='A', status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'B') self.sch.add_task(worker=WORKER, task_id='B', status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) def test_failed_dep(self): self.sch.add_task(worker=WORKER, task_id='B', deps=('A',)) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) # can still wait and retry: TODO: do we want this? self.sch.add_task(worker=WORKER, task_id='A', status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'B') self.sch.add_task(worker=WORKER, task_id='B', status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) def test_broken_dep(self): self.sch.add_task(worker=WORKER, task_id='B', deps=('A',)) self.sch.add_task(worker=WORKER, task_id='A', runnable=False) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) # can still wait and retry: TODO: do we want this? self.sch.add_task(worker=WORKER, task_id='A', status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'B') self.sch.add_task(worker=WORKER, task_id='B', status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) def test_two_workers(self): # Worker X wants to build A -> B # Worker Y wants to build A -> C self.sch.add_task(worker='X', task_id='A') self.sch.add_task(worker='Y', task_id='A') self.sch.add_task(task_id='B', deps=('A',), worker='X') self.sch.add_task(task_id='C', deps=('A',), worker='Y') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None) # Worker Y is pending on A to be done self.sch.add_task(worker='X', task_id='A', status=DONE) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'C') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'B') def test_retry(self): # Try to build A but fails, will retry after 100s self.setTime(0) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) for t in range(100): self.setTime(t) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) self.sch.ping(worker=WORKER) if t % 10 == 0: self.sch.prune() self.setTime(101) self.sch.prune() self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') def test_resend_task(self): self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='B') for _ in range(10): self.assertEqual('A', self.sch.get_work(worker=WORKER, current_tasks=[])['task_id']) self.assertEqual('B', self.sch.get_work(worker=WORKER, current_tasks=['A'])['task_id']) def test_resend_multiple_tasks(self): self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='B') self.sch.add_task(worker=WORKER, task_id='C') # get A and B running self.assertEqual('A', self.sch.get_work(worker=WORKER)['task_id']) self.assertEqual('B', self.sch.get_work(worker=WORKER)['task_id']) for _ in range(10): self.assertEqual('A', self.sch.get_work(worker=WORKER, current_tasks=[])['task_id']) self.assertEqual('A', self.sch.get_work(worker=WORKER, current_tasks=['B'])['task_id']) self.assertEqual('B', self.sch.get_work(worker=WORKER, current_tasks=['A'])['task_id']) self.assertEqual('C', self.sch.get_work(worker=WORKER, current_tasks=['A', 'B'])['task_id']) def test_disconnect_running(self): # X and Y wants to run A. # X starts but does not report back. Y does. # After some timeout, Y will build it instead self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.sch.add_task(task_id='A', worker='Y') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') for t in range(200): self.setTime(t) self.sch.ping(worker='Y') if t % 10 == 0: self.sch.prune() self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'A') def test_do_not_overwrite_tracking_url_while_running(self): self.sch.add_task(task_id='A', worker='X', status='RUNNING', tracking_url='trackme') self.assertEqual('trackme', self.sch.task_list('RUNNING', '')['A']['tracking_url']) # not wiped out by another working scheduling as pending self.sch.add_task(task_id='A', worker='Y', status='PENDING') self.assertEqual('trackme', self.sch.task_list('RUNNING', '')['A']['tracking_url']) def test_do_update_tracking_url_while_running(self): self.sch.add_task(task_id='A', worker='X', status='RUNNING', tracking_url='trackme') self.assertEqual('trackme', self.sch.task_list('RUNNING', '')['A']['tracking_url']) self.sch.add_task(task_id='A', worker='X', status='RUNNING', tracking_url='stage_2') self.assertEqual('stage_2', self.sch.task_list('RUNNING', '')['A']['tracking_url']) def test_keep_tracking_url_on_done_and_fail(self): for status in ('DONE', 'FAILED'): self.sch.add_task(task_id='A', worker='X', status='RUNNING', tracking_url='trackme') self.assertEqual('trackme', self.sch.task_list('RUNNING', '')['A']['tracking_url']) self.sch.add_task(task_id='A', worker='X', status=status) self.assertEqual('trackme', self.sch.task_list(status, '')['A']['tracking_url']) def test_drop_tracking_url_when_rescheduled_while_not_running(self): for status in ('DONE', 'FAILED', 'PENDING'): self.sch.add_task(task_id='A', worker='X', status=status, tracking_url='trackme') self.assertEqual('trackme', self.sch.task_list(status, '')['A']['tracking_url']) self.sch.add_task(task_id='A', worker='Y', status='PENDING') self.assertIsNone(self.sch.task_list('PENDING', '')['A']['tracking_url']) def test_reset_tracking_url_on_new_run(self): self.sch.add_task(task_id='A', worker='X', status='PENDING', tracking_url='trackme') self.assertEqual('trackme', self.sch.task_list('PENDING', '')['A']['tracking_url']) self.sch.add_task(task_id='A', worker='Y', status='RUNNING') self.assertIsNone(self.sch.task_list('RUNNING', '')['A']['tracking_url']) def test_remove_dep(self): # X schedules A -> B, A is broken # Y schedules C -> B: this should remove A as a dep of B self.sch.add_task(task_id='A', worker='X', runnable=False) self.sch.add_task(task_id='B', deps=('A',), worker='X') # X can't build anything self.assertEqual(self.sch.get_work(worker='X')['task_id'], None) self.sch.add_task(task_id='B', deps=('C',), worker='Y') # should reset dependencies for A self.sch.add_task(task_id='C', worker='Y', status=DONE) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B') def test_start_time(self): self.setTime(100) self.sch.add_task(worker=WORKER, task_id='A') self.setTime(200) self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='A', status=DONE) self.assertEqual(100, self.sch.task_list(DONE, '')['A']['start_time']) def test_last_updated_does_not_change_with_same_status_update(self): for t, status in ((100, PENDING), (300, DONE), (500, DISABLED)): self.setTime(t) self.sch.add_task(worker=WORKER, task_id='A', status=status) self.assertEqual(t, self.sch.task_list(status, '')['A']['last_updated']) self.setTime(t + 100) self.sch.add_task(worker=WORKER, task_id='A', status=status) self.assertEqual(t, self.sch.task_list(status, '')['A']['last_updated']) def test_last_updated_shows_running_start(self): self.setTime(100) self.sch.add_task(worker=WORKER, task_id='A', status=PENDING) self.assertEqual(100, self.sch.task_list(PENDING, '')['A']['last_updated']) self.setTime(200) self.assertEqual('A', self.sch.get_work(worker=WORKER)['task_id']) self.assertEqual(200, self.sch.task_list('RUNNING', '')['A']['last_updated']) self.setTime(300) self.sch.add_task(worker=WORKER, task_id='A', status=PENDING) self.assertEqual(200, self.sch.task_list('RUNNING', '')['A']['last_updated']) def test_last_updated_with_failure_and_recovery(self): self.setTime(100) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual('A', self.sch.get_work(worker=WORKER)['task_id']) self.setTime(200) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.assertEqual(200, self.sch.task_list(FAILED, '')['A']['last_updated']) self.setTime(1000) self.sch.prune() self.assertEqual(1000, self.sch.task_list(PENDING, '')['A']['last_updated']) def test_timeout(self): # A bug that was earlier present when restarting the same flow self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.setTime(10000) self.sch.add_task(task_id='A', worker='Y') # Will timeout X but not schedule A for removal for i in range(2000): self.setTime(10000 + i) self.sch.ping(worker='Y') self.sch.add_task(task_id='A', status=DONE, worker='Y') # This used to raise an exception since A was removed def test_disallowed_state_changes(self): # Test that we can not schedule an already running task t = 'A' self.sch.add_task(task_id=t, worker='X') self.assertEqual(self.sch.get_work(worker='X')['task_id'], t) self.sch.add_task(task_id=t, worker='Y') self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None) def test_two_worker_info(self): # Make sure the scheduler returns info that some other worker is running task A self.sch.add_task(worker='X', task_id='A') self.sch.add_task(worker='Y', task_id='A') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') r = self.sch.get_work(worker='Y') self.assertEqual(r['task_id'], None) # Worker Y is pending on A to be done s = r['running_tasks'][0] self.assertEqual(s['task_id'], 'A') self.assertEqual(s['worker'], 'X') def test_assistant_get_work(self): self.sch.add_task(worker='X', task_id='A') self.sch.add_worker('Y', []) self.assertEqual(self.sch.get_work(worker='Y', assistant=True)['task_id'], 'A') # check that the scheduler recognizes tasks as running running_tasks = self.sch.task_list('RUNNING', '') self.assertEqual(len(running_tasks), 1) self.assertEqual(list(running_tasks.keys()), ['A']) self.assertEqual(running_tasks['A']['worker_running'], 'Y') def test_assistant_get_work_external_task(self): self.sch.add_task(worker='X', task_id='A', runnable=False) self.assertTrue(self.sch.get_work(worker='Y', assistant=True)['task_id'] is None) def test_task_fails_when_assistant_dies(self): self.setTime(0) self.sch.add_task(worker='X', task_id='A') self.sch.add_worker('Y', []) self.assertEqual(self.sch.get_work(worker='Y', assistant=True)['task_id'], 'A') self.assertEqual(list(self.sch.task_list('RUNNING', '').keys()), ['A']) # Y dies for 50 seconds, X stays alive self.setTime(50) self.sch.ping(worker='X') self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), ['A']) def test_prune_with_live_assistant(self): self.setTime(0) self.sch.add_task(worker='X', task_id='A') self.sch.get_work(worker='Y', assistant=True) self.sch.add_task(worker='Y', task_id='A', status=DONE, assistant=True) # worker X stops communicating, A should be marked for removal self.setTime(600) self.sch.ping(worker='Y') self.sch.prune() # A will now be pruned self.setTime(2000) self.sch.prune() self.assertFalse(list(self.sch.task_list('', ''))) def test_re_enable_failed_task_assistant(self): self.setTime(0) self.sch.add_worker('X', [('assistant', True)]) self.sch.add_task(worker='X', task_id='A', status=FAILED, assistant=True) # should be failed now self.assertEqual(FAILED, self.sch.task_list('', '')['A']['status']) # resets to PENDING after 100 seconds self.setTime(101) self.sch.ping(worker='X') # worker still alive self.assertEqual('PENDING', self.sch.task_list('', '')['A']['status']) def test_fail_job_from_dead_worker_with_live_assistant(self): self.setTime(0) self.sch.add_task(worker='X', task_id='A') self.assertEqual('A', self.sch.get_work(worker='X')['task_id']) self.sch.add_worker('Y', [('assistant', True)]) self.setTime(600) self.sch.ping(worker='Y') self.sch.prune() self.assertEqual(['A'], list(self.sch.task_list('FAILED', '').keys())) def test_assistant_request_runnable_task(self): self.setTime(0) self.sch.add_task(worker='X', task_id='A', runnable=True) self.setTime(600) self.sch.prune() self.assertEqual('A', self.sch.get_work(worker='Y', assistant=True)['task_id']) def test_assistant_request_external_task(self): self.sch.add_task(worker='X', task_id='A', runnable=False) self.assertIsNone(self.sch.get_work(worker='Y', assistant=True)['task_id']) def test_prune_done_tasks(self, expected=None): self.setTime(0) self.sch.add_task(worker=WORKER, task_id='A', status=DONE) self.sch.add_task(worker=WORKER, task_id='B', deps=['A'], status=DONE) self.sch.add_task(worker=WORKER, task_id='C', deps=['B']) self.setTime(600) self.sch.ping(worker='ASSISTANT') self.sch.prune() self.setTime(2000) self.sch.ping(worker='ASSISTANT') self.sch.prune() self.assertEqual(set(expected or ()), set(self.sch.task_list('', '').keys())) def test_keep_tasks_for_assistant(self): self.sch.get_work(worker='ASSISTANT', assistant=True) # tell the scheduler this is an assistant self.test_prune_done_tasks(['B', 'C']) def test_keep_scheduler_disabled_tasks_for_assistant(self): self.sch.get_work(worker='ASSISTANT', assistant=True) # tell the scheduler this is an assistant # create a scheduler disabled task and a worker disabled task for i in range(10): self.sch.add_task(worker=WORKER, task_id='D', status=FAILED) self.sch.add_task(worker=WORKER, task_id='E', status=DISABLED) # scheduler prunes the worker disabled task self.assertEqual(set(['D', 'E']), set(self.sch.task_list(DISABLED, ''))) self.test_prune_done_tasks(['B', 'C', 'D']) def test_keep_failed_tasks_for_assistant(self): self.sch.get_work(worker='ASSISTANT', assistant=True) # tell the scheduler this is an assistant self.sch.add_task(worker=WORKER, task_id='D', status=FAILED, deps='A') self.test_prune_done_tasks(['A', 'B', 'C', 'D']) def test_scheduler_resources_none_allow_one(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 1}) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_resources_none_disallow_two(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 2}) self.assertFalse(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_with_insufficient_resources(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 3}) self.sch.update_resources(R1=2) self.assertFalse(self.sch.get_work(worker='X')['task_id']) def test_scheduler_with_sufficient_resources(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 3}) self.sch.update_resources(R1=3) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') def test_scheduler_with_resources_used(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 1}) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='Y', task_id='B', resources={'R1': 1}) self.sch.update_resources(R1=1) self.assertFalse(self.sch.get_work(worker='Y')['task_id']) def test_scheduler_overprovisioned_on_other_resource(self): self.sch.add_task(worker='X', task_id='A', resources={'R1': 2}) self.sch.update_resources(R1=2) self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='Y', task_id='B', resources={'R2': 2}) self.sch.update_resources(R1=1, R2=2) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B') def test_scheduler_with_priority_and_competing_resources(self): self.sch.add_task(worker='X', task_id='A') self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A') self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=10) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.assertFalse(self.sch.get_work(worker='Y')['task_id']) self.sch.add_task(worker='Y', task_id='D', priority=0) self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'D') def test_do_not_lock_resources_when_not_ready(self): """ Test to make sure that resources won't go unused waiting on workers """ self.sch.add_task(worker='X', task_id='A', priority=10) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker('X', [('workers', 1)]) self.assertEqual('C', self.sch.get_work(worker='Y')['task_id']) def test_lock_resources_when_one_of_multiple_workers_is_ready(self): self.sch.get_work(worker='X') # indicate to the scheduler that X is active self.sch.add_task(worker='X', task_id='A', priority=10) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker('X', [('workers', 2)]) self.sch.add_worker('Y', []) self.assertFalse(self.sch.get_work(worker='Y')['task_id']) def test_do_not_lock_resources_while_running_higher_priority(self): """ Test to make sure that resources won't go unused waiting on workers """ self.sch.add_task(worker='X', task_id='A', priority=10) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker('X', [('workers', 1)]) self.assertEqual('A', self.sch.get_work(worker='X')['task_id']) self.assertEqual('C', self.sch.get_work(worker='Y')['task_id']) def test_lock_resources_while_running_lower_priority(self): """ Make sure resources will be made available while working on lower priority tasks """ self.sch.add_task(worker='X', task_id='A', priority=4) self.assertEqual('A', self.sch.get_work(worker='X')['task_id']) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1) self.sch.update_resources(R=1) self.sch.add_worker('X', [('workers', 1)]) self.assertFalse(self.sch.get_work(worker='Y')['task_id']) def test_lock_resources_for_second_worker(self): self.sch.get_work(worker='Y') # indicate to the scheduler that Y is active self.sch.add_task(worker='X', task_id='A', resources={'R': 1}) self.sch.add_task(worker='X', task_id='B', resources={'R': 1}) self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=10) self.sch.add_worker('X', {'workers': 2}) self.sch.add_worker('Y', {'workers': 1}) self.sch.update_resources(R=2) self.assertEqual('A', self.sch.get_work(worker='X')['task_id']) self.assertFalse(self.sch.get_work(worker='X')['task_id']) def test_can_work_on_lower_priority_while_waiting_for_resources(self): self.sch.add_task(worker='X', task_id='A', resources={'R': 1}, priority=0) self.assertEqual('A', self.sch.get_work(worker='X')['task_id']) self.sch.add_task(worker='Y', task_id='B', resources={'R': 1}, priority=10) self.sch.add_task(worker='Y', task_id='C', priority=0) self.sch.update_resources(R=1) self.assertEqual('C', self.sch.get_work(worker='Y')['task_id']) def test_priority_update_with_pruning(self): self.setTime(0) self.sch.add_task(task_id='A', worker='X') self.setTime(50) # after worker disconnects self.sch.prune() self.sch.add_task(task_id='B', deps=['A'], worker='X') self.setTime(2000) # after remove for task A self.sch.prune() # Here task A that B depends on is missing self.sch.add_task(worker=WORKER, task_id='C', deps=['B'], priority=100) self.sch.add_task(worker=WORKER, task_id='B', deps=['A']) self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='D', priority=10) self.check_task_order('ABCD') def test_update_resources(self): self.sch.add_task(worker=WORKER, task_id='A', deps=['B']) self.sch.add_task(worker=WORKER, task_id='B', resources={'r': 2}) self.sch.update_resources(r=1) # B requires too many resources, we can't schedule self.check_task_order([]) self.sch.add_task(worker=WORKER, task_id='B', resources={'r': 1}) # now we have enough resources self.check_task_order(['B', 'A']) def test_hendle_multiple_resources(self): self.sch.add_task(worker=WORKER, task_id='A', resources={'r1': 1, 'r2': 1}) self.sch.add_task(worker=WORKER, task_id='B', resources={'r1': 1, 'r2': 1}) self.sch.add_task(worker=WORKER, task_id='C', resources={'r1': 1}) self.sch.update_resources(r1=2, r2=1) self.assertEqual('A', self.sch.get_work(worker=WORKER)['task_id']) self.check_task_order('C') def test_single_resource_lock(self): self.sch.add_task(worker='X', task_id='A', resources={'r': 1}) self.assertEqual('A', self.sch.get_work(worker='X')['task_id']) self.sch.add_task(worker=WORKER, task_id='B', resources={'r': 2}, priority=10) self.sch.add_task(worker=WORKER, task_id='C', resources={'r': 1}) self.sch.update_resources(r=2) # Should wait for 2 units of r to be available for B before scheduling C self.check_task_order([]) def test_no_lock_if_too_many_resources_required(self): self.sch.add_task(worker=WORKER, task_id='A', resources={'r': 2}, priority=10) self.sch.add_task(worker=WORKER, task_id='B', resources={'r': 1}) self.sch.update_resources(r=1) self.check_task_order('B') def test_multiple_resources_lock(self): self.sch.get_work(worker='X') # indicate to the scheduler that X is active self.sch.add_task(worker='X', task_id='A', resources={'r1': 1, 'r2': 1}, priority=10) self.sch.add_task(worker=WORKER, task_id='B', resources={'r2': 1}) self.sch.add_task(worker=WORKER, task_id='C', resources={'r1': 1}) self.sch.update_resources(r1=1, r2=1) # should preserve both resources for worker 'X' self.check_task_order([]) def test_multiple_resources_no_lock(self): self.sch.add_task(worker=WORKER, task_id='A', resources={'r1': 1}, priority=10) self.sch.add_task(worker=WORKER, task_id='B', resources={'r1': 1, 'r2': 1}, priority=10) self.sch.add_task(worker=WORKER, task_id='C', resources={'r2': 1}) self.sch.update_resources(r1=1, r2=2) self.assertEqual('A', self.sch.get_work(worker=WORKER)['task_id']) # C doesn't block B, so it can go first self.check_task_order('C') def test_allow_resource_use_while_scheduling(self): self.sch.update_resources(r1=1) self.sch.add_task(worker='SCHEDULING', task_id='A', resources={'r1': 1}, priority=10) self.sch.add_task(worker=WORKER, task_id='B', resources={'r1': 1}, priority=1) self.assertEqual('B', self.sch.get_work(worker=WORKER)['task_id']) def test_stop_locking_resource_for_uninterested_worker(self): self.setTime(0) self.sch.update_resources(r1=1) self.assertIsNone(self.sch.get_work(worker=WORKER)['task_id']) self.sch.add_task(worker=WORKER, task_id='A', resources={'r1': 1}, priority=10) self.sch.add_task(worker='LOW_PRIO', task_id='B', resources={'r1': 1}, priority=1) self.assertIsNone(self.sch.get_work(worker='LOW_PRIO')['task_id']) self.setTime(120) self.assertEqual('B', self.sch.get_work(worker='LOW_PRIO')['task_id']) def check_task_order(self, order): for expected_id in order: self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], expected_id) self.sch.add_task(worker=WORKER, task_id=expected_id, status=DONE) self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) def test_priorities(self): self.sch.add_task(worker=WORKER, task_id='A', priority=10) self.sch.add_task(worker=WORKER, task_id='B', priority=5) self.sch.add_task(worker=WORKER, task_id='C', priority=15) self.sch.add_task(worker=WORKER, task_id='D', priority=9) self.check_task_order(['C', 'A', 'D', 'B']) def test_priorities_default_and_negative(self): self.sch.add_task(worker=WORKER, task_id='A', priority=10) self.sch.add_task(worker=WORKER, task_id='B') self.sch.add_task(worker=WORKER, task_id='C', priority=15) self.sch.add_task(worker=WORKER, task_id='D', priority=-20) self.sch.add_task(worker=WORKER, task_id='E', priority=1) self.check_task_order(['C', 'A', 'E', 'B', 'D']) def test_priorities_and_dependencies(self): self.sch.add_task(worker=WORKER, task_id='A', deps=['Z'], priority=10) self.sch.add_task(worker=WORKER, task_id='B', priority=5) self.sch.add_task(worker=WORKER, task_id='C', deps=['Z'], priority=3) self.sch.add_task(worker=WORKER, task_id='D', priority=2) self.sch.add_task(worker=WORKER, task_id='Z', priority=1) self.check_task_order(['Z', 'A', 'B', 'C', 'D']) def test_priority_update_dependency_after_scheduling(self): self.sch.add_task(worker=WORKER, task_id='A', priority=1) self.sch.add_task(worker=WORKER, task_id='B', priority=5, deps=['A']) self.sch.add_task(worker=WORKER, task_id='C', priority=10, deps=['B']) self.sch.add_task(worker=WORKER, task_id='D', priority=6) self.check_task_order(['A', 'B', 'C', 'D']) def test_disable(self): self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) def test_disable_and_reenable(self): self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.re_enable_task('A') # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('FAILED', '')), 1) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') def test_disable_and_reenable_and_disable_again(self): self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.re_enable_task('A') # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('FAILED', '')), 1) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be still enabled self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('FAILED', '')), 1) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be disabled now self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None) def test_disable_and_done(self): self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be disabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.assertEqual(len(self.sch.task_list('FAILED', '')), 0) self.sch.add_task(worker=WORKER, task_id='A', status=DONE) # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.assertEqual(len(self.sch.task_list('DONE', '')), 1) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') def test_automatic_re_enable(self): self.sch = CentralPlannerScheduler(disable_failures=2, disable_persist=100) self.setTime(0) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be disabled now self.assertEqual(DISABLED, self.sch.task_list('', '')['A']['status']) # re-enables after 100 seconds self.setTime(101) self.assertEqual(FAILED, self.sch.task_list('', '')['A']['status']) def test_automatic_re_enable_with_one_failure_allowed(self): self.sch = CentralPlannerScheduler(disable_failures=1, disable_persist=100) self.setTime(0) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be disabled now self.assertEqual(DISABLED, self.sch.task_list('', '')['A']['status']) # re-enables after 100 seconds self.setTime(101) self.assertEqual(FAILED, self.sch.task_list('', '')['A']['status']) def test_no_automatic_re_enable_after_manual_disable(self): self.sch = CentralPlannerScheduler(disable_persist=100) self.setTime(0) self.sch.add_task(worker=WORKER, task_id='A', status=DISABLED) # should be disabled now self.assertEqual(DISABLED, self.sch.task_list('', '')['A']['status']) # should not re-enable after 100 seconds self.setTime(101) self.assertEqual(DISABLED, self.sch.task_list('', '')['A']['status']) def test_no_automatic_re_enable_after_auto_then_manual_disable(self): self.sch = CentralPlannerScheduler(disable_failures=2, disable_persist=100) self.setTime(0) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # should be disabled now self.assertEqual(DISABLED, self.sch.task_list('', '')['A']['status']) # should remain disabled once set self.sch.add_task(worker=WORKER, task_id='A', status=DISABLED) self.assertEqual(DISABLED, self.sch.task_list('', '')['A']['status']) # should not re-enable after 100 seconds self.setTime(101) self.assertEqual(DISABLED, self.sch.task_list('', '')['A']['status']) def test_disable_by_worker(self): self.sch.add_task(worker=WORKER, task_id='A', status=DISABLED) self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1) self.sch.add_task(worker=WORKER, task_id='A') # should be enabled at this point self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0) self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A') def test_disable_worker(self): self.sch.add_task(worker=WORKER, task_id='A') self.sch.disable_worker(worker=WORKER) work = self.sch.get_work(worker=WORKER) self.assertEqual(0, work['n_unique_pending']) self.assertEqual(0, work['n_pending_tasks']) self.assertIsNone(work['task_id']) def test_disable_worker_leaves_jobs_running(self): self.sch.add_task(worker=WORKER, task_id='A') self.sch.get_work(worker=WORKER) self.sch.disable_worker(worker=WORKER) self.assertEqual(['A'], list(self.sch.task_list('RUNNING', '').keys())) self.assertEqual(['A'], list(self.sch.worker_list()[0]['running'].keys())) def test_disable_worker_cannot_pick_up_failed_jobs(self): self.setTime(0) self.sch.add_task(worker=WORKER, task_id='A') self.sch.get_work(worker=WORKER) self.sch.disable_worker(worker=WORKER) self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) # increase time and prune to make the job pending again self.setTime(1000) self.sch.ping(worker=WORKER) self.sch.prune() # we won't try the job again self.assertIsNone(self.sch.get_work(worker=WORKER)['task_id']) # not even if other stuff is pending, changing the pending tasks code path self.sch.add_task(worker='other_worker', task_id='B') self.assertIsNone(self.sch.get_work(worker=WORKER)['task_id']) def test_disable_worker_cannot_continue_scheduling(self): self.sch.disable_worker(worker=WORKER) self.sch.add_task(worker=WORKER, task_id='A') self.assertIsNone(self.sch.get_work(worker=WORKER)['task_id']) def test_disable_worker_can_finish_task(self, new_status=DONE, new_deps=[]): self.sch.add_task(worker=WORKER, task_id='A') self.assertEqual('A', self.sch.get_work(worker=WORKER)['task_id']) self.sch.disable_worker(worker=WORKER) self.assertEqual(['A'], list(self.sch.task_list('RUNNING', '').keys())) for dep in new_deps: self.sch.add_task(worker=WORKER, task_id=dep, status='PENDING') self.sch.add_task(worker=WORKER, task_id='A', status=new_status, new_deps=new_deps) self.assertFalse(self.sch.task_list('RUNNING', '').keys()) self.assertEqual(['A'], list(self.sch.task_list(new_status, '').keys())) self.assertIsNone(self.sch.get_work(worker=WORKER)['task_id']) for task in self.sch.task_list('', '').values(): self.assertFalse(task['workers']) def test_disable_worker_can_fail_task(self): self.test_disable_worker_can_finish_task(new_status=FAILED) def test_disable_worker_stays_disabled_on_new_deps(self): self.test_disable_worker_can_finish_task(new_status='PENDING', new_deps=['B', 'C']) def test_prune_worker(self): self.setTime(1) self.sch.add_worker(worker=WORKER, info={}) self.setTime(10000) self.sch.prune() self.setTime(20000) self.sch.prune() self.assertFalse(self.sch.worker_list()) def test_task_list_beyond_limit(self): sch = CentralPlannerScheduler(max_shown_tasks=3) for c in 'ABCD': sch.add_task(worker=WORKER, task_id=c) self.assertEqual(set('ABCD'), set(sch.task_list('PENDING', '', False).keys())) self.assertEqual({'num_tasks': 4}, sch.task_list('PENDING', '')) def test_task_list_within_limit(self): sch = CentralPlannerScheduler(max_shown_tasks=4) for c in 'ABCD': sch.add_task(worker=WORKER, task_id=c) self.assertEqual(set('ABCD'), set(sch.task_list('PENDING', '').keys())) def test_task_lists_some_beyond_limit(self): sch = CentralPlannerScheduler(max_shown_tasks=3) for c in 'ABCD': sch.add_task(worker=WORKER, task_id=c, status=DONE) for c in 'EFG': sch.add_task(worker=WORKER, task_id=c) self.assertEqual(set('EFG'), set(sch.task_list('PENDING', '').keys())) self.assertEqual({'num_tasks': 4}, sch.task_list('DONE', '')) def add_task(self, family, **params): task_id = str(hash((family, str(params)))) # use an unhelpful task id self.sch.add_task(worker=WORKER, family=family, params=params, task_id=task_id) return task_id def search_pending(self, term, expected_keys): actual_keys = set(self.sch.task_list('PENDING', '', search=term).keys()) self.assertEqual(expected_keys, actual_keys) def test_task_list_filter_by_search_family_name(self): task1 = self.add_task('MySpecialTask') task2 = self.add_task('OtherSpecialTask') self.search_pending('Special', {task1, task2}) self.search_pending('Task', {task1, task2}) self.search_pending('My', {task1}) self.search_pending('Other', {task2}) def test_task_list_filter_by_search_long_family_name(self): task = self.add_task('TaskClassWithAVeryLongNameAndDistinctEndingUUDDLRLRAB') self.search_pending('UUDDLRLRAB', {task}) def test_task_list_filter_by_param_name(self): task1 = self.add_task('ClassA', day='2016-02-01') task2 = self.add_task('ClassB', hour='2016-02-01T12') self.search_pending('day', {task1}) self.search_pending('hour', {task2}) def test_task_list_filter_by_long_param_name(self): task = self.add_task('ClassA', a_very_long_param_name_ending_with_uuddlrlrab='2016-02-01') self.search_pending('uuddlrlrab', {task}) def test_task_list_filter_by_param_value(self): task1 = self.add_task('ClassA', day='2016-02-01') task2 = self.add_task('ClassB', hour='2016-02-01T12') self.search_pending('2016-02-01', {task1, task2}) self.search_pending('T12', {task2}) def test_task_list_filter_by_long_param_value(self): task = self.add_task('ClassA', param='a_very_long_param_value_ending_with_uuddlrlrab') self.search_pending('uuddlrlrab', {task}) def test_task_list_filter_by_param_name_value_pair(self): task = self.add_task('ClassA', param='value') self.search_pending('param=value', {task}) def test_task_list_does_not_filter_by_task_id(self): task = self.add_task('Class') self.search_pending(task, set()) def test_task_list_filter_by_multiple_search_terms(self): expected = self.add_task('ClassA', day='2016-02-01', num='5') self.add_task('ClassA', day='2016-03-01', num='5') self.add_task('ClassB', day='2016-02-01', num='5') self.add_task('ClassA', day='2016-02-01', val='5') self.search_pending('ClassA 2016-02-01 num', {expected}) def test_search_results_beyond_limit(self): sch = CentralPlannerScheduler(max_shown_tasks=3) for i in range(4): sch.add_task(worker=WORKER, family='Test', params={'p': str(i)}, task_id='Test_%i' % i) self.assertEqual({'num_tasks': 4}, sch.task_list('PENDING', '', search='Test')) self.assertEqual(['Test_0'], list(sch.task_list('PENDING', '', search='0').keys())) def test_priority_update_dependency_chain(self): self.sch.add_task(worker=WORKER, task_id='A', priority=10, deps=['B']) self.sch.add_task(worker=WORKER, task_id='B', priority=5, deps=['C']) self.sch.add_task(worker=WORKER, task_id='C', priority=1) self.sch.add_task(worker=WORKER, task_id='D', priority=6) self.check_task_order(['C', 'B', 'A', 'D']) def test_priority_no_decrease_with_multiple_updates(self): self.sch.add_task(worker=WORKER, task_id='A', priority=1) self.sch.add_task(worker=WORKER, task_id='B', priority=10, deps=['A']) self.sch.add_task(worker=WORKER, task_id='C', priority=5, deps=['A']) self.sch.add_task(worker=WORKER, task_id='D', priority=6) self.check_task_order(['A', 'B', 'D', 'C']) def test_unique_tasks(self): self.sch.add_task(worker=WORKER, task_id='A') self.sch.add_task(worker=WORKER, task_id='B') self.sch.add_task(worker=WORKER, task_id='C') self.sch.add_task(worker=WORKER + "_2", task_id='B') response = self.sch.get_work(worker=WORKER) self.assertEqual(3, response['n_pending_tasks']) self.assertEqual(2, response['n_unique_pending']) def test_pending_downstream_disable(self): self.sch.add_task(worker=WORKER, task_id='A', status=DISABLED) self.sch.add_task(worker=WORKER, task_id='B', deps=('A',)) self.sch.add_task(worker=WORKER, task_id='C', deps=('B',)) response = self.sch.get_work(worker=WORKER) self.assertTrue(response['task_id'] is None) self.assertEqual(0, response['n_pending_tasks']) self.assertEqual(0, response['n_unique_pending']) def test_pending_downstream_failure(self): self.sch.add_task(worker=WORKER, task_id='A', status=FAILED) self.sch.add_task(worker=WORKER, task_id='B', deps=('A',)) self.sch.add_task(worker=WORKER, task_id='C', deps=('B',)) response = self.sch.get_work(worker=WORKER) self.assertTrue(response['task_id'] is None) self.assertEqual(2, response['n_pending_tasks']) self.assertEqual(2, response['n_unique_pending']) def test_task_list_no_deps(self): self.sch.add_task(worker=WORKER, task_id='B', deps=('A',)) self.sch.add_task(worker=WORKER, task_id='A') task_list = self.sch.task_list('PENDING', '') self.assertFalse('deps' in task_list['A']) def test_task_first_failure_time(self): self.sch.add_task(worker=WORKER, task_id='A') test_task = self.sch._state.get_task('A') self.assertIsNone(test_task.failures.first_failure_time) time_before_failure = time.time() test_task.add_failure() time_after_failure = time.time() self.assertLessEqual(time_before_failure, test_task.failures.first_failure_time) self.assertGreaterEqual(time_after_failure, test_task.failures.first_failure_time) def test_task_first_failure_time_remains_constant(self): self.sch.add_task(worker=WORKER, task_id='A') test_task = self.sch._state.get_task('A') self.assertIsNone(test_task.failures.first_failure_time) test_task.add_failure() first_failure_time = test_task.failures.first_failure_time test_task.add_failure() self.assertEqual(first_failure_time, test_task.failures.first_failure_time) def test_task_has_excessive_failures(self): self.sch.add_task(worker=WORKER, task_id='A') test_task = self.sch._state.get_task('A') self.assertIsNone(test_task.failures.first_failure_time) self.assertFalse(test_task.has_excessive_failures()) test_task.add_failure() self.assertFalse(test_task.has_excessive_failures()) fake_failure_time = (test_task.failures.first_failure_time - 2 * 60 * 60) test_task.failures.first_failure_time = fake_failure_time self.assertTrue(test_task.has_excessive_failures()) def test_quadratic_behavior(self): """ Test that get_work is not taking linear amount of time. This is of course impossible to test, however, doing reasonable assumptions about hardware. This time should finish in a timely manner. """ # For 10000 it takes almost 1 second on my laptop. Prior to these # changes it was being slow already at NUM_TASKS=300 NUM_TASKS = 10000 for i in range(NUM_TASKS): self.sch.add_task(worker=str(i), task_id=str(i), resources={}) for i in range(NUM_TASKS): self.assertEqual(self.sch.get_work(worker=str(i))['task_id'], str(i)) self.sch.add_task(worker=str(i), task_id=str(i), status=DONE) def test_get_work_speed(self): """ Test that get_work is fast for few workers and many DONEs. In #986, @daveFNbuck reported that he got a slowdown. """ # This took almost 4 minutes without optimization. # Now it takes 10 seconds on my machine. NUM_PENDING = 1000 NUM_DONE = 200000 assert NUM_DONE >= NUM_PENDING for i in range(NUM_PENDING): self.sch.add_task(worker=WORKER, task_id=str(i), resources={}) for i in range(NUM_PENDING, NUM_DONE): self.sch.add_task(worker=WORKER, task_id=str(i), status=DONE) for i in range(NUM_PENDING): res = int(self.sch.get_work(worker=WORKER)['task_id']) self.assertTrue(0 <= res < NUM_PENDING) self.sch.add_task(worker=WORKER, task_id=str(res), status=DONE)
class WorkerTest(unittest.TestCase): def run(self, result=None): self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.time = time.time with Worker(scheduler=self.sch, worker_id="X") as w, Worker(scheduler=self.sch, worker_id="Y") as w2: self.w = w self.w2 = w2 super(WorkerTest, self).run(result) if time.time != self.time: time.time = self.time def setTime(self, t): time.time = lambda: t def test_dep(self): class A(Task): def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertTrue(a.has_run) self.assertTrue(b.has_run) def test_stop_getting_new_work(self): d = DummyTask() self.w.add(d) self.assertFalse(d.complete()) try: self.w.handle_interrupt(signal.SIGUSR1, None) except AttributeError: raise unittest.SkipTest("signal.SIGUSR1 not found on this system") self.w.run() self.assertFalse(d.complete()) def test_disabled_shutdown_hook(self): w = Worker(scheduler=self.sch, keep_alive=True, no_install_shutdown_handler=True) with w: try: # try to kill the worker! os.kill(os.getpid(), signal.SIGUSR1) except AttributeError: raise unittest.SkipTest("signal.SIGUSR1 not found on this system") # try to kill the worker... AGAIN! t = SuicidalWorker(signal.SIGUSR1) w.add(t) w.run() # task should have stepped away from the ledge, and completed successfully despite all the SIGUSR1 signals self.assertEqual(list(self.sch.task_list("DONE", "").keys()), [t.task_id]) @with_config({"worker": {"no_install_shutdown_handler": "True"}}) def test_can_run_luigi_in_thread(self): class A(DummyTask): pass task = A() # Note that ``signal.signal(signal.SIGUSR1, fn)`` can only be called in the main thread. # So if we do not disable the shutdown handler, this would fail. t = threading.Thread(target=lambda: luigi.build([task], local_scheduler=True)) t.start() t.join() self.assertTrue(task.complete()) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_externalized_dep(self): class A(Task): has_run = False def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(A): def requires(self): return luigi.task.externalize(a) b = B() self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_legacy_externalized_dep(self): class A(Task): has_run = False def run(self): self.has_run = True def complete(self): return self.has_run a = A() a.run = NotImplemented class B(A): def requires(self): return a b = B() self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_type_error_in_tracking_run_deprecated(self): class A(Task): num_runs = 0 def complete(self): return False def run(self, tracking_url_callback=None): self.num_runs += 1 raise TypeError("bad type") a = A() self.assertTrue(self.w.add(a)) self.assertFalse(self.w.run()) # Should only run and fail once, not retry because of the type error self.assertEqual(1, a.num_runs) def test_tracking_url(self): tracking_url = "http://test_url.com/" class A(Task): has_run = False def complete(self): return self.has_run def run(self): self.set_tracking_url(tracking_url) self.has_run = True a = A() self.assertTrue(self.w.add(a)) self.assertTrue(self.w.run()) tasks = self.sch.task_list("DONE", "") self.assertEqual(1, len(tasks)) self.assertEqual(tracking_url, tasks[a.task_id]["tracking_url"]) def test_fail(self): class CustomException(BaseException): def __init__(self, msg): self.msg = msg class A(Task): def run(self): self.has_run = True raise CustomException("bad things") def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertFalse(self.w.run()) self.assertTrue(a.has_run) self.assertFalse(b.has_run) def test_unknown_dep(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.assertTrue(self.w.add(b_a)) # So now another worker goes in and schedules C -> B # This should remove the dep A -> B but will screw up the first worker self.assertTrue(self.w2.add(b_c)) self.assertFalse(self.w.run()) # should not run anything - the worker should detect that A is broken self.assertFalse(b_a.has_run) # not sure what should happen?? # self.w2.run() # should run B since C is fulfilled # self.assertTrue(b_c.has_run) def test_unfulfilled_dep(self): class A(Task): def complete(self): return self.done def run(self): self.done = True def get_b(a): class B(A): def requires(self): return a b = B() b.done = False a.done = True return b a = A() b = get_b(a) self.assertTrue(self.w.add(b)) a.done = False self.w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_gets_missed_work(self): class A(Task): done = False def complete(self): return self.done def run(self): self.done = True a = A() self.assertTrue(self.w.add(a)) # simulate a missed get_work response self.assertEqual(a.task_id, self.sch.get_work(worker="X")["task_id"]) self.assertTrue(self.w.run()) self.assertTrue(a.complete()) def test_avoid_infinite_reschedule(self): class A(Task): def complete(self): return False class B(Task): def complete(self): return False def requires(self): return A() self.assertTrue(self.w.add(B())) self.assertFalse(self.w.run()) def test_fails_registering_signal(self): with mock.patch("luigi.worker.signal", spec=["signal"]): # mock will raise an attribute error getting signal.SIGUSR1 Worker() def test_allow_reschedule_with_many_missing_deps(self): class A(Task): """ Task that must run twice to succeed """ i = luigi.IntParameter() runs = 0 def complete(self): return self.runs >= 2 def run(self): self.runs += 1 class B(Task): done = False def requires(self): return map(A, range(20)) def complete(self): return self.done def run(self): self.done = True b = B() w = Worker(scheduler=self.sch, worker_id="X", max_reschedules=1) self.assertTrue(w.add(b)) self.assertFalse(w.run()) # For b to be done, we must have rescheduled its dependencies to run them twice self.assertTrue(b.complete()) self.assertTrue(all(a.complete() for a in b.deps())) def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(str(eb), "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id="X") as w, Worker(scheduler=sch, worker_id="Y") as w2: self.assertTrue(w.add(b)) self.assertTrue(w2.add(eb)) logging.debug("RUNNING BROKEN WORKER") self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") self.assertTrue(w.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(str(eb), "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id="X") as w, Worker(scheduler=sch, worker_id="Y") as w2: self.assertTrue(w2.add(eb)) self.assertTrue(w.add(b)) self.assertTrue(w2.run()) self.assertFalse(b.complete()) self.assertTrue(w.run()) self.assertTrue(b.complete()) def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug("running A") time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug("running B") super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id="X", keep_alive=True, count_uniques=True) as w: with Worker(scheduler=sch, worker_id="Y", keep_alive=True, count_uniques=True, wait_interval=0.1) as w2: self.assertTrue(w.add(a)) self.assertTrue(w2.add(b)) threading.Thread(target=w.run).start() self.assertTrue(w2.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_die_for_non_unique_pending(self): class A(DummyTask): def run(self): logging.debug("running A") time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug("running B") super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id="X", keep_alive=True, count_uniques=True) as w: with Worker(scheduler=sch, worker_id="Y", keep_alive=True, count_uniques=True, wait_interval=0.1) as w2: self.assertTrue(w.add(b)) self.assertTrue(w2.add(b)) self.assertEqual(w._get_work()[0], a.task_id) self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id="foo") as w: self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) def test_requires_exception(self): class A(DummyTask): def requires(self): raise Exception("doh") a = A() class D(DummyTask): pass d = D() class C(DummyTask): def requires(self): return d c = C() class B(DummyTask): def requires(self): return c, a b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id="foo") as w: self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertTrue(d.has_run) self.assertFalse(a.has_run)