def test_race_condition_when_parent_queues_child( app1, app2, job_id1, log, tasks_json_tmpfile): # The parent queues the child and the child runs before the parent gets # a chance to mark itself as completed qb.set_state(app1, job_id1, pending=True) lock = qb.obtain_execute_lock(app1, job_id1) assert lock qb.set_state(app1, job_id1, completed=True) qb.set_state(app1, job_id1, pending=True) validate_one_queued_task(app2, job_id1) validate_zero_queued_task(app1) # should not complete child. should de-queue child # should not queue parent. # should exit gracefully # stays in the queue (forever) until parent state is completed run_code(log, tasks_json_tmpfile, app2) validate_zero_queued_task(app1) validate_one_queued_task(app2, job_id1) qb.set_state( app1, job_id1, completed=True, _disable_maybe_queue_children_for_testing_only=True) lock.release() validate_one_completed_task(app1, job_id1) validate_one_queued_task(app2, job_id1) run_code(log, tasks_json_tmpfile, app2) validate_one_completed_task(app1, job_id1) validate_one_completed_task(app2, job_id1)
def test_pull_tasks1(app1, app2, job_id1, log, tasks_json_tmpfile): """ Parent tasks should be generated and executed before child tasks (The Bubble Up and then Bubble Down test) If A --> B, and: we queue and run B, then we should have 0 completed tasks, but A should be queued nothing should change until: we run A and A becomes completed we then run B and B becomes completed """ enqueue(app2, job_id1) run_code(log, tasks_json_tmpfile, app2) validate_one_queued_task(app1, job_id1) validate_zero_queued_task(app2) run_code(log, tasks_json_tmpfile, app2) validate_one_queued_task(app1, job_id1) validate_zero_queued_task(app2) run_code(log, tasks_json_tmpfile, app1) validate_one_completed_task(app1, job_id1) validate_one_queued_task(app2, job_id1) run_code(log, tasks_json_tmpfile, app2) validate_one_completed_task(app2, job_id1)
def test_rerun_push_tasks1(app1, app2, job_id1): # this tests recursively deleteing parent status on child nodes # queue and complete app 1. it queues a child enqueue(app1, job_id1) qb.set_state(app1, job_id1, completed=True) consume_queue(app1) validate_one_completed_task(app1, job_id1) validate_one_queued_task(app2, job_id1) # complete app 2 qb.set_state(app2, job_id1, completed=True) consume_queue(app2) validate_one_completed_task(app1, job_id1) validate_one_completed_task(app2, job_id1) # readd app 1 qb.readd_subtask(app1, job_id1) validate_one_queued_task(app1, job_id1) validate_zero_queued_task(app2) nose.tools.assert_true( qb.check_state(app2, job_id1, pending=True)) # complete app 1 qb.set_state(app1, job_id1, completed=True) consume_queue(app1) validate_one_completed_task(app1, job_id1) validate_one_queued_task(app2, job_id1) # complete app 2 qb.set_state(app2, job_id1, completed=True) consume_queue(app2) validate_one_completed_task(app1, job_id1) validate_one_completed_task(app2, job_id1)
def test_bypass_scheduler(bash1, job_id1, log, tasks_json_tmpfile): validate_zero_queued_task(bash1) run_code( log, tasks_json_tmpfile, bash1, '--bypass_scheduler --job_id %s --bash_cmd echo 123' % job_id1) validate_zero_queued_task(bash1) validate_not_exists(bash1, job_id1)
def test_stolos_submit(app1, job_id1, tasks_json_tmpfile): with nt.assert_raises(CalledProcessError): run("stolos-submit -h", tasks_json_tmpfile) validate_zero_queued_task(app1) run("stolos-submit -a %s -j %s" % (app1, job_id1), tasks_json_tmpfile) validate_one_queued_task(app1, job_id1) run("stolos-submit -a %s -j %s" % (app1, job_id1), tasks_json_tmpfile) validate_one_queued_task(app1, job_id1)
def test_stolos_submit_readd(app1, job_id1, tasks_json_tmpfile): qb.set_state(app1, job_id1, failed=True) validate_zero_queued_task(app1) run("stolos-submit -a %s -j %s" % (app1, job_id1), tasks_json_tmpfile) validate_zero_queued_task(app1) run("stolos-submit -a %s -j %s --readd" % (app1, job_id1), tasks_json_tmpfile) validate_one_queued_task(app1, job_id1)
def test_stolos_submit(app1, job_id1, tasks_json_tmpfile): with nt.assert_raises(CalledProcessError): run("stolos-submit -h", tasks_json_tmpfile) validate_zero_queued_task(app1) run("stolos-submit -a %s -j %s" % (app1, job_id1), tasks_json_tmpfile) validate_one_queued_task(app1, job_id1) run("stolos-submit -a %s -j %s" % (app1, job_id1), tasks_json_tmpfile) validate_one_queued_task(app1, job_id1)
def test_stolos_submit_readd(app1, job_id1, tasks_json_tmpfile): qb.set_state(app1, job_id1, failed=True) validate_zero_queued_task(app1) run("stolos-submit -a %s -j %s" % (app1, job_id1), tasks_json_tmpfile) validate_zero_queued_task(app1) run("stolos-submit -a %s -j %s --readd" % (app1, job_id1), tasks_json_tmpfile) validate_one_queued_task(app1, job_id1)
def test_run_failing_spark_given_specific_job_id( bash1, job_id1, log, tasks_json_tmpfile): """ task should still get queued if --job_id is specified and the task fails """ with nose.tools.assert_raises(Exception): run_code(log, tasks_json_tmpfile, bash1, '--pluginfail') validate_zero_queued_task(bash1) run_code( log, tasks_json_tmpfile, bash1, '--job_id %s --bash_cmd kasdfkajsdfajaja' % job_id1) validate_one_queued_task(bash1, job_id1)
def test_bash1(bash1, job_id1, log, tasks_json_tmpfile): """a bash task should execute properly """ # queue task enqueue(bash1, job_id1) validate_one_queued_task(bash1, job_id1) # run failing task run_code( log, tasks_json_tmpfile, bash1, '--bash_cmd thiscommandshouldfail') validate_one_queued_task(bash1, job_id1) # run successful task run_code(log, tasks_json_tmpfile, bash1, '--bash_cmd echo 123') validate_zero_queued_task(bash1)
def test_invalid_queued_job_id(app4, depends_on_job_id1, log, tasks_json_tmpfile): job_id = depends_on_job_id1 # this job_id does not match the app # manually bypass the decorator that validates job_id qb._set_state_unsafe(app4, job_id, pending=True) q = qb.get_qbclient().LockingQueue(app4) q.put(job_id) validate_one_queued_task(app4, job_id) run_code(log, tasks_json_tmpfile, app4, '--bash_cmd echo 123') validate_one_failed_task(app4, job_id) validate_zero_queued_task(app4)
def test_stolos_submit_multiple_jobs(app1, app2, job_id1, job_id2, tasks_json_tmpfile): validate_zero_queued_task(app1) validate_zero_queued_task(app2) run("stolos-submit -a %s %s -j %s %s" % (app1, app2, job_id1, job_id2), tasks_json_tmpfile) validate_n_queued_task(app1, job_id1, job_id2) validate_n_queued_task(app2, job_id1, job_id2) run("stolos-submit -a %s %s -j %s %s" % (app1, app2, job_id1, job_id2), tasks_json_tmpfile) validate_n_queued_task(app1, job_id1, job_id2) validate_n_queued_task(app2, job_id1, job_id2)
def test_stolos_submit_multiple_jobs(app1, app2, job_id1, job_id2, tasks_json_tmpfile): validate_zero_queued_task(app1) validate_zero_queued_task(app2) run("stolos-submit -a %s %s -j %s %s" % (app1, app2, job_id1, job_id2), tasks_json_tmpfile) validate_n_queued_task(app1, job_id1, job_id2) validate_n_queued_task(app2, job_id1, job_id2) run("stolos-submit -a %s %s -j %s %s" % (app1, app2, job_id1, job_id2), tasks_json_tmpfile) validate_n_queued_task(app1, job_id1, job_id2) validate_n_queued_task(app2, job_id1, job_id2)
def test_complex_dependencies_pull_push( depends_on1, depends_on_job_id1, log, tasks_json_tmpfile): job_id = depends_on_job_id1 enqueue(depends_on1, job_id) run_code(log, tasks_json_tmpfile, depends_on1, '--bash_cmd echo 123') parents = api.get_parents(depends_on1, job_id) parents = list(api.topological_sort(parents)) for parent, pjob_id in parents[:-1]: qb.set_state(parent, pjob_id, completed=True) validate_zero_queued_task(depends_on1) qb.set_state(*parents[-1], completed=True) validate_one_queued_task(depends_on1, job_id) run_code(log, tasks_json_tmpfile, depends_on1, '--bash_cmd echo 123') validate_one_completed_task(depends_on1, job_id)
def test_readd_subtask(app1, job_id1, job_id2): # readding the same job twice should result in error and 1 queued job tt.validate_zero_queued_task(app1) api.readd_subtask(app1, job_id1) tt.validate_one_queued_task(app1, job_id1) with nt.assert_raises(JobAlreadyQueued): api.readd_subtask(app1, job_id1) tt.validate_one_queued_task(app1, job_id1) # setting task pending but not queueing it. api.maybe_add_subtask(app1, job_id2, queue=False) tt.validate_one_queued_task(app1, job_id1) # then queueing it. api.readd_subtask(app1, job_id2) tt.validate_n_queued_task(app1, job_id1, job_id2)
def test_readd_subtask(app1, job_id1, job_id2): # readding the same job twice should result in error and 1 queued job tt.validate_zero_queued_task(app1) api.readd_subtask(app1, job_id1) tt.validate_one_queued_task(app1, job_id1) with nt.assert_raises(JobAlreadyQueued): api.readd_subtask(app1, job_id1) tt.validate_one_queued_task(app1, job_id1) # setting task pending but not queueing it. api.maybe_add_subtask(app1, job_id2, queue=False) tt.validate_one_queued_task(app1, job_id1) # then queueing it. api.readd_subtask(app1, job_id2) tt.validate_n_queued_task(app1, job_id1, job_id2)
def test_complex_dependencies_readd(depends_on1, depends_on_job_id1, log, tasks_json_tmpfile): job_id = depends_on_job_id1 # mark everything completed. ensure only last completed parent queues child parents = list(api.topological_sort(api.get_parents(depends_on1, job_id))) for parent, pjob_id in parents[:-1]: qb.set_state(parent, pjob_id, completed=True) validate_zero_queued_task(depends_on1) qb.set_state(parents[-1][0], parents[-1][1], completed=True) validate_one_queued_task(depends_on1, job_id) # --> parents should queue our app consume_queue(depends_on1) qb.set_state(depends_on1, job_id, completed=True) validate_one_completed_task(depends_on1, job_id) # ok great - ran through pipeline once. log.warn("OK... Now try complex dependency test with a readd") # re-complete the very first parent. # we assume that this parent is a root task parent, pjob_id = parents[0] qb.readd_subtask(parent, pjob_id) validate_one_queued_task(parent, pjob_id) validate_zero_queued_task(depends_on1) consume_queue(parent) qb.set_state(parent, pjob_id, completed=True) validate_one_completed_task(parent, pjob_id) # since the parent that re-queues children that may be depends_on1's # parents, complete those too! for p2, pjob2 in api.get_children(parent, pjob_id, False): if p2 == depends_on1: continue consume_queue(p2) qb.set_state(p2, pjob2, completed=True) # now, that last parent should have queued our application validate_n_queued_task( depends_on1, job_id, job_id.replace('testID1', 'testID3')) # this replace is a hack run_code(log, tasks_json_tmpfile, depends_on1, '--bash_cmd echo 123') run_code(log, tasks_json_tmpfile, depends_on1, '--bash_cmd echo 123') validate_one_completed_task(depends_on1, job_id)
def test_create_child_task_after_one_parent_completed( app1, app2, app3, job_id1, log, tasks_json_tmpfile, func_name): # if you modify the tasks.json file in the middle of processing the dag # modifications to the json file should be recognized # the child task should run if another parent completes # but otherwise should not run until it's manually queued injected_app = app3 dct = { injected_app: { "depends_on": {"app_name": [app1, app2]}, }, } qb.set_state( app1, job_id1, completed=True) validate_one_completed_task(app1, job_id1) validate_zero_queued_task(injected_app) consume_queue(app2) qb.set_state(app2, job_id1, completed=True) with inject_into_dag(func_name, dct): validate_zero_queued_task(injected_app) qb.set_state(app2, job_id1, completed=True) validate_zero_queued_task(injected_app) qb.set_state(app1, job_id1, completed=True) validate_one_queued_task(injected_app, job_id1) run_code(log, tasks_json_tmpfile, injected_app, '--bash_cmd echo 123') validate_one_completed_task(injected_app, job_id1)
def test_create_parent_task_after_child_completed(app1, app3, job_id1, func_name): # if you modify the tasks.json file in the middle of processing the dag # modifications to the json file should be recognized appropriately # we do not re-schedule the child unless parent is completed qb.set_state(app1, job_id1, completed=True) validate_one_completed_task(app1, job_id1) injected_app = app3 child_injapp = 'test_stolos/%s/testX' % func_name dct = { injected_app: { }, child_injapp: { "depends_on": {"app_name": [injected_app]} } } with inject_into_dag(func_name, dct): validate_zero_queued_task(injected_app) qb.set_state(injected_app, job_id1, completed=True) validate_one_completed_task(injected_app, job_id1) validate_one_queued_task(child_injapp, job_id1)
def test_skipped_parent_and_queued_child(app1, app2, app3, app4, job_id1, log, tasks_json_tmpfile): qb.set_state(app1, job_id1, skipped=True) qb.set_state(app3, job_id1, skipped=True) qb.set_state(app2, job_id1, skipped=True) enqueue(app4, job_id1) validate_zero_queued_task(app2) validate_one_queued_task(app4, job_id1) # ensure child unqueues itself and raises warning out, err = run_code(log, tasks_json_tmpfile, app4, capture=True) nose.tools.assert_in( "parent_job_id is marked as 'skipped', so should be impossible for me," " the child, to exist", err) validate_zero_queued_task(app4) validate_zero_queued_task(app2)
def test_maybe_add_subtask(app1, job_id1, job_id2, job_id3): # we don't queue anything if we request queue=False, but we create data for # this node if it doesn't exist tt.validate_zero_queued_task(app1) api.maybe_add_subtask(app1, job_id1, queue=False) tt.validate_zero_queued_task(app1) # data for this job_id exists, so it can't get queued api.maybe_add_subtask(app1, job_id1, priority=4) tt.validate_zero_queued_task(app1) api.maybe_add_subtask(app1, job_id2, priority=8) tt.validate_one_queued_task(app1, job_id2) api.maybe_add_subtask(app1, job_id3, priority=5) # this should have no effect because it's already queued with priority=5 api.maybe_add_subtask(app1, job_id3, priority=9) job_id = tt.cycle_queue(app1) nt.assert_equal(job_id3, job_id)
def test_maybe_add_subtask(app1, job_id1, job_id2, job_id3): # we don't queue anything if we request queue=False, but we create data for # this node if it doesn't exist tt.validate_zero_queued_task(app1) api.maybe_add_subtask(app1, job_id1, queue=False) tt.validate_zero_queued_task(app1) # data for this job_id exists, so it can't get queued api.maybe_add_subtask(app1, job_id1, priority=4) tt.validate_zero_queued_task(app1) api.maybe_add_subtask(app1, job_id2, priority=8) tt.validate_one_queued_task(app1, job_id2) api.maybe_add_subtask(app1, job_id3, priority=5) # this should have no effect because it's already queued with priority=5 api.maybe_add_subtask(app1, job_id3, priority=9) job_id = tt.cycle_queue(app1) nt.assert_equal(job_id3, job_id)
def test_rerun_pull_tasks1(app1, app2, job_id1, log, tasks_json_tmpfile): # queue and complete app 1. it queues a child enqueue(app1, job_id1) qb.set_state(app1, job_id1, completed=True) consume_queue(app1) validate_zero_queued_task(app1) validate_one_queued_task(app2, job_id1) # complete app 2 qb.set_state(app2, job_id1, completed=True) consume_queue(app2) validate_zero_queued_task(app2) # readd app 2 qb.readd_subtask(app2, job_id1) validate_zero_queued_task(app1) validate_one_queued_task(app2, job_id1) # run app 2. the parent was previously completed run_code(log, tasks_json_tmpfile, app2) validate_one_completed_task(app1, job_id1) # previously completed validate_one_completed_task(app2, job_id1)
def test_pull_tasks_with_many_children(app1, app2, app3, app4, job_id1, log, tasks_json_tmpfile): enqueue(app4, job_id1) validate_one_queued_task(app4, job_id1) validate_zero_queued_task(app1) validate_zero_queued_task(app2) validate_zero_queued_task(app3) run_code(log, tasks_json_tmpfile, app4, '--bash_cmd echo app4helloworld') validate_zero_queued_task(app4) validate_one_queued_task(app1, job_id1) validate_one_queued_task(app2, job_id1) validate_one_queued_task(app3, job_id1) consume_queue(app1) qb.set_state(app1, job_id1, completed=True) validate_zero_queued_task(app4) validate_one_completed_task(app1, job_id1) validate_one_queued_task(app2, job_id1) validate_one_queued_task(app3, job_id1) consume_queue(app2) qb.set_state(app2, job_id1, completed=True) validate_zero_queued_task(app4) validate_one_completed_task(app1, job_id1) validate_one_completed_task(app2, job_id1) validate_one_queued_task(app3, job_id1) consume_queue(app3) qb.set_state(app3, job_id1, completed=True) validate_one_completed_task(app1, job_id1) validate_one_completed_task(app2, job_id1) validate_one_completed_task(app3, job_id1) validate_one_queued_task(app4, job_id1) consume_queue(app4) qb.set_state(app4, job_id1, completed=True) validate_one_completed_task(app1, job_id1) validate_one_completed_task(app2, job_id1) validate_one_completed_task(app3, job_id1) validate_one_completed_task(app4, job_id1)
def test_no_tasks(app1, app2, log, tasks_json_tmpfile): # The script shouldn't fail if it doesn't find any queued tasks run_code(log, tasks_json_tmpfile, app1) validate_zero_queued_task(app1) validate_zero_queued_task(app2)