def test_job_hierarchy_execution_with_pipes_no_dispatching(tmpdir): tmpdir = str(tmpdir) target_file = os.path.join(tmpdir, 'result') # create the pipeline p = jip.Pipeline() a = p.job(dir=tmpdir).bash('echo "hello world"') b = p.job(dir=tmpdir).bash('wc -w', output=target_file) a | b p.context(locals()) # create the jobs jobs = jip.create_jobs(p) assert len(jobs[0].pipe_to) == 1 assert len(jobs) == 2 # iterate the executions and pass the session so all jobs are stored execs = 0 for e in jip.create_executions(jobs): jip.run_job(e.job) execs += 1 assert execs == 1 # now the file should be there assert os.path.exists(target_file) for j in jobs: assert j.state == jip.db.STATE_DONE # check the content of the output files assert open(target_file).read().strip() == "2"
def test_job_hierarchy_execution_with_dispatching_fan_out(tmpdir): tmpdir = str(tmpdir) target_file = os.path.join(tmpdir, 'result') # create the pipeline p = jip.Pipeline() a = p.job(dir=tmpdir).bash('echo "hello world"', output=target_file + ".1") b = p.job(dir=tmpdir).bash('wc -w', output=target_file + ".2") c = p.job(dir=tmpdir).bash('wc -l', output=target_file + ".3") a | (b + c) p.context(locals()) # create the jobs jobs = jip.create_jobs(p) # iterate the executions and pass the session so all jobs are stored execs = 0 for e in jip.create_executions(jobs): jip.run_job(e.job) execs += 1 assert execs == 1 # now the file should be there for j in jobs: assert j.state == jip.db.STATE_DONE # check the content of the output files assert open(target_file + '.1').read().strip() == "hello world" assert open(target_file + '.3').read().strip() == "1" assert open(target_file + '.2').read().strip() == "2"
def test_single_job_fail(tmpdir): tmpdir = str(tmpdir) target_file = os.path.join(tmpdir, 'result.txt') db_file = os.path.join(tmpdir, "test.db") assert not os.path.exists(target_file) # create a JIP database and a session jip.db.init(db_file) # create the cluster instance c = cl.LocalCluster() # create the pipeline p = jip.Pipeline() p.job(dir=tmpdir).bash('touch ${input}; exit 1;', outfile=target_file) p.context(locals()) # create the jobs jobs = jip.create_jobs(p) # iterate the executions and pass the session so all jobs are stored for e in jip.create_executions(jobs, save=True): jip.submit_job(e.job, cluster=c) c.wait() # now the file should be there assert not os.path.exists(target_file) # we should also have the log files assert os.path.exists(os.path.join(tmpdir, "jip-1.out")) assert os.path.exists(os.path.join(tmpdir, "jip-1.err")) # and we should have one job in Done state in our database # we do the query with a fresh session though job = jip.db.get(1) assert job is not None assert job.state == jip.db.STATE_FAILED
def test_job_hierarchy_job_group(tmpdir): tmpdir = str(tmpdir) target_file = os.path.join(tmpdir, 'result') @jip.tool() def merge(): """\ Merge usage: merge --input <input>... [--output <output>] Options: --input <input>... The input [default: stdin] --output <output> The input [default: stdout] """ return "cat ${input|else('-')} ${output|arg('> ')}" # create the pipeline p = jip.Pipeline() a_1 = p.job(dir=tmpdir).bash('echo "hello spain"', output=target_file + ".1") a_2 = p.job(dir=tmpdir).bash('echo "hello world"', output=target_file + ".2") a_3 = p.job(dir=tmpdir).bash('echo "hello universe"', output=target_file + ".3") b = p.job(dir=tmpdir).run('merge', output=target_file) b.input = [a_1, a_2, a_3] (a_1 - a_2 - a_3 - b) p.context(locals()) # create the jobs jobs = jip.create_jobs(p) assert len(jobs) == 4 assert len(jobs[0].dependencies) == 0 assert len(jobs[0].children) == 2 assert len(jobs[1].dependencies) == 1 assert len(jobs[1].children) == 2 assert len(jobs[2].dependencies) == 1 assert len(jobs[2].children) == 1 assert len(jobs[3].dependencies) == 3 print jobs[3].command # iterate the executions and pass the session so all jobs are stored execs = 0 for e in jip.create_executions(jobs): jip.run_job(e.job) execs += 1 assert execs == 1 # now the file should be there for j in jobs: assert j.state == jip.db.STATE_DONE # check the content of the output files assert open(target_file + '.1').read().strip() == "hello spain" assert open(target_file + '.2').read().strip() == "hello world" assert open(target_file + '.3').read().strip() == "hello universe" assert open(target_file).read().strip() == "hello spain\n"\ "hello world\nhello universe"
def test_job_hierarchy_execution_with_pipes_and_dispatching(tmpdir): print ">>>", tmpdir tmpdir = str(tmpdir) target_file = os.path.join(tmpdir, 'result') db_file = os.path.join(tmpdir, "test.db") # create a JIP database and a session jip.db.init(db_file) session = jip.db.create_session() # create the cluster instance c = cl.LocalCluster() # create the pipeline p = jip.Pipeline() a = p.job(dir=tmpdir).bash('echo "hello world"', output="${target_file}.1") b = p.job(dir=tmpdir).bash('wc -w ${input}', input=a, output="${target_file}.2") l = p.job(dir=tmpdir).bash('echo "Other" > ${target_file}.3') a | b p.context(locals()) # create the jobs jobs = jip.create_jobs(p) assert len(jobs) == 3 # iterate the executions and pass the session so all jobs are stored execs = 0 for e in jip.create_executions(jobs, save=True): jip.submit_job(e.job, save=True, cluster=c) execs += 1 assert execs == 2 c.wait() # now the file should be there assert os.path.exists(target_file + ".1") assert os.path.exists(target_file + ".2") assert os.path.exists(target_file + ".3") # we should also have the log files assert os.path.exists(os.path.join(tmpdir, "jip-1.out")) assert os.path.exists(os.path.join(tmpdir, "jip-1.err")) assert os.path.exists(os.path.join(tmpdir, "jip-3.out")) assert os.path.exists(os.path.join(tmpdir, "jip-3.err")) # and we should have one job in Done state in our database # we do the query with a fresh session though find = jip.db.get assert find(1).state == jip.db.STATE_DONE assert find(2).state == jip.db.STATE_DONE assert find(3).state == jip.db.STATE_DONE # check the content of the output files assert open(target_file + ".1").read() == "hello world\n" assert open(target_file + ".2").read().strip() == "2" assert open(target_file + ".3").read() == "Other\n"
def test_multiplex_with_stream(): p = jip.Pipeline() first = p.bash("cat ${input}", input=['A', 'B']) second = p.bash("wc -l") first | second p.expand(validate=False) assert len(p) == 4 jobs = jip.create_jobs(p, validate=False) assert len(jobs) == 4 execs = jip.create_executions(jobs) assert len(execs) == 2
def test_job_cancelation(tmpdir): tmpdir = str(tmpdir) db_file = os.path.join(tmpdir, "test.db") # create a JIP database and a session jip.db.init(db_file) session = jip.db.create_session() # create the cluster instance c = cl.LocalCluster() # create the pipeline p = jip.Pipeline() first = p.job(dir=tmpdir).bash('sleep 10') p.job(dir=tmpdir).bash('sleep 5').depends_on(first) p.context(locals()) # create the jobs jobs = jip.create_jobs(p) # iterate the executions and pass the session so all jobs are stored for e in jip.create_executions(jobs, save=True): jip.submit_job(e.job, save=True, cluster=c) # sleep for a second to give the job time to start time.sleep(0.1) # cancel the job print jobs, jobs[0].id job = jip.db.get(1) jip.jobs.cancel(job, cluster=c, save=True) c.wait() # and we should have one job in Failed state in our database # we do the query with a fresh session though job_1 = jip.db.get(1) job_2 = jip.db.get(2) # print the log files print ">>>JOB 1 STD ERR LOG" print open(c.resolve_log(job, job_1.stderr)).read() print ">>>JOB 1 STD OUT LOG" print open(c.resolve_log(job, job_1.stdout)).read() assert job_1.state == jip.db.STATE_CANCELED assert job_2.state == jip.db.STATE_CANCELED
def test_single_job_master_termination(tmpdir): tmpdir = str(tmpdir) db_file = os.path.join(tmpdir, "test.db") # create a JIP database and a session jip.db.init(db_file) session = jip.db.create_session() # create the cluster instance c = cl.LocalCluster() # create the pipeline p = jip.Pipeline() p.job(dir=tmpdir).bash('sleep 30') p.context(locals()) # create the jobs jobs = jip.create_jobs(p) # iterate the executions and pass the session so all jobs are stored for e in jip.create_executions(jobs, save=True): jip.submit_job(e.job, save=True, cluster=c) # sleep for a second to give the job time to start time.sleep(1) c.shutdown() # and we should have one job in Failed state in our database # we do the query with a fresh session though job = jip.db.get(1) # print the log files print ">>>STD ERR LOG" print open(c.resolve_log(job, job.stderr)).read() print ">>>STD OUT LOG" print open(c.resolve_log(job, job.stdout)).read() assert job is not None assert job.state == jip.db.STATE_FAILED
c = cl.LocalCluster() # create the pipeline for i in range(num_runs): print "### CREATE RUN", i target_file = "result.%d" % i p = jip.Pipeline() a = p.bash('echo "hello world" > ${outfile}; sleep 1', outfile="${target_file}.1.%d" % i) b = p.bash('wc -w ${input}; sleep 1', input=a, output="${target_file}.2.%d" % i) l = p.bash('echo "Other" > ${outfile}; sleep 1', outfile="${target_file}.3.%d" % i) p.context(locals()) # create the jobs jobs = jip.create_jobs(p) # iterate the executions and pass the session so all jobs are stored for e in jip.create_executions(jobs, save=True): print "### STORED", i if not e.completed: jip.submit_job(e.job, save=True, cluster=c) #print "### QUEUED", len(c.list()) c.wait() session = jip.db.create_session() for j in session.query(Job): print ">>>", j.id, j.state
c = cl.LocalCluster() # create the pipeline for i in range(num_runs): print("### CREATE RUN", i) target_file = "result.%d" % i p = jip.Pipeline() a = p.bash('echo "hello world" > ${outfile}; sleep 1', outfile="${target_file}.1.%d" % i) b = p.bash('wc -w ${input}; sleep 1', input=a, output="${target_file}.2.%d" % i) l = p.bash('echo "Other" > ${outfile}; sleep 1', outfile="${target_file}.3.%d" % i) p.context(locals()) # create the jobs jobs = jip.create_jobs(p) # iterate the executions and pass the session so all jobs are stored for e in jip.create_executions(jobs, save=True): print("### STORED", i) if not e.completed: jip.submit_job(e.job, save=True, cluster=c) #print("### QUEUED", len(c.list())) c.wait() session = jip.db.create_session() for j in session.query(Job): print(">>>", j.id, j.state)