示例#1
0
def test_job_hierarchy_execution_with_pipes_no_dispatching(tmpdir):
    tmpdir = str(tmpdir)
    target_file = os.path.join(tmpdir, 'result')

    # create the pipeline
    p = jip.Pipeline()
    a = p.job(dir=tmpdir).bash('echo "hello world"')
    b = p.job(dir=tmpdir).bash('wc -w', output=target_file)
    a | b
    p.context(locals())

    # create the jobs
    jobs = jip.create_jobs(p)
    assert len(jobs[0].pipe_to) == 1
    assert len(jobs) == 2

    # iterate the executions and pass the session so all jobs are stored
    execs = 0
    for e in jip.create_executions(jobs):
        jip.run_job(e.job)
        execs += 1
    assert execs == 1
    # now the file should be there
    assert os.path.exists(target_file)
    for j in jobs:
        assert j.state == jip.db.STATE_DONE

    # check the content of the output files
    assert open(target_file).read().strip() == "2"
示例#2
0
def test_job_hierarchy_execution_with_dispatching_fan_out(tmpdir):
    tmpdir = str(tmpdir)
    target_file = os.path.join(tmpdir, 'result')

    # create the pipeline
    p = jip.Pipeline()
    a = p.job(dir=tmpdir).bash('echo "hello world"', output=target_file + ".1")
    b = p.job(dir=tmpdir).bash('wc -w', output=target_file + ".2")
    c = p.job(dir=tmpdir).bash('wc -l', output=target_file + ".3")
    a | (b + c)
    p.context(locals())

    # create the jobs
    jobs = jip.create_jobs(p)

    # iterate the executions and pass the session so all jobs are stored
    execs = 0
    for e in jip.create_executions(jobs):
        jip.run_job(e.job)
        execs += 1
    assert execs == 1
    # now the file should be there
    for j in jobs:
        assert j.state == jip.db.STATE_DONE

    # check the content of the output files
    assert open(target_file + '.1').read().strip() == "hello world"
    assert open(target_file + '.3').read().strip() == "1"
    assert open(target_file + '.2').read().strip() == "2"
示例#3
0
def test_single_job_fail(tmpdir):
    tmpdir = str(tmpdir)
    target_file = os.path.join(tmpdir, 'result.txt')
    db_file = os.path.join(tmpdir, "test.db")
    assert not os.path.exists(target_file)

    # create a JIP database and a session
    jip.db.init(db_file)
    # create the cluster instance
    c = cl.LocalCluster()

    # create the pipeline
    p = jip.Pipeline()
    p.job(dir=tmpdir).bash('touch ${input}; exit 1;', outfile=target_file)
    p.context(locals())

    # create the jobs
    jobs = jip.create_jobs(p)

    # iterate the executions and pass the session so all jobs are stored
    for e in jip.create_executions(jobs, save=True):
        jip.submit_job(e.job, cluster=c)

    c.wait()
    # now the file should be there
    assert not os.path.exists(target_file)

    # we should also have the log files
    assert os.path.exists(os.path.join(tmpdir, "jip-1.out"))
    assert os.path.exists(os.path.join(tmpdir, "jip-1.err"))
    # and we should have one job in Done state in our database
    # we do the query with a fresh session though
    job = jip.db.get(1)
    assert job is not None
    assert job.state == jip.db.STATE_FAILED
示例#4
0
def test_job_hierarchy_job_group(tmpdir):
    tmpdir = str(tmpdir)
    target_file = os.path.join(tmpdir, 'result')

    @jip.tool()
    def merge():
        """\
        Merge

        usage:
            merge --input <input>... [--output <output>]

        Options:
            --input <input>...    The input
                                  [default: stdin]
            --output <output>     The input
                                  [default: stdout]
        """
        return "cat ${input|else('-')} ${output|arg('> ')}"

    # create the pipeline
    p = jip.Pipeline()
    a_1 = p.job(dir=tmpdir).bash('echo "hello spain"',
                                 output=target_file + ".1")
    a_2 = p.job(dir=tmpdir).bash('echo "hello world"',
                                 output=target_file + ".2")
    a_3 = p.job(dir=tmpdir).bash('echo "hello universe"',
                                 output=target_file + ".3")
    b = p.job(dir=tmpdir).run('merge', output=target_file)
    b.input = [a_1, a_2, a_3]
    (a_1 - a_2 - a_3 - b)
    p.context(locals())
    # create the jobs
    jobs = jip.create_jobs(p)
    assert len(jobs) == 4
    assert len(jobs[0].dependencies) == 0
    assert len(jobs[0].children) == 2
    assert len(jobs[1].dependencies) == 1
    assert len(jobs[1].children) == 2
    assert len(jobs[2].dependencies) == 1
    assert len(jobs[2].children) == 1
    assert len(jobs[3].dependencies) == 3
    print jobs[3].command

    # iterate the executions and pass the session so all jobs are stored
    execs = 0
    for e in jip.create_executions(jobs):
        jip.run_job(e.job)
        execs += 1
    assert execs == 1
    # now the file should be there
    for j in jobs:
        assert j.state == jip.db.STATE_DONE

    # check the content of the output files
    assert open(target_file + '.1').read().strip() == "hello spain"
    assert open(target_file + '.2').read().strip() == "hello world"
    assert open(target_file + '.3').read().strip() == "hello universe"
    assert open(target_file).read().strip() == "hello spain\n"\
                                               "hello world\nhello universe"
def test_single_job_fail(tmpdir):
    tmpdir = str(tmpdir)
    target_file = os.path.join(tmpdir, 'result.txt')
    db_file = os.path.join(tmpdir, "test.db")
    assert not os.path.exists(target_file)

    # create a JIP database and a session
    jip.db.init(db_file)
    # create the cluster instance
    c = cl.LocalCluster()

    # create the pipeline
    p = jip.Pipeline()
    p.job(dir=tmpdir).bash('touch ${input}; exit 1;', outfile=target_file)
    p.context(locals())

    # create the jobs
    jobs = jip.create_jobs(p)

    # iterate the executions and pass the session so all jobs are stored
    for e in jip.create_executions(jobs, save=True):
        jip.submit_job(e.job, cluster=c)

    c.wait()
    # now the file should be there
    assert not os.path.exists(target_file)

    # we should also have the log files
    assert os.path.exists(os.path.join(tmpdir, "jip-1.out"))
    assert os.path.exists(os.path.join(tmpdir, "jip-1.err"))
    # and we should have one job in Done state in our database
    # we do the query with a fresh session though
    job = jip.db.get(1)
    assert job is not None
    assert job.state == jip.db.STATE_FAILED
def test_job_hierarchy_execution_with_pipes_and_dispatching(tmpdir):
    print ">>>", tmpdir
    tmpdir = str(tmpdir)
    target_file = os.path.join(tmpdir, 'result')
    db_file = os.path.join(tmpdir, "test.db")

    # create a JIP database and a session
    jip.db.init(db_file)
    session = jip.db.create_session()

    # create the cluster instance
    c = cl.LocalCluster()

    # create the pipeline
    p = jip.Pipeline()
    a = p.job(dir=tmpdir).bash('echo "hello world"',
                               output="${target_file}.1")
    b = p.job(dir=tmpdir).bash('wc -w ${input}',
                               input=a, output="${target_file}.2")
    l = p.job(dir=tmpdir).bash('echo "Other" > ${target_file}.3')

    a | b
    p.context(locals())

    # create the jobs
    jobs = jip.create_jobs(p)
    assert len(jobs) == 3

    # iterate the executions and pass the session so all jobs are stored
    execs = 0
    for e in jip.create_executions(jobs, save=True):
        jip.submit_job(e.job, save=True, cluster=c)
        execs += 1
    assert execs == 2
    c.wait()
    # now the file should be there
    assert os.path.exists(target_file + ".1")
    assert os.path.exists(target_file + ".2")
    assert os.path.exists(target_file + ".3")

    # we should also have the log files
    assert os.path.exists(os.path.join(tmpdir, "jip-1.out"))
    assert os.path.exists(os.path.join(tmpdir, "jip-1.err"))
    assert os.path.exists(os.path.join(tmpdir, "jip-3.out"))
    assert os.path.exists(os.path.join(tmpdir, "jip-3.err"))
    # and we should have one job in Done state in our database
    # we do the query with a fresh session though
    find = jip.db.get
    assert find(1).state == jip.db.STATE_DONE
    assert find(2).state == jip.db.STATE_DONE
    assert find(3).state == jip.db.STATE_DONE

    # check the content of the output files
    assert open(target_file + ".1").read() == "hello world\n"
    assert open(target_file + ".2").read().strip() == "2"
    assert open(target_file + ".3").read() == "Other\n"
示例#7
0
def test_job_hierarchy_execution_with_pipes_and_dispatching(tmpdir):
    print ">>>", tmpdir
    tmpdir = str(tmpdir)
    target_file = os.path.join(tmpdir, 'result')
    db_file = os.path.join(tmpdir, "test.db")

    # create a JIP database and a session
    jip.db.init(db_file)
    session = jip.db.create_session()

    # create the cluster instance
    c = cl.LocalCluster()

    # create the pipeline
    p = jip.Pipeline()
    a = p.job(dir=tmpdir).bash('echo "hello world"', output="${target_file}.1")
    b = p.job(dir=tmpdir).bash('wc -w ${input}',
                               input=a,
                               output="${target_file}.2")
    l = p.job(dir=tmpdir).bash('echo "Other" > ${target_file}.3')

    a | b
    p.context(locals())

    # create the jobs
    jobs = jip.create_jobs(p)
    assert len(jobs) == 3

    # iterate the executions and pass the session so all jobs are stored
    execs = 0
    for e in jip.create_executions(jobs, save=True):
        jip.submit_job(e.job, save=True, cluster=c)
        execs += 1
    assert execs == 2
    c.wait()
    # now the file should be there
    assert os.path.exists(target_file + ".1")
    assert os.path.exists(target_file + ".2")
    assert os.path.exists(target_file + ".3")

    # we should also have the log files
    assert os.path.exists(os.path.join(tmpdir, "jip-1.out"))
    assert os.path.exists(os.path.join(tmpdir, "jip-1.err"))
    assert os.path.exists(os.path.join(tmpdir, "jip-3.out"))
    assert os.path.exists(os.path.join(tmpdir, "jip-3.err"))
    # and we should have one job in Done state in our database
    # we do the query with a fresh session though
    find = jip.db.get
    assert find(1).state == jip.db.STATE_DONE
    assert find(2).state == jip.db.STATE_DONE
    assert find(3).state == jip.db.STATE_DONE

    # check the content of the output files
    assert open(target_file + ".1").read() == "hello world\n"
    assert open(target_file + ".2").read().strip() == "2"
    assert open(target_file + ".3").read() == "Other\n"
示例#8
0
def test_multiplex_with_stream():
    p = jip.Pipeline()
    first = p.bash("cat ${input}", input=['A', 'B'])
    second = p.bash("wc -l")
    first | second
    p.expand(validate=False)
    assert len(p) == 4
    jobs = jip.create_jobs(p, validate=False)
    assert len(jobs) == 4
    execs = jip.create_executions(jobs)
    assert len(execs) == 2
示例#9
0
def test_multiplex_with_stream():
    p = jip.Pipeline()
    first = p.bash("cat ${input}", input=['A', 'B'])
    second = p.bash("wc -l")
    first | second
    p.expand(validate=False)
    assert len(p) == 4
    jobs = jip.create_jobs(p, validate=False)
    assert len(jobs) == 4
    execs = jip.create_executions(jobs)
    assert len(execs) == 2
def test_job_cancelation(tmpdir):
    tmpdir = str(tmpdir)
    db_file = os.path.join(tmpdir, "test.db")

    # create a JIP database and a session
    jip.db.init(db_file)
    session = jip.db.create_session()

    # create the cluster instance
    c = cl.LocalCluster()

    # create the pipeline
    p = jip.Pipeline()
    first = p.job(dir=tmpdir).bash('sleep 10')
    p.job(dir=tmpdir).bash('sleep 5').depends_on(first)
    p.context(locals())

    # create the jobs
    jobs = jip.create_jobs(p)

    # iterate the executions and pass the session so all jobs are stored
    for e in jip.create_executions(jobs, save=True):
        jip.submit_job(e.job, save=True, cluster=c)
    # sleep for a second to give the job time to start
    time.sleep(0.1)

    # cancel the job
    print jobs, jobs[0].id
    job = jip.db.get(1)
    jip.jobs.cancel(job, cluster=c, save=True)

    c.wait()

    # and we should have one job in Failed state in our database
    # we do the query with a fresh session though
    job_1 = jip.db.get(1)
    job_2 = jip.db.get(2)
    # print the log files
    print ">>>JOB 1 STD ERR LOG"
    print open(c.resolve_log(job, job_1.stderr)).read()
    print ">>>JOB 1 STD OUT LOG"
    print open(c.resolve_log(job, job_1.stdout)).read()

    assert job_1.state == jip.db.STATE_CANCELED
    assert job_2.state == jip.db.STATE_CANCELED
示例#11
0
def test_job_cancelation(tmpdir):
    tmpdir = str(tmpdir)
    db_file = os.path.join(tmpdir, "test.db")

    # create a JIP database and a session
    jip.db.init(db_file)
    session = jip.db.create_session()

    # create the cluster instance
    c = cl.LocalCluster()

    # create the pipeline
    p = jip.Pipeline()
    first = p.job(dir=tmpdir).bash('sleep 10')
    p.job(dir=tmpdir).bash('sleep 5').depends_on(first)
    p.context(locals())

    # create the jobs
    jobs = jip.create_jobs(p)

    # iterate the executions and pass the session so all jobs are stored
    for e in jip.create_executions(jobs, save=True):
        jip.submit_job(e.job, save=True, cluster=c)
    # sleep for a second to give the job time to start
    time.sleep(0.1)

    # cancel the job
    print jobs, jobs[0].id
    job = jip.db.get(1)
    jip.jobs.cancel(job, cluster=c, save=True)

    c.wait()

    # and we should have one job in Failed state in our database
    # we do the query with a fresh session though
    job_1 = jip.db.get(1)
    job_2 = jip.db.get(2)
    # print the log files
    print ">>>JOB 1 STD ERR LOG"
    print open(c.resolve_log(job, job_1.stderr)).read()
    print ">>>JOB 1 STD OUT LOG"
    print open(c.resolve_log(job, job_1.stdout)).read()

    assert job_1.state == jip.db.STATE_CANCELED
    assert job_2.state == jip.db.STATE_CANCELED
def test_single_job_master_termination(tmpdir):
    tmpdir = str(tmpdir)
    db_file = os.path.join(tmpdir, "test.db")

    # create a JIP database and a session
    jip.db.init(db_file)
    session = jip.db.create_session()

    # create the cluster instance
    c = cl.LocalCluster()

    # create the pipeline
    p = jip.Pipeline()
    p.job(dir=tmpdir).bash('sleep 30')
    p.context(locals())

    # create the jobs
    jobs = jip.create_jobs(p)

    # iterate the executions and pass the session so all jobs are stored
    for e in jip.create_executions(jobs, save=True):
        jip.submit_job(e.job, save=True, cluster=c)
    # sleep for a second to give the job time to start
    time.sleep(1)

    c.shutdown()

    # and we should have one job in Failed state in our database
    # we do the query with a fresh session though
    job = jip.db.get(1)
    # print the log files
    print ">>>STD ERR LOG"
    print open(c.resolve_log(job, job.stderr)).read()
    print ">>>STD OUT LOG"
    print open(c.resolve_log(job, job.stdout)).read()
    assert job is not None
    assert job.state == jip.db.STATE_FAILED
示例#13
0
def test_single_job_master_termination(tmpdir):
    tmpdir = str(tmpdir)
    db_file = os.path.join(tmpdir, "test.db")

    # create a JIP database and a session
    jip.db.init(db_file)
    session = jip.db.create_session()

    # create the cluster instance
    c = cl.LocalCluster()

    # create the pipeline
    p = jip.Pipeline()
    p.job(dir=tmpdir).bash('sleep 30')
    p.context(locals())

    # create the jobs
    jobs = jip.create_jobs(p)

    # iterate the executions and pass the session so all jobs are stored
    for e in jip.create_executions(jobs, save=True):
        jip.submit_job(e.job, save=True, cluster=c)
    # sleep for a second to give the job time to start
    time.sleep(1)

    c.shutdown()

    # and we should have one job in Failed state in our database
    # we do the query with a fresh session though
    job = jip.db.get(1)
    # print the log files
    print ">>>STD ERR LOG"
    print open(c.resolve_log(job, job.stderr)).read()
    print ">>>STD OUT LOG"
    print open(c.resolve_log(job, job.stdout)).read()
    assert job is not None
    assert job.state == jip.db.STATE_FAILED
示例#14
0
    c = cl.LocalCluster()

    # create the pipeline

    for i in range(num_runs):
        print "### CREATE RUN", i
        target_file = "result.%d" % i
        p = jip.Pipeline()
        a = p.bash('echo "hello world" > ${outfile}; sleep 1',
                   outfile="${target_file}.1.%d" % i)
        b = p.bash('wc -w ${input}; sleep 1',
                   input=a, output="${target_file}.2.%d" % i)
        l = p.bash('echo "Other" > ${outfile}; sleep 1',
                   outfile="${target_file}.3.%d" % i)
        p.context(locals())

        # create the jobs
        jobs = jip.create_jobs(p)

        # iterate the executions and pass the session so all jobs are stored
        for e in jip.create_executions(jobs, save=True):
            print "### STORED", i
            if not e.completed:
                jip.submit_job(e.job, save=True, cluster=c)
        #print "### QUEUED", len(c.list())
    c.wait()

    session = jip.db.create_session()
    for j in session.query(Job):
        print ">>>", j.id, j.state
示例#15
0
    c = cl.LocalCluster()

    # create the pipeline

    for i in range(num_runs):
        print("### CREATE RUN", i)
        target_file = "result.%d" % i
        p = jip.Pipeline()
        a = p.bash('echo "hello world" > ${outfile}; sleep 1',
                   outfile="${target_file}.1.%d" % i)
        b = p.bash('wc -w ${input}; sleep 1',
                   input=a, output="${target_file}.2.%d" % i)
        l = p.bash('echo "Other" > ${outfile}; sleep 1',
                   outfile="${target_file}.3.%d" % i)
        p.context(locals())

        # create the jobs
        jobs = jip.create_jobs(p)

        # iterate the executions and pass the session so all jobs are stored
        for e in jip.create_executions(jobs, save=True):
            print("### STORED", i)
            if not e.completed:
                jip.submit_job(e.job, save=True, cluster=c)
        #print("### QUEUED", len(c.list()))
    c.wait()

    session = jip.db.create_session()
    for j in session.query(Job):
        print(">>>", j.id, j.state)