Exemplo n.º 1
0
def test_no_duplicated_jobs_after_file_query_direct(db, tmpdir):
    if not db.startswith('mysql'):
        db = os.path.join(str(tmpdir), db)
    jip.db.init(db)
    p = jip.Pipeline()
    a = p.bash('ls ${input}', input='A.txt', output='out.dat')
    p.context(locals())
    jobs = jip.create_jobs(p, validate=False)
    jip.db.save(jobs)
    assert len(list(jip.db.get_all())) == 1
    assert jobs[0].id == 1

    # second pipeline
    p = jip.Pipeline()
    a = p.bash('ls ${input}', input='A.txt', output='out.dat')
    b = p.bash('ls ${input}', input=a)
    p.context(locals())
    jobs = jip.create_jobs(p, validate=False)
    # search for the out.dat job
    existing = jip.db.query_by_files(
        outputs=jobs[1].tool.input.value
    )
    assert len(list(existing)) == 1
    old = list(existing)[0]
    # now replace the dependency
    jobs[1].dependencies = [old]
    # save only job 1
    jip.db.save(jobs[1])
    # we should have 2 jobs in the database
    assert len(list(jip.db.get_all())) == 2
    # and the one we skipped has no ID
    assert jobs[0].id is None
Exemplo n.º 2
0
    def testPipelineStructure(self):
        # load the pipeline
        tool = jip.find("examples/bwa/pileup.jip")
        assert tool is not None

        # create a pipeline
        p = jip.Pipeline()
        # create a new pipeline node and configure id
        p.run(tool, input="setup.py", reference="Makefile", output="out.txt")

        # expand the pipeline such that the internal pipeline is resolved
        p.expand(validate=False)

        # after expansion with this setuo, the pipeline should have 7 nodes
        assert len(p) == 7
        # the graph should consist of 6 edges
        assert len(p.edges) == 6
        # get out the nodes. we have to use indexes here
        # because the names might have changed after expansion
        ref = p.get("ref")
        align = p.get("align")
        sam = p.get("sam")
        bam = p.get("bam")
        dups = p.get("dups")
        index = p.get("index")
        pileup = p.get("pileup")
        # check the connections
        assert not ref.has_incoming()
        assert align.has_incoming(ref)
        assert sam.has_incoming(align)
        assert bam.has_incoming(sam)
        assert dups.has_incoming(bam)
        assert index.has_incoming(dups)
        assert pileup.has_incoming(index)
        assert not pileup.has_outgoing()
Exemplo n.º 3
0
def test_embedded_pipelines():
    @jip.tool()
    class produce():
        """Produce a set of files

        Usage:
            produce --prefix <prefix> --number <number>
        """
        def init(self):
            self.add_output('output', '${prefix}.*', nargs="*")

        def get_command(self):
            return """
            for x in $(seq ${number}); do
                echo Hello $x > ${prefix}.$x;
            done
            """

    @jip.tool()
    def consume():
        """Count something

        Usage:
            consume <input>
        """
        return """cat ${input}"""

    p = jip.Pipeline()
    # produce n files
    producer = p.run('produce', prefix='test', number=5)
    # run after success dynamically
    producer.on_success('consume', input=producer)
    jobs = jip.create_jobs(p)
    assert len(jobs) == 1
    assert len(jobs[0].on_success) == 1
Exemplo n.º 4
0
def test_gem_pipeline_with_output_dir():
    p = jip.Pipeline()
    p.run('grape_gem_rnapipeline', fastq='reads_1.fastq.gz', genome='index.fa',
          annotation='gencode.gtf', output_dir="mydir", max_matches='10', max_mismatches='4')
    jobs = jip.create_jobs(p, validate=False)
    ldir = os.getcwd()
    j = os.path.join
    assert len(jobs) == 4
    assert jobs[2].configuration['index'].get() == j(ldir, 'index.gem')
    assert jobs[2].configuration['fastq'].get() == j(ldir, 'reads_1.fastq.gz')
    assert jobs[2].configuration['transcript_index'].get() == j(ldir, 'gencode.gtf.gem')
    assert jobs[2].configuration['quality'].get() == '33'
    assert jobs[2].configuration['output_dir'].get() == "mydir"
    assert jobs[2].configuration['name'].get() == 'reads'
    assert jobs[2].configuration['bam'].get() == j(ldir, 'mydir/reads.bam')
    assert jobs[2].configuration['bai'].get() == j(ldir, 'mydir/reads.bam.bai')
    assert jobs[2].configuration['map'].get() == j(ldir, 'mydir/reads.map.gz')

    assert jobs[3].configuration['input'].get() == j(ldir, 'mydir/reads.bam')
    assert jobs[3].configuration['name'].get() == 'reads'
    assert jobs[3].configuration['annotation'].get() == j(ldir, 'gencode.gtf')
    assert jobs[3].configuration['output_dir'].get() == "mydir"
    assert jobs[3].configuration['output'].get() == j(ldir, 'mydir/reads.gtf')

    assert len(jobs[0].children) == 2
    assert len(jobs[1].dependencies) == 1
    assert len(jobs[2].dependencies) == 2
    assert len(jobs[3].dependencies) == 1
    assert jobs[0].children[0] == jobs[1]
Exemplo n.º 5
0
def test_nested_pipes_stream_setup_stream_multiplex():
    tool = jip.find('joined_pipeline')
    assert tool is not None
    p = jip.Pipeline()
    p.run(tool,
          input=["Makefile", "README.rst", "setup.py"],
          output="${input}.dat")
    p.expand(validate=False)

    # 2 nodes 1 edge
    assert len(p) == 6
    assert len(p.edges) == 3
    t1_0 = p.get("TestJob1.0")
    t2_0 = p.get("TestJob2.0")
    assert t1_0.has_outgoing(t2_0, ('output', 'input'), True)
    t1_1 = p.get("TestJob1.1")
    t2_1 = p.get("TestJob2.1")
    assert t1_1.has_outgoing(t2_1, ('output', 'input'), True)
    t1_2 = p.get("TestJob1.2")
    t2_2 = p.get("TestJob2.2")
    assert t1_2.has_outgoing(t2_2, ('output', 'input'), True)

    # test option values
    cwd = os.getcwd()
    join = os.path.join
    assert t1_0.input == join(cwd, 'Makefile')
    assert t1_1.input == join(cwd, 'README.rst')
    assert t1_2.input == join(cwd, 'setup.py')
    assert t2_0.output == join(cwd, 'Makefile.dat')
    assert t2_1.output == join(cwd, 'README.rst.dat')
    assert t2_2.output == join(cwd, 'setup.py.dat')
Exemplo n.º 6
0
def test_single_job_fail(tmpdir):
    tmpdir = str(tmpdir)
    target_file = os.path.join(tmpdir, 'result.txt')
    db_file = os.path.join(tmpdir, "test.db")
    assert not os.path.exists(target_file)

    # create a JIP database and a session
    jip.db.init(db_file)
    # create the cluster instance
    c = cl.LocalCluster()

    # create the pipeline
    p = jip.Pipeline()
    p.job(dir=tmpdir).bash('touch ${input}; exit 1;', outfile=target_file)
    p.context(locals())

    # create the jobs
    jobs = jip.create_jobs(p)

    # iterate the executions and pass the session so all jobs are stored
    for e in jip.create_executions(jobs, save=True):
        jip.submit_job(e.job, cluster=c)

    c.wait()
    # now the file should be there
    assert not os.path.exists(target_file)

    # we should also have the log files
    assert os.path.exists(os.path.join(tmpdir, "jip-1.out"))
    assert os.path.exists(os.path.join(tmpdir, "jip-1.err"))
    # and we should have one job in Done state in our database
    # we do the query with a fresh session though
    job = jip.db.get(1)
    assert job is not None
    assert job.state == jip.db.STATE_FAILED
Exemplo n.º 7
0
def test_setting_working_directory_to_sub():
    cwd = os.getcwd()
    p = jip.Pipeline()
    # produce n files
    p.job(dir="sub").run('produce', prefix='test', number=5)
    jobs = jip.create_jobs(p)
    assert jobs[0].working_directory == cwd + "/sub"
Exemplo n.º 8
0
def test_job_hierarchy_execution_with_pipes_no_dispatching(tmpdir):
    tmpdir = str(tmpdir)
    target_file = os.path.join(tmpdir, 'result')

    # create the pipeline
    p = jip.Pipeline()
    a = p.job(dir=tmpdir).bash('echo "hello world"')
    b = p.job(dir=tmpdir).bash('wc -w', output=target_file)
    a | b
    p.context(locals())

    # create the jobs
    jobs = jip.create_jobs(p)
    assert len(jobs[0].pipe_to) == 1
    assert len(jobs) == 2

    # iterate the executions and pass the session so all jobs are stored
    execs = 0
    for e in jip.create_executions(jobs):
        jip.run_job(e.job)
        execs += 1
    assert execs == 1
    # now the file should be there
    assert os.path.exists(target_file)
    for j in jobs:
        assert j.state == jip.db.STATE_DONE

    # check the content of the output files
    assert open(target_file).read().strip() == "2"
Exemplo n.º 9
0
 def pipeline(self):
     p = jip.Pipeline()
     gem_setup = p.run('grape_gem_setup', input=self.genome)
     gem = p.run('grape_gem_rnatool', index=gem_setup.index,
                 fastq=self.fastq)
     p.context(locals())
     return p
Exemplo n.º 10
0
def test_job_hierarchy_job_group(tmpdir):
    tmpdir = str(tmpdir)
    target_file = os.path.join(tmpdir, 'result')

    @jip.tool()
    def merge():
        """\
        Merge

        usage:
            merge --input <input>... [--output <output>]

        Options:
            --input <input>...    The input
                                  [default: stdin]
            --output <output>     The input
                                  [default: stdout]
        """
        return "cat ${input|else('-')} ${output|arg('> ')}"

    # create the pipeline
    p = jip.Pipeline()
    a_1 = p.job(dir=tmpdir).bash('echo "hello spain"',
                                 output=target_file + ".1")
    a_2 = p.job(dir=tmpdir).bash('echo "hello world"',
                                 output=target_file + ".2")
    a_3 = p.job(dir=tmpdir).bash('echo "hello universe"',
                                 output=target_file + ".3")
    b = p.job(dir=tmpdir).run('merge', output=target_file)
    b.input = [a_1, a_2, a_3]
    (a_1 - a_2 - a_3 - b)
    p.context(locals())
    # create the jobs
    jobs = jip.create_jobs(p)
    assert len(jobs) == 4
    assert len(jobs[0].dependencies) == 0
    assert len(jobs[0].children) == 2
    assert len(jobs[1].dependencies) == 1
    assert len(jobs[1].children) == 2
    assert len(jobs[2].dependencies) == 1
    assert len(jobs[2].children) == 1
    assert len(jobs[3].dependencies) == 3
    print jobs[3].command

    # iterate the executions and pass the session so all jobs are stored
    execs = 0
    for e in jip.create_executions(jobs):
        jip.run_job(e.job)
        execs += 1
    assert execs == 1
    # now the file should be there
    for j in jobs:
        assert j.state == jip.db.STATE_DONE

    # check the content of the output files
    assert open(target_file + '.1').read().strip() == "hello spain"
    assert open(target_file + '.2').read().strip() == "hello world"
    assert open(target_file + '.3').read().strip() == "hello universe"
    assert open(target_file).read().strip() == "hello spain\n"\
                                               "hello world\nhello universe"
Exemplo n.º 11
0
def test_dynamic_options():
    script = '''#!/usr/bin/env jip
# Touch a number of files with a common prefix
#
# usage:
#   touch --prefix <prefix> --count <count>

#%begin init
add_output('output')
#%end

#%begin setup
options['output'].set(["%s_%s" % (prefix, i) for i in range(1, count.get(int) + 1)])
#%end

#%begin command
for x in ${output}; do
    touch $x
done
    '''
    tool = jip.tools.ScriptTool.from_string(script)
    tool.init()
    assert tool is not None
    p = jip.Pipeline()
    node = p.job('test').run(tool, prefix='test', count=5)
    assert node is not None
    p.expand()
    assert len(p) == 1
    node = p.get('test')
    assert node.prefix == 'test'
    cwd = os.getcwd()
    assert node.output == [
        os.path.join(cwd, x)
        for x in ['test_1', 'test_2', 'test_3', 'test_4', 'test_5']
    ]
Exemplo n.º 12
0
def jip_prepare(args, submit=False, project=None, datasets=[], validate=True):
    # get the project and the selected datasets
    if not project and not datasets:
        project, datasets = get_project_and_datasets(args)
    # setup jip db
    jip.db.init(project.jip_db)
    p = jip.Pipeline()
    jargs = {}
    if datasets == ['setup']:
        jargs['input'] = project.config.get('genome')
        jargs['annotation'] = project.config.get('annotation')
        p.run('grape_gem_setup', **jargs)
        jobs = jip.jobs.create_jobs(p)
    else:
        input = []
        for d in datasets:
            fqs = d.fastq.keys()
            fqs.sort()
            input.append(fqs[0])
            if len(fqs) == 1:
                jargs['single-end'] == True
        jargs['fastq'] = input
        jargs['annotation'] = project.config.get('annotation')
        jargs['genome'] = project.config.get('genome')
        jargs['max_mismatches'] = args.max_mismatches
        jargs['max_matches'] = args.max_matches
        jargs['threads'] = args.threads
        p.run('grape_gem_rnapipeline', **jargs)
        jobs = jip.jobs.create_jobs(p, validate=validate)
    if submit:
        jobs = check_jobs_dependencies(jobs)
    return jobs
Exemplo n.º 13
0
def test_tool_name_with_local_context():
    p = jip.Pipeline()
    a = p.run('foo', input='Makefile')
    p.context(locals())
    jobs = jip.create_jobs(p, validate=False)
    assert len(jobs) == 1
    assert jobs[0].name == 'Makefile'
Exemplo n.º 14
0
def test_tool_name_in_pipeline_context():
    @jip.tool()
    class MyTool():
        def validate(self):
            self.job.name = "testtool"

        def get_command(self):
            return "echo"

    @jip.pipeline()
    class MyPipeline():
        def validate(self):
            self.name("thepipeline")

        def pipeline(self):
            p = jip.Pipeline()
            p.run('MyTool')
            return p

    p = jip.Pipeline()
    p.run('MyPipeline')
    p.expand()

    jobs = jip.create_jobs(p)
    assert len(jobs) == 1
    assert jobs[0].name == "testtool"
    assert jobs[0].pipeline == "thepipeline"
Exemplo n.º 15
0
 def pipeline(self):
     p = jip.Pipeline()
     index = p.run('grape_gem_index',
                   input=self.input,
                   output=self.index)
     p.context(locals())
     return p
Exemplo n.º 16
0
def test_pipeline_tool_defaults_global_job():
    @jip.tool()
    class MyTool():
        def setup(self):
            self.profile.threads = 2
            self.profile.queue = "Org"

        def get_command(self):
            return "echo"

    @jip.pipeline()
    class MyPipeline():
        def pipeline(self):
            p = jip.Pipeline()
            p.job(threads=3, queue="Intern").run('MyTool')
            return p

    p = jip.Pipeline()
    p.run('MyPipeline')
    p.expand()

    profile = jip.Profile(threads=5, queue="yeah")
    profile.specs['MyTool'] = jip.Profile()
    profile.apply_to_pipeline(p)

    jobs = jip.create_jobs(p)
    assert jobs[0].threads == 3
    assert jobs[0].queue == "Intern"
Exemplo n.º 17
0
def test_pipeline_tool_spec_regexp():
    @jip.tool()
    class MyTool():
        def get_command(self):
            return "echo"

    @jip.pipeline()
    class MyPipeline():
        def pipeline(self):
            p = jip.Pipeline()
            p.job(threads=3, queue="Intern").run('MyTool')
            return p

    p = jip.Pipeline()
    p.run('MyPipeline')
    p.expand()

    profile = jip.Profile(threads=5, queue="yeah", priority="high")
    profile.specs['My*'] = jip.Profile(threads=10, queue="rock")
    profile.apply_to_pipeline(p)

    jobs = jip.create_jobs(p)
    assert jobs[0].threads == 10
    assert jobs[0].queue == "rock"
    assert jobs[0].priority == "high"
Exemplo n.º 18
0
def test_pipeline_overwrites_pipeline_from_spec():
    @jip.tool()
    class MyTool():
        def setup(self):
            self.profile.threads = 2
            self.profile.queue = "Org"

        def get_command(self):
            return "echo"

    @jip.pipeline()
    class MyPipeline():
        def pipeline(self):
            p = jip.Pipeline()
            p.job(threads=3, queue="Yeah").run('MyTool')
            return p

    p = jip.Pipeline()
    p.run('MyPipeline')
    p.expand()

    profile = jip.Profile(threads=10, queue="Test")
    profile.specs['MyTool'] = jip.Profile(threads=5)
    profile.apply_to_pipeline(p)

    jobs = jip.create_jobs(p)
    assert jobs[0].threads == 5
    assert jobs[0].queue == "Yeah"
Exemplo n.º 19
0
def test_tool_name_in_pipelines_with_multiplexing_and_custom_template_name():
    @jip.tool()
    class MyTool():
        """mytool
        usage:
            mytool <data>
        """
        def validate(self):
            self.job.name = "${data}"

        def get_command(self):
            return "echo"

    @jip.pipeline()
    class MyPipeline():
        def validate(self):
            self.name("thepipeline")

        def pipeline(self):
            p = jip.Pipeline()
            p.run('MyTool', data=["A", "B"])
            return p

    p = jip.Pipeline()
    p.run('MyPipeline')
    p.expand()
    profile = jip.Profile(name="customname")
    jobs = jip.create_jobs(p, profile=profile)
    assert len(jobs) == 2
    assert jobs[0].name == "A"
    assert jobs[0].pipeline == "customname"
    assert jobs[1].name == "B"
    assert jobs[1].pipeline == "customname"
Exemplo n.º 20
0
def test_tool_name_in_pipeline_context_with_custom_profile_and_custom_name():
    @jip.tool()
    class MyTool():
        def validate(self):
            self.job.name = "testtool"

        def get_command(self):
            return "echo"

    @jip.pipeline()
    class MyPipeline():
        def validate(self):
            self.name("thepipeline")

        def pipeline(self):
            p = jip.Pipeline()
            p.job('Tool1').run('MyTool')
            return p

    p = jip.Pipeline()
    p.run('MyPipeline')
    p.expand()

    profile = jip.Profile(name="customname")
    jobs = jip.create_jobs(p, profile=profile)
    assert len(jobs) == 1
    assert jobs[0].name == "Tool1"
    assert jobs[0].pipeline == "customname"
Exemplo n.º 21
0
def test_job_hierarchy_execution_with_dispatching_fan_out(tmpdir):
    tmpdir = str(tmpdir)
    target_file = os.path.join(tmpdir, 'result')

    # create the pipeline
    p = jip.Pipeline()
    a = p.job(dir=tmpdir).bash('echo "hello world"', output=target_file + ".1")
    b = p.job(dir=tmpdir).bash('wc -w', output=target_file + ".2")
    c = p.job(dir=tmpdir).bash('wc -l', output=target_file + ".3")
    a | (b + c)
    p.context(locals())

    # create the jobs
    jobs = jip.create_jobs(p)

    # iterate the executions and pass the session so all jobs are stored
    execs = 0
    for e in jip.create_executions(jobs):
        jip.run_job(e.job)
        execs += 1
    assert execs == 1
    # now the file should be there
    for j in jobs:
        assert j.state == jip.db.STATE_DONE

    # check the content of the output files
    assert open(target_file + '.1').read().strip() == "hello world"
    assert open(target_file + '.3').read().strip() == "1"
    assert open(target_file + '.2').read().strip() == "2"
Exemplo n.º 22
0
def test_gem_name_option_delegation_with_output_dir():
    p = jip.Pipeline()
    p.run('grape_gem_rnapipeline',
          fastq='reads_1.fastq.gz',
          index='index.gem',
          annotation='gencode.gtf',
          output_dir="mydir")
    jobs = jip.create_jobs(p, validate=False)
    ldir = os.getcwd()
    j = os.path.join
    assert len(jobs) == 2
    assert jobs[0].configuration['index'].get() == j(ldir, 'index.gem')
    assert jobs[0].configuration['fastq'].get() == j(ldir, 'reads_1.fastq.gz')
    assert jobs[0].configuration['annotation'].get() == j(ldir, 'gencode.gtf')
    assert jobs[0].configuration['quality'].get() == '33'
    assert jobs[0].configuration['output_dir'].get() == "mydir"
    assert jobs[0].configuration['name'].get() == 'reads'
    assert jobs[0].configuration['bam'].get() == j(ldir, 'mydir/reads.bam')
    assert jobs[0].configuration['bai'].get() == j(ldir, 'mydir/reads.bam.bai')
    assert jobs[0].configuration['map'].get() == j(ldir, 'mydir/reads.map.gz')

    assert jobs[1].configuration['input'].get() == j(ldir, 'mydir/reads.bam')
    assert jobs[1].configuration['name'].get() == 'reads'
    assert jobs[1].configuration['annotation'].get() == j(ldir, 'gencode.gtf')
    assert jobs[1].configuration['output_dir'].get() == "mydir"
    assert jobs[1].configuration['gtf'].get() == j(ldir, 'mydir/reads.gtf')

    assert len(jobs[0].children) == 1
    assert len(jobs[1].dependencies) == 1
    assert jobs[0].children[0] == jobs[1]
Exemplo n.º 23
0
def test_setting_working_directory_cwd_with_profile():
    cwd = os.getcwd()
    p = jip.Pipeline()
    # produce n files
    p.run('produce', prefix='test', number=5)
    profile = jip.profiles.Profile()
    jobs = jip.create_jobs(p, profile=profile)
    assert jobs[0].working_directory == cwd
Exemplo n.º 24
0
def test_depends_on_with_multiple_nodes():
    p = jip.Pipeline()
    a = p.bash('hostname')
    b = p.bash('hostname')
    c = p.bash('hostname')
    a.depends_on(c, b)

    assert len(list(a.incoming())) == 2
Exemplo n.º 25
0
 def pipeline(self):
     p = jip.Pipeline()
     p.name("Test2")
     p.job("TestJob2").run('bash',
                           cmd='cat ${input|else("-")}',
                           input=self.options['input'],
                           output=self.options['output'])
     return p
Exemplo n.º 26
0
def test_hello_world_py_cls(tmpdir):
    tmpdir = str(tmpdir)
    jip.scanner.add_module('examples/hello_world/hello_world.py')
    jip.scanner.scan_modules()
    p = jip.Pipeline()
    p.job(dir=tmpdir).run('cls_hello_world_py')
    jobs = jip.create_jobs(p)
    assert len(jobs) == 1
Exemplo n.º 27
0
def test_pipeline_with_local_context_in_expand():
    p = jip.Pipeline()
    a = "Makefile"
    p.job().bash("wc -l ${a}")
    p.expand(locals())
    b = p.get('bash')
    assert b is not None
    assert b.cmd.get() == 'wc -l Makefile'
Exemplo n.º 28
0
def embedded():
    """Produce and consume"""
    p = jip.Pipeline()
    # produce n files
    producer = p.run('produce', prefix='test', number=5)
    # run after success dynamically
    producer.on_success('consume', input=producer)
    return p
Exemplo n.º 29
0
def test_embedded_options_are_absolute():
    jip.scanner.add_folder("test/data/makeabs")
    p = jip.Pipeline()
    p.run('makeabs', infile="Makefile", output="result")
    jobs = jip.create_jobs(p)
    assert len(jobs) == 1
    cwd = os.getcwd()
    assert jobs[0].command == "(cat %s/Makefile)> %s/result" % (cwd, cwd)
Exemplo n.º 30
0
    def subedge_pipe_2(tool):
        """Subedge

        usage:
            subedge --input <input> --output <output>
        """
        p = jip.Pipeline()
        p.job('p2').bash('touch', input=tool.input, output=tool.output)
        return p