def test_auto_expose_all(wf): t = CommandLineTool(id='t') input = t.add_input(cwl.String(), 'input') output = t.add_output(cwl.String(), 'output') wf.add_step(t) assert wf.inputs == [ WorkflowInput(id='input', label='input', type=Workflow.set_required(Primitive.STRING, False)) ] assert wf.outputs == [ WorkflowOutput(id='output', label='output', output_source='{}/{}'.format(t.id, output.id), type=Workflow.set_required(Primitive.STRING, False)) ] assert wf.steps[0] == Step(id=t.id, in_=[StepInput(input.id, source=input.id)], out=[StepOutput(output.id)], run=t)
def test_add_connection_wf_input(wf): wf.add_input(cwl.String(), 'input_str') t1 = CommandLineTool(id='t1') t1.add_input(cwl.String(), 'input_str') wf.add_step(t1, expose=[]) wf.add_connection('input_str', 't1.input_str') assert wf.steps[0].in_ == [StepInput('input_str', source='input_str')]
def test_add_connection_wf_output(wf): wf.add_output(cwl.String(), 'output_str') t1 = CommandLineTool(id='t1') t1.add_output(cwl.String(), 'output_str') wf.add_step(t1, expose=[]) wf.add_connection('t1.output_str', 'output_str') assert wf.outputs[0] == WorkflowOutput(id='output_str', output_source='t1/output_str', type=set_required( Primitive.STRING, False))
def inputs(): return dict( input_str=cwl.String(), input_int=cwl.Int(), input_bool=cwl.Bool(), input_float=cwl.Float(), input_enum=cwl.Enum(['a', 'b', 'c']), input_record=cwl.Record(dict(k1=cwl.String())), input_array=cwl.Array(cwl.Int()), input_no_type=cwl.Any(), input_union=cwl.Union([cwl.Int(), cwl.String()]), input_nonreq_primitive=cwl.String(), input_nonreq_none=cwl.String(), input_nonreq_object=cwl.Enum(['a', 'b', 'c']) )
def test_add_connection_inner_nodes(wf): t1 = CommandLineTool(id='t1') t2 = CommandLineTool(id='t2') t1.add_input(cwl.String(), 'input') t1.add_output(cwl.String(), 'output') t2.add_input(cwl.String(), 'input') t2.add_output(cwl.String(), 'output') wf.add_step(t1, expose=[]) wf.add_step(t2, expose=[]) wf.add_connection('t1.output', 't2.input') assert wf.steps[0].out == [StepOutput('output')] assert wf.steps[1].in_ == [StepInput('input', source='t1/output')]
def test_expose_except(wf): t = CommandLineTool(id='test') t.add_input(cwl.Int(default=10, required=True), id='x') t.add_input(cwl.String(), id='y') t.add_output(cwl.File(glob='something', required=True), id='out') wf.add_step(t, expose_except=['y', 'out']) assert wf.inputs == [WorkflowInput(id='x', label='x', type=Primitive.INT)] assert wf.outputs == []
from sbg import cwl with cwl.tool('tool1.cwl', 'w') as t: t.id = 'tool1' t.base_command = ['grep'] t.stdout = '_output_' t.add_input(cwl.String(required=True), 'pattern', label='pattern', input_binding=cwl.InputBinding(shell_quote=False, position=0)) t.add_input(cwl.File(required=True), 'inFile', label='inFile', input_binding=cwl.InputBinding(shell_quote=False, position=1)) t.add_output(cwl.File(required=True), 'out', label='Out', output_binding=cwl.OutputBinding(glob='_output_')) t.add_requirement(cwl.Docker(docker_pull='ubuntu:16.04')) # required if we want to disable shell_quote t.add_requirement(cwl.ShellCommand())
from sbg import cwl # First node @cwl.to_tool(inputs=dict(x=cwl.String()), outputs=dict(out=cwl.Float(required=True)), docker='images.sbgenomics.com/filip_tubic/ubuntu1604py') def to_float(x): return dict(out=float(x)) # Second node @cwl.to_tool(inputs=dict(x=cwl.Float(), n=cwl.Int()), outputs=dict(out=cwl.Float()), docker='images.sbgenomics.com/filip_tubic/ubuntu1604py') def times_n(x, n=10): return dict(out=x * n) with cwl.workflow('wf.cwl', 'w') as wf: # create tools t1 = to_float() t2 = times_n() # steps wf.add_step(t1, expose=['x']) wf.add_step(t2, expose=['n', 'out']) # add connections wf.add_connection('{}.out'.format(t1.id), '{}.x'.format(t2.id))
from sbg import cwl import textwrap cwl.from_bash( label='Example tool', inputs=dict( HELLO="HELLO WORLD", STR=cwl.String(), INT=cwl.Int(), FLOAT=cwl.Float(), BOOL=cwl.Bool(), ANY=cwl.Any(), FILE=cwl.File(), DIR=cwl.Dir(), ENUM=cwl.Enum(['opt1', 'opt2']), INT_OR_STR=cwl.Union([cwl.Int(), cwl.String()]), # with default value STR_DEF=cwl.String(default="hello"), INT_DEF=cwl.Int(default=123), FLOAT_DEF=cwl.Float(default=24.42), BOOL_DEF=cwl.Bool(default=True), ANY_DEF=cwl.Any(default="whatever"), ENUM_DEF=cwl.Enum(['opt1', 'opt2'], default='opt2'), INT_OR_STR_DEF=cwl.Union([cwl.Int(), cwl.String()], default=22)), outputs=dict(out=cwl.File(glob='stdout')), script=textwrap.dedent(r""" echo $HELLO echo $STR echo $INT echo $FLOAT echo $BOOL
from sbg import cwl with cwl.workflow('scatter_single.cwl', 'w') as wf: t1 = cwl.ExpressionTool('$({"out": inputs.word })', id='expr_tool1') t1.add_input(cwl.String(required=True), id='word', label='Word') t1.add_output(cwl.String(required=True), id='out', label='Word out') t2 = cwl.ExpressionTool( '$({"out": inputs.word.map(function(x){ return x.toLowerCase()}) })', id='expr_tool2') t2.add_input(cwl.Array(cwl.String(), required=True), id='word', label='Word') t2.add_output(cwl.Array(cwl.String(), required=True), id='out', label='Word out') wf.add_step(t1, expose=['word'], scatter=['word']) wf.add_step(t2, expose=['out']) wf.add_connection('expr_tool1.out', 'expr_tool2.word') wf.add_requirement(cwl.InlineJavascript()) wf.add_requirement(cwl.ScatterFeature())
from sbg import cwl with cwl.workflow('expr_example.cwl', 'w') as wf: t = cwl.ExpressionTool('$({"out": inputs.word })', id='expr_tool') t.add_input(cwl.String(required=True), id='word', label='Word') t.add_output(cwl.String(required=True), id='out', label='Word out') wf.add_step(t) wf.add_requirement(cwl.InlineJavascript())
ShellCommand(), Resource(), Docker(docker_pull='something'), InlineJavascript(), EnvVar(EnvironmentDef('foo', 'bar')), InitialWorkDir([Dirent('hello world', 'foo.txt')]), EnvVar(EnvironmentDef('foo', 'bar')) ]) def test_find_requirement(req, cls): obj = cls() obj.add_requirement(req) assert obj.find_requirement(req.class_) == req @pytest.mark.parametrize('cls', [CommandLineTool, Workflow]) @pytest.mark.parametrize('type', [ cwl.String(), cwl.Int(), cwl.Float(), cwl.Bool(), cwl.Record(), cwl.Enum(), cwl.Array(cwl.Int()) ]) def test_get_port(type, cls): obj = cls() i = obj.add_input(type, id='in') o = obj.add_output(type, id='out') assert obj.get_port('in') == i assert obj.get_port('out') == o @pytest.mark.parametrize('cls', [CommandLineTool, Workflow]) @pytest.mark.parametrize('type', [ cwl.String(), cwl.Int(), cwl.Float(), cwl.Bool(), cwl.Record(), cwl.Enum(), cwl.Array(cwl.Int())
tool.unarchive_bundle(bundle, encoded=encoded, postprocess=postprocess) assert tool.arguments[0] == arg def make_f(t, r=inspect._empty): """Argument `t` is type hint for argument `x` of function `f`.""" def f(x): pass f.to_tool_args = {'inputs': dict(x=t), 'outputs': r} return f @pytest.mark.parametrize('hint', [ cwl.Int(), cwl.String(), cwl.Float(), cwl.Bool(), cwl.Record(), cwl.Enum() ]) def test_inputs_from_f(tool, hint): f = make_f(hint) inputs = tool._inputs_from_f(f) i = inputs[0] assert i['id'] == 'x' assert i['type'] == hint @pytest.mark.parametrize('hint', [ cwl.Int(),
def outputs(): return dict( out_str=cwl.String(), out_glob_star=cwl.Array(cwl.File(), glob='*.txt'), out_glob=cwl.File(glob="some_name") )
def strelka( normal_bam: cwl.File(secondary_files='.bai', doc='Normal sample BAM or CRAM file.'), tumor_bam: cwl.File(secondary_files='.bai', doc='Tumor sample BAM or CRAM file.', required=True), reference_fasta: cwl.File( secondary_files='.fai', doc='samtools-indexed reference fasta file [required]'), indel_candidates: cwl.File( doc='Specify a VCF of candidate indel alleles. These alleles are always ' 'evaluated but only reported in the output when they are inferred to ' 'exist in the sample. The VCF must be tabix indexed. All indel alleles' ' must be left-shifted/normalized, any unnormalized alleles will be ' 'ignored. This option may be specified more than once, multiple input ' 'VCFs will be merged.', default='None') = None, forced_gt: cwl.File( doc="Specify a VCF of candidate alleles. " "These alleles are always evaluated and " "reported even if they are unlikely to exist in the " "sample. The VCF must be tabix indexed. All indel " "alleles must be left-shifted/normalized, any unnormalized " "allele will trigger a runtime error. This option may " "be specified more than once, multiple input VCFs will " "be merged. Note that for any SNVs provided in the VCF, " "the SNV site will be reported (and for gVCF, excluded " "from block compression), but the specific SNV " "alleles are ignored.", default='None') = None, exome: cwl.Bool( doc="Set options for exome or other targeted input: note in " "particular that this flag turns off high-depth filters") = False, call_regions: cwl.File( doc="Optionally provide a bgzip-compressed/tabix-indexed BED " "file containing the set of regions to call. No VCF " "output will be provided outside of these regions. " "The full genome will still be used to estimate statistics " "from the input (such as expected depth per chromosome). " "Only one BED file may be specified.", default='Call the entire genome') = None, scan_size_mb: cwl.Int( doc="Maximum sequence region size (in megabases) scanned by " "each task during genome variant calling. (default: 12)", default=12) = 12, region: cwl.String( doc="Limit the analysis to one or more genome region(s) for " "debugging purposes. If this argument is provided multiple" " times the union of all specified regions will be analyzed. " "All regions must be non-overlapping to get a meaningful " "result. Examples: '--region chr20' (whole chromosome), " "'--region chr2:100-2000 --region chr3:2500-3000' " "(two regions)'. If this option is specified (one or more times) " "together with the --callRegions BED file, then all " "region arguments will be intersected with the " "callRegions BED track.", default='None') = None): """ :param normal_bam: :param tumor_bam: :param reference_fasta: :param indel_candidates: :param forced_gt: :param exome: :param call_regions: :param scan_size_mb: :param region: :return: """ strelka_config_path = '/opt/bin/configureStrelkaSomaticWorkflow.py' strelka_cmd = [strelka_config_path] strelka_cmd += ['--normalBam', normal_bam['path']] strelka_cmd += ['--tumorBam', tumor_bam['path']] strelka_cmd += ['--referenceFasta', reference_fasta['path']] strelka_cmd += ['--runDir', '.'] if indel_candidates: strelka_cmd += ['--indelCandidates', indel_candidates['path']] if forced_gt: strelka_cmd += ['--forcedGT', forced_gt['path']] if exome: strelka_cmd += ['--exome'] if call_regions: strelka_cmd += ['--callRegions', call_regions['path']] strelka_cmd += ['--scanSizeMb', str(scan_size_mb)] if region: strelka_cmd += ['--region', region] check_output(strelka_cmd) check_call(['python', 'runWorkflow.py', '-m', 'local', '-j', '8'])