def test_upstream(self): tups = (simple_job, self.DIR / 'root', 'ATG', '/tmp/digit.txt') res = force_run(*tups, verbose=0) # tups = (simple_job, self.DIR/'job2', 'ATG', res.output.out_txt) # self.DIR/'root.simple_job.out_txt') print('[param]', spiper.rcParams) tups = (simple_job, self.DIR / 'job2', 'ATG', self.DIR / 'root.simple_job.out_txt') job2 = force_run(*tups, verbose=0) res = spiper.graph.get_upstream_nodes([File('/tmp/digit.txt')], strict=0) print('''##### no test for get_upstream_nodes()''') # print(res) # res ==[] res = spiper.graph.get_upstream_files([File(job2.output.out_txt)], strict=0, flat=1)[1:] expect = [ InputFile( '~/.temp/singular-pipe_test_build/root.simple_job.out_txt'). expand(), InputFile('/tmp/digit.txt') ] expect = [x.expand() for x in expect] assert sorted(expect) == sorted(res), json.dumps((res, expect), indent=2)
def job_hisat2_index( self = Default, prefix = File, FASTA_FILE = InputFile, THREADS_ = int, _IMAGE = "docker://quay.io/biocontainers/hisat2:2.1.0--py36hc9558a2_4", _output = [ Prefix('index_prefix'), File('log'), File('cmd'), ], ): # _out = get_output_files(self, prefix, _output) # func_name = get_func_name() # lc = locals() # files = [ "{prefix}.{func_name}.{suffix}".format(suffix=suffix,**lc) for suffix in _output ] CMD = [ 'hisat2-build', File( FASTA_FILE), Prefix(self.output.index_prefix), '&>', File( self.output.log), ] res = SingularityShellCommand(CMD, _IMAGE, self.output.cmd) return self
def http_job1( self, prefix, _response1=HttpResponseContentHeader( 'http://worldtimeapi.org/api/timezone/Europe/London.txt'), _output=[File('cache'), File('cmd')], ): print(_response1.text[:20])
def http_job2( self, prefix, _response1=HttpResponse( 'GET', 'http://worldtimeapi.org/api/timezone/Europe/London.txt'), _output=[File('cache'), File('cmd')], ): with open(self.output.cache, 'w') as f: f.write(_response1.text) res = LoggedShellCommand( ['curl', '-LC-', self.output.cache + '.2', _response1.url], self.output.cmd, 1) res = LoggedShellCommand( ['curl', '-LC-', self.output.cache + '.2', _response1.url], None, 1)
def make_files_for(cmd): FS = [] modes = [] for F in cmd: if isinstance(F, (File, Prefix)): F = F.realpath() if isinstance(F, InputPrefix): #### if is prefix, mount the directory res = F.fileglob('*', 1) FS += res modes += ['ro'] * len(res) elif isinstance(F, Prefix): #### if is not inputPrefix, mount the directory F.dirname().makedirs_p().check_writable() FS.append(File(F.dirname())) mode = 'rw' modes += [mode] elif isinstance(F, InputFile): #### if is inputFile, dont touch # assert F.isfile(),(F,cmd) assert F.exists(), (F, cmd) FS.append(F) modes += ['ro'] elif isinstance(F, File): #### if not inputfile, touch to makesure # F.touch() if not F.isfile() else None F.touch() if not F.exists() else None FS.append(F) modes += ['rw'] # mode = 'rw' else: assert 0, (type(F), F) # FS.append(F) assert len(FS) == len(modes) return FS, modes
def job_hisat2_align( self = Default, prefix = File, INDEX_PREFIX = Prefix, FASTQ_FILE_1 = InputFile, FASTQ_FILE_2 = InputFile, THREADS_ = int, _IMAGE = "docker://quay.io/biocontainers/hisat2:2.1.0--py36hc9558a2_4", _IMAGE_SAMTOOLS = "docker://quay.io/biocontainers/samtools:1.10--h9402c20_2", _output = [ File('bam'), File('log'), File('cmd'), ] ): # _out = get_output_files(self,prefix,_output) results = [] CMD = [ 'hisat2','-x', Prefix(INDEX_PREFIX), '-1', str( FASTQ_FILE_1), '-2', str( FASTQ_FILE_2), # '-U', InputFile( FASTQ_FILE_1), # ['-2',InputFile( FASTQ_FILE_2) ] if FASTQ_FILE_2 else [], '-S', str( self.output.bam +'.sam' ), '--threads', str( THREADS_ ), '--no-mixed', '--rna-strandness','RF', '--dta', '--fr', '&>', str( self.output.log), ] CMD = list_flatten_strict(CMD) res = SingularityShellCommand(CMD, _IMAGE, self.output.cmd) # results.append(job_result( None, CMD, self.output)) _ = ''' samtools view /home/feng/temp/187R/187R-S1-2018_06_27_14:02:08/809_S1.sam -b --threads 4 -o 809_S1.bam ''' CMD = [ 'samtools','view', File( self.output.bam+'.sam'), '--threads',str(THREADS_), '-o', File( self.output.bam+'.unsorted'), ] CMD = list_flatten_strict(CMD) res = SingularityShellCommand(CMD, _IMAGE_SAMTOOLS, self.output.cmd) CMD = [ 'samtools','sort', File( self.output.bam + '.unsorted'), '--threads', str(THREADS_), '-o', File( self.output.bam), ] CMD = list_flatten_strict(CMD) res = SingularityShellCommand(CMD, _IMAGE_SAMTOOLS, self.output.cmd) return self
def simple_job(self=Default, prefix=File, s=str, digitFile=InputFile, _output=[File('out_txt')]): with open(self.output.out_txt, 'w') as f: print(s * 10) f.write(s * 10) print('do something else') return self
def simple_job(self=Default, prefix=File, s=str, digitFile=InputFile, _output=[File('out_txt')]): _out = get_output_files(self, prefix, _output) with open(_out.out_txt, 'w') as f: print(s * 10) f.write(s * 10) print('do something else')
def simple_job( self, prefix, # self = Default, # prefix=Prefix, s=str, digitFile=InputFile, _output=[File('out_txt')]): [x for x in range(10)] _out = get_output_files(self, prefix, _output) with open(_out.out_txt, 'w') as f: print(s * 10) f.write(s * 10)
def test_downstream(self): dir_layout = 'clean' # import spiper # dir_layout = spiper.DEFAULT_DIR_LAYOUT (self.DIR / 'root').dirname().rmtree_p() tups = ( simple_job, self.DIR / 'root', 'ATG', '/tmp/digit.txt', ) force_run(*tups, dir_layout=dir_layout, verbose=0) tups = ( simple_job, self.DIR / 'job2', 'ATG', self.DIR / 'root.simple_job.out_txt', ) force_run(*tups, dir_layout=dir_layout, verbose=0) import spiper.graph # s = res = spiper.graph.get_downstream_nodes([File('/tmp/digit.txt')], strict=0, flat=0, dir_layout=dir_layout) print('''##### no test for nodes in get_downstream_nodes()''') # print(res) res = spiper.graph.get_downstream_files([File('/tmp/digit.txt')], strict=0, flat=1, dir_layout=dir_layout, verbose=2)[1:] # res = spiper.runner.get_downstream_targets(File('/tmp/digit.txt'),strict=0,flat=0,target='all',dir_layout=dir_layout) expect = [ File('~/.temp/singular-pipe_test_build/root.simple_job.out_txt'), File( '~/.temp/singular-pipe_test_build/_spiper/root.simple_job.cache_pk' ), File('~/.temp/singular-pipe_test_build/job2.simple_job.out_txt'), File( '~/.temp/singular-pipe_test_build/_spiper/job2.simple_job.cache_pk' ), ] expect = [x.expand() for x in expect] assert sorted(expect) == sorted(res), json.dumps((res, expect), indent=2, default=repr)
def test_downstream(self): dir_layout = 'clean' tups = ( simple_job, self.DIR / 'root', 'ATG', '/tmp/digit.txt', ) force_run(*tups, config=dir_layout, verbose=0) tups = ( simple_job, self.DIR / 'job2', 'ATG', self.DIR / 'root.simple_job.out_txt', ) force_run(*tups, config=dir_layout, verbose=0) import spiper.runner # s = res = spiper.runner.get_downstream_nodes(File('/tmp/digit.txt'), strict=0, flat=0, config=dir_layout) print('''##### no test for nodes in get_downstream_nodes()''') # print(res) res = spiper.runner.get_downstream_files(File('/tmp/digit.txt'), strict=0, flat=1, config=dir_layout) expect = [ File('~/.temp/singular-pipe_test_build/root.simple_job.out_txt'), File( '~/.temp/singular-pipe_test_build/_spiper/root.simple_job.cache_pk' ), File('~/.temp/singular-pipe_test_build/job2.simple_job.out_txt'), File( '~/.temp/singular-pipe_test_build/_spiper/job2.simple_job.cache_pk' ), ] expect = [x.expand() for x in expect] assert sorted(expect) == sorted(res), json.dumps((res, expect), indent=2)
def LoggedSingularityCommand(prefix, cmd, image, log_file, check=1, mode='w', is_exec=1, multiline=0, extra_files=None, debug=0): ''' return a tuple (executed command, command_stdout) cmd: a list of str-like objects that gets concatenated into a shell command image: a singularity image url extra_files: to-be-deprecated debug: print dbg info ''' if extra_files is None: extra_files = [] # cmd = ['set','-e;',cmd] # cmd = list_flatten_strict(cmd) # #### potential redundant # #### all output path derives from Prefix hence only Prefix needs to be realpath # #### for input path, realisation better be done at job calling # out = [] # for x in cmd: # if isinstance(x,Path): # x = x.realpath() # if x.startswith('/tmp'): # warnings.warn('[singularity_run] with /tmp is unstable') # out.append(x) # cmd = out # debug = 1 leafs = list_flatten_strict(cmd) if debug: print(json.dumps( list(map(repr, leafs)), indent=4, )) FS, modes = make_files_for(leafs + extra_files) FS = list(set(FS)) if debug: print(json.dumps( list(map(repr, FS)), indent=4, )) bfs = [':'.join([f, f, m]) for f, m in zip(FS, modes)] # bfs = bind_files( FS + extra_files) if debug: print(json.dumps( list(map(repr, bfs)), indent=4, )) ''' FILE=/tmp/_spiper.$$.script.sh; touch $FILE; chmod +x $FILE cat <<EOF >$FILE cat - | python3 -c "import sys; print('hi');[print('[line]',line) for line in sys.stdin]" EOF cat $FILE | singularity exec --bind $FILE:$FILE:ro docker://python:3.5-alpine $FILE ''' cmd_curr = [ # '\n', 'singularity', 'exec', '--contain', '--writable', [ '--workdir', File(prefix + '.singularity_temp').makedirs_p().check_writable() ] if prefix else [], '\\\n', # extra_params, ['--bind', ','.join(bfs), '\\\n'] if len(bfs) else [], # [-1],'--bind','/tmp:/tmp', image, '\\\n', [ 'bash', '<<EOF\n', 'set', '-e;', [list_to_string([x], strict=1) for x in cmd], '\nEOF\n', ] if multiline else [list_to_string([x], strict=1) for x in cmd], # '\n', ] cmd_curr = list_flatten_strict(cmd_curr) if not is_exec: return cmd_curr stdout = LoggedShellCommand(cmd_curr, log_file, check, mode=mode) # suc,stdout # suc,stdout,stderr = shellcmd(cmd_curr,1,0) # suc , res = shellcmd(' '.join(cmd_curr),1,1) return (cmd_curr, stdout)
def _cache_run(job, args, dir_layout, mock, check_only, check_changed, force, verbose): ''' return: job_result Check whether a valid cache exists for a job receipe. Load cache as result if so, otherwise recalculate the job. ##### we want to avoid re-calculating the output if they already exist and is intact ##### this is done by storing an identity information on disk ##### this identity information is calculated from the outputted files ##### which could be md5sum or timestamp ''' func_name = get_func_name() prefix = args[0] runner = partial(cache_run, dir_layout=dir_layout, mock=mock, check_only=check_only, check_changed=check_changed, force=force, verbose=verbose) ###### the _input is changed if one of the func.co_code/func.co_consts/input_args changed ###### the prefix is ignored in to_ident() because it would point to a different ident_file ##### Caller.from_input() would also cast types for inputs _input = args _caller = Caller.from_input(job, _input, dir_layout) _input = [_caller.to_ident()] # print(_dump()) if verbose>=2 else None print(repr(_caller)) if verbose >= 3 else None input_ident_file = IdentFile(dir_layout, prefix, job.__name__, 'input_json') output_ident_file = IdentFile(dir_layout, prefix, job.__name__, 'output_json') # output_cache_file= IdentFile(config, prefix, job, 'cache_pk') output_cache_file = _caller.output_cache_file File(input_ident_file).dirname().makedirs_p() #### calculate output files ### cast all files all as prefix ### here we add cache_file as a constitutive output. _output = _caller.get_output_files() # _output = get_output_files( job, prefix, job._output_type._typed_fields) + (CacheFile(output_cache_file),) # print('[out1]',_output) input_ident_changed = ident_changed(get_identity(_input, ), input_ident_file, 'ident') output_ident_changed = ident_changed(get_identity(_output, ), output_ident_file, 'ident') use_cache = not input_ident_changed and not output_ident_changed if check_only: return use_cache if check_changed: if check_changed >= 2: input_ident = get_identity(_input) input_ident_old = _loads( json.load(open(input_ident_file, 'r'))['ident']) output_ident = get_identity(_output) output_ident_old = _loads( json.load(open(output_ident_file, 'r'))['ident']) import pdb pdb.set_trace() return (input_ident_changed, output_ident_changed) if verbose: print('[{func_name}]'.format(**locals()), json.dumps(_dict([ ('job_name', job.__name__), ('input_ident_changed', int(input_ident_changed)), ('output_ident_chanegd', int(output_ident_changed)) ]), separators='_=') # .replace('"','') ) if verbose >= 2: import pdb pdb.set_trace() if check_only: return bool(use_cache) if force: use_cache = False if mock: use_cache = False if (_caller.output_cache_file + '.mock').isfile(): use_cache = False if use_cache: with open(output_cache_file, 'rb') as f: result = pickle.load(f) else: # if not issubclass(_caller.job_type, spiper._types.NodeFunc): # mock = 0 if mock: for k, v in _caller.output.items(): for f in v.expanded(): if f.isfile(): raise spiper._types.OverwriteError( 'mock_run() must be done with file uninitialised: %r' % v) # assert not f.isfile(),('mock_run() must be done with file uninitialised: %r' % v) vs = (v + '.mock') vs.touch() if not vs.isfile() else None # result = _caller if issubclass(_caller.job_type, spiper._types.NodeFunction): result = _caller else: ### recurse if not a Terminal Node result = _caller(runner) else: for k, v in _caller.output.items(): vs = (v + '.mock') vs.unlink() if vs.isfile() else None result = _caller(runner) for k, v in _caller.output.items(): func = getattr(v, 'callback_output', lambda *x: None) func(_caller, k) # method(_caller) # if hasattr(x,'callback_output'): # x.output_callback(_caller) # ident_dump( result, output_cache_file, ) _input_ident = get_identity(_input) _output_ident = get_identity(_output) ident_dump( [('comment', [[repr(x) for x in _output], _output_ident]), ('output_dump', _dumps(_output)), ('ident', _dumps(_output_ident))], output_ident_file, ) # comment = [[repr(x) for x in _output],get_identity(_output)] ) ### outputs are all ident_dump([ ('comment', _caller.to_dict()), ('caller_dump', _dumps(_caller)), ('ident', _dumps(_input_ident)), ], input_ident_file) # ident_dump( _input_ident , input_ident_file, comment = (_caller.to_dict(), _dumps( _caller))) #### add edge_file to inputs ### add input and output ident to outward_pk # outward_dir_list = get_outward_json_list( _input, config) outward_dir_list = get_outward_json_list(_caller.arg_tuples, dir_layout) # print(outward_dir_list) for outward_dir in outward_dir_list: outward_edge_file = outward_dir.makedirs_p() / str( hash_nr(_input_ident)) + '.%s.json' % job.__name__ # ident_dump( _input_ident, outward_edge_file, comment=_caller.to_dict() ) ident_dump([ ('comment', _caller.to_dict()), ('caller_dump', _dumps(_caller)), ('ident', _dumps(_input_ident)), ], outward_edge_file) # ident_dump( _input_ident , outward_edge_file, comment = (_caller.to_dict(), _dumps(_caller) ) ) # ident_dump( (_caller, get_identity(_caller.to_ident())), # outward_edge_file, comment=_caller.to_dict() ) #### remove edge_file of outputs outward_dir_list = get_outward_json_list(_caller._output_dict.items(), dir_layout) for outward_dir in outward_dir_list: shutil.move(outward_dir.makedirs_p(), (outward_dir + '_old').rmtree_p()) outward_dir = outward_dir.makedirs_p() return result
def job_trimmomatic( self=Default, prefix = File, FASTQ_FILE_1 = InputFile, FASTQ_FILE_2 = InputFile, THREADS_ = int, _IMAGE ='docker://quay.io/biocontainers/trimmomatic:0.35--6', _output = [ File('fastq_1'), File('fastq_2'), File('log'), File('cmd'), ], ): _ = ''' trimmomatic PE -threads 4 -phred33 /home/feng/temp/187R/187R-S1-2018_06_27_14:02:08/809_S1_R1_raw.fastq /home/feng/temp /187R/187R-S1-2018_06_27_14:02:08/809_S1_R2_raw.fastq 809_S1_R1_raw_pass.fastq 809_S1_R1_raw_fail.fastq 809_S1_R2_raw_pass.fastq 809_S1_R2_raw_fail.fastq ILLUMINACLIP:/home/Program_NGS_sl-pw-srv01/Trimmomatic-0.32/adapters/TruSeq3-PE-2.fa :6:30:10 LEADING:3 TRAILING:3 MINLEN:36 SLIDINGWINDOW:4:15 ''' # _out = get_output_files(self, prefix, _output) CMD = [ 'trimmomatic','PE', '-threads', str(THREADS_), '-phred33', File( FASTQ_FILE_1 ), File( FASTQ_FILE_2 ), File( self.output.fastq_1 ), File( self.output.fastq_1 + '.fail'), File( self.output.fastq_2 ), File( self.output.fastq_2 + '.fail'), 'ILLUMINACLIP:' '/usr/local/share/trimmomatic-0.35-6/adapters/TruSeq3-PE-2.fa' ':6:30:10', 'LEADING:3', 'TRAILING:3', 'MINLEN:36', 'SLIDINGWINDOW:4:15', '&>', File( self.output.log) ] CMD = list_flatten_strict(CMD) # res = SingularityShellCommand(CMD, ) res = SingularityShellCommand(CMD, _IMAGE, self.output.cmd) return self