Пример #1
0
    def test_upstream(self):
        tups = (simple_job, self.DIR / 'root', 'ATG', '/tmp/digit.txt')
        res = force_run(*tups, verbose=0)
        # tups = (simple_job, self.DIR/'job2', 'ATG', res.output.out_txt)
        # self.DIR/'root.simple_job.out_txt')
        print('[param]', spiper.rcParams)
        tups = (simple_job, self.DIR / 'job2', 'ATG',
                self.DIR / 'root.simple_job.out_txt')
        job2 = force_run(*tups, verbose=0)

        res = spiper.graph.get_upstream_nodes([File('/tmp/digit.txt')],
                                              strict=0)
        print('''##### no test for get_upstream_nodes()''')
        # print(res)

        # res ==[]
        res = spiper.graph.get_upstream_files([File(job2.output.out_txt)],
                                              strict=0,
                                              flat=1)[1:]
        expect = [
            InputFile(
                '~/.temp/singular-pipe_test_build/root.simple_job.out_txt').
            expand(),
            InputFile('/tmp/digit.txt')
        ]
        expect = [x.expand() for x in expect]
        assert sorted(expect) == sorted(res), json.dumps((res, expect),
                                                         indent=2)
Пример #2
0
def job_hisat2_index( 
	self = Default,
	prefix = File, 
	FASTA_FILE = InputFile,
	THREADS_  = int,
	_IMAGE    = "docker://quay.io/biocontainers/hisat2:2.1.0--py36hc9558a2_4",
	_output   = [
		Prefix('index_prefix'), 
		File('log'),
		File('cmd'),
	],
	):
	# _out = get_output_files(self, prefix, _output)

	# func_name = get_func_name()
	# lc = locals()
	# files = [ "{prefix}.{func_name}.{suffix}".format(suffix=suffix,**lc) for suffix in _output ]

	CMD = [
	'hisat2-build',
	 File(  FASTA_FILE),
	 Prefix(self.output.index_prefix),
	 '&>', 
	 File(  self.output.log),
	 ]
	res = SingularityShellCommand(CMD, _IMAGE, self.output.cmd)
	return self
Пример #3
0
def http_job1(
    self,
    prefix,
    _response1=HttpResponseContentHeader(
        'http://worldtimeapi.org/api/timezone/Europe/London.txt'),
    _output=[File('cache'), File('cmd')],
):
    print(_response1.text[:20])
Пример #4
0
def http_job2(
    self,
    prefix,
    _response1=HttpResponse(
        'GET', 'http://worldtimeapi.org/api/timezone/Europe/London.txt'),
    _output=[File('cache'), File('cmd')],
):
    with open(self.output.cache, 'w') as f:
        f.write(_response1.text)
    res = LoggedShellCommand(
        ['curl', '-LC-', self.output.cache + '.2', _response1.url],
        self.output.cmd, 1)
    res = LoggedShellCommand(
        ['curl', '-LC-', self.output.cache + '.2', _response1.url], None, 1)
Пример #5
0
 def make_files_for(cmd):
     FS = []
     modes = []
     for F in cmd:
         if isinstance(F, (File, Prefix)):
             F = F.realpath()
             if isinstance(F, InputPrefix):
                 #### if is prefix, mount the directory
                 res = F.fileglob('*', 1)
                 FS += res
                 modes += ['ro'] * len(res)
             elif isinstance(F, Prefix):
                 #### if is not inputPrefix, mount the directory
                 F.dirname().makedirs_p().check_writable()
                 FS.append(File(F.dirname()))
                 mode = 'rw'
                 modes += [mode]
             elif isinstance(F, InputFile):
                 #### if is inputFile, dont touch
                 # assert F.isfile(),(F,cmd)
                 assert F.exists(), (F, cmd)
                 FS.append(F)
                 modes += ['ro']
             elif isinstance(F, File):
                 #### if not inputfile, touch to makesure
                 # F.touch() if not F.isfile() else None
                 F.touch() if not F.exists() else None
                 FS.append(F)
                 modes += ['rw']
                 # mode = 'rw'
             else:
                 assert 0, (type(F), F)
             # FS.append(F)
     assert len(FS) == len(modes)
     return FS, modes
Пример #6
0
def job_hisat2_align(
	self   = Default,
	prefix = File,
	INDEX_PREFIX = Prefix,
	FASTQ_FILE_1 = InputFile,
	FASTQ_FILE_2 = InputFile,
	THREADS_ = int,
	_IMAGE   = "docker://quay.io/biocontainers/hisat2:2.1.0--py36hc9558a2_4",
	_IMAGE_SAMTOOLS = "docker://quay.io/biocontainers/samtools:1.10--h9402c20_2",
	_output = [
		File('bam'),
		File('log'),
		File('cmd'),
	]
	):
	# _out = get_output_files(self,prefix,_output)
	results = []
	CMD = [
	 'hisat2','-x',
	 Prefix(INDEX_PREFIX),
	 '-1', str( FASTQ_FILE_1),
	 '-2', str( FASTQ_FILE_2),
	 # '-U', InputFile( FASTQ_FILE_1),
	 # ['-2',InputFile( FASTQ_FILE_2) ] if FASTQ_FILE_2 else [],
	 '-S', str( self.output.bam +'.sam' ),
	 '--threads', str( THREADS_ ),
	 '--no-mixed',
	 '--rna-strandness','RF',
	 '--dta',
	 '--fr',
	 '&>', str( self.output.log),
	]
	CMD = list_flatten_strict(CMD)
	res = SingularityShellCommand(CMD, _IMAGE, self.output.cmd)
	# results.append(job_result( None, CMD, self.output))

	_ = '''
	samtools view /home/feng/temp/187R/187R-S1-2018_06_27_14:02:08/809_S1.sam -b --threads 4 -o 809_S1.bam
	'''
	CMD = [
	'samtools','view',
	File( self.output.bam+'.sam'),
	'--threads',str(THREADS_),
	'-o', 
	File( self.output.bam+'.unsorted'),
	]
	CMD = list_flatten_strict(CMD)
	res = SingularityShellCommand(CMD, _IMAGE_SAMTOOLS, self.output.cmd)


	CMD = [
	'samtools','sort',
	File( self.output.bam + '.unsorted'),
	'--threads', str(THREADS_),
	'-o', 
	File( self.output.bam),
	]
	CMD = list_flatten_strict(CMD)
	res = SingularityShellCommand(CMD, _IMAGE_SAMTOOLS, self.output.cmd)
	return self
Пример #7
0
        def simple_job(self=Default,
                       prefix=File,
                       s=str,
                       digitFile=InputFile,
                       _output=[File('out_txt')]):
            with open(self.output.out_txt, 'w') as f:
                print(s * 10)
                f.write(s * 10)

                print('do something else')
            return self
Пример #8
0
        def simple_job(self=Default,
                       prefix=File,
                       s=str,
                       digitFile=InputFile,
                       _output=[File('out_txt')]):
            _out = get_output_files(self, prefix, _output)
            with open(_out.out_txt, 'w') as f:
                print(s * 10)
                f.write(s * 10)

                print('do something else')
Пример #9
0
def simple_job(
        self,
        prefix,
        # self = Default,
        # prefix=Prefix,
        s=str,
        digitFile=InputFile,
        _output=[File('out_txt')]):
    [x for x in range(10)]
    _out = get_output_files(self, prefix, _output)
    with open(_out.out_txt, 'w') as f:
        print(s * 10)
        f.write(s * 10)
Пример #10
0
    def test_downstream(self):
        dir_layout = 'clean'
        # import spiper
        # dir_layout = spiper.DEFAULT_DIR_LAYOUT
        (self.DIR / 'root').dirname().rmtree_p()
        tups = (
            simple_job,
            self.DIR / 'root',
            'ATG',
            '/tmp/digit.txt',
        )
        force_run(*tups, dir_layout=dir_layout, verbose=0)
        tups = (
            simple_job,
            self.DIR / 'job2',
            'ATG',
            self.DIR / 'root.simple_job.out_txt',
        )
        force_run(*tups, dir_layout=dir_layout, verbose=0)

        import spiper.graph
        # s =
        res = spiper.graph.get_downstream_nodes([File('/tmp/digit.txt')],
                                                strict=0,
                                                flat=0,
                                                dir_layout=dir_layout)
        print('''##### no test for nodes in get_downstream_nodes()''')
        # print(res)

        res = spiper.graph.get_downstream_files([File('/tmp/digit.txt')],
                                                strict=0,
                                                flat=1,
                                                dir_layout=dir_layout,
                                                verbose=2)[1:]
        # res = spiper.runner.get_downstream_targets(File('/tmp/digit.txt'),strict=0,flat=0,target='all',dir_layout=dir_layout)
        expect = [
            File('~/.temp/singular-pipe_test_build/root.simple_job.out_txt'),
            File(
                '~/.temp/singular-pipe_test_build/_spiper/root.simple_job.cache_pk'
            ),
            File('~/.temp/singular-pipe_test_build/job2.simple_job.out_txt'),
            File(
                '~/.temp/singular-pipe_test_build/_spiper/job2.simple_job.cache_pk'
            ),
        ]
        expect = [x.expand() for x in expect]
        assert sorted(expect) == sorted(res), json.dumps((res, expect),
                                                         indent=2,
                                                         default=repr)
Пример #11
0
    def test_downstream(self):
        dir_layout = 'clean'
        tups = (
            simple_job,
            self.DIR / 'root',
            'ATG',
            '/tmp/digit.txt',
        )
        force_run(*tups, config=dir_layout, verbose=0)
        tups = (
            simple_job,
            self.DIR / 'job2',
            'ATG',
            self.DIR / 'root.simple_job.out_txt',
        )
        force_run(*tups, config=dir_layout, verbose=0)

        import spiper.runner
        # s =
        res = spiper.runner.get_downstream_nodes(File('/tmp/digit.txt'),
                                                 strict=0,
                                                 flat=0,
                                                 config=dir_layout)
        print('''##### no test for nodes in get_downstream_nodes()''')
        # print(res)

        res = spiper.runner.get_downstream_files(File('/tmp/digit.txt'),
                                                 strict=0,
                                                 flat=1,
                                                 config=dir_layout)
        expect = [
            File('~/.temp/singular-pipe_test_build/root.simple_job.out_txt'),
            File(
                '~/.temp/singular-pipe_test_build/_spiper/root.simple_job.cache_pk'
            ),
            File('~/.temp/singular-pipe_test_build/job2.simple_job.out_txt'),
            File(
                '~/.temp/singular-pipe_test_build/_spiper/job2.simple_job.cache_pk'
            ),
        ]
        expect = [x.expand() for x in expect]
        assert sorted(expect) == sorted(res), json.dumps((res, expect),
                                                         indent=2)
Пример #12
0
    def LoggedSingularityCommand(prefix,
                                 cmd,
                                 image,
                                 log_file,
                                 check=1,
                                 mode='w',
                                 is_exec=1,
                                 multiline=0,
                                 extra_files=None,
                                 debug=0):
        '''
		return a tuple (executed command, command_stdout)
			cmd: a list of str-like objects that gets concatenated into a shell command
			image: a singularity image url
			extra_files: to-be-deprecated
			debug: print dbg info
		'''
        if extra_files is None:
            extra_files = []

        # cmd = ['set','-e;',cmd]
        # cmd = list_flatten_strict(cmd)

        # #### potential redundant
        # #### all output path derives from Prefix hence only Prefix needs to be realpath
        # #### for input path, realisation better be done at job calling
        # out = []
        # for x in cmd:
        # 	if isinstance(x,Path):
        # 		x = x.realpath()
        # 	if x.startswith('/tmp'):
        # 		warnings.warn('[singularity_run] with /tmp is unstable')
        # 	out.append(x)
        # cmd = out

        # debug = 1

        leafs = list_flatten_strict(cmd)
        if debug: print(json.dumps(
                list(map(repr, leafs)),
                indent=4,
        ))

        FS, modes = make_files_for(leafs + extra_files)
        FS = list(set(FS))
        if debug: print(json.dumps(
                list(map(repr, FS)),
                indent=4,
        ))

        bfs = [':'.join([f, f, m]) for f, m in zip(FS, modes)]
        # bfs = bind_files( FS + extra_files)
        if debug: print(json.dumps(
                list(map(repr, bfs)),
                indent=4,
        ))
        '''
		FILE=/tmp/_spiper.$$.script.sh; touch $FILE; chmod +x $FILE
		cat <<EOF >$FILE
		cat - | python3 -c "import sys; print('hi');[print('[line]',line) for line in sys.stdin]"
		EOF
		cat $FILE | singularity exec --bind $FILE:$FILE:ro docker://python:3.5-alpine $FILE
		'''

        cmd_curr = [
            # '\n',
            'singularity',
            'exec',
            '--contain',
            '--writable',
            [
                '--workdir',
                File(prefix +
                     '.singularity_temp').makedirs_p().check_writable()
            ] if prefix else [],
            '\\\n',
            # extra_params,
            ['--bind', ','.join(bfs), '\\\n'] if len(bfs) else [],
            # [-1],'--bind','/tmp:/tmp',
            image,
            '\\\n',
            [
                'bash',
                '<<EOF\n',
                'set',
                '-e;',
                [list_to_string([x], strict=1) for x in cmd],
                '\nEOF\n',
            ] if multiline else [list_to_string([x], strict=1) for x in cmd],
            # '\n',
        ]
        cmd_curr = list_flatten_strict(cmd_curr)
        if not is_exec:
            return cmd_curr
        stdout = LoggedShellCommand(cmd_curr, log_file, check, mode=mode)
        # suc,stdout
        # suc,stdout,stderr = shellcmd(cmd_curr,1,0)
        # suc , res = shellcmd(' '.join(cmd_curr),1,1)
        return (cmd_curr, stdout)
Пример #13
0
def _cache_run(job, args, dir_layout, mock, check_only, check_changed, force,
               verbose):
    '''
	return: job_result
		Check whether a valid cache exists for a job receipe.
		Load cache as result if so, otherwise recalculate the job.

	##### we want to avoid re-calculating the output if they already exist and is intact
	##### this is done by storing an identity information on disk 
	##### this identity information is calculated from the outputted files
	##### which could be md5sum or timestamp		
	'''
    func_name = get_func_name()
    prefix = args[0]
    runner = partial(cache_run,
                     dir_layout=dir_layout,
                     mock=mock,
                     check_only=check_only,
                     check_changed=check_changed,
                     force=force,
                     verbose=verbose)

    ###### the _input is changed if one of the func.co_code/func.co_consts/input_args changed
    ###### the prefix is ignored in to_ident() because it would point to a different ident_file
    #####  Caller.from_input() would also cast types for inputs
    _input = args
    _caller = Caller.from_input(job, _input, dir_layout)
    _input = [_caller.to_ident()]
    # print(_dump()) if verbose>=2 else None
    print(repr(_caller)) if verbose >= 3 else None

    input_ident_file = IdentFile(dir_layout, prefix, job.__name__,
                                 'input_json')
    output_ident_file = IdentFile(dir_layout, prefix, job.__name__,
                                  'output_json')
    # output_cache_file=  IdentFile(config, prefix, job, 'cache_pk')
    output_cache_file = _caller.output_cache_file
    File(input_ident_file).dirname().makedirs_p()

    #### calculate output files
    ### cast all files all as prefix
    ### here we add cache_file as a constitutive output.
    _output = _caller.get_output_files()
    # _output = get_output_files( job, prefix, job._output_type._typed_fields) + (CacheFile(output_cache_file),)
    # print('[out1]',_output)

    input_ident_changed = ident_changed(get_identity(_input, ),
                                        input_ident_file, 'ident')
    output_ident_changed = ident_changed(get_identity(_output, ),
                                         output_ident_file, 'ident')
    use_cache = not input_ident_changed and not output_ident_changed
    if check_only:
        return use_cache
    if check_changed:
        if check_changed >= 2:
            input_ident = get_identity(_input)
            input_ident_old = _loads(
                json.load(open(input_ident_file, 'r'))['ident'])
            output_ident = get_identity(_output)
            output_ident_old = _loads(
                json.load(open(output_ident_file, 'r'))['ident'])
            import pdb
            pdb.set_trace()
        return (input_ident_changed, output_ident_changed)

    if verbose:
        print('[{func_name}]'.format(**locals()),
              json.dumps(_dict([
                  ('job_name', job.__name__),
                  ('input_ident_changed', int(input_ident_changed)),
                  ('output_ident_chanegd', int(output_ident_changed))
              ]),
                         separators='_=')
              # .replace('"','')
              )
        if verbose >= 2:
            import pdb
            pdb.set_trace()

    if check_only:
        return bool(use_cache)

    if force:
        use_cache = False
    if mock:
        use_cache = False

    if (_caller.output_cache_file + '.mock').isfile():
        use_cache = False

    if use_cache:
        with open(output_cache_file, 'rb') as f:
            result = pickle.load(f)

    else:
        # if not issubclass(_caller.job_type, spiper._types.NodeFunc):
        # 	mock = 0
        if mock:
            for k, v in _caller.output.items():
                for f in v.expanded():
                    if f.isfile():
                        raise spiper._types.OverwriteError(
                            'mock_run() must be done with file uninitialised: %r'
                            % v)
                    # assert not f.isfile(),('mock_run() must be done with file uninitialised: %r' % v)
                vs = (v + '.mock')
                vs.touch() if not vs.isfile() else None
            # result = _caller
            if issubclass(_caller.job_type, spiper._types.NodeFunction):
                result = _caller
            else:
                ### recurse if not a Terminal Node
                result = _caller(runner)
        else:
            for k, v in _caller.output.items():
                vs = (v + '.mock')
                vs.unlink() if vs.isfile() else None
            result = _caller(runner)

        for k, v in _caller.output.items():
            func = getattr(v, 'callback_output', lambda *x: None)
            func(_caller, k)
            # method(_caller)
            # if hasattr(x,'callback_output'):
            # 	x.output_callback(_caller)
        # ident_dump( result, output_cache_file, )
        _input_ident = get_identity(_input)
        _output_ident = get_identity(_output)

        ident_dump(
            [('comment', [[repr(x) for x in _output], _output_ident]),
             ('output_dump', _dumps(_output)),
             ('ident', _dumps(_output_ident))],
            output_ident_file,
        )
        # comment = [[repr(x) for x in _output],get_identity(_output)] ) ### outputs are all
        ident_dump([
            ('comment', _caller.to_dict()),
            ('caller_dump', _dumps(_caller)),
            ('ident', _dumps(_input_ident)),
        ], input_ident_file)
        # ident_dump( _input_ident  , input_ident_file,  comment = (_caller.to_dict(),  _dumps( _caller)))

        #### add edge_file to inputs
        ### add input and output ident to outward_pk
        # outward_dir_list = get_outward_json_list( _input, config)
        outward_dir_list = get_outward_json_list(_caller.arg_tuples,
                                                 dir_layout)
        # print(outward_dir_list)
        for outward_dir in outward_dir_list:
            outward_edge_file = outward_dir.makedirs_p() / str(
                hash_nr(_input_ident)) + '.%s.json' % job.__name__
            # ident_dump( _input_ident,  outward_edge_file, comment=_caller.to_dict() )

            ident_dump([
                ('comment', _caller.to_dict()),
                ('caller_dump', _dumps(_caller)),
                ('ident', _dumps(_input_ident)),
            ], outward_edge_file)
            # ident_dump( _input_ident  , outward_edge_file,  comment = (_caller.to_dict(), _dumps(_caller) ) )

            # ident_dump( (_caller, get_identity(_caller.to_ident())),
            # 	outward_edge_file, comment=_caller.to_dict() )

        #### remove edge_file of outputs
        outward_dir_list = get_outward_json_list(_caller._output_dict.items(),
                                                 dir_layout)
        for outward_dir in outward_dir_list:
            shutil.move(outward_dir.makedirs_p(),
                        (outward_dir + '_old').rmtree_p())
            outward_dir = outward_dir.makedirs_p()

    return result
Пример #14
0
def job_trimmomatic(
	self=Default,
	prefix = File,
	FASTQ_FILE_1 = InputFile, 
	FASTQ_FILE_2 = InputFile, 
	THREADS_ = int,
	_IMAGE ='docker://quay.io/biocontainers/trimmomatic:0.35--6',
	_output = [
		File('fastq_1'),
		File('fastq_2'),
		File('log'),
		File('cmd'),
		],
	):	
		_ = '''
		trimmomatic PE -threads 4 -phred33 
		/home/feng/temp/187R/187R-S1-2018_06_27_14:02:08/809_S1_R1_raw.fastq 
		/home/feng/temp
	/187R/187R-S1-2018_06_27_14:02:08/809_S1_R2_raw.fastq 
	809_S1_R1_raw_pass.fastq 
	809_S1_R1_raw_fail.fastq 
	809_S1_R2_raw_pass.fastq 
	809_S1_R2_raw_fail.fastq 
	ILLUMINACLIP:/home/Program_NGS_sl-pw-srv01/Trimmomatic-0.32/adapters/TruSeq3-PE-2.fa
	:6:30:10 LEADING:3 TRAILING:3 MINLEN:36 SLIDINGWINDOW:4:15
		'''
		# _out = get_output_files(self, prefix, _output)

		CMD = [
		'trimmomatic','PE',
		'-threads', str(THREADS_), 
		'-phred33',
		File( FASTQ_FILE_1 ),
		File( FASTQ_FILE_2 ),
		File( self.output.fastq_1 ),
		File( self.output.fastq_1 + '.fail'),
		File( self.output.fastq_2 ),
		File( self.output.fastq_2 + '.fail'),
		'ILLUMINACLIP:'
		'/usr/local/share/trimmomatic-0.35-6/adapters/TruSeq3-PE-2.fa'
		':6:30:10',
		'LEADING:3',
		'TRAILING:3',
		'MINLEN:36',
		'SLIDINGWINDOW:4:15',
		'&>', 
		File( self.output.log)
		]
		CMD = list_flatten_strict(CMD)
		# res = SingularityShellCommand(CMD, )
		res = SingularityShellCommand(CMD, _IMAGE, self.output.cmd)
		return self