def job_picard_dedup( self, prefix, bam_file=File, THREADS_=int, _IMAGE=Depend('docker://quay.io/biocontainers/picard:2.21.9--0'), _IMAGE_SAMTOOLS=Depend( "docker://quay.io/biocontainers/samtools:1.10--h9402c20_2"), _output=['bam', 'log', 'cmd_log'], ): CMD = [ 'picard', 'MarkDuplicates', Concat('I=', File(bam_file)), Concat('O=', File(self.output.bam)), Concat('M=', File(self.output.log)), # Concat('TMP_DIR=',File(self.output.bam+'.picard_temp').makedirs_p().check_writable()), 'REMOVE_DUPLICATES=true', ] res = LoggedSingularityCommand( self.prefix_named, CMD, _IMAGE, self.output.cmd_log, ) res = LoggedSingularityCommand( self.prefix_named, # prefix, ['samtools', 'index', self.output.bam], _IMAGE_SAMTOOLS, self.output.cmd_log, mode='a', extra_files=[self.output.bam + '.bai'])
def job_hisat2_align( self,prefix, INDEX_PREFIX = Prefix, FASTQ_FILE_1 = InputFile, FASTQ_FILE_2 = InputFile, THREADS_ = int, _IMAGE = Depend("docker://quay.io/biocontainers/hisat2:2.1.0--py36hc9558a2_4"), _IMAGE_SAMTOOLS = Depend("docker://quay.io/biocontainers/samtools:1.10--h9402c20_2"), _output = [ File('bam'), File('log'), File('cmd'), ] ): # _out = get_output_files(self,prefix,_output) results = [] CMD = [ 'hisat2','-x', Prefix(INDEX_PREFIX), '-1', File( FASTQ_FILE_1), '-2', File( FASTQ_FILE_2), # '-U', InputFile( FASTQ_FILE_1), # ['-2',InputFile( FASTQ_FILE_2) ] if FASTQ_FILE_2 else [], '-S', File( self.output.bam +'.sam' ), '--threads', str( THREADS_ ), '--no-mixed', '--rna-strandness','RF', '--dta', '--fr', '&>', File( self.output.log), ] res = SingularityShellCommand(CMD, _IMAGE, self.output.cmd) # results.append(job_result( None, CMD, self.output)) _ = ''' samtools view /home/feng/temp/187R/187R-S1-2018_06_27_14:02:08/809_S1.sam -b --threads 4 -o 809_S1.bam ''' CMD = [ 'samtools','view', File( self.output.bam+'.sam'), '--threads',str(THREADS_), '-o', File( self.output.bam+'.unsorted'), ] res = SingularityShellCommand(CMD, _IMAGE_SAMTOOLS, self.output.cmd) CMD = [ 'samtools','sort', File( self.output.bam + '.unsorted'), '--threads', str(THREADS_), '-o', File( self.output.bam), ] res = SingularityShellCommand(CMD, _IMAGE_SAMTOOLS, self.output.cmd) return self
def get_fasta(self, prefix, _depends = [Depend('curl'),Depend('gzip')], _resp = spiper.types.HttpResponseContentHeader('https://hgdownload.soe.ucsc.edu/goldenPath/currentGenomes/Wuhan_seafood_market_pneumonia_virus/bigZips/chromFa.tar.gz'), _output = ['fasta','cmd']): with (self.prefix_named/'_temp').makedirs_p() as d: CMD = ['curl','-LC0',_resp.url, '|','tar','-xvzf-',] stdout = spiper.types.LoggedShellCommand(CMD) res = d.glob('*.fa') assert len(res)==1 res[0].move(self.output.fasta) d.rmtree_p()
def job_stringtie_count( self, prefix, BAM_FILE=File, GTF_FILE=File, THREADS_=int, _IMAGE=Depend( 'docker://quay.io/biocontainers/stringtie:2.1.1--hc900ff6_0'), _output=['count', 'cmd']): _ = ''' Example run: stringtie -p 4 --rf 809_S1.bam -G /home/feng/ref/Arabidopsis_thaliana_TAIR10/annotation/genes.gtf -o 809_S1.stringtie.gtf -A 809_S1.stringtie.count &> 809_S1.stringtie.log ''' CMD = [ 'stringtie', '-p', str(THREADS_), File(BAM_FILE), '--rf', '-G', File(GTF_FILE), '-A', File(self.output.count), ] res = SingularityShellCommand(CMD, _IMAGE, self.output.cmd)
def get_genepred( self, prefix, _resp=spiper.types.HttpResponseContentHeader( 'https://hgdownload.soe.ucsc.edu/goldenPath/currentGenomes/Wuhan_seafood_market_pneumonia_virus/database/ncbiGene.txt.gz' ), _IMAGE=Depend( 'docker://quay.io/biocontainers/ucsc-genepredtogtf:377--h35c10e6_2'), _output=['genepred', 'gtf', 'cmd'], ): CMD = [ 'curl', '-LC0', _resp.url, '|', 'gzip -d | cut -f2- >', self.output.genepred, ] LoggedShellCommand(CMD, self.output.cmd, mode='w') CMD = ['genePredToGtf', 'file', self.output.genepred, self.output.gtf] LoggedSingularityCommand(self.prefix_named, CMD, _IMAGE, self.output.cmd, mode='a')
def job_bam2bw_cpm( self, prefix, bam_file=File, bam_qc_file=File, THREADS_=int, _image=Depend('docker://quay.io/shouldsee/cgpbigwig:b024993'), # _image = Depend('docker://quay.io/wtsicgp/cgpbigwig:1.2.0'), _output=['bw', 'cmd'], ): ''' #### set scale_log10==0. to disable rescaling ''' assert (bam_file + '.bai').isfile() scale_log10 = math.log10(1.E6 / max( 1, json.loads(open(bam_qc_file, 'r').read())['counts.uniq_mapped.sum'])) CMD = [ 'bam2bw', '-S', str(scale_log10), '-i', bam_file, '-o', self.output.bw ] LoggedSingularityCommand(self.prefix_named, CMD, _image, self.output.cmd, extra_files=[bam_file + '.bai'])
def job_hisat2_index( self, prefix, FASTA_FILE=File, THREADS_=int, _IMAGE=Depend( "docker://quay.io/biocontainers/hisat2:2.1.0--py36hc9558a2_4"), _output=[ Prefix('index_prefix'), File('log'), File('cmd'), ], ): CMD = [ 'hisat2-build', '-p', str(THREADS_), File(FASTA_FILE), Prefix(self.output.index_prefix), '&>', File(self.output.log), ] res = LoggedSingularityCommand(self.prefix_named, CMD, _IMAGE, self.output.cmd) return self
def job_trimmomatic( self, prefix, FASTQ_FILE_1 = InputFile, FASTQ_FILE_2 = InputFile, THREADS_ = int, _IMAGE = Depend('docker://quay.io/biocontainers/trimmomatic:0.35--6'), _output = [ File('fastq1'), File('fastq2'), File('log'), File('cmd'), ], ): _ = ''' trimmomatic PE -threads 4 -phred33 /home/feng/temp/187R/187R-S1-2018_06_27_14:02:08/809_S1_R1_raw.fastq /home/feng/temp /187R/187R-S1-2018_06_27_14:02:08/809_S1_R2_raw.fastq 809_S1_R1_raw_pass.fastq 809_S1_R1_raw_fail.fastq 809_S1_R2_raw_pass.fastq 809_S1_R2_raw_fail.fastq ILLUMINACLIP:/home/Program_NGS_sl-pw-srv01/Trimmomatic-0.32/adapters/TruSeq3-PE-2.fa :6:30:10 LEADING:3 TRAILING:3 MINLEN:36 SLIDINGWINDOW:4:15 ''' # _out = get_output_files(self, prefix, _output) CMD = [ 'trimmomatic','PE', '-threads', str(THREADS_), '-phred33', File( FASTQ_FILE_1 ), File( FASTQ_FILE_2 ), File( self.output.fastq1 ), File( self.output.fastq1 + '.fail'), File( self.output.fastq2 ), File( self.output.fastq2 + '.fail'), 'ILLUMINACLIP:' '/usr/local/share/trimmomatic-0.35-6/adapters/TruSeq3-PE-2.fa' ':6:30:10', 'LEADING:3', 'TRAILING:3', 'MINLEN:36', 'SLIDINGWINDOW:4:15', '&>', File( self.output.log) ] res = SingularityShellCommand(CMD, _IMAGE, self.output.cmd) return self
def job_bam_qc(self, prefix, bam_file=File, THREADS_=int, _image=Depend( "docker://quay.io/biocontainers/samtools:1.10--h9402c20_2"), _output=['cmd', 'data_json']): DATA_DICT = collections.OrderedDict() # DATA_DICT['counts'] = collections.OrderedDict() cmd_runned, stdout = ( LoggedSingularityCommand( self.prefix_named, [ 'bash -euc "{ ', 'samtools view -c -f 0x4 ', bam_file, ';', ## UNMAPPED 'samtools view -c -F0x10 -F0x100 -F0x4 ', bam_file, ';', ### FWD_UNIQ_MAPPED 'samtools view -c -f0x10 -F0x100 -F0x4 ', bam_file, ';', ### REV_UNIQ_MAPPED '}"', ], _image, self.output.cmd, extra_files=[bam_file + '.bai'])) sp = stdout.splitlines() assert len(sp) == 3 DATA_DICT['version'] = '0.0.1' DATA_DICT['counts.unmapped'] = int(sp[0]) DATA_DICT['counts.uniq_mapped.fwd'] = int(sp[1]) DATA_DICT['counts.uniq_mapped.rev'] = int(sp[2]) DATA_DICT['counts.uniq_mapped.sum'] = int(sp[1]) + int(sp[2]) DATA_DICT['filename'] = str(bam_file) with open(self.output.data_json, 'w') as f: json.dump(DATA_DICT, f, indent=2)
def job_hisat2_align( self, prefix, INDEX_PREFIX=Prefix, FASTQ_FILE_1=File, FASTQ_FILE_2=File, hisat2_args=list, THREADS_=int, _IMAGE=Depend( "docker://quay.io/biocontainers/hisat2:2.1.0--py36hc9558a2_4"), _IMAGE_SAMTOOLS=Depend( "docker://quay.io/biocontainers/samtools:1.10--h9402c20_2"), _output=[ File('bam'), File('log'), File('cmd'), ]): # _out = get_output_files(self,prefix,_output) results = [] cmd1 = CMD = [ 'hisat2', # hisat2_args, '-x', Prefix(INDEX_PREFIX), '-1', File(FASTQ_FILE_1), '-2', File(FASTQ_FILE_2), # '-U', File( FASTQ_FILE_1), # ['-2',File( FASTQ_FILE_2) ] if FASTQ_FILE_2 else [], '-S', '/dev/stdout', '--threads', str(max(1, THREADS_ - 1)), hisat2_args or ['--no-mixed', '--rna-strandness', 'RF', '--dta', '--fr'], '2>', File(self.output.log), ] ''' singularity --verbose --debug exec docker://python:2.7.17-alpine python -V singularity shell docker://python:2.7.17-alpine python -V ''' # res = LoggedSingularityCommand(CMD, _IMAGE, self.output.cmd) # results.append(job_result( None, CMD, self.output)) # _ = ''' # samtools view /home/feng/temp/187R/187R-S1-2018_06_27_14:02:08/809_S1.sam -b --threads 4 -o 809_S1.bam # ''' cmd2 = CMD = [ 'samtools', 'view', '-bS', '/dev/stdin', '--threads', str(1), '-o', (self.output.bam + '.unsorted'), ] # res = LoggedSingularityCommand(CMD, _IMAGE_SAMTOOLS, self.output.cmd) cmd3 = CMD = [ 'samtools', 'sort', (self.output.bam + '.unsorted'), '--threads', str(THREADS_), '-o', (self.output.bam), '-T', File(self.output.bam + '.sort_temp/').makedirs_p().check_writable(), ] CMD = [ # 'PIPE=$(mktemp -u);mkfifo $PIPE;exec 3<>$PIPE ;rm $PIPE;', LoggedSingularityCommandList( self.prefix_named, cmd1, _IMAGE, ), '|', LoggedSingularityCommandList(self.prefix_named, cmd2, _IMAGE_SAMTOOLS), '&&', LoggedSingularityCommandList(self.prefix_named, cmd3, _IMAGE_SAMTOOLS), # extra_files = [File(self.output.bam.dirname())]), # LoggedSingularityCommandList(cmd3, _IMAGE_SAMTOOLS, extra_files = [File(self.output.bam.dirname())]), # LoggedSingularityCommandList([cmd3,'&&','df',File(self.output.bam.dirname())], _IMAGE_SAMTOOLS, # extra_files = [File(self.output.bam.dirname())]), ] res = LoggedShellCommand(CMD, self.output.cmd) # (self.output.bam+'.sam').unlink_p() # (self.output.bam+'.unsorted').unlink_p() # res = LoggedSingularityCommand(CMD, _IMAGE_SAMTOOLS, self.output.cmd) return self