def job_picard_dedup(
    self,
    prefix,
    bam_file=File,
    THREADS_=int,
    _IMAGE=Depend('docker://quay.io/biocontainers/picard:2.21.9--0'),
    _IMAGE_SAMTOOLS=Depend(
        "docker://quay.io/biocontainers/samtools:1.10--h9402c20_2"),
    _output=['bam', 'log', 'cmd_log'],
):
    CMD = [
        'picard',
        'MarkDuplicates',
        Concat('I=', File(bam_file)),
        Concat('O=', File(self.output.bam)),
        Concat('M=', File(self.output.log)),
        # Concat('TMP_DIR=',File(self.output.bam+'.picard_temp').makedirs_p().check_writable()),
        'REMOVE_DUPLICATES=true',
    ]
    res = LoggedSingularityCommand(
        self.prefix_named,
        CMD,
        _IMAGE,
        self.output.cmd_log,
    )
    res = LoggedSingularityCommand(
        self.prefix_named,
        # prefix,
        ['samtools', 'index', self.output.bam],
        _IMAGE_SAMTOOLS,
        self.output.cmd_log,
        mode='a',
        extra_files=[self.output.bam + '.bai'])
示例#2
0
def job_hisat2_align(
	self,prefix,
	INDEX_PREFIX = Prefix,
	FASTQ_FILE_1 = InputFile,
	FASTQ_FILE_2 = InputFile,
	THREADS_ = int,
	_IMAGE   = Depend("docker://quay.io/biocontainers/hisat2:2.1.0--py36hc9558a2_4"),
	_IMAGE_SAMTOOLS = Depend("docker://quay.io/biocontainers/samtools:1.10--h9402c20_2"),
	_output = [
		File('bam'),
		File('log'),
		File('cmd'),
	]
	):
	# _out = get_output_files(self,prefix,_output)
	results = []
	CMD = [
	 'hisat2','-x',
	 Prefix(INDEX_PREFIX),
	 '-1', File( FASTQ_FILE_1),
	 '-2', File( FASTQ_FILE_2),
	 # '-U', InputFile( FASTQ_FILE_1),
	 # ['-2',InputFile( FASTQ_FILE_2) ] if FASTQ_FILE_2 else [],
	 '-S', File( self.output.bam +'.sam' ),
	 '--threads', str( THREADS_ ),
	 '--no-mixed',
	 '--rna-strandness','RF',
	 '--dta',
	 '--fr',
	 '&>', File( self.output.log),
	]
	res = SingularityShellCommand(CMD, _IMAGE, self.output.cmd)
	# results.append(job_result( None, CMD, self.output))

	_ = '''
	samtools view /home/feng/temp/187R/187R-S1-2018_06_27_14:02:08/809_S1.sam -b --threads 4 -o 809_S1.bam
	'''
	CMD = [	
	'samtools','view',
	File( self.output.bam+'.sam'),
	'--threads',str(THREADS_),
	'-o', 
	File( self.output.bam+'.unsorted'),
	]
	res = SingularityShellCommand(CMD, _IMAGE_SAMTOOLS, self.output.cmd)


	CMD = [
	'samtools','sort',
	File( self.output.bam + '.unsorted'),
	'--threads', str(THREADS_),
	'-o', 
	File( self.output.bam),
	]
	res = SingularityShellCommand(CMD, _IMAGE_SAMTOOLS, self.output.cmd)
	return self
示例#3
0
def get_fasta(self, prefix,
	_depends = [Depend('curl'),Depend('gzip')],
	_resp = spiper.types.HttpResponseContentHeader('https://hgdownload.soe.ucsc.edu/goldenPath/currentGenomes/Wuhan_seafood_market_pneumonia_virus/bigZips/chromFa.tar.gz'),
	_output = ['fasta','cmd']):
	with (self.prefix_named/'_temp').makedirs_p() as d:
		CMD = ['curl','-LC0',_resp.url,
		'|','tar','-xvzf-',]
		stdout = spiper.types.LoggedShellCommand(CMD)
		res = d.glob('*.fa')
		assert len(res)==1
		res[0].move(self.output.fasta)
	d.rmtree_p()
示例#4
0
def job_stringtie_count(
        self,
        prefix,
        BAM_FILE=File,
        GTF_FILE=File,
        THREADS_=int,
        _IMAGE=Depend(
            'docker://quay.io/biocontainers/stringtie:2.1.1--hc900ff6_0'),
        _output=['count', 'cmd']):
    _ = '''
	Example run:
		stringtie 
		-p 4 
		--rf 809_S1.bam 
		-G /home/feng/ref/Arabidopsis_thaliana_TAIR10/annotation/genes.gtf 
		-o 809_S1.stringtie.gtf 
		-A 809_S1.stringtie.count &> 809_S1.stringtie.log
	'''
    CMD = [
        'stringtie',
        '-p',
        str(THREADS_),
        File(BAM_FILE),
        '--rf',
        '-G',
        File(GTF_FILE),
        '-A',
        File(self.output.count),
    ]
    res = SingularityShellCommand(CMD, _IMAGE, self.output.cmd)
def get_genepred(
    self,
    prefix,
    _resp=spiper.types.HttpResponseContentHeader(
        'https://hgdownload.soe.ucsc.edu/goldenPath/currentGenomes/Wuhan_seafood_market_pneumonia_virus/database/ncbiGene.txt.gz'
    ),
    _IMAGE=Depend(
        'docker://quay.io/biocontainers/ucsc-genepredtogtf:377--h35c10e6_2'),
    _output=['genepred', 'gtf', 'cmd'],
):
    CMD = [
        'curl',
        '-LC0',
        _resp.url,
        '|',
        'gzip -d | cut -f2- >',
        self.output.genepred,
    ]

    LoggedShellCommand(CMD, self.output.cmd, mode='w')
    CMD = ['genePredToGtf', 'file', self.output.genepred, self.output.gtf]
    LoggedSingularityCommand(self.prefix_named,
                             CMD,
                             _IMAGE,
                             self.output.cmd,
                             mode='a')
def job_bam2bw_cpm(
    self,
    prefix,
    bam_file=File,
    bam_qc_file=File,
    THREADS_=int,
    _image=Depend('docker://quay.io/shouldsee/cgpbigwig:b024993'),
    # _image = Depend('docker://quay.io/wtsicgp/cgpbigwig:1.2.0'),
    _output=['bw', 'cmd'],
):
    '''
	#### set scale_log10==0. to disable rescaling
	'''
    assert (bam_file + '.bai').isfile()

    scale_log10 = math.log10(1.E6 / max(
        1,
        json.loads(open(bam_qc_file, 'r').read())['counts.uniq_mapped.sum']))
    CMD = [
        'bam2bw', '-S',
        str(scale_log10), '-i', bam_file, '-o', self.output.bw
    ]
    LoggedSingularityCommand(self.prefix_named,
                             CMD,
                             _image,
                             self.output.cmd,
                             extra_files=[bam_file + '.bai'])
def job_hisat2_index(
    self,
    prefix,
    FASTA_FILE=File,
    THREADS_=int,
    _IMAGE=Depend(
        "docker://quay.io/biocontainers/hisat2:2.1.0--py36hc9558a2_4"),
    _output=[
        Prefix('index_prefix'),
        File('log'),
        File('cmd'),
    ],
):

    CMD = [
        'hisat2-build',
        '-p',
        str(THREADS_),
        File(FASTA_FILE),
        Prefix(self.output.index_prefix),
        '&>',
        File(self.output.log),
    ]
    res = LoggedSingularityCommand(self.prefix_named, CMD, _IMAGE,
                                   self.output.cmd)
    return self
示例#8
0
def job_trimmomatic(
	self, prefix,
	FASTQ_FILE_1 = InputFile, 
	FASTQ_FILE_2 = InputFile, 
	THREADS_ = int,
	_IMAGE = Depend('docker://quay.io/biocontainers/trimmomatic:0.35--6'),
	_output = [
		File('fastq1'),
		File('fastq2'),
		File('log'),
		File('cmd'),
		],
	):	
		_ = '''
		trimmomatic PE -threads 4 -phred33 
		/home/feng/temp/187R/187R-S1-2018_06_27_14:02:08/809_S1_R1_raw.fastq 
		/home/feng/temp
	/187R/187R-S1-2018_06_27_14:02:08/809_S1_R2_raw.fastq 
	809_S1_R1_raw_pass.fastq 
	809_S1_R1_raw_fail.fastq 
	809_S1_R2_raw_pass.fastq 
	809_S1_R2_raw_fail.fastq 
	ILLUMINACLIP:/home/Program_NGS_sl-pw-srv01/Trimmomatic-0.32/adapters/TruSeq3-PE-2.fa
	:6:30:10 LEADING:3 TRAILING:3 MINLEN:36 SLIDINGWINDOW:4:15
		'''
		# _out = get_output_files(self, prefix, _output)

		CMD = [
		'trimmomatic','PE',
		'-threads', str(THREADS_), 
		'-phred33',
		File( FASTQ_FILE_1 ),
		File( FASTQ_FILE_2 ),
		File( self.output.fastq1 ),
		File( self.output.fastq1 + '.fail'),
		File( self.output.fastq2 ),
		File( self.output.fastq2 + '.fail'),
		'ILLUMINACLIP:'
		'/usr/local/share/trimmomatic-0.35-6/adapters/TruSeq3-PE-2.fa'
		':6:30:10',
		'LEADING:3',
		'TRAILING:3',
		'MINLEN:36',
		'SLIDINGWINDOW:4:15',
		'&>', 
		File( self.output.log)
		]
		res = SingularityShellCommand(CMD, _IMAGE, self.output.cmd)
		return self
def job_bam_qc(self,
               prefix,
               bam_file=File,
               THREADS_=int,
               _image=Depend(
                   "docker://quay.io/biocontainers/samtools:1.10--h9402c20_2"),
               _output=['cmd', 'data_json']):
    DATA_DICT = collections.OrderedDict()
    # DATA_DICT['counts'] = collections.OrderedDict()
    cmd_runned, stdout = (
        LoggedSingularityCommand(
            self.prefix_named,
            [
                'bash -euc "{ ',
                'samtools view -c -f 0x4 ',
                bam_file,
                ';',  ## UNMAPPED
                'samtools view -c -F0x10 -F0x100 -F0x4 ',
                bam_file,
                ';',  ### FWD_UNIQ_MAPPED
                'samtools view -c -f0x10 -F0x100 -F0x4 ',
                bam_file,
                ';',  ### REV_UNIQ_MAPPED
                '}"',
            ],
            _image,
            self.output.cmd,
            extra_files=[bam_file + '.bai']))
    sp = stdout.splitlines()
    assert len(sp) == 3
    DATA_DICT['version'] = '0.0.1'
    DATA_DICT['counts.unmapped'] = int(sp[0])
    DATA_DICT['counts.uniq_mapped.fwd'] = int(sp[1])
    DATA_DICT['counts.uniq_mapped.rev'] = int(sp[2])
    DATA_DICT['counts.uniq_mapped.sum'] = int(sp[1]) + int(sp[2])
    DATA_DICT['filename'] = str(bam_file)
    with open(self.output.data_json, 'w') as f:
        json.dump(DATA_DICT, f, indent=2)
def job_hisat2_align(
        self,
        prefix,
        INDEX_PREFIX=Prefix,
        FASTQ_FILE_1=File,
        FASTQ_FILE_2=File,
        hisat2_args=list,
        THREADS_=int,
        _IMAGE=Depend(
            "docker://quay.io/biocontainers/hisat2:2.1.0--py36hc9558a2_4"),
        _IMAGE_SAMTOOLS=Depend(
            "docker://quay.io/biocontainers/samtools:1.10--h9402c20_2"),
        _output=[
            File('bam'),
            File('log'),
            File('cmd'),
        ]):
    # _out = get_output_files(self,prefix,_output)
    results = []
    cmd1 = CMD = [
        'hisat2',
        # hisat2_args,
        '-x',
        Prefix(INDEX_PREFIX),
        '-1',
        File(FASTQ_FILE_1),
        '-2',
        File(FASTQ_FILE_2),
        # '-U', File( FASTQ_FILE_1),
        # ['-2',File( FASTQ_FILE_2) ] if FASTQ_FILE_2 else [],
        '-S',
        '/dev/stdout',
        '--threads',
        str(max(1, THREADS_ - 1)),
        hisat2_args
        or ['--no-mixed', '--rna-strandness', 'RF', '--dta', '--fr'],
        '2>',
        File(self.output.log),
    ]
    '''
	singularity --verbose --debug exec docker://python:2.7.17-alpine python -V
	singularity shell docker://python:2.7.17-alpine python -V
	'''
    # res = LoggedSingularityCommand(CMD, _IMAGE, self.output.cmd)

    # results.append(job_result( None, CMD, self.output))
    # _ = '''
    # samtools view /home/feng/temp/187R/187R-S1-2018_06_27_14:02:08/809_S1.sam -b --threads 4 -o 809_S1.bam
    # '''
    cmd2 = CMD = [
        'samtools',
        'view',
        '-bS',
        '/dev/stdin',
        '--threads',
        str(1),
        '-o',
        (self.output.bam + '.unsorted'),
    ]
    # res = LoggedSingularityCommand(CMD, _IMAGE_SAMTOOLS, self.output.cmd)

    cmd3 = CMD = [
        'samtools',
        'sort',
        (self.output.bam + '.unsorted'),
        '--threads',
        str(THREADS_),
        '-o',
        (self.output.bam),
        '-T',
        File(self.output.bam + '.sort_temp/').makedirs_p().check_writable(),
    ]

    CMD = [
        # 'PIPE=$(mktemp -u);mkfifo $PIPE;exec 3<>$PIPE ;rm $PIPE;',
        LoggedSingularityCommandList(
            self.prefix_named,
            cmd1,
            _IMAGE,
        ),
        '|',
        LoggedSingularityCommandList(self.prefix_named, cmd2, _IMAGE_SAMTOOLS),
        '&&',
        LoggedSingularityCommandList(self.prefix_named, cmd3, _IMAGE_SAMTOOLS),
        # extra_files = [File(self.output.bam.dirname())]),
        # LoggedSingularityCommandList(cmd3, _IMAGE_SAMTOOLS, extra_files = [File(self.output.bam.dirname())]),
        # LoggedSingularityCommandList([cmd3,'&&','df',File(self.output.bam.dirname())], _IMAGE_SAMTOOLS,
        #     extra_files = [File(self.output.bam.dirname())]),
    ]
    res = LoggedShellCommand(CMD, self.output.cmd)
    # (self.output.bam+'.sam').unlink_p()
    # (self.output.bam+'.unsorted').unlink_p()

    # res = LoggedSingularityCommand(CMD, _IMAGE_SAMTOOLS, self.output.cmd)
    return self