def run(self, fastq1, fastq2, index): print(time_stamp(), 'start to align reads of {} by BWA.'.format(index), flush=True) cmd = 'bwa mem -t {0} {1} {2} {3} | \ samtools view -b \ -o {4}/20_bam/{5}.bam \ >> {4}/log/bwa.log \ 2>&1'.format(self.args.threads, self.args.ref, fastq1, fastq2, self.out, index) cmd = clean_cmd(cmd) try: sbp.run(cmd, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: call_log(self.out, 'bwa', cmd) sys.exit() print(time_stamp(), 'alignment {} successfully finished.'.format(index), flush=True)
def mpileup(self, chr_name): cmd1 = 'bcftools mpileup -a AD,ADF,ADR \ -B \ -q {0}\ -Q {1} \ -C {2} \ -O u \ -r {3} \ -f {4} \ {5}/20_bam/cultivar.bam \ {5}/20_bam/bulk.bam | \ bcftools call -vm \ -f GQ,GP \ -O u | \ bcftools filter -i "INFO/MQ>={0}" \ -O z \ -o {5}/30_vcf/mutmap.{3}.vcf.gz \ >> {5}/log/bcftools.{3}.log \ 2>&1'.format(self.args.min_MQ, self.args.min_BQ, self.args.adjust_MQ, chr_name, self.args.ref, self.out) cmd2 = 'tabix -f \ -p vcf \ {0}/30_vcf/mutmap.{1}.vcf.gz \ >> {0}/log/tabix.{1}.log \ 2>&1'.format(self.out, chr_name) cmd1 = clean_cmd(cmd1) cmd2 = clean_cmd(cmd2) try: sbp.run(cmd1, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: call_log(self.out, 'bcftools', cmd1) sys.exit(1) try: sbp.run(cmd2, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: call_log(self.out, 'tabix', cmd2) sys.exit(1)
def mkindex(self): cmd = 'tabix -f \ -p vcf \ {0}/30_vcf/mutmap.vcf.gz \ >> {0}/log/tabix.log \ 2>&1'.format(self.out) cmd = clean_cmd(cmd) try: sbp.run(cmd, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: call_log(self.out, 'tabix', cmd) sys.exit(1)
def filt(self, label_with_flags): flag = label_with_flags[0] label = label_with_flags[1] cmd = 'samtools view -b \ -f {0} \ -o {1}/20_bam/{2}.f{0}.bam \ {1}/20_bam/{2}.bam \ >> {1}/log/samtools.{2}.f{0}.log \ 2>&1'.format(flag, self.out, label) cmd = clean_cmd(cmd) try: sbp.run(cmd, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: call_log(self.out, 'samtools', cmd) sys.exit()
def run(self): print(time_stamp(), 'start to index reference fasta.', flush=True) cmd1 = 'bwa index {} \ >> {}/log/bwa.log \ 2>&1'.format(self.args.ref, self.out) cmd2 = 'samtools faidx {} \ >> {}/log/samtools.log \ 2>&1'.format(self.args.ref, self.out) cmd1 = clean_cmd(cmd1) cmd2 = clean_cmd(cmd2) try: sbp.run(cmd1, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: call_log(self.out, 'bwa', cmd1) sys.exit() try: sbp.run(cmd2, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: call_log(self.out, 'samtools', cmd1) sys.exit() print(time_stamp(), 'indexing of reference successfully finished.', flush=True)
def run(self, fastq1, fastq2, index): print(time_stamp(), 'start to align reads of {} by BWA.'.format(index), flush=True) cmd = 'bwa mem -t {0} \ {1} {2} {3} | \ samtools fixmate -m \ - \ - | \ samtools sort -m {4} \ -@ {0} | \ samtools markdup -r \ - \ - | \ samtools view -b \ -f 2 \ -F 2048 \ -o {5}/20_bam/{6}.bam \ >> {5}/log/alignment.log \ 2>&1'.format(self.args.threads, self.args.ref, fastq1, fastq2, self.args.mem, self.out, index) cmd = clean_cmd(cmd) try: sbp.run(cmd, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: call_log(self.out, 'alignment', cmd) sys.exit(1) print(time_stamp(), 'alignment {} successfully finished.'.format(index), flush=True)
def merge(self): for label in ['cultivar', 'bulk']: bams = self.get_bams(label) if len(bams) == 1: path_to_bam = os.path.abspath(bams[0]) cmd1 = 'ln -s {} {}/20_bam/{}.unsorted.bam'.format( path_to_bam, self.out, label) else: cmd1 = 'samtools merge -f {0}/20_bam/{1}.unsorted.bam \ {0}/20_bam/{1}*.bam \ >> {0}/log/samtools.log \ 2>&1'.format(self.out, label) cmd2 = 'samtools sort -m {0} \ -@ {1} \ -o {2}/20_bam/{3}.bam \ {2}/20_bam/{3}.unsorted.bam \ >> {2}/log/samtools.log \ 2>&1'.format(self.args.mem, self.args.threads, self.out, label) cmd3 = 'samtools index {0}/20_bam/{1}.bam \ >> {0}/log/samtools.log \ 2>&1'.format(self.out, label) cmd4 = 'rm -f {}/20_bam/{}.*.bam'.format(self.out, label) cmd1 = clean_cmd(cmd1) cmd2 = clean_cmd(cmd2) cmd3 = clean_cmd(cmd3) cmd4 = clean_cmd(cmd4) try: sbp.run(cmd1, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: call_log(self.out, 'samtools', cmd1) sys.exit(1) try: sbp.run(cmd2, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: call_log(self.out, 'samtools', cmd2) sys.exit(1) try: sbp.run(cmd3, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: call_log(self.out, 'samtools', cmd3) sys.exit(1) sbp.run(cmd4, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True)
def concat(self): cmd1 = 'cat {0}/log/bcftools.*.log > {0}/log/bcftools.log'.format( self.out) cmd2 = 'cat {0}/log/tabix.*.log > {0}/log/tabix.log'.format(self.out) cmd3 = 'bcftools concat -O z \ -o {0}/30_vcf/mutmap.vcf.gz \ {0}/30_vcf/mutmap.*.vcf.gz \ >> {0}/log/bcftools.log \ 2>&1'.format(self.out) cmd4 = 'rm -f {}/30_vcf/mutmap.*.vcf.gz'.format(self.out) cmd5 = 'rm -f {}/30_vcf/mutmap.*.vcf.gz.tbi'.format(self.out) cmd6 = 'rm -f {}/log/bcftools.*.log'.format(self.out) cmd7 = 'rm -f {}/log/tabix.*.log'.format(self.out) cmd1 = clean_cmd(cmd1) cmd2 = clean_cmd(cmd2) cmd3 = clean_cmd(cmd3) cmd4 = clean_cmd(cmd4) cmd5 = clean_cmd(cmd5) cmd6 = clean_cmd(cmd6) cmd7 = clean_cmd(cmd7) sbp.run(cmd1, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) sbp.run(cmd2, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) try: sbp.run(cmd3, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: call_log(self.out, 'bcftools', cmd3) sys.exit(1) sbp.run(cmd4, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) sbp.run(cmd5, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) sbp.run(cmd6, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) sbp.run(cmd7, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True)
def merge(self, label): cmd1 = 'cat {0}/log/samtools.{1}.f83.log \ {0}/log/samtools.{1}.f99.log \ {0}/log/samtools.{1}.f147.log \ {0}/log/samtools.{1}.f163.log \ > {0}/log/samtools.{1}.log'.format(self.out, label) cmd2 = 'rm -f {0}/20_bam/{1}.bam'.format(self.out, label) cmd3 = 'rm -f {0}/log/samtools.{1}.f*.log'.format(self.out, label) cmd4 = 'samtools merge -f {0}/20_bam/{1}.filt.bam \ {0}/20_bam/{1}.f83.bam \ {0}/20_bam/{1}.f99.bam \ {0}/20_bam/{1}.f147.bam \ {0}/20_bam/{1}.f163.bam \ >> {0}/log/samtools.{1}.log \ 2>&1'.format(self.out, label) cmd5 = 'rm -f {}/20_bam/{}.f83.bam'.format(self.out, label) cmd6 = 'rm -f {}/20_bam/{}.f99.bam'.format(self.out, label) cmd7 = 'rm -f {}/20_bam/{}.f147.bam'.format(self.out, label) cmd8 = 'rm -f {}/20_bam/{}.f163.bam'.format(self.out, label) cmd1 = clean_cmd(cmd1) cmd2 = clean_cmd(cmd2) cmd3 = clean_cmd(cmd3) cmd4 = clean_cmd(cmd4) cmd5 = clean_cmd(cmd5) cmd6 = clean_cmd(cmd6) cmd7 = clean_cmd(cmd7) cmd8 = clean_cmd(cmd8) sbp.run(cmd1, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) sbp.run(cmd2, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) sbp.run(cmd3, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) try: sbp.run(cmd4, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: call_log(self.out, 'samtools', cmd4) sys.exit() sbp.run(cmd5, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) sbp.run(cmd6, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) sbp.run(cmd7, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) sbp.run(cmd8, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True)
def run(self, fastq1, fastq2, index): print(time_stamp(), 'start trimming for {} and {}.'.format(fastq1, fastq2), flush=True) trim1 = '{}/00_fastq/{}.1.trim.fastq.gz'.format(self.out, index) trim2 = '{}/00_fastq/{}.2.trim.fastq.gz'.format(self.out, index) unpaired1 = '{}/00_fastq/{}.1.unpaired.fastq.gz'.format( self.out, index) unpaired2 = '{}/00_fastq/{}.2.unpaired.fastq.gz'.format( self.out, index) if (len(self.trim_params['ILLUMINACLIP']) == 0) or \ ('<ADAPTER_FASTA>' in self.trim_params['ILLUMINACLIP']): cmd = 'trimmomatic PE -threads {} \ -phred{} {} {} {} {} {} {} \ LEADING:{} \ TRAILING:{} \ SLIDINGWINDOW:{} \ MINLEN:{} \ >> {}/log/trimmomatic.log \ 2>&1'.format( self.args.threads, self.trim_params['phred'], fastq1, fastq2, trim1, unpaired1, trim2, unpaired2, self.trim_params['LEADING'], self.trim_params['TRAILING'], self.trim_params['SLIDINGWINDOW'], self.trim_params['MINLEN'], self.out) else: cmd = 'trimmomatic PE -threads {} \ -phred{} {} {} {} {} {} {} \ ILLUMINACLIP:{} \ LEADING:{} \ TRAILING:{} \ SLIDINGWINDOW:{} \ MINLEN:{} \ >> {}/log/trimmomatic.log \ 2>&1'.format( self.args.threads, self.trim_params['phred'], fastq1, fastq2, trim1, unpaired1, trim2, unpaired2, self.trim_params['ILLUMINACLIP'], self.trim_params['LEADING'], self.trim_params['TRAILING'], self.trim_params['SLIDINGWINDOW'], self.trim_params['MINLEN'], self.out) cmd = clean_cmd(cmd) try: sbp.run(cmd, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: call_log(self.out, 'trimmomatic', cmd) sys.exit() print(time_stamp(), 'trimming for {} and {} successfully finished.'.format( fastq1, fastq2), flush=True) aln = Alignment(self.args) aln.run(trim1, trim2, index)