示例#1
0
    def run(self, fastq1, fastq2, index):
        print(time_stamp(),
              'start to align reads of {} by BWA.'.format(index),
              flush=True)

        cmd = 'bwa mem -t {0} {1} {2} {3} | \
               samtools view -b \
                             -o {4}/20_bam/{5}.bam \
                             >> {4}/log/bwa.log \
                             2>&1'.format(self.args.threads, self.args.ref,
                                          fastq1, fastq2, self.out, index)

        cmd = clean_cmd(cmd)

        try:
            sbp.run(cmd,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'bwa', cmd)
            sys.exit()

        print(time_stamp(),
              'alignment {} successfully finished.'.format(index),
              flush=True)
示例#2
0
    def mpileup(self, chr_name):
        cmd1 = 'bcftools mpileup -a AD,ADF,ADR \
                                 -B \
                                 -q {0}\
                                 -Q {1} \
                                 -C {2} \
                                 -O u \
                                 -r {3} \
                                 -f {4} \
                                 {5}/20_bam/cultivar.bam \
                                 {5}/20_bam/bulk.bam | \
                bcftools call -vm \
                              -f GQ,GP \
                              -O u | \
                bcftools filter -i "INFO/MQ>={0}" \
                                -O z \
                                -o {5}/30_vcf/mutmap.{3}.vcf.gz \
                                >> {5}/log/bcftools.{3}.log \
                                2>&1'.format(self.args.min_MQ,
                                             self.args.min_BQ,
                                             self.args.adjust_MQ, chr_name,
                                             self.args.ref, self.out)

        cmd2 = 'tabix -f \
                      -p vcf \
                      {0}/30_vcf/mutmap.{1}.vcf.gz \
                      >> {0}/log/tabix.{1}.log \
                      2>&1'.format(self.out, chr_name)

        cmd1 = clean_cmd(cmd1)
        cmd2 = clean_cmd(cmd2)

        try:
            sbp.run(cmd1,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'bcftools', cmd1)
            sys.exit(1)

        try:
            sbp.run(cmd2,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'tabix', cmd2)
            sys.exit(1)
示例#3
0
    def mkindex(self):
        cmd = 'tabix -f \
                     -p vcf \
                     {0}/30_vcf/mutmap.vcf.gz \
                     >> {0}/log/tabix.log \
                     2>&1'.format(self.out)

        cmd = clean_cmd(cmd)

        try:
            sbp.run(cmd,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'tabix', cmd)
            sys.exit(1)
示例#4
0
    def filt(self, label_with_flags):
        flag = label_with_flags[0]
        label = label_with_flags[1]
        cmd = 'samtools view -b \
                             -f {0} \
                             -o {1}/20_bam/{2}.f{0}.bam \
                             {1}/20_bam/{2}.bam \
                             >> {1}/log/samtools.{2}.f{0}.log \
                             2>&1'.format(flag, self.out, label)

        cmd = clean_cmd(cmd)

        try:
            sbp.run(cmd,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'samtools', cmd)
            sys.exit()
示例#5
0
    def run(self):
        print(time_stamp(),
              'start to index reference fasta.',
              flush=True)

        cmd1 = 'bwa index {} \
                >> {}/log/bwa.log \
                2>&1'.format(self.args.ref, self.out)

        cmd2 = 'samtools faidx {} \
                >> {}/log/samtools.log \
                2>&1'.format(self.args.ref, self.out)

        cmd1 = clean_cmd(cmd1)
        cmd2 = clean_cmd(cmd2)

        try:
            sbp.run(cmd1,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'bwa', cmd1)
            sys.exit()

        try:
            sbp.run(cmd2,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'samtools', cmd1)
            sys.exit()

        print(time_stamp(),
              'indexing of reference successfully finished.',
              flush=True)
示例#6
0
    def run(self, fastq1, fastq2, index):
        print(time_stamp(),
              'start to align reads of {} by BWA.'.format(index),
              flush=True)

        cmd = 'bwa mem -t {0} \
                       {1} {2} {3} | \
               samtools fixmate -m \
                                - \
                                - | \
               samtools sort -m {4} \
                             -@ {0} | \
               samtools markdup -r \
                                - \
                                - | \
               samtools view -b \
                             -f 2 \
                             -F 2048 \
                             -o {5}/20_bam/{6}.bam \
                             >> {5}/log/alignment.log \
                             2>&1'.format(self.args.threads, self.args.ref,
                                          fastq1, fastq2, self.args.mem,
                                          self.out, index)

        cmd = clean_cmd(cmd)

        try:
            sbp.run(cmd,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'alignment', cmd)
            sys.exit(1)

        print(time_stamp(),
              'alignment {} successfully finished.'.format(index),
              flush=True)
示例#7
0
    def merge(self):
        for label in ['cultivar', 'bulk']:
            bams = self.get_bams(label)
            if len(bams) == 1:
                path_to_bam = os.path.abspath(bams[0])
                cmd1 = 'ln -s {} {}/20_bam/{}.unsorted.bam'.format(
                    path_to_bam, self.out, label)
            else:
                cmd1 = 'samtools merge -f {0}/20_bam/{1}.unsorted.bam \
                                          {0}/20_bam/{1}*.bam \
                                          >> {0}/log/samtools.log \
                                          2>&1'.format(self.out, label)

            cmd2 = 'samtools sort -m {0} \
                                  -@ {1} \
                                  -o {2}/20_bam/{3}.bam \
                                  {2}/20_bam/{3}.unsorted.bam \
                                  >> {2}/log/samtools.log \
                                  2>&1'.format(self.args.mem,
                                               self.args.threads, self.out,
                                               label)

            cmd3 = 'samtools index {0}/20_bam/{1}.bam \
                                   >> {0}/log/samtools.log \
                                   2>&1'.format(self.out, label)

            cmd4 = 'rm -f {}/20_bam/{}.*.bam'.format(self.out, label)

            cmd1 = clean_cmd(cmd1)
            cmd2 = clean_cmd(cmd2)
            cmd3 = clean_cmd(cmd3)
            cmd4 = clean_cmd(cmd4)

            try:
                sbp.run(cmd1,
                        stdout=sbp.DEVNULL,
                        stderr=sbp.DEVNULL,
                        shell=True,
                        check=True)
            except sbp.CalledProcessError:
                call_log(self.out, 'samtools', cmd1)
                sys.exit(1)

            try:
                sbp.run(cmd2,
                        stdout=sbp.DEVNULL,
                        stderr=sbp.DEVNULL,
                        shell=True,
                        check=True)
            except sbp.CalledProcessError:
                call_log(self.out, 'samtools', cmd2)
                sys.exit(1)

            try:
                sbp.run(cmd3,
                        stdout=sbp.DEVNULL,
                        stderr=sbp.DEVNULL,
                        shell=True,
                        check=True)
            except sbp.CalledProcessError:
                call_log(self.out, 'samtools', cmd3)
                sys.exit(1)

            sbp.run(cmd4,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
示例#8
0
    def concat(self):
        cmd1 = 'cat {0}/log/bcftools.*.log > {0}/log/bcftools.log'.format(
            self.out)
        cmd2 = 'cat {0}/log/tabix.*.log > {0}/log/tabix.log'.format(self.out)

        cmd3 = 'bcftools concat -O z \
                                -o {0}/30_vcf/mutmap.vcf.gz \
                                {0}/30_vcf/mutmap.*.vcf.gz \
                                >> {0}/log/bcftools.log \
                                2>&1'.format(self.out)

        cmd4 = 'rm -f {}/30_vcf/mutmap.*.vcf.gz'.format(self.out)
        cmd5 = 'rm -f {}/30_vcf/mutmap.*.vcf.gz.tbi'.format(self.out)
        cmd6 = 'rm -f {}/log/bcftools.*.log'.format(self.out)
        cmd7 = 'rm -f {}/log/tabix.*.log'.format(self.out)

        cmd1 = clean_cmd(cmd1)
        cmd2 = clean_cmd(cmd2)
        cmd3 = clean_cmd(cmd3)
        cmd4 = clean_cmd(cmd4)
        cmd5 = clean_cmd(cmd5)
        cmd6 = clean_cmd(cmd6)
        cmd7 = clean_cmd(cmd7)

        sbp.run(cmd1,
                stdout=sbp.DEVNULL,
                stderr=sbp.DEVNULL,
                shell=True,
                check=True)
        sbp.run(cmd2,
                stdout=sbp.DEVNULL,
                stderr=sbp.DEVNULL,
                shell=True,
                check=True)

        try:
            sbp.run(cmd3,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'bcftools', cmd3)
            sys.exit(1)

        sbp.run(cmd4,
                stdout=sbp.DEVNULL,
                stderr=sbp.DEVNULL,
                shell=True,
                check=True)
        sbp.run(cmd5,
                stdout=sbp.DEVNULL,
                stderr=sbp.DEVNULL,
                shell=True,
                check=True)
        sbp.run(cmd6,
                stdout=sbp.DEVNULL,
                stderr=sbp.DEVNULL,
                shell=True,
                check=True)
        sbp.run(cmd7,
                stdout=sbp.DEVNULL,
                stderr=sbp.DEVNULL,
                shell=True,
                check=True)
示例#9
0
    def merge(self, label):
        cmd1 = 'cat {0}/log/samtools.{1}.f83.log \
                    {0}/log/samtools.{1}.f99.log \
                    {0}/log/samtools.{1}.f147.log \
                    {0}/log/samtools.{1}.f163.log \
                    > {0}/log/samtools.{1}.log'.format(self.out, label)

        cmd2 = 'rm -f {0}/20_bam/{1}.bam'.format(self.out, label)
        cmd3 = 'rm -f {0}/log/samtools.{1}.f*.log'.format(self.out, label)

        cmd4 = 'samtools merge -f {0}/20_bam/{1}.filt.bam \
                                  {0}/20_bam/{1}.f83.bam \
                                  {0}/20_bam/{1}.f99.bam \
                                  {0}/20_bam/{1}.f147.bam \
                                  {0}/20_bam/{1}.f163.bam \
                                  >> {0}/log/samtools.{1}.log \
                                  2>&1'.format(self.out, label)

        cmd5 = 'rm -f {}/20_bam/{}.f83.bam'.format(self.out, label)
        cmd6 = 'rm -f {}/20_bam/{}.f99.bam'.format(self.out, label)
        cmd7 = 'rm -f {}/20_bam/{}.f147.bam'.format(self.out, label)
        cmd8 = 'rm -f {}/20_bam/{}.f163.bam'.format(self.out, label)

        cmd1 = clean_cmd(cmd1)
        cmd2 = clean_cmd(cmd2)
        cmd3 = clean_cmd(cmd3)
        cmd4 = clean_cmd(cmd4)
        cmd5 = clean_cmd(cmd5)
        cmd6 = clean_cmd(cmd6)
        cmd7 = clean_cmd(cmd7)
        cmd8 = clean_cmd(cmd8)

        sbp.run(cmd1,
                stdout=sbp.DEVNULL,
                stderr=sbp.DEVNULL,
                shell=True,
                check=True)
        sbp.run(cmd2,
                stdout=sbp.DEVNULL,
                stderr=sbp.DEVNULL,
                shell=True,
                check=True)
        sbp.run(cmd3,
                stdout=sbp.DEVNULL,
                stderr=sbp.DEVNULL,
                shell=True,
                check=True)

        try:
            sbp.run(cmd4,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'samtools', cmd4)
            sys.exit()

        sbp.run(cmd5,
                stdout=sbp.DEVNULL,
                stderr=sbp.DEVNULL,
                shell=True,
                check=True)
        sbp.run(cmd6,
                stdout=sbp.DEVNULL,
                stderr=sbp.DEVNULL,
                shell=True,
                check=True)
        sbp.run(cmd7,
                stdout=sbp.DEVNULL,
                stderr=sbp.DEVNULL,
                shell=True,
                check=True)
        sbp.run(cmd8,
                stdout=sbp.DEVNULL,
                stderr=sbp.DEVNULL,
                shell=True,
                check=True)
示例#10
0
    def run(self, fastq1, fastq2, index):
        print(time_stamp(),
              'start trimming for {} and {}.'.format(fastq1, fastq2),
              flush=True)

        trim1 = '{}/00_fastq/{}.1.trim.fastq.gz'.format(self.out, index)
        trim2 = '{}/00_fastq/{}.2.trim.fastq.gz'.format(self.out, index)
        unpaired1 = '{}/00_fastq/{}.1.unpaired.fastq.gz'.format(
            self.out, index)
        unpaired2 = '{}/00_fastq/{}.2.unpaired.fastq.gz'.format(
            self.out, index)

        if (len(self.trim_params['ILLUMINACLIP']) == 0) or \
           ('<ADAPTER_FASTA>' in self.trim_params['ILLUMINACLIP']):
            cmd = 'trimmomatic PE -threads {} \
                                  -phred{} {} {} {} {} {} {} \
                                  LEADING:{} \
                                  TRAILING:{} \
                                  SLIDINGWINDOW:{} \
                                  MINLEN:{} \
                                  >> {}/log/trimmomatic.log \
                                  2>&1'.format(
                self.args.threads, self.trim_params['phred'], fastq1, fastq2,
                trim1, unpaired1, trim2, unpaired2,
                self.trim_params['LEADING'], self.trim_params['TRAILING'],
                self.trim_params['SLIDINGWINDOW'], self.trim_params['MINLEN'],
                self.out)
        else:
            cmd = 'trimmomatic PE -threads {} \
                                  -phred{} {} {} {} {} {} {} \
                                  ILLUMINACLIP:{} \
                                  LEADING:{} \
                                  TRAILING:{} \
                                  SLIDINGWINDOW:{} \
                                  MINLEN:{} \
                                  >> {}/log/trimmomatic.log \
                                  2>&1'.format(
                self.args.threads, self.trim_params['phred'], fastq1, fastq2,
                trim1, unpaired1, trim2, unpaired2,
                self.trim_params['ILLUMINACLIP'], self.trim_params['LEADING'],
                self.trim_params['TRAILING'],
                self.trim_params['SLIDINGWINDOW'], self.trim_params['MINLEN'],
                self.out)
        cmd = clean_cmd(cmd)

        try:
            sbp.run(cmd,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'trimmomatic', cmd)
            sys.exit()

        print(time_stamp(),
              'trimming for {} and {} successfully finished.'.format(
                  fastq1, fastq2),
              flush=True)

        aln = Alignment(self.args)
        aln.run(trim1, trim2, index)