Exemplo n.º 1
0
    def get_field(self):
        root, ext = os.path.splitext(self.vcf)
        if ext == '.gz':
            vcf = gzip.open(self.vcf, 'rt')
        else:
            vcf = open(self.vcf, 'r')
        for line in vcf:
            if re.match(r'[^#]', line):
                fields = line.split()[8].split(':')
                try:
                    GT_pos = fields.index('GT')
                except ValueError:
                    sys.stderr.write(('{} No GT field'
                                      ' in your VCF!!\n').format(time_stamp()))
                    sys.exit(1)
                try:
                    AD_pos = fields.index('AD')
                except ValueError:
                    sys.stderr.write(('{} No AD field'
                                      ' in your VCF!!\n').format(time_stamp()))
                    sys.exit(1)

                if 'ADF' in fields and 'ADR' in fields:
                    ADF_pos = fields.index('ADF')
                    ADR_pos = fields.index('ADR')
                else:
                    ADF_pos = None
                    ADR_pos = None
                    sys.stderr.write(('{} no ADF or ADR field'
                                      ' in your VCF.\n').format(time_stamp()))
                    sys.stderr.write(('{} strand bias filter '
                                      'will be skipped.\n').format(time_stamp()))
                break
        vcf.close()
        return GT_pos, AD_pos, ADF_pos, ADR_pos
Exemplo n.º 2
0
    def run(self, fastq1, fastq2, index):
        print(time_stamp(),
              'start to align reads of {} by BWA.'.format(index),
              flush=True)

        cmd = 'bwa mem -t {0} {1} {2} {3} | \
               samtools view -b \
                             -o {4}/20_bam/{5}.bam \
                             >> {4}/log/bwa.log \
                             2>&1'.format(self.args.threads,
                                          self.args.ref,
                                          fastq1,
                                          fastq2,
                                          self.out,
                                          index)

        cmd = clean_cmd(cmd)

        try:
            sbp.run(cmd,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'bwa', cmd)
            sys.exit()

        print(time_stamp(),
              'alignment {} successfully finished.'.format(index),
              flush=True)
Exemplo n.º 3
0
    def run(self):
        print(time_stamp(), 'start to calculate SNP-index.', flush=True)
        self.field_pos = self.get_field()
        self.calculate_SNPindex()
        print(time_stamp(), 'SNP-index successfully finished.', flush=True)

        print(time_stamp(), 'start to smooth SNP-index.', flush=True)
        sm = Smooth(self.args)
        sm.run()
        print(time_stamp(), 'smoothing process successfully finished.', flush=True)
Exemplo n.º 4
0
    def run(self):
        labels, label_with_flags = self.get_labels()

        print(time_stamp(), 'start to filter reads.', flush=True)
        p = Pool(self.args.threads)
        p.map(self.filt, label_with_flags)
        p.close()

        p = Pool(self.args.threads)
        p.map(self.merge, labels)
        p.close()

        self.clean_log()
        print(time_stamp(), 'filtering process successfully finished.', flush=True)
Exemplo n.º 5
0
 def check_outdir(self):
     if os.path.exists(self.out):
         print(time_stamp(),
               'output directory already exist.'.format(self.out),
               flush=True)
     else:
         os.mkdir(self.out)
Exemplo n.º 6
0
    def run_snpEff(self):
        cmd = 'snpEff ann -s {0}/snpEff_summary.html \
                          {1} \
                          {2} \
                          1> {0}/qtlseq.snpEff.vcf \
                          2> {0}/snpEff.log'.format(self.out, self.snpEff,
                                                    self.vcf)
        cmd = clean_cmd(cmd)

        print(time_stamp(), 'start to run SnpEff.', flush=True)
        sbp.run(cmd,
                stdout=sbp.DEVNULL,
                stderr=sbp.DEVNULL,
                shell=True,
                check=True)
        print(time_stamp(), 'SnpEff successfully finished.', flush=True)

        self.args.vcf = '{}/qtlseq.snpEff.vcf'.format(self.out)
Exemplo n.º 7
0
    def run(self, fastq1, fastq2, index):
        print(time_stamp(),
              'start to align reads of {} by BWA.'.format(index),
              flush=True)

        cmd = 'bwa mem -t {0} \
                       {1} {2} {3} | \
               samtools fixmate -m \
                                - \
                                - | \
               samtools sort -m {4} \
                             -@ {0} | \
               samtools markdup -r \
                                - \
                                - | \
               samtools view -b \
                             -f 2 \
                             -F 2048 \
                             -o {5}/20_bam/{6}.bam \
                             >> {5}/log/alignment.log \
                             2>&1'.format(self.args.threads, self.args.ref,
                                          fastq1, fastq2, self.args.mem,
                                          self.out, index)

        cmd = clean_cmd(cmd)

        try:
            sbp.run(cmd,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'alignment', cmd)
            sys.exit(1)

        print(time_stamp(),
              'alignment {} successfully finished.'.format(index),
              flush=True)
Exemplo n.º 8
0
 def check_max_threads(self, args):
     max_cpu = multi.cpu_count()
     print(time_stamp(),
           'maximum number of threads which you can use is up to {}.'.format(max_cpu),
           flush=True)
     if max_cpu <= args.threads:
         sys.stderr.write(('!!WARNING!! You can use up to {0} threads. '
                           'This program will use {0} threads.\n').format(max_cpu))
         sys.stderr.flush()
         args.threads = max_cpu
     elif args.threads < 1:
         args.threads = max_cpu
     return args
Exemplo n.º 9
0
    def run(self):
        print(time_stamp(), 'start to index reference fasta.', flush=True)

        cmd1 = 'bwa index {} \
                >> {}/log/bwa.log \
                2>&1'.format(self.args.ref, self.out)

        cmd2 = 'samtools faidx {} \
                >> {}/log/samtools.log \
                2>&1'.format(self.args.ref, self.out)

        cmd1 = clean_cmd(cmd1)
        cmd2 = clean_cmd(cmd2)

        try:
            sbp.run(cmd1,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'bwa', cmd1)
            sys.exit()

        try:
            sbp.run(cmd2,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'samtools', cmd1)
            sys.exit()

        print(time_stamp(),
              'indexing of reference successfully finished.',
              flush=True)
Exemplo n.º 10
0
    def run(self):
        self.check_outdir()

        if self.snpEff is not None:
            self.run_snpEff()

        v2i = Vcf2Index(self.args)
        v2i.run()

        print(time_stamp(), 'plotting now...', flush=True)
        pt = Plot(self.args)
        pt.run()

        self.get_outlier_SNPindex()
        self.get_outlier_windows()

        if self.args.igv:
            self.make_igv_file()
Exemplo n.º 11
0
    def run(self):
        print(time_stamp(), 'start to merge BAMs.', flush=True)
        self.merge()
        print(time_stamp(), 'merging process successfully finished.', flush=True)

        print(time_stamp(), 'start to call variants.', flush=True)
        chr_names = self.get_header()

        p = Pool(self.args.threads)
        p.map(self.mpileup, chr_names)
        p.close()

        self.concat()
        print(time_stamp(), 'variant calling successfully finished.', flush=True)

        print(time_stamp(), 'start to index VCF.', flush=True)
        self.mkindex()
        print(time_stamp(), 'indexing VCF successfully finished.', flush=True)
Exemplo n.º 12
0
    def run(self, fastq1, fastq2, index):
        print(time_stamp(),
              'start trimming for {} and {}.'.format(fastq1, fastq2),
              flush=True)

        trim1 = '{}/00_fastq/{}.1.trim.fastq.gz'.format(self.out, index)
        trim2 = '{}/00_fastq/{}.2.trim.fastq.gz'.format(self.out, index)
        unpaired1 = '{}/00_fastq/{}.1.unpaired.fastq.gz'.format(
            self.out, index)
        unpaired2 = '{}/00_fastq/{}.2.unpaired.fastq.gz'.format(
            self.out, index)

        if (len(self.trim_params['ILLUMINACLIP']) == 0) or \
           ('<ADAPTER_FASTA>' in self.trim_params['ILLUMINACLIP']):
            cmd = 'trimmomatic PE -threads {} \
                                  -phred{} {} {} {} {} {} {} \
                                  LEADING:{} \
                                  TRAILING:{} \
                                  SLIDINGWINDOW:{} \
                                  MINLEN:{} \
                                  >> {}/log/trimmomatic.log \
                                  2>&1'.format(
                self.args.threads, self.trim_params['phred'], fastq1, fastq2,
                trim1, unpaired1, trim2, unpaired2,
                self.trim_params['LEADING'], self.trim_params['TRAILING'],
                self.trim_params['SLIDINGWINDOW'], self.trim_params['MINLEN'],
                self.out)
        else:
            cmd = 'trimmomatic PE -threads {} \
                                  -phred{} {} {} {} {} {} {} \
                                  ILLUMINACLIP:{} \
                                  LEADING:{} \
                                  TRAILING:{} \
                                  SLIDINGWINDOW:{} \
                                  MINLEN:{} \
                                  >> {}/log/trimmomatic.log \
                                  2>&1'.format(
                self.args.threads, self.trim_params['phred'], fastq1, fastq2,
                trim1, unpaired1, trim2, unpaired2,
                self.trim_params['ILLUMINACLIP'], self.trim_params['LEADING'],
                self.trim_params['TRAILING'],
                self.trim_params['SLIDINGWINDOW'], self.trim_params['MINLEN'],
                self.out)

        cmd = clean_cmd(cmd)

        try:
            sbp.run(cmd,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'trimmomatic', cmd)
            sys.exit()

        print(time_stamp(),
              'trimming for {} and {} successfully finished.'.format(
                  fastq1, fastq2),
              flush=True)

        aln = Alignment(self.args)
        aln.run(trim1, trim2, index)
Exemplo n.º 13
0
 def run(self):
     print(time_stamp(), 'plotting now...', flush=True)
     self.plot_bulk1_SNPindex()
     self.plot_bulk2_SNPindex()
     self.plot_delta_SNPindex()
Exemplo n.º 14
0
def main():
    print(time_stamp(), 'start to run QTL-seq.', flush=True)
    QTLseq(args).run()
    print(time_stamp(), 'QTL-seq successfully finished.\n', flush=True)
Exemplo n.º 15
0
def main():
    print(time_stamp(), 'start to run QTL-plot.', flush=True)
    qp = QTLplot(args)
    qp.run()
    print(time_stamp(), 'QTL-plot successfully finished.', flush=True)