Exemplo n.º 1
0
    def sam_retrieval(self):

        outdir = os.path.join(self.home_dir, 'sam_retrieved_r2')
        if not os.path.isdir(outdir): os.mkdir(outdir)

        bam_files = sorted(glob.glob(self.input_dir + '*.bam'))

        ctw = ColorTextWriter.ColorTextWriter()

        for i in bam_files:
            print(ctw.CRED + 'Retrieving R2 mapped reads from ' + ctw.CBLUE + os.path.basename(i) + ctw.CRED + ' ...' + ctw.CEND + '\n')

            output_file = outdir + '/' + os.path.basename(i).split('.bam')[0] + '_R2' + self.extensions[4]

            command = [
                'samtools view -h -b -f 130', '-@', self.threads, i,
                '-o', output_file
            ]

            command = ' '.join(command)
            sp.check_call(command, shell=True)

            print('\n' + ctw.CRED + 'Indexing: ' + ctw.CBLUE + os.path.basename(output_file) + ctw.CRED + ' ...' + ctw.CEND + '\n')

            command = [
                'samtools index', output_file
            ]

            command = ' '.join(command)
            sp.check_call(command, shell=True)

        print(ctw.CBEIGE + ctw.CBOLD + 'R2 Retrieval and Indexing Completed!!!' + ctw.CEND + '\n')
Exemplo n.º 2
0
    def refgen(self):

        outdir = os.path.join(self.home_dir, 'star_genome')
        if not os.path.isdir(outdir): os.mkdir(outdir)

        ctw = ColorTextWriter.ColorTextWriter()

        if len(os.listdir(outdir + '/')) == 0:

            print(ctw.CBEIGE + ctw.CBOLD +
                  'Creating the STAR Reference Genome ...' + ctw.CEND + '\n')

            command = [
                'STAR --runThreadN', self.threads,
                '--runMode genomeGenerate --genomeSAindexNbases 12',
                '--genomeDir', outdir + '/', '--genomeFastaFiles',
                self.genome_fasta, '--sjdbGTFfile', self.genes_gtf
            ]

            command = ' '.join(command)
            sp.check_call(command, shell=True)

            print('\n' + ctw.CBEIGE + ctw.CBOLD +
                  'Reference Genome Created!!!' + ctw.CEND)

        else:
            print(
                ctw.CRED +
                "Destination directory contain files. Ignoring STAR Reference Genome generation!!!"
                + ctw.CEND + '\n')
Exemplo n.º 3
0
    def aligner(self):

        outdir = os.path.join(self.home_dir, 'bt2_aligned')
        if not os.path.isdir(outdir): os.mkdir(outdir)

        fastq_list = sorted(glob.glob(self.input_dir + '*tagdustout.fq'))

        ctw = ColorTextWriter.ColorTextWriter()

        print(ctw.CBEIGE + ctw.CBOLD + 'Running Bowtie2 Aligner ...' + ctw.CEND + '\n')

        for i in fastq_list:
            print('\n' + ctw.CBEIGE + ctw.CBOLD + 'Mapping: ' + ctw.CBLUE + os.path.basename(i) + ctw.CBEIGE + ctw.CBOLD + ' ...' + ctw.CEND + '\n')

            output_file = outdir + '/' + os.path.basename(i).split('tagdustout.fq')[0] + 'aligned' + self.extensions[4]

            command = [
                'bowtie2',
                '-p', self.threads, '-x', self.bt2_index + 'bt2_index',
                self.bt2_parameter, '-q', i,
                '-S', output_file,
                '2>', output_file.split(self.extensions[4])[0] + self.extensions[3]
            ]

            command = ' '.join(command)
            sp.check_call(command, shell=True)

        print('\n' + ctw.CRED + ctw.CBOLD + 'Sequence Alignment Completed!!!' + ctw.CEND + '\n')
Exemplo n.º 4
0
    def multiqc(self):

        outdir = os.path.join(self.home_dir, 'MultiQC_Summary')
        if not os.path.isdir(outdir): os.mkdir(outdir)

        dir_list = sorted(glob.glob(self.home_dir + '*/'))

        ctw = ColorTextWriter.ColorTextWriter()

        print(ctw.CBEIGE + ctw.CBOLD + 'Running MultiQC ...' + ctw.CEND + '\n')

        for i in dir_list:
            print('\n' + ctw.CBEIGE + ctw.CBOLD +
                  'Generating MultiQC Reports for ' + i + ' ...' + ctw.CEND +
                  '\n')

            command = [
                'multiqc', i, '-o', outdir, '-n',
                i.split(self.home_dir)[1].split('/')[0] +
                '_MultiQC_Report.html'
            ]

            command = ' '.join(command)
            sp.check_call(command, shell=True)

        print('\n' + ctw.CBEIGE + ctw.CBOLD + 'Running MultiQC done!!!' +
              ctw.CEND + '\n')
    def bigwig(self):

        outdir = os.path.join(self.home_dir, 'bedgraphs')
        if not os.path.isdir(outdir): os.mkdir(outdir)

        file_list = sorted(glob.glob(self.input_dir + '*.bam'))

        ctw = ColorTextWriter.ColorTextWriter()

        for i in file_list:

            print('\n' + ctw.CRED + 'Generating BigWig file for ' + ctw.CBLUE +
                  os.path.basename(i) + ctw.CRED + ' ...' + ctw.CEND + '\n')

            output_file = outdir + '/' + os.path.basename(i).split(
                '.bam')[0] + self.extensions[5]

            param = [
                'bamCoverage --normalizeUsing CPM --binSize 1', '-b', i,
                '--outFileFormat bigwig', '-o', output_file
            ]

            command = ' '.join(param)
            sp.check_call(command, shell=True)

        print('\n' + ctw.CBEIGE + ctw.CBOLD +
              'BigWig files were deposited!!!' + ctw.CEND + '\n')
Exemplo n.º 6
0
    def ss_aligner(self):

        outdir = os.path.join(self.home_dir, 'shortstack_aligned')
        if not os.path.isdir(outdir): os.mkdir(outdir)

        fastq_list = sorted(glob.glob(self.input_dir + '*.fastq'))

        ctw = ColorTextWriter.ColorTextWriter()

        print(ctw.CBEIGE + ctw.CBOLD + 'Running ShortStack Aligner ...' +
              ctw.CEND + '\n')

        for i in fastq_list:

            print(ctw.CBEIGE + ctw.CBOLD + 'ShortStacking: ' + ctw.CBLUE +
                  os.path.basename(i) + ctw.CBEIGE + ctw.CBOLD + ' ...' +
                  ctw.CEND + '\n')

            sub_directory = outdir + '/' + os.path.basename(i).split(
                '.fq')[0] + '/'

            command = [
                'ShortStack', '--genomefile', self.genome_fa, '--readfile', i,
                '--outdir', sub_directory
            ]

            command = ' '.join(command)
            sp.check_call(command, shell=True)

        print('\n' + ctw.CRED + ctw.CBOLD +
              'Mapping with ShortStack is Completed!!!' + ctw.CEND + '\n')
Exemplo n.º 7
0
    def dedup(self):

        outdir = os.path.join(self.home_dir, 'umi_dedup')
        if not os.path.isdir(outdir): os.mkdir(outdir)

        bam_list = sorted(glob.glob(self.input_dir + '*.bam'))

        ctw = ColorTextWriter.ColorTextWriter()

        for i in bam_list:

            print(ctw.CRED + 'Removing duplicates: ' + ctw.CBLUE +
                  os.path.basename(i) + ctw.CRED + ' ...' + ctw.CEND + '\n')

            output_file = outdir + '/' + os.path.basename(i).split(
                '.bam')[0] + '_dupRm' + self.extensions[4]

            command = ['umi_tools dedup', '-I', i]

            if self.seq_method == 'paired': command.extend(['--paired'])

            command.extend([
                '-S', output_file, '-L',
                output_file.split(self.extensions[4])[0] + '.log',
                '--method unique'
            ])

            command = ' '.join(command)
            sp.check_call(command, shell=True)

        print(ctw.CBEIGE + ctw.CBOLD + 'Deduplicate Removal Completed!!!' +
              ctw.CEND + '\n')
Exemplo n.º 8
0
    def bt2_index_maker(self):

        outdir = os.path.join(self.home_dir, 'bt_index')
        if not os.path.isdir(outdir): os.mkdir(outdir)

        ctw = ColorTextWriter.ColorTextWriter()

        if len(os.listdir(outdir + '/')) == 0:

            print(ctw.CBEIGE + ctw.CBOLD +
                  'Generating Bowtie Genome Indices ...' + ctw.CEND + '\n')

            bt2_index_path = outdir + '/' + 'bt_index'

            command = [
                'bowtie-build', '--threads', self.threads, '-q', self.genes_fa,
                bt2_index_path
            ]

            command = ' '.join(command)
            sp.check_call(command, shell=True)

            print('\n' + ctw.CRED + ctw.CBOLD +
                  'Bowtie Genome Indices Generated!!!' + ctw.CEND + '\n')

        else:
            print(
                ctw.CRED +
                "Destination directory contain files. Ignoring Bowtie Genome Index generation!!!"
                + ctw.CEND + '\n')
    def download(self):

        outdir = os.path.join(self.home_dir, self.output_dir)
        if not os.path.isdir(outdir): os.mkdir(outdir)

        ctw = ColorTextWriter.ColorTextWriter()

        if len(os.listdir(outdir + '/')) == 0:

            print(ctw.CRED + 'Downloading ' + ctw.CBLUE + self.download_file +
                  ctw.CRED + ' ...' + ctw.CEND + '\n')

            sp.call('wget ' + self.input_path + self.download_file, shell=True)

            print(ctw.CBLUE + self.download_file + ctw.CRED +
                  ' Downloaded!!!' + ctw.CEND)

            dir_path = os.path.dirname(os.path.realpath(__file__))
            te = TarExtractor.TarExtractor(dir_path + '/' + self.download_file,
                                           self.output_dir)
            te.tar_extractor()

            if self.download_file.endswith('.tar.gz'):
                sp.call('rm -r ' + self.download_file, shell=True)

            else:
                new_file = self.download_file.split('.gz')[0]
                sp.call(['mv', new_file, outdir + '/' + new_file])

        else:
            print(ctw.CRED +
                  "Destination directory contain files. Ignore download!!!" +
                  ctw.CEND + '\n')
Exemplo n.º 10
0
    def tagdust(self):

        outdir = os.path.join(self.home_dir, 'tagdust_out')
        if not os.path.isdir(outdir): os.mkdir(outdir)

        r1_reads = sorted(glob.glob(self.input_dir + '*_trimmed.fastq'))

        ctw = ColorTextWriter.ColorTextWriter()

        print('\n' + ctw.CBEIGE + ctw.CBOLD + 'Running TagDust ...' + ctw.CEND)

        for i in r1_reads:
            print('\n' + ctw.CBEIGE + ctw.CBOLD + 'Tagdusting: ' + ctw.CBLUE +
                  os.path.basename(i) + ctw.CBEIGE + ctw.CBOLD + ' ...' +
                  ctw.CEND + '\n')

            output_file = outdir + '/' + os.path.basename(i).split(
                '_trimmed')[0] + '_tagdustout'

            command = [
                'module load singularity;singularity exec -e -C -B',
                self.home_dir, '-H', self.home_dir, self.tagdust_sing,
                'tagdust -1 O:N -2 R:N', '-o', output_file, '-ref',
                self.rrna_list, '-fe 3', i
            ]

            command = ' '.join(command)
            sp.check_call(command, shell=True)

        print('\n' + ctw.CRED + ctw.CBOLD + 'Running TagDust Completed!!!' +
              ctw.CEND + '\n')
Exemplo n.º 11
0
    def multiqc(self):

        outdir = os.path.join(self.home_dir, 'MultiQC_Summary')
        if not os.path.isdir(outdir): os.mkdir(outdir)

        dl = ListMaker.ListMaker(self.home_dir)
        dir_list = dl.list_files()

        ctw = ColorTextWriter.ColorTextWriter()

        print(ctw.CBEIGE + ctw.CBOLD + 'Running MultiQC ...' + ctw.CEND + '\n')

        for i in dir_list:
            print('\n' + ctw.CBEIGE + ctw.CBOLD + 'Generating MultiQC Reports for ' + i + ' ...' + ctw.CEND + '\n')

            multiqc_report = i + '_MultiQC_Report.html'

            command = [
                'multiqc',
                self.home_dir + i + '/',
                '-o', outdir,
                '-n', multiqc_report
            ]

            command = ' '.join(command)
            sp.check_call(command, shell=True)

        print('\n' + ctw.CBEIGE + ctw.CBOLD + 'Running MultiQC done!!!' + ctw.CEND)
Exemplo n.º 12
0
    def cutadapt(self):

        outdir = os.path.join(self.home_dir, 'cutadapt')
        if not os.path.isdir(outdir): os.mkdir(outdir)

        ctw = ColorTextWriter.ColorTextWriter()

        print(ctw.CBEIGE + ctw.CBOLD + 'Running CutAdapt ...' + ctw.CEND +
              '\n')

        fastq_list = sorted(glob.glob(self.input_dir + '*R1.fastq'))

        for i in fastq_list:
            print('\n' + ctw.CBEIGE + ctw.CBOLD + 'CutAdapting: ' + ctw.CBLUE +
                  os.path.basename(i) + ctw.CBEIGE + ctw.CBOLD + ' ...' +
                  ctw.CEND + '\n')

            output_file = outdir + '/' + os.path.basename(i).split(
                '.fastq')[0] + '_trimmed' + self.extensions[0]
            illumina_adap = 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC'

            command = [
                'cutadapt -m 18 -M 30 -u 4', '-a NNNN' + illumina_adap, '-o',
                output_file, i, '>',
                output_file.split('_trimmed')[0] + '_trim.matrics' +
                self.extensions[3]
            ]

            command = ' '.join(command)
            sp.check_call(command, shell=True)

        print('\n' + ctw.CRED + ctw.CBOLD + 'CutAdapt Trimming Completed!!!' +
              ctw.CEND)
Exemplo n.º 13
0
    def sam_sorting(self):

        outdir = os.path.join(self.home_dir, 'sam_sorted')
        if not os.path.isdir(outdir): os.mkdir(outdir)

        bam_files = sorted(glob.glob(self.input_dir + '*.bam'))

        ctw = ColorTextWriter.ColorTextWriter()

        for i in bam_files:
            if i.endswith(self.extensions[4]):

                print(ctw.CRED + 'Filtering and Sorting: ' + ctw.CBLUE +
                      os.path.basename(i) + ctw.CRED + ' ...' + ctw.CEND +
                      '\n')

                output_file = outdir + '/' + os.path.basename(i).split(
                    '.bam')[0] + 'sorted' + self.extensions[4]

                command = [
                    'samtools sort -@', self.threads, i, '|',
                    'samtools view -F 4 -O BAM', '-@', self.threads, '-o',
                    output_file
                ]

                command = ' '.join(command)
                sp.check_call(command, shell=True)

                print('\n' + ctw.CRED + 'Indexing: ' + ctw.CBLUE +
                      os.path.basename(output_file) + ctw.CRED + ' ...' +
                      ctw.CEND + '\n')

                command = ['samtools index', output_file]

                command = ' '.join(command)
                sp.check_call(command, shell=True)

        print(ctw.CBEIGE + ctw.CBOLD +
              'Filtering, Sorting and Indexing Completed!!!' + ctw.CEND + '\n')

        #### Mapping quality control using Qualimap
        bam_files = sorted(glob.glob(outdir + '/' + '*.bam'))

        for i in bam_files:
            command = ['qualimap rnaseq', '-bam', i, '-gtf', self.genes_gtf]

            command = ' '.join(command)
            sp.check_call(command, shell=True)

            command = [
                'qualimap bamqc', '-bam', i, '-gff', self.genes_gtf, '-sd -c'
            ]

            command = ' '.join(command)
            sp.check_call(command, shell=True)

        print('\n' + ctw.CRED + ctw.CBOLD +
              'Alignment Quality Assessment Completed!!!' + ctw.CEND + '\n')
Exemplo n.º 14
0
    def cutadapt(self):

        outdir = os.path.join(self.home_dir, 'cutadapt')
        if not os.path.isdir(outdir): os.mkdir(outdir)

        ctw = ColorTextWriter.ColorTextWriter()

        print('\n' + ctw.CBEIGE + ctw.CBOLD + 'Running CutAdapt ...' +
              ctw.CEND + '\n')

        r1_reads = sorted(glob.glob(self.input_dir + '*R1.fastq'))
        r2_reads = sorted(glob.glob(self.input_dir + '*R2.fastq'))

        if self.seq_method == 'single':
            for i in r1_reads:
                print('\n' + ctw.CBEIGE + ctw.CBOLD + 'CutAdapting: ' +
                      ctw.CBLUE + os.path.basename(i) + ctw.CBEIGE +
                      ctw.CBOLD + ' ...' + ctw.CEND + '\n')

                output_file = outdir + '/' + os.path.basename(i).split(
                    '.fastq')[0] + '_trimmed' + self.extensions[0]

                command = [
                    'cutadapt -f fastq -m 20 --quality-cutoff 5', '-o',
                    output_file, i, '>',
                    output_file.split('_trimmed')[0] + '_trim.matrics' +
                    self.extensions[3]
                ]

                command = ' '.join(command)
                sp.check_call(command, shell=True)

        elif self.seq_method == 'paired':
            for (i, j) in zip(r1_reads, r2_reads):
                print('\n' + ctw.CBEIGE + ctw.CBOLD + 'CutAdapting: ' +
                      ctw.CBLUE + os.path.basename(i) + ' and ' +
                      os.path.basename(j) + ctw.CBEIGE + ctw.CBOLD + ' ...' +
                      ctw.CEND + '\n')

                output_file_R1 = outdir + '/' + os.path.basename(i).split(
                    '.fastq')[0] + '_trimmed' + self.extensions[0]
                output_file_R2 = outdir + '/' + os.path.basename(j).split(
                    '.fastq')[0] + '_trimmed' + self.extensions[0]

                command = [
                    'cutadapt -f fastq -m 20 --quality-cutoff 5', '-o',
                    output_file_R1, '-p', output_file_R2, i, j, '>',
                    output_file_R1.split('_trimmed')[0] + '_trim.matrics' +
                    self.extensions[3]
                ]

                command = ' '.join(command)
                sp.check_call(command, shell=True)

        print('\n' + ctw.CRED + ctw.CBOLD + 'CutAdapt Trimming Completed!!!' +
              ctw.CEND)
Exemplo n.º 15
0
    def extract_UMI(self):

        outdir = os.path.join(self.home_dir, 'umi_extract')
        if not os.path.isdir(outdir): os.mkdir(outdir)

        ctw = ColorTextWriter.ColorTextWriter()

        print(ctw.CBEIGE + ctw.CBOLD + 'Extracting UMIs ...' + ctw.CEND + '\n')

        r1_reads = sorted(glob.glob(self.input_dir + '*R1*.fastq'))
        r2_reads = sorted(glob.glob(self.input_dir + '*R2*.fastq'))

        if self.seq_method == 'single':
            for i in r2_reads:
                print(ctw.CBEIGE + ctw.CBOLD + 'UMI Extraction: ' + ctw.CBLUE +
                      os.path.basename(i) + ctw.CBEIGE + ctw.CBOLD + ' ...' +
                      ctw.CEND + '\n')

                output_file = outdir + '/' + os.path.basename(i).split(
                    '.fastq')[0] + '_UMI' + self.extensions[0]

                param = [
                    'umi_tools extract --extract-method=string',
                    '--stdin=' + i, '--bc-pattern=' + self.umi,
                    '--stdout=' + output_file, '-L',
                    output_file.split('_R2')[0] + '_UMI_extract.log'
                ]

                command = ' '.join(param)
                sp.check_call(command, shell=True)

        elif self.seq_method == 'paired':
            for (i, j) in zip(r1_reads, r2_reads):
                print('\n' + ctw.CBEIGE + ctw.CBOLD + 'UMI Extraction: ' +
                      ctw.CBLUE + os.path.basename(i) + ' and ' +
                      os.path.basename(j) + ctw.CBEIGE + ctw.CBOLD + ' ...' +
                      ctw.CEND + '\n')

                output_file_R1 = outdir + '/' + os.path.basename(i).split(
                    '.fastq')[0] + '_UMI' + self.extensions[0]
                output_file_R2 = outdir + '/' + os.path.basename(j).split(
                    '.fastq')[0] + '_UMI' + self.extensions[0]

                command = [
                    'umi_tools extract --extract-method=string',
                    '--stdin=' + j, '--stdout=' + output_file_R2,
                    '--bc-pattern=' + self.umi, '--read2-in=' + i,
                    '--read2-out=' + output_file_R1, '-L',
                    output_file_R1.split('_R1')[0] + '_UMI_extract.log'
                ]

                command = ' '.join(command)
                sp.check_call(command, shell=True)

        print('\n' + ctw.CRED + ctw.CBOLD + 'UMI Extraction Completed!!!' +
              ctw.CEND + '\n')
    def feature(self):

        bam_list = sorted(glob.glob(self.input_dir + '*.bam'))

        ctw = ColorTextWriter.ColorTextWriter()

        print(ctw.CBEIGE + ctw.CBOLD + 'Feature counting started ...' +
              ctw.CEND + '\n')

        i = 0
        while i < len(self.gfeature):

            outdir = os.path.join(self.home_dir,
                                  'featureCounts' + '_' + self.gfeature[i])
            if not os.path.isdir(outdir): os.mkdir(outdir)

            print('\n' + 'Quantifying ' + self.gfeature[i] + 's ...' + '\n')

            command = [
                'featureCounts -t', self.gfeature[i],
                '-F GTF -g gene_name -O -M -s', self.stranded
            ]

            if self.seq_method == 'paired': command.extend(['-p -B -C'])

            command.extend([
                '-a', self.feature_dir + self.feature_file.split('.gz')[0],
                '-o', outdir + '/' + self.gfeature[i] + self.extensions[3],
                ' '.join([
                    self.input_dir + '{0}'.format(j.split(self.input_dir)[1])
                    for j in bam_list
                ])
            ])

            command = ' '.join(command)
            sp.check_call(command, shell=True)

            ### Manipulating the FeatureCounts output
            data = pd.read_csv(outdir + '/' + self.gfeature[i] +
                               self.extensions[3],
                               sep='\t',
                               header=0,
                               index_col=0,
                               skiprows=1)

            data = data.drop(data.iloc[:, 0:4], axis=1)

            data.to_csv(outdir + '/' + self.gfeature[i] + '_DESeq2_Input' +
                        self.extensions[3],
                        sep='\t')

            i = i + 1

        if i == len(self.gfeature):
            print('\n' + ctw.CBEIGE + ctw.CBOLD + 'Feature counting done!!!' +
                  ctw.CEND + '\n')
    def tagdust(self):

        outdir = os.path.join(self.home_dir, 'tagdust_out')
        if not os.path.isdir(outdir): os.mkdir(outdir)

        r1_reads = sorted(glob.glob(self.input_dir + '*_R1_trimmed.fastq'))
        r2_reads = sorted(glob.glob(self.input_dir + '*_R2_trimmed.fastq'))

        ctw = ColorTextWriter.ColorTextWriter()

        print('\n' + ctw.CBEIGE + ctw.CBOLD + 'Running TagDust ...' + ctw.CEND)

        if self.seq_method == 'single':
            for i in r1_reads:
                print('\n' + ctw.CBEIGE + ctw.CBOLD + 'Tagdusting: ' +
                      ctw.CBLUE + os.path.basename(i) + ctw.CBEIGE +
                      ctw.CBOLD + ' ...' + ctw.CEND + '\n')

                output_file = outdir + '/' + os.path.basename(i).split(
                    '_trimmed')[0] + '_tagdustout'

                command = [
                    'module load singularity;singularity exec -e -C -B',
                    self.home_dir, '-H', self.home_dir, self.tagdust_sing,
                    'tagdust -1 R:N', '-o', output_file, '-ref',
                    self.rrna_list, '-fe 3', i
                ]

                command = ' '.join(command)
                sp.check_call(command, shell=True)

        elif self.seq_method == 'paired':
            for (i, j) in zip(r1_reads, r2_reads):
                if i.endswith('R1_trimmed.fastq') and j.endswith(
                        'R2_trimmed.fastq'):
                    print('\n' + ctw.CBEIGE + ctw.CBOLD + 'Tagdusting: ' +
                          ctw.CBLUE + os.path.basename(i) + ' and ' +
                          os.path.basename(j) + ctw.CBEIGE + ctw.CBOLD +
                          ' ...' + ctw.CEND + '\n')

                    output_file = outdir + '/' + os.path.basename(i).split(
                        '_R1_trimmed')[0] + '_tagdustout'

                    command = [
                        'module load singularity;singularity exec -e -C -B',
                        self.home_dir, '-H', self.home_dir, self.tagdust_sing,
                        'tagdust -1 R:N', '-o', output_file, '-ref',
                        self.rrna_list, '-fe 3', i, j
                    ]

                    command = ' '.join(command)
                    sp.check_call(command, shell=True)

        print('\n' + ctw.CRED + ctw.CBOLD + 'Running TagDust Completed!!!' +
              ctw.CEND + '\n')
Exemplo n.º 18
0
    def editing_prediction(self):

        bam_files = sorted(glob.glob(self.input_dir + '*.bam'))

        ctw = ColorTextWriter.ColorTextWriter()

        for i in bam_files:
            if i.endswith(self.extensions[4]):

                print('\n' + ctw.CRED +
                      'Seraching for A-to-I editing sites: ' + ctw.CBLUE +
                      os.path.basename(i) + ctw.CRED + ' ...' + ctw.CEND +
                      '\n')

                ## Write the .yaml file

                kv_pair = {
                    'input_bam': {
                        'class': 'File',
                        'path': i
                    },
                    'reference': {
                        'class': 'File',
                        'path': self.genome_fasta
                    },
                    'known_snp': {
                        'class': 'File',
                        'path': self.snp_file
                    }
                }

                yml_file = os.path.basename(i).split('.fastq')[0] + '.yml'

                with open(yml_file, 'w') as f:
                    yaml.dump(kv_pair, f, default_flow_style=False)

                ## Running Sailor with default settings

                command = [
                    'module load singularity;', self.sailor_path, yml_file
                ]

                command = ' '.join(command)
                sp.check_call(command, shell=True)

                ## Move results to "sailor" directory and remove .yml files

                sp.call(['mv', i, os.path.dirname(self.sailor_path) + '/'])
                sp.call('rm -r ' + yml_file, shell=True)

        print(ctw.CBEIGE + ctw.CBOLD +
              'A-to-I Editing Site Identification Completed!!!' + ctw.CEND)
    def tar_extractor(self):

        ctw = ColorTextWriter.ColorTextWriter()

        print('\n' + ctw.CRED + 'Unzipping ' + ctw.CBLUE + os.path.basename(self.zip_file) + ctw.CRED +' ...' + ctw.CEND + '\n')

        self.zip_formats = ['.tar', '.tar.gz']

        if self.zip_file.endswith(tuple(self.zip_formats)):
            tar = tarfile.open(self.zip_file)
            tar.extractall(self.output_dir)
            tar.close()
        else:
            sp.call(['gunzip', self.zip_file])
Exemplo n.º 20
0
    def feature(self):

        file_list = sorted(glob.glob(self.input_dir + '*.bam'))

        ctw = ColorTextWriter.ColorTextWriter()

        print(ctw.CBEIGE + ctw.CBOLD + 'Feature counting started ...' +
              ctw.CEND + '\n')

        i = 0
        while i < len(self.gfeature):

            outdir = os.path.join(self.home_dir,
                                  'featureCounts' + '_' + self.gfeature[i])
            if not os.path.isdir(outdir): os.mkdir(outdir)

            command = [
                'featureCounts -t', self.gfeature[i],
                '-F GTF -g gene_symbol --minOverlap 10 --largestOverlap --primary -s',
                self.stranded, '-a',
                self.feature_dir + self.feature_file.split('.gz')[0], '-o',
                outdir + '/' + self.gfeature[i] + self.extensions[3],
                ' '.join([
                    self.input_dir + '{0}'.format(j.split(self.input_dir)[1])
                    for j in file_list
                ])
            ]

            command = ' '.join(command)
            sp.check_call(command, shell=True)

            ### Manipulating the FeatureCounts output
            data = pd.read_csv(outdir + '/' + self.gfeature[i] +
                               self.extensions[3],
                               sep='\t',
                               header=0,
                               index_col=0,
                               skiprows=1)

            data = data.drop(data.iloc[:, 0:5], axis=1)

            data.to_csv(outdir + '/' + self.gfeature[i] + '_DESeq2_Input' +
                        self.extensions[3],
                        sep='\t')

            i = i + 1

        if i == len(self.gfeature):
            print('\n' + ctw.CBEIGE + ctw.CBOLD + 'Feature counting done!!!' +
                  ctw.CEND + '\n')
    def sam_sorting(self):

        outdir = os.path.join(self.home_dir, 'sam_sorted')
        if not os.path.isdir(outdir): os.mkdir(outdir)

        bam_files = sorted(glob.glob(self.input_dir + '*.bam'))

        ctw = ColorTextWriter.ColorTextWriter()

        for i in bam_files:
            if i.endswith(self.extensions[4]):

                print(ctw.CRED + 'Filtering and Sorting: ' + ctw.CBLUE +
                      os.path.basename(i) + ctw.CRED + ' ...' + ctw.CEND +
                      '\n')

                output_file = outdir + '/' + os.path.basename(i).split(
                    '.bam')[0] + 'sorted' + self.extensions[4]

                param = [
                    'module load samtools;'
                    'samtools sort -n -@', self.threads, i, '|',
                    'samtools fixmate -m - - |', 'samtools sort -@',
                    self.threads, '- |', 'samtools markdup - - |',
                    'samtools view -F 3844 -O BAM', '-@', self.threads, '-o',
                    output_file
                ]

                command = ' '.join(param)
                sp.check_call(command, shell=True)

                print('\n' + ctw.CRED + 'Indexing: ' + ctw.CBLUE +
                      os.path.basename(output_file) + ctw.CRED + ' ...' +
                      ctw.CEND + '\n')

                param = ['samtools index', output_file]

                command = ' '.join(param)
                sp.check_call(command, shell=True)

        print(ctw.CBEIGE + ctw.CBOLD +
              'Filtering, Sorting and Indexing Completed!!!' + ctw.CEND)
Exemplo n.º 22
0
    def refgen(self):

        outdir = os.path.join(self.home_dir, 'star_genome')
        if not os.path.isdir(outdir): os.mkdir(outdir)

        ctw = ColorTextWriter.ColorTextWriter()

        print(ctw.CBEIGE + ctw.CBOLD + 'Creating the STAR Reference Genome ...' + ctw.CEND + '\n')

        param = [
            'STAR --runThreadN', self.threads,
            '--runMode genomeGenerate --genomeSAindexNbases 12',
            '--genomeDir', outdir + '/',
            '--genomeFastaFiles', self.genome_fasta,
            '--sjdbGTFfile', self.genes_gtf
        ]

        command = ' '.join(param)
        sp.check_call(command, shell=True)

        print('\n' + ctw.CBEIGE + ctw.CBOLD + 'Reference Genome Created!!!' + ctw.CEND)
Exemplo n.º 23
0
    def bt2_index_maker(self):

        outdir = os.path.join(self.home_dir, 'bt2_index')
        if not os.path.isdir(outdir): os.mkdir(outdir)

        ctw = ColorTextWriter.ColorTextWriter()

        print(ctw.CBEIGE + ctw.CBOLD +
              'Generating Bowtie2 Genome Indices ...' + ctw.CEND + '\n')

        bt2_index_path = outdir + '/' + 'bt2_index'

        command = [
            'bowtie2-build', '--threads', self.threads, '-q', self.genes_fa,
            bt2_index_path
        ]

        command = ' '.join(command)
        sp.check_call(command, shell=True)

        print('\n' + ctw.CRED + ctw.CBOLD +
              'Bowtie2 Genome Indices Generated!!!' + ctw.CEND + '\n')
Exemplo n.º 24
0
    def fastqc(self):

        outdir = os.path.join(self.home_dir, self.output_dir)
        if not os.path.isdir(outdir): os.mkdir(outdir)

        file_list = sorted(glob.glob(self.input_dir + '*.fastq'))

        ctw = ColorTextWriter.ColorTextWriter()

        print(ctw.CBEIGE + ctw.CBOLD + 'Running FastQC ...' + ctw.CEND)

        for input_file in file_list:

            print('\n')

            command = ['fastqc', input_file, '-o', outdir]

            command = ' '.join(command)
            sp.check_call(command, shell=True)

        print('\n' + ctw.CBEIGE + ctw.CBOLD + 'Running FastQC done!!!' +
              ctw.CEND)
Exemplo n.º 25
0
    def sam_filtering(self):

        outdir = os.path.join(self.home_dir, 'sam_sorted')
        if not os.path.isdir(outdir): os.mkdir(outdir)

        bam_files = sorted(glob.glob(self.input_dir + '*.bam'))

        ctw = ColorTextWriter.ColorTextWriter()

        for i in bam_files:
            if i.endswith(self.extensions[4]):

                print(ctw.CRED + 'Filtering and Sorting: ' + ctw.CBLUE + os.path.basename(i) + ctw.CRED + ' ...' + ctw.CEND + '\n')

                output_file = outdir + '/' + os.path.basename(i).split('.bam')[0] + '_sorted' + self.extensions[4]

                command = ['samtools sort -@', self.threads,'-T', outdir + '/', i, '|']

                if self.seq_method == 'single': command.extend(['samtools view -O BAM -@', self.threads])
                if self.seq_method == 'paired': command.extend(['samtools view -f 3 -O BAM -@', self.threads])

                command.extend(['-o', output_file])

                command = ' '.join(command)
                sp.check_call(command, shell=True)

                print('\n' + ctw.CRED + 'Indexing: ' + ctw.CBLUE + os.path.basename(output_file) + ctw.CRED + ' ...' + ctw.CEND + '\n')

                command = [
                    'samtools index', output_file
                ]

                command = ' '.join(command)
                sp.check_call(command, shell=True)

        print(ctw.CBEIGE + ctw.CBOLD + 'Filtering, Sorting and Indexing Completed!!!' + ctw.CEND)
Exemplo n.º 26
0
    def crosslink(self):

        outdir = os.path.join(self.home_dir, 'crosslink_data')
        if not os.path.isdir(outdir): os.mkdir(outdir)

        bam_list = sorted(glob.glob(self.input_dir + '*UV*.bam'))

        ctw = ColorTextWriter.ColorTextWriter()

        #### Generate a chromosome length file using SAMTools
        sp.check_call(' '.join(['samtools faidx', self.genome_fasta]),
                      shell=True)
        fai_file = self.genome_fasta + '.fai'

        for i in bam_list:

            print('\n' + ctw.CRED + 'Detection of cross-linked nucleotides: ' +
                  ctw.CBLUE + os.path.basename(i) + ctw.CRED + ' ...' +
                  ctw.CEND + '\n')

            output_file = outdir + '/' + os.path.basename(i).split(
                '.bam')[0] + '_shifted.bed'

            #### Convert bam to bed and shift intervals 1bp upstream to identify the cross-linked nucleotide
            bamtoshiftedbed = [
                'bamToBed -i', i, '-bed12', '|',
                'bedtools shift -m 1 -p -1 -g', fai_file, '>', output_file
            ]

            bamtoshiftedbed = ' '.join(bamtoshiftedbed)
            sp.check_call(bamtoshiftedbed, shell=True)

            #### Convert shifted bed to bam
            shiftedbedtobam = [
                'bedToBam -i', output_file, '-g', fai_file, '-bed12', '>',
                output_file.split('.bed')[0] + '.bam'
            ]

            shiftedbedtobam = ' '.join(shiftedbedtobam)
            sp.check_call(shiftedbedtobam, shell=True)

            command = [
                'samtools sort -@', self.threads, '-T', outdir + '/',
                output_file.split('.bed')[0] + '.bam', '-o',
                output_file.split('.bed')[0] + '.bam'
            ]

            command = ' '.join(command)
            sp.check_call(command, shell=True)

            #### Make coverage tracks for plus and minus strands
            strand = ['+', '-']
            bedgraph_extension = ['_plus.bedgraph', '_minus.bedgraph']
            bigwig_extension = ['_plus.bw', '_minus.bw']

            x = 0

            for j in strand:

                command = [
                    'bedtools genomecov -bg -strand',
                    j,
                    '-5 -i',
                    output_file,
                    '-g',
                    fai_file,  #'-scale', str(scale_fac),
                    '>',
                    output_file.split('.bed')[0] + bedgraph_extension[x]
                ]

                command = ' '.join(command)
                sp.check_call(command, shell=True)

                command = [
                    'LC_COLLATE=C sort -k1,1 -k2,2n',
                    output_file.split('.bed')[0] + bedgraph_extension[x], '>',
                    output_file.split('.bed')[0] + '_sorted' +
                    bedgraph_extension[x]
                ]

                command = ' '.join(command)
                sp.check_call(command, shell=True)

                command = [
                    'bedGraphToBigWig',
                    output_file.split('.bed')[0] + '_sorted' +
                    bedgraph_extension[x], fai_file,
                    output_file.split('.bed')[0] + bigwig_extension[x]
                ]

                command = ' '.join(command)
                sp.check_call(command, shell=True)

                x = x + 1

        print(ctw.CBEIGE + ctw.CBOLD +
              'Identification of cross-linked nucleotides is completed!!!' +
              ctw.CEND + '\n')
Exemplo n.º 27
0
    def ins_del_finder(self):

        outdir_summary = os.path.join(self.home_dir, 'summary_files')
        if not os.path.isdir(outdir_summary): os.mkdir(outdir_summary)

        bam_list = sorted(glob.glob(self.input_dir + '*UV*.bam'))

        ctw = ColorTextWriter.ColorTextWriter()

        summary = pd.DataFrame(
            {'Summary': ['Mapped with Insertions', 'Mapped with Deletions']})

        count = 0
        frames = []
        count_list = []
        column_headers = []

        for i in bam_list:

            print(ctw.CRED + 'Detection of Insertions/Deletions: ' +
                  ctw.CBLUE + os.path.basename(i) + ctw.CRED + ' ...' +
                  ctw.CEND + '\n')

            column_headers.append(
                os.path.basename(i).split(self.extensions[4])[0])

            #### Convert from bam to sam format

            output_file = i.split(self.extensions[4])[0] + self.extensions[1]

            command = ['samtools view', i, '-o', output_file]

            command = ' '.join(command)
            sp.check_call(command, shell=True)

            #### Total number of reads mapped with Insertions

            command = ['cut -f 6', output_file, '| grep I | wc -l']

            command = ' '.join(command)
            results = sp.check_output(command,
                                      shell=True,
                                      universal_newlines=True)
            count_list.append(int(results))

            #### Total number of reads mapped with Deletions

            command = ['cut -f 6', output_file, '| grep D | wc -l']

            command = ' '.join(command)
            results = sp.check_output(command,
                                      shell=True,
                                      universal_newlines=True)
            count_list.append(int(results))

            count_details = pd.DataFrame({i: count_list})

            if count == 0:
                frames = [summary, count_details]
                count = 1
                count_list = []

            else:
                frames.append(count_details)
                count_list = []

        df = pd.concat(frames, axis=1)
        summary_column_header = 'Summary'
        final_column_headers = [summary_column_header] + column_headers

        df.columns = [final_column_headers]
        df.to_csv(outdir_summary + '/' + 'Insertion_Deletion_Data_Summary.csv',
                  index=False)
        return df
Exemplo n.º 28
0
    def peak_caller(self):

        outdir = os.path.join(self.home_dir, 'pure_clip_pc')
        if not os.path.isdir(outdir): os.mkdir(outdir)

        ctw = ColorTextWriter.ColorTextWriter()

        uv1_list = sorted(glob.glob(self.input_dir + '*UV1*.bam'))
        uv2_list = sorted(glob.glob(self.input_dir + '*UV2*.bam'))
        input_list = sorted(glob.glob(self.input_dir + '*IN*.bam'))

        x = 0

        for (i, j) in zip(uv1_list, uv2_list):

            print(ctw.CRED + 'Merging replicates and indexing:' + ctw.CBLUE +
                  os.path.basename(i) + ctw.CRED + ' and ' + ctw.CBLUE +
                  os.path.basename(j) + ctw.CRED + ' ...' + ctw.CEND + '\n')

            merged_bam = outdir + '/' + os.path.basename(i).split(
                '_UV')[0] + '_UV_aligned_sorted_dupRm_merged.bam'

            command = ['samtools merge', merged_bam, i, j]

            command = ' '.join(command)
            sp.check_call(command, shell=True)
            sp.check_call(' '.join(['samtools index', merged_bam]), shell=True)

            #### Peak Calling with Input Normalization

            sp.check_call(' '.join(['samtools index', input_list[x]]),
                          shell=True)

            outfile_prefix = outdir + '/' + os.path.basename(i).split(
                self.extensions[4])[0] + '_pureclip_crosslink'

            command = [
                'pureclip', '-i', merged_bam, '-bai', merged_bam + '.bai',
                '-g', self.genome_fa, '-ld -nt 8'
            ]

            if self.run_mode == '0': command.extend(['-bc 0'])
            if self.run_mode == '1': command.extend(['-bc 1'])

            command.extend([
                '-o', outfile_prefix + '_INnorm_sites.bed', '-or',
                outfile_prefix + '_INnorm_regions.bed', '-ibam', input_list[x],
                '-ibai', input_list[x] + '.bai'
            ])

            command = ' '.join(command)
            sp.check_call(command, shell=True)

            command = [
                'cat', outfile_prefix + '_INnorm_sites.bed', '|',
                'cut -f 1,2,3,4,5,6 >',
                outfile_prefix + '_INnorm_sites_short.bed'
            ]

            command = ' '.join(command)
            sp.check_call(command, shell=True)

            x = x + 1

        #### Generate wiggle files for merged bam files
        bw = BigWigFileMaker.BigWigFileMaker(self.home_dir, outdir + '/',
                                             self.extensions)
        bw.bigwig()

        print(ctw.CBEIGE + ctw.CBOLD + 'Peak Calling Completed!!!' + ctw.CEND)
    def aligner(self):

        outdir = os.path.join(self.home_dir, 'star_aligned')
        if not os.path.isdir(outdir): os.mkdir(outdir)

        #### Sequence Alignment using STAR
        r1_reads = sorted(glob.glob(self.input_dir + '*R1_trimmed.fastq'))
        r2_reads = sorted(glob.glob(self.input_dir + '*R2_trimmed.fastq'))

        ctw = ColorTextWriter.ColorTextWriter()

        print('\n' + ctw.CBEIGE + ctw.CBOLD + 'Running Star Aligner ...' +
              ctw.CEND)

        if self.seq_method == 'single':
            for i in r1_reads:

                output_file = outdir + '/' + os.path.basename(i).split(
                    'trimmed.fastq')[0] + 'aligned' + self.extensions[4]

                if i.endswith('R1_trimmed.fastq'):
                    print('\n' + ctw.CBEIGE + ctw.CBOLD + 'Mapping: ' +
                          ctw.CBLUE + os.path.basename(i) + ctw.CBEIGE +
                          ctw.CBOLD + ' ...' + ctw.CEND + '\n')

                    command = [
                        'STAR --runThreadN', self.threads, '--genomeDir',
                        self.genome_dir, '--outFileNamePrefix',
                        output_file.split(self.extensions[4])[0], '>',
                        output_file,
                        '--outReadsUnmapped Fastx --outSJfilterReads Unique --outFilterMultimapNmax 1',
                        '--outStd BAM_SortedByCoordinate --outSAMtype BAM SortedByCoordinate',
                        '--readFilesIn', i
                    ]

                    command = ' '.join(command)
                    sp.check_call(command, shell=True)

        elif self.seq_method == 'paired':
            for (i, j) in zip(r1_reads, r2_reads):

                output_file = outdir + '/' + os.path.basename(i).split(
                    'trimmed.fastq')[0] + 'aligned' + self.extensions[4]

                if i.endswith('R1_trimmed.fastq') and j.endswith(
                        'R2_trimmed.fastq'):
                    print('\n' + ctw.CBEIGE + ctw.CBOLD + 'Mapping: ' +
                          ctw.CBLUE + os.path.basename(i) + ' and ' +
                          os.path.basename(j) + ctw.CBEIGE + ctw.CBOLD +
                          ' ...' + ctw.CEND + '\n')

                    command = [
                        'STAR --runThreadN', self.threads, '--genomeDir',
                        self.genome_dir, '--outFileNamePrefix',
                        output_file.split(self.extensions[4])[0], '>',
                        output_file,
                        '--outReadsUnmapped Fastx --outSJfilterReads Unique --outFilterMultimapNmax 1',
                        '--outStd BAM_SortedByCoordinate --outSAMtype BAM SortedByCoordinate',
                        '--readFilesIn', i, j
                    ]

                    command = ' '.join(command)
                    sp.check_call(command, shell=True)

        print('\n' + ctw.CRED + ctw.CBOLD + 'Star Alignment Completed!!!' +
              ctw.CEND + '\n')

        #### Mapping quality control using Qualimap
        bam_list = sorted(glob.glob(outdir + '/' + '*.bam'))

        for i in bam_list:
            command = [
                'qualimap rnaseq', '-bam', i, '-gtf', self.genes_gtf,
                '--java-mem-size=4G'
            ]

            command = ' '.join(command)
            sp.check_call(command, shell=True)

            command = [
                'qualimap bamqc', '-bam', i, '-gff', self.genes_gtf, '-sd -c',
                '--java-mem-size=4G'
            ]

            command = ' '.join(command)
            sp.check_call(command, shell=True)

        print('\n' + ctw.CRED + ctw.CBOLD +
              'Alignment Quality Assessment Completed!!!' + ctw.CEND + '\n')
Exemplo n.º 30
0
import Tagduster
import TDSummaryProcessor
import CutAdapt
import WebDownloader
import Bt2IndexMaker
import Bt2Aligner
import ShortStack
import SamTools
import BigWigFileMaker
import FeatureCounter
import MultiQCRunner
import ColorTextWriter

#### Executing the Program

ctw = ColorTextWriter.ColorTextWriter()
print('\n' + ctw.CRED + ctw.CBOLD + 'Initiating sRNA data analyzer ...' + ctw.CEND + '\n')
print(ctw.CRED + 'This script can take minutes to hours to analyze your data based on the number of libraries to be analyzed ...' + '\n')

gv = GeneralVariables.GeneralVariables()
cv = CommonVariables.CommonVariables()

qc_raw = FastQCRunner.FastQCRunner(cv.home_dir, cv.fastqc_raw, cv.raw_sequences_dir, cv.file_type[0])
qc_raw.fastqc()

ca = CutAdapt.CutAdapt(cv.home_dir, cv.raw_sequences_dir, cv.extensions, cv.cutadapt_dir)
ca.cutadapt()

td = Tagduster.Tagduster(cv.home_dir, cv.tagdust_singu, cv.cutadapt_dir, cv.rRNA_path, cv.extensions)
td.tagdust()