Exemplo n.º 1
0
    def convert_sort(self, sort_gene_origin):
        """
        Function creates a sorted and indexed bam file from given bam file

        Parameters
        ----------
        sort_gene_origin: str
            Bam file's name that created by mapping algorithm

        """

        if self.map_type == "Novoalign":
            convert_sort = self.get_paths.novoalign + "novosort -m 16g -t . -c " + self.threads + " --removeduplicates --keeptags " + \
                           sort_gene_origin + " -i  -o SortedBAM_" + sort_gene_origin
            log_command(convert_sort, "Convert Sort", self.threads, "Mapping")
            self.file_list.append("SortedBAM_" + sort_gene_origin)
            self.file_list.append("SortedBAM_" + sort_gene_origin + ".bai")
        else:
            convert_sort = "samtools view -@" + self.threads + " -bS " + sort_gene_origin + " | samtools sort -@" + \
                           self.threads + " -o SortedBAM_" + sort_gene_origin
            log_command(convert_sort, "Convert Sort", self.threads, "Mapping")
            self.file_list.append("SortedBAM_" + sort_gene_origin)
            indexed = helpers.create_index("SortedBAM_" + sort_gene_origin,
                                           "Create Index", self.threads,
                                           "Mapping")
            self.file_list.append(indexed)
 def gatk4_applybsqr(self, lastbam, recaltable):
     afterbqsrbam = "GATK4_" + lastbam
     apply_command = self.get_paths.gatk4_path + " ApplyBQSR -R " + self.bundle_dir + "Homo_sapiens_assembly38.fasta -I " + \
                     lastbam + " --bqsr-recal-file " + recaltable + " -O " + afterbqsrbam
     log_command(apply_command, "ApplyBQSR", self.threads,
                 "Gatk4PreProcessing")
     self.file_list.append(afterbqsrbam)
     indexed = helpers.create_index(afterbqsrbam,
                                    "Create Index by GATK_ApplyBSQR",
                                    self.threads, "GatkPreProcess")
     self.file_list.append(indexed)
    def gatk3_print_reads(self, lastbam, bqsr):
        nct = " -nct " + str(self.threads)

        aftercalibratorBam = "GATK_PR" + lastbam
        bcal = "java -jar " + self.get_paths.gatk_path + nct + " -T PrintReads -R " + self.bundle_dir + \
               "/ucsc.hg19.fasta -I " + lastbam + " --BQSR " + bqsr + " -o " + aftercalibratorBam
        log_command(bcal, "Print Reads", self.threads, "GatkPreProcessing")
        self.file_list.append(aftercalibratorBam)
        indexed = helpers.create_index(aftercalibratorBam,
                                       "Create Index by GATK_PrintReads",
                                       self.threads, "GatkPreProcess")
        self.file_list.append(indexed)
Exemplo n.º 4
0
    def convert_sort(self, sort_gene_origin):
        """
        Function creates a sorted and indexed bam file from given bam file

        Parameters
        ----------
        sort_gene_origin: str
            Bam file's name that created by mapping algorithm

        """
        convert_sort = "samtools view -@" + self.threads + " -bS " + sort_gene_origin + " | samtools sort -@" + \
                       self.threads + " -o SortedBAM_" + sort_gene_origin
        log_command(convert_sort, "Convert Sort", self.threads, "Mapping")
        self.file_list.append("SortedBAM_" + sort_gene_origin)
        indexed = helpers.create_index("SortedBAM_" + sort_gene_origin, "Create Index", self.threads, "Mapping")
        self.file_list.append(indexed)
Exemplo n.º 5
0
    def pre_process(self, info_dict, all_bam_files):
        if self.split_chr == "After":
            merged_file = self.merge_bams(info_dict, all_bam_files)
            self.file_list.append(merged_file)
            indexed = helpers.create_index(merged_file, "Create Index by Merge", self.threads, "Pre Processing")
            self.file_list.append(indexed)
            splitted_files = split_bam_by_chr(merged_file)
            for splitted_file in splitted_files:
                index_start = splitted_file.find("_Chr_")
                chr_a = splitted_file[index_start:]
                mark_duplicate_file = self.mark_duplicate(splitted_file, chr_a)
                self.file_list.append(mark_duplicate_file)
                indexed = helpers.create_index(mark_duplicate_file, "Create Index by MarkDuplicate", self.threads,
                                     "Pre Processing")
                self.file_list.append(indexed)
            helpers.create_folder(self.working_directory, self.file_list, map_type=self.map_type, step="PreProcess",
                                  folder_directory=self.folder_directory)
            return_files = [a for a in self.file_list if "MDUP" in a and "bam" in a]
            return return_files

        elif self.split_chr == "Before":
            for bam_file in all_bam_files:
                splitted_files = split_bam_by_chr(bam_file)
            all_chr_files = get_bam_by_chr()
            print("preprocess line 128" )
            print( all_chr_files)
            for i in all_chr_files:
                merged_file = self.merge_bams(info_dict, all_chr_files[i])
                self.file_list.append(merged_file)
                indexed = helpers.create_index(merged_file, "Create Index by Merge", self.threads, "Pre Processing")
                self.file_list.append(indexed)
                index_start = all_chr_files[i][0].find("_Chr_")
                chr_a = all_chr_files[i][0][index_start:]
                mark_duplicate_file = self.mark_duplicate(merged_file, chr_a)
                self.file_list.append(mark_duplicate_file)
                indexed = helpers.create_index(mark_duplicate_file, "Create Index by MarkDuplicate", self.threads,
                                     "Pre Processing")
                self.file_list.append(indexed)
                helpers.create_folder(self.working_directory, self.file_list, map_type=self.map_type, step="PreProcess",
                                      folder_directory=self.folder_directory)
            return_files = [a for a in self.file_list if "MDUP" in a and "bam" in a]
            return return_files

        # self.split_chr == "No":
        else:
            if self.map_type == "Novoalign":
                mark_duplicate_file = self.novoalign_sort_markduplicate(info_dict, all_bam_files)
                #self.file_list.append(indexed)
                helpers.create_folder(self.working_directory, self.file_list, map_type=self.map_type, step="PreProcess",
                                      folder_directory=self.folder_directory)
                return mark_duplicate_file

            merged_file = self.merge_bams(info_dict, all_bam_files)
            indexed = helpers.create_index(merged_file, "Create Index by Merge", self.threads, "Pre Processing")
            self.file_list.append(merged_file)
            self.file_list.append(indexed)
            mark_duplicate_file = self.mark_duplicate(merged_file,"")
            print("preprocess mark duplicate file " )
            print(mark_duplicate_file)
            self.file_list.append(mark_duplicate_file)
            indexed = helpers.create_index(mark_duplicate_file, "Create Index by MarkDuplicate", self.threads,
                                 "Pre Processing")
            self.file_list.append(indexed)
            helpers.create_folder(self.working_directory, self.file_list, map_type=self.map_type, step="PreProcess",
                                  folder_directory=self.folder_directory)
            return mark_duplicate_file


# if __name__ == "__main__":
#     pre_processing_step = PreProcessing(working_directory="/home/bioinformaticslab/Desktop/GitHub_Repos/Genomics_Pipeline_Test/test_files",
#                            map_type="Bwa", sample_type="Tumor", library_matching_id="203", thrds="1", issplitchr="Before")
#
#     mapping_step = mapping.Mapping(working_directory=pre_processing_step.main_directory,
#         map_type="Bwa", sample_type="Tumor", library_matching_id="203", thrds="3")
#
#     fastq_list = mapping_step.get_fastq()
#     info_dict = mapping_step.get_info(fastq_list)
#     os.chdir(pre_processing_step.working_directory)
#     bam_files = glob.glob("SortedBAM*.bam")
#     mark_duplicate_file = pre_processing_step.pre_process(info_dict, bam_files)
#     print(mark_duplicate_file)
Exemplo n.º 6
0
    def pre_process(self, info_dict, all_bam_files):
        if self.split_chr == "After":
            merged_file = self.merge_bams(info_dict, all_bam_files)
            self.file_list.append(merged_file)
            indexed = helpers.create_index(merged_file,
                                           "Create Index by Merge",
                                           self.threads, "Pre Processing")
            self.file_list.append(indexed)
            splitted_files = split_bam_by_chr(merged_file)
            for splitted_file in splitted_files:
                index_start = splitted_file.find("_Chr_")
                chr_a = splitted_file[index_start:]
                mark_duplicate_file = self.mark_duplicate(splitted_file, chr_a)
                self.file_list.append(mark_duplicate_file)
                indexed = helpers.create_index(
                    mark_duplicate_file, "Create Index by MarkDuplicate",
                    self.threads, "Pre Processing")
                self.file_list.append(indexed)
            helpers.create_folder(self.working_directory,
                                  self.file_list,
                                  map_type=self.map_type,
                                  step="PreProcess",
                                  folder_directory=self.folder_directory)
            return_files = [
                a for a in self.file_list if "MDUP" in a and "bam" in a
            ]
            return return_files

        elif self.split_chr == "Before":
            for bam_file in all_bam_files:
                splitted_files = split_bam_by_chr(bam_file)
            all_chr_files = get_bam_by_chr()
            print("preprocess line 128")
            print(all_chr_files)
            for i in all_chr_files:
                merged_file = self.merge_bams(info_dict, all_chr_files[i])
                self.file_list.append(merged_file)
                indexed = helpers.create_index(merged_file,
                                               "Create Index by Merge",
                                               self.threads, "Pre Processing")
                self.file_list.append(indexed)
                index_start = all_chr_files[i][0].find("_Chr_")
                chr_a = all_chr_files[i][0][index_start:]
                mark_duplicate_file = self.mark_duplicate(merged_file, chr_a)
                self.file_list.append(mark_duplicate_file)
                indexed = helpers.create_index(
                    mark_duplicate_file, "Create Index by MarkDuplicate",
                    self.threads, "Pre Processing")
                self.file_list.append(indexed)
                helpers.create_folder(self.working_directory,
                                      self.file_list,
                                      map_type=self.map_type,
                                      step="PreProcess",
                                      folder_directory=self.folder_directory)
            return_files = [
                a for a in self.file_list if "MDUP" in a and "bam" in a
            ]
            return return_files

        # self.split_chr == "No":
        else:
            merged_file = self.merge_bams(info_dict, all_bam_files)
            indexed = helpers.create_index(merged_file,
                                           "Create Index by Merge",
                                           self.threads, "Pre Processing")
            self.file_list.append(merged_file)
            self.file_list.append(indexed)
            mark_duplicate_file = self.mark_duplicate(merged_file, "")
            print("preprocess mark duplicate file ")
            print(mark_duplicate_file)
            self.file_list.append(mark_duplicate_file)
            indexed = helpers.create_index(mark_duplicate_file,
                                           "Create Index by MarkDuplicate",
                                           self.threads, "Pre Processing")
            self.file_list.append(indexed)
            helpers.create_folder(self.working_directory,
                                  self.file_list,
                                  map_type=self.map_type,
                                  step="PreProcess",
                                  folder_directory=self.folder_directory)
            return mark_duplicate_file