def mutect_select_variant_other(self, mutect_output): indel_output = "OTHER_" + mutect_output.split("/")[-1] command = self.get_paths.gatk4_path + " SelectVariants -R " + self.ref_dir + " -V " + mutect_output + \ " --select-type-to-exclude INDEL --select-type-to-exclude SNP -O " + indel_output print(command) log_command(command, "Mutect2", self.threads, "Select OTHER Variants") print(indel_output)
def qc_trim(self): try: for i in self.info_dict["Lanes"]: for k in self.info_dict["Number_of_seq"]: r1 = re.compile(".*" + i + "_R1_" + k) read1 = [ s + ".fastq.gz" for s in self.fastq_list if r1.match(s) ] r2 = re.compile(".*" + i + "_R2_" + k) read2 = [ s + ".fastq.gz" for s in self.fastq_list if r2.match(s) ] gene_origin = self.info_dict["Sample_ID"][ 0] + "_" + self.info_dict["Index"][ 0] + "_" + i + "_" + k command = self.paths.fastp + " -w " + self.thread + " --in1 " + read1[0] + " --in2 " + \ read2[0] + " --out1 trim_" + read1[0] + " --out2 trim_" + read2[0] + \ " --html " + gene_origin + ".html --json " + gene_origin + ".json" log_command(command, "Fastp Trim", self.thread, "Quality Control") self.file_list.append(gene_origin + ".html") self.file_list.append(gene_origin + ".json") self.file_list.append("trim_" + str(read1[0])) self.file_list.append("trim_" + str(read2[0])) print( "---------------------------------------------------") print(self.file_list) except: pass
def annovar_for_strelka(self, input_fs): print(input_fs) if type(input_fs) == list: for input_f in input_fs: input_file = self.working_directory + "/" + input_f header_f = input_f.replace("Strelka", "Strelka2") header_f1 = header_f.replace(".vcf", ".txt") header_output_file = self.working_directory + "/" + header_f1 header_remove_comand = 'grep -v "##" ' + input_file + " | awk '" + '{print $1"\\t"$2"\\t"$2"\\t"$4"\\t"$5"\\t"$6"\\t"$7"\\t"$8"\\t"$9"\\t"$10"\\t"$11}' + "' > {}".format( header_output_file) print(header_remove_comand) log_command(header_remove_comand, "Annovar", self.threads, "Variant Annotation Preprocess") output_f = "Annovar_" + "_".join(header_f.split(".")[:-1]) output_file = self.working_directory + "/" + output_f command = self.annovar_dir + " " + input_file + " " + self.humandb + \ " -buildver hg38 -out " + output_file + " -remove -protocol refGene,ensGene,knownGene," \ "cytoBand" \ ",exac03,avsnp150,dbnsfp35c,gme,gnomad_exome," \ "clinvar_20180603,cosmic -operation " \ "gx,gx,gx,r,f,f,f,f,f,f,f -nastring . -polish " \ "-xreffile " + self.xref print(command) output_fs = glob.glob("*" + output_f + "*")
def mutect_select_variant_snp(self, mutect_output): snp_output = "SNP_" + mutect_output.split("/")[-1] command = self.get_paths.gatk4_path + " SelectVariants -R " + self.ref_dir + " -V " + mutect_output + \ " --select-type-to-include SNP -O " + snp_output print(command) log_command(command, "Mutect2", self.threads, "Select SNP Variants") print(snp_output)
def merge_bams(self, info_dict, all_bam_files): print("preprocess merge bams ") print(all_bam_files) inputs_list = "" if self.split_chr == "Before": for i in all_bam_files: inputs_list = inputs_list + "I=" + i + " " index_start = all_bam_files[0].find("_Chr_") chr_a = all_bam_files[0][index_start:] ouput_name = self.map_type + "_" + info_dict["Sample_ID"][0] + "_MergedBAM" + chr_a merge_command = "java -XX:ParallelGCThreads=" + self.threads + \ " -jar " + self.get_paths.picard_path + " MergeSamFiles " + inputs_list + \ " O=" + ouput_name + " USE_THREADING=true" log_command(merge_command, "Merge Bams(Split Before)", self.threads, "PreProcessing") return ouput_name else: for i in all_bam_files: inputs_list = inputs_list + "I=" + i + " " ouput_name = self.map_type + "_" + info_dict["Sample_ID"][0] + "_MergedBAM.bam" merge_command = "java -XX:ParallelGCThreads=" + self.threads + \ " -jar " + self.get_paths.picard_path + " MergeSamFiles " + inputs_list + \ " O=" + ouput_name + " USE_THREADING=true" log_command(merge_command, "Merge Bams", self.threads, "PreProcessing") return ouput_name
def convert_sort(self, sort_gene_origin): """ Function creates a sorted and indexed bam file from given bam file Parameters ---------- sort_gene_origin: str Bam file's name that created by mapping algorithm """ if self.map_type == "Novoalign": convert_sort = self.get_paths.novoalign + "novosort -m 16g -t . -c " + self.threads + " --removeduplicates --keeptags " + \ sort_gene_origin + " -i -o SortedBAM_" + sort_gene_origin log_command(convert_sort, "Convert Sort", self.threads, "Mapping") self.file_list.append("SortedBAM_" + sort_gene_origin) self.file_list.append("SortedBAM_" + sort_gene_origin + ".bai") else: convert_sort = "samtools view -@" + self.threads + " -bS " + sort_gene_origin + " | samtools sort -@" + \ self.threads + " -o SortedBAM_" + sort_gene_origin log_command(convert_sort, "Convert Sort", self.threads, "Mapping") self.file_list.append("SortedBAM_" + sort_gene_origin) indexed = helpers.create_index("SortedBAM_" + sort_gene_origin, "Create Index", self.threads, "Mapping") self.file_list.append(indexed)
def somaticsniper_caller(self): somaticsniper_output = self.working_directory + "/" + self.output_name + ".vcf" command = self.get_paths.somaticsniper + " -q 1 -L -G -Q 15 -s 0.01 -T 0.85 -N 2 -r 0.001 -n NORMAL -t TUMOR " \ "-F vcf -f " + self.ref_dir + " " + self.tumor_bam + " " + \ self.germline_bam + " " + somaticsniper_output log_command(command, "Somatic Sniper", self.threads, "Variant Calling")
def annovar_for_g37(self, input_fs): print(input_fs) if type(input_fs) == list: for input_f in input_fs: input_file = self.working_directory + "/" + input_f output_f = "Annovar_" + "_".join(input_f.split(".")[:-1]) output_file = self.working_directory + "/" + output_f command = self.annovar_dir + " --vcfinput " + input_file + " " + self.humandb + \ " -buildver hg19 -out " + output_file + " -remove -protocol refGene," \ "cytoBand" \ ",exac03,gnomad211_exome,avsnp150,dbnsfp35a," \ "clinvar_20190305,intervar_20180118 -operation " \ "gx,r,f,f,f,f,f,f -nastring . -polish " \ "-xreffile " + self.xref print(command) log_command(command, "Annovar", self.threads, "Variant Annotation") output_fs = glob.glob("*" + output_f + "*") self.file_list.extend(output_fs) helpers.create_folder(self.working_directory, self.file_list, step="Annovar", folder_directory=self.working_directory) else: return False
def strelka_caller(self): command = self.get_paths.strelka + " --normalBam " + self.germline_bam + " --tumorBam " + self.tumor_bam + \ " --referenceFasta " + self.ref_dir + " --runDir " + self.working_directory + " --exome --disableEVS" log_command(command, "Strelka Create Workflow", self.threads, "Variant Calling") run_workflow_command = "python runWorkflow.py -m local -j " + self.threads log_command(run_workflow_command, "Strelka Create Workflow", self.threads, "Variant Calling")
def varscan_caller_step2(self, intermediate_varscan_somatic): print(intermediate_varscan_somatic) for somatic in intermediate_varscan_somatic: command = "java -jar " + self.get_paths.varscan_path + " processSomatic " + somatic + \ " --min-tumor-freq 0.10 --max-normal-freq 0.05 --p-value 0.07" log_command(command, "Varscan Step Process Somatic", self.threads, "Variant Calling") return glob.glob("*vcf*")
def gatk_haplotype(self): haplotype_output = self.working_directory + "/" + self.output_name + ".vcf" command = "java -jar " + self.get_paths.gatk_path + " -R " + self.ref_dir + " -T HaplotypeCaller -I " + \ self.germline_bam + " --dbsnp " + self.get_paths.dbsnp + \ " -o " + haplotype_output + ".raw.snps.indels.vcf" print(command) log_command(command, "Haplotype", self.threads, "Haplotype Variant Calling")
def fastqc(self): all_fastq_files = glob.glob("*fastq.gz") for fastq_file in all_fastq_files: file = self.working_directory + "/" + fastq_file command = self.paths.fastqc + " " + file log_command(command, "FastQC Quality Control", self.thread, "Quality Control") fastqc_files = glob.glob("*fastqc*") self.file_list.extend(fastqc_files)
def mutect_caller_gatk3(self): mutect_output = self.working_directory + "/" + self.output_name # Prepare output name nct = " -nct " + self.threads # Prepare the mutect variant caller command command = "java -jar " + self.get_paths.gatk_path + " -T MuTect2 " + nct + " -R " + self.ref_dir + \ " -I:tumor " + self.tumor_bam + " -I:normal " + self.germline_bam + \ " -o " + mutect_output print(command) log_command(command, "Mutect2", self.threads, "Variant Calling" ) # "log_command" function run the command in terminal
def gatk3_base_recalibrator(self, lastbam): basequalityscore = str(lastbam).split(".")[0] + "_bqsr.grp" nct = " -nct " + str(self.threads) bcal = "java -jar " + self.get_paths.gatk_path + nct + " -T BaseRecalibrator -R " + self.bundle_dir +\ "/ucsc.hg19.fasta -I " + lastbam + " -knownSites " + self.bundle_dir +\ "/Mills_and_1000G_gold_standard.indels.hg19.vcf" + " -o " + basequalityscore log_command(bcal, "Base Recalibrator", self.threads, "GatkPreProcessing") self.file_list.append(basequalityscore) return basequalityscore
def split_bam_by_chr(file): split_command = "for file in " + file+ "; " \ "do filename=`echo $file | cut -d \".\" -f 1`; " \ "for chrom in `seq 1 22` X Y; do " \ "samtools view -bh $file chr${chrom} > ${filename}_Chr_${chrom}.bam; done; done" print(split_command) log_command(split_command, "split by chrommose", "0", "PreProcessing") all_chr_files = glob.glob("*_Chr_*.bam") return all_chr_files
def gatk4_applybsqr(self, lastbam, recaltable): afterbqsrbam = "GATK4_" + lastbam apply_command = self.get_paths.gatk4_path + " ApplyBQSR -R " + self.bundle_dir + "Homo_sapiens_assembly38.fasta -I " + \ lastbam + " --bqsr-recal-file " + recaltable + " -O " + afterbqsrbam log_command(apply_command, "ApplyBQSR", self.threads, "Gatk4PreProcessing") self.file_list.append(afterbqsrbam) indexed = helpers.create_index(afterbqsrbam, "Create Index by GATK_ApplyBSQR", self.threads, "GatkPreProcess") self.file_list.append(indexed)
def gatk4_base_recalibrator(self, lastbam): recal_table = str(lastbam).split(".")[0] + "_RECAL.table" bcal = self.get_paths.gatk4_path + " BaseRecalibrator -R " + self.bundle_dir +\ "Homo_sapiens_assembly38.fasta -I " + lastbam + " --known-sites " + self.get_paths.mills_indel +\ " --known-sites " + self.get_paths.dbsnp + " --known-sites " + self.get_paths.one_thousand_g + " -O " +\ recal_table log_command(bcal, "Base Recalibrator", self.threads, "Gatk4PreProcessing") self.file_list.append(recal_table) return recal_table
def gatk3_indel_realigner(self, lastbam, realign_target): realigned_last_bam = "IR_" + lastbam bcal = "java -jar " + self.get_paths.gatk_path + " -T IndelRealigner -R " + self.bundle_dir + \ "/ucsc.hg19.fasta -known " + self.bundle_dir + "/Mills_and_1000G_gold_standard.indels.hg19.vcf" + \ " -targetIntervals " + realign_target + " --noOriginalAlignmentTags -I " + lastbam + " -o " + \ realigned_last_bam log_command(bcal, "Indel Realigner", self.threads, "GatkPreProcessing") self.file_list.append(realigned_last_bam) return realigned_last_bam
def gatk3_print_reads(self, lastbam, bqsr): nct = " -nct " + str(self.threads) aftercalibratorBam = "GATK_PR" + lastbam bcal = "java -jar " + self.get_paths.gatk_path + nct + " -T PrintReads -R " + self.bundle_dir + \ "/ucsc.hg19.fasta -I " + lastbam + " --BQSR " + bqsr + " -o " + aftercalibratorBam log_command(bcal, "Print Reads", self.threads, "GatkPreProcessing") self.file_list.append(aftercalibratorBam) indexed = helpers.create_index(aftercalibratorBam, "Create Index by GATK_PrintReads", self.threads, "GatkPreProcess") self.file_list.append(indexed)
def gatk3_realign_target_creator(self, lastbam): realign_target = str(lastbam).split( ".")[0] + "_realign_target.intervals" bcal = "java -jar " + self.get_paths.gatk_path + " -T RealignerTargetCreator -nt " + \ self.threads + " -R " + self.bundle_dir + "/ucsc.hg19.fasta -known " + \ self.bundle_dir + "/Mills_and_1000G_gold_standard.indels.hg19.vcf -I " + lastbam + \ " -o " + realign_target print(bcal) log_command(bcal, "Realign Target Creator", self.threads, "GatkPreProcessing") self.file_list.append(realign_target) return realign_target
def novoalign_sort_markduplicate(self, info_dict, all_bam_files): ouput_name = "MDUP_" + self.map_type + "_" + info_dict["Sample_ID"][0] + "_MergedBAM.bam" inputs_list = "" for a in all_bam_files: inputs_list += " " + a commands = self.get_paths.novoalign +"novosort -m 16g -t . -c "+ self.threads +" " + inputs_list +" -i -o " + ouput_name log_command(commands, "Merge&Mark Duplicate", self.threads, "PreProcessing") self.file_list.append(ouput_name) self.file_list.append(ouput_name + ".bai") return ouput_name
def mutect_tumor_only(self): mutect_output = self.working_directory + "/" + "TumorOnly_" + self.output_name + ".vcf" # Prepare output name # "helpers.get_sample_name" function get sample names which is inside read group of bam file tumor_s_name = helpers.get_sample_name(self.tumor_bam) # Prepare the mutect variant caller command command = self.get_paths.gatk4_path + " Mutect2 -R " + self.ref_dir + " -I " + self.tumor_bam + " -tumor " + \ tumor_s_name + " -O " + mutect_output print(command) log_command(command, "Mutect2", self.threads, "Variant Calling Tumor Only" ) # "log_command" function run the command in terminal self.mutect_select_variant( mutect_output) # Separate variants to the SNPs and INDELs file
def mutect_caller(self): mutect_output = self.working_directory + "/" + self.output_name + ".vcf" # Prepare output name # "helpers.get_sample_name" function get sample names which is inside read group of bam file normal_s_name = helpers.get_sample_name(self.germline_bam) tumor_s_name = helpers.get_sample_name(self.tumor_bam) print(tumor_s_name) # Prepare the mutect variant caller command command = self.get_paths.gatk4_path + " --javaOptions\"-Xmx4g\" Mutect2 " + " -R " + self.ref_dir + " -I " + self.tumor_bam + " -tumor "\ + tumor_s_name + " -I " + self.germline_bam + " -normal " + normal_s_name + " -O " + mutect_output print(command) log_command(command, "Mutect2", self.threads, "Variant Calling" ) # "log_command" function run the command in terminal self.mutect_select_variant( mutect_output) # Separate variants to the SNPs and INDELs file
def convert_sort(self, sort_gene_origin): """ Function creates a sorted and indexed bam file from given bam file Parameters ---------- sort_gene_origin: str Bam file's name that created by mapping algorithm """ convert_sort = "samtools view -@" + self.threads + " -bS " + sort_gene_origin + " | samtools sort -@" + \ self.threads + " -o SortedBAM_" + sort_gene_origin log_command(convert_sort, "Convert Sort", self.threads, "Mapping") self.file_list.append("SortedBAM_" + sort_gene_origin) indexed = helpers.create_index("SortedBAM_" + sort_gene_origin, "Create Index", self.threads, "Mapping") self.file_list.append(indexed)
def varscan_caller_step1(self): snp_output = self.working_directory + "/SNP_" + self.output_name indel_output = self.working_directory + "/INDEL_" + self.output_name command = "samtools mpileup -f " + self.ref_dir + " -q 1 -B " + self.germline_bam + " " + \ self.tumor_bam + " | java -jar " + self.get_paths.varscan_path + " somatic --output-snp " \ + snp_output + " --output-indel " + indel_output + \ " --mpileup 1 --min-coverage 8 --min-coverage-normal 8 --min-coverage-tumor 6 --min-var-freq 0.10 " \ "--min-freq-for-hom 0.75 --normal-purity 1.0 --tumor-purity 1.00 --p-value 0.99 " \ "--somatic-p-value 0.05 " + "--strand-filter 0 --output-vcf" print(command) log_command(command, "Varscan Step Pileup", self.threads, "Variant Calling") intermediate_varscan_somatic = glob.glob("*" + self.output_name + "*vcf*") return intermediate_varscan_somatic
def mark_duplicate(self, merged_bam, chr): if self.split_chr == "After": mark_prefix_removed = "MDUP" output = mark_prefix_removed + "_" + merged_bam marked_dup_metrics = "marked_dup_metrics" + chr[:-4] + ".txt" picardcommand = "java -XX:ParallelGCThreads=" + self.threads + \ " -jar " + self.get_paths.picard_path + " MarkDuplicates I=" + merged_bam + \ " O=" + output + " M=" + marked_dup_metrics + " REMOVE_DUPLICATES=true " \ "CREATE_INDEX=true" log_command(picardcommand, "Mark Duplicate Split After", self.threads, "PreProcessing") self.file_list.append(marked_dup_metrics) return output elif self.split_chr == "Before": mark_prefix_removed = "MDUP" output = mark_prefix_removed + "_" + merged_bam marked_dup_metrics = "marked_dup_metrics" + chr[:-4] + ".txt" picardcommand = "java -XX:ParallelGCThreads=" + self.threads + \ " -jar " + self.get_paths.picard_path + " MarkDuplicates I=" + merged_bam + \ " O=" + output + " M=" + marked_dup_metrics + " REMOVE_DUPLICATES=true " \ "CREATE_INDEX=true" log_command(picardcommand, "Mark Duplicate Split Before", self.threads, "PreProcessing") self.file_list.append(marked_dup_metrics) return output else: mark_prefix_removed = "MDUP" output = mark_prefix_removed + "_" + merged_bam picardcommand = "java -XX:ParallelGCThreads=" + self.threads + \ " -jar " + self.get_paths.picard_path + " MarkDuplicates I=" + merged_bam + \ " O=" + output + " M=marked_dup_metrics.txt REMOVE_DUPLICATES=true CREATE_INDEX=true" log_command(picardcommand, "Mark Duplicate", self.threads, "PreProcessing") self.file_list.append("marked_dup_metrics.txt") return output
def create_index(lastbam, function, threads, step): indexcol = "java -Dpicard.useLegacyParser=false -jar " + GetPaths( ).picard_path + " BuildBamIndex -I " + lastbam log_command(indexcol, function, threads, step) return lastbam[:-3] + "bai"
def mapping(self): """ End of this function mapping job is done in terms of selected mapping algorithms Bwa or Bowtie2. There is 5 important step in this function. - First is reading a fastq file first line in order to get information given by sequence machine. - Second thing is creating table by same group of paired-end reads and lanes for mapping. - Thirdly, adding a custom read group information and give it to mapping alghorithm. This information will be in bam files which are created in this step. - Fourthly, creating a complete script as string type. - Lastly, created script is given to linux terminal system. The key point is algorithms must be in path """ print(os.getcwd()) fastq_list = helpers.get_fastq() # Get list of fastq files print(fastq_list) info_dict = helpers.get_info( self.sample_type, fastq_list, self.trim) # Get neccesery information from filename # RG_{..} variables are created for prepare read group information. RG_SM = info_dict["Sample_ID"][0] RG_PL = "Illumina" RG_LB = self.library_matching_id # Each fastq file has flow cell information so just read one fastq file first line first_fastq_file_dir = self.working_directory + "/" + fastq_list[ 0] + ".fastq.gz" with gzip.open(first_fastq_file_dir) as f: first_line = f.readline() flowcell_info = str(first_line).split(":")[2] # Fastq files grouped by lane if there are more than one lane and grouped by how many sequence read there are. # i.e. SampleName_S1_L001_R1_001.fastq.gz , SampleName_S1_L002_R1_001.fastq.gz , # SampleName_S1_L001_R2_001.fastq.gz , SampleName_S1_L002_R2_001.fastq.gz SampleName_S1_L001_R1_002.fastq.gz , # SampleName_S1_L002_R1_002.fastq.gz , SampleName_S1_L001_R2_002.fastq.gz , SampleName_S1_L002_R2_0012.fastq.gz # grouped like => (SampleName_S1_L001_R1_001.fastq.gz, SampleName_S1_L001_R2_001.fastq.gz), # (SampleName_S1_L001_R1_001.fastq.gz, SampleName_S1_L002_R2_001.fastq.gz), # (SampleName_S1_L001_R1_002.fastq.gz, SampleName_S1_L001_R2_002.fastq.gz), # (SampleName_S1_L002_R1_002.fastq.gz, SampleName_S1_L002_R2_002.fastq.gz) for i in info_dict["Lanes"]: for k in info_dict["Number_of_seq"]: r1 = re.compile(".*" + i + "_R1_" + k) read1 = [s + ".fastq.gz" for s in fastq_list if r1.match(s)] r2 = re.compile(".*" + i + "_R2_" + k) read2 = [s + ".fastq.gz" for s in fastq_list if r2.match(s)] RG_ID = flowcell_info + "." + i[-1] RG_PU = flowcell_info + "." + info_dict["Index"][0] + "." + i[ -1] map_bam = "" # Create output name of bam file after mapping gene_origin = self.map_type + "_" + info_dict["Sample_ID"][ 0] + "_" + info_dict["Index"][ 0] + "_" + i + "_" + k + ".bam" if self.map_type == "Bwa": # If selected algorithm is Bwa add_read_group = ' -R "@RG\\tID:' + RG_ID + '\\tSM:' + RG_SM + '\\tLB:' + RG_LB + '\\tPL:' + \ RG_PL + '\\tPU:' + RG_PU + '" ' # Read group created and will bed added bam file map_bam = "bwa mem -t " + self.threads + " " + add_read_group + self.get_paths.ref_dir + \ "Bwa/Homo_sapiens_assembly38.fasta " + read1[0] + " " + read2[0] + \ " | samtools view -@" + self.threads + " -bS - > " + gene_origin print("mapping =>" + map_bam) elif self.map_type == "Bowtie2": # If selected algorithm is Bowtie2 add_read_group = " --rg-id " + RG_ID + " --rg SM:" + RG_SM + " --rg LB:" + RG_LB + " --rg PL:" + \ RG_PL + " --rg PU:" + RG_PU # Read group created and will bed added bam file map_bam = "bowtie2 -p" + self.threads + add_read_group + " -x " + self.get_paths.ref_dir + \ "Bowtie2/Homo_sapiens_assembly38 -1 " + read1[0] + " -2 " + read2[0] + \ " | samtools view -@" + self.threads + " -bS - > " + gene_origin print("mapping =>" + map_bam) elif self.map_type == "Novoalign": add_read_group = ' "@RG\\tID:' + RG_ID + '\\tSM:' + RG_SM + '\\tLB:' + RG_LB + '\\tPL:' + \ RG_PL + '\\tPU:' + RG_PU + '" ' # Read group created and will bed added bam file stats_txt = gene_origin.split(".")[0] + "_stats.txt " map_bam = self.get_paths.novoalign + "novoalign -k -d " + self.get_paths.ref_dir + "NovoAlign/Homo_sapiens_assembly38 -f " + \ read1[0] + " " +read2[0] + " -a -c " + self.threads + " -o SAM " + add_read_group + " 2> " + stats_txt + \ " | samtools view -@" + self.threads + " -bS - > " + gene_origin print("mapping =>" + map_bam) else: return "Please specify the map type Bwa/Bowtie " # This function run created algorithm's command created above in string for format in linux system. # The step, # of threads and class name added for keep logging purposes log_command(map_bam, "Mapping", self.threads, "Mapping") self.file_list.append( gene_origin) # Output file's name added to list self.convert_sort( gene_origin ) # Each output bam file sorted and indexed with this function all_sortedbam_files = glob.glob( "SortedBAM*bam") # Get all sorted bam files # Below helper function get working directory, list of files created in this step, maping type and step's name # in order to create folder for that particular step inside base on mapping file helpers.create_folder(self.working_directory, self.file_list, map_type=self.map_type, step="Mapping", folder_directory=self.folder_directory) print("print sorted all bam files ") print(all_sortedbam_files) return all_sortedbam_files # Return list of sorted bam files