def variantCalling_mutect2_partial(chromosome): # variant caller vcf = self.output_folder + self.sample_name + ".chr" + chromosome + ".mutect2.vcf.gz" parameters_dict = { "input_tumor": self.output_folder + self.sample_name + "_r_tumor.chr" + chromosome + "_RG.bam", "input_normal": self.output_folder + self.sample_name + "_r_normal.chr" + chromosome + "_RG.bam", "normal-sample": self.sample_name + "_r_normal.chr" + chromosome + "_RG", "tumor-sample": self.sample_name + "_r_tumor.chr" + chromosome + "_RG", "output": vcf, "reference": reference_fasta, # "--disable-read-filter":"" # "dbsnp": # "emit-reference-confidence":"true", # "max-alternate-alleles":"3" } mutect2_log = self.output_folder + self.sample_name + ".chr" + chromosome + ".Mutect2.log" gatk_docker("gatk_mutect2", parameters_dict, mutect2_log, self.ram, self.docker_images_dict["gatk"])
def filterMutectCalls(vcf_file): # perform variant filtering parameters_dict = { "variant": vcf_file, "output": vcf_file.replace(".vcf.gz", ".filtered.vcf.gz") } filter_log = self.output_folder + self.sample_name + ".filter.log" gatk_docker("gatk_filter_mutect", parameters_dict, filter_log, self.ram, self.docker_images_dict["gatk"])
def validateSam(): # Validate alignment file integrity parameters_dict = { "input":output_folder+self.sample_name+".bwa.bam", "MODE":"SUMMARY" } validate_log = output_folder+self.sample_name+".validateSamFile.log" gatk_docker(gatk_validate_sam, parameters_dict, HaplotypeCaller_log, self.ram,self.docker_images_dict["gatk"])
def buildRecalibrator(): # Base recalibrator (GATK) parameters_dict = { "input":sorted_bam, "output":BaseRecalibrator_metrics, "reference":reference_fasta, "known-sites":dbsnp_vcf, "use-original-qualities":"true" } BaseRecalibrator_log = output_folder+self.sample_name+".BaseRecalibrator.log" gatk_docker("gatk_build_recalibrator", parameters_dict, BaseRecalibrator_log, self.ram,self.docker_images_dict["gatk"])
def variantCalling_HaplotypeCaller(input_bam): # Haplotype caller parameters_dict = { "input": input_bam, "output": input_bam.replace(".bam", ".vcf.gz"), "reference": reference_fasta # "emit-reference-confidence":"true", # "max-alternate-alleles":"3" } HaplotypeCaller_log = input_bam + ".HaplotypeCaller.log" gatk_docker("gatk_haplotype_caller", parameters_dict, HaplotypeCaller_log, self.ram, self.docker_images_dict["gatk"])
def addReadGroups(): # Add read groups (GATK) parameters_dict = { "input":unsorted_bam, "output":ReadGroups_bam, "RGLB":self.lib_ID, "RGPL":self.pl_ID, "RGPU":self.pu_ID, "RGSM":self.sample_name } ReadGroups_log = self.output_folder+self.sample_name+".ReadGroups.log" gatk_docker("gatk_add_read_groups", parameters_dict, ReadGroups_log, self.ram,self.docker_images_dict["gatk"], )
def addReadGroups(t_n): # Add read groups (GATK) parameters_dict = { "input": markDuplicates_bam, "output": ReadGroups_bam, "RGLB": self.lib_ID, "RGPL": self.pl_ID, "RGPU": self.pu_ID, "RGSM": self.sample_name + "_r_" + t_n } ReadGroups_log = self.output_folder + t_n_sample_name + ".ReadGroups." + t_n + ".log" gatk_docker("gatk_add_read_groups", parameters_dict, ReadGroups_log, self.ram, self.docker_images_dict["gatk"])
def addReadGroups_partial_bam(chromosome, input_bam): # Add read groups (GATK) parameters_dict = { "input": input_bam, "output": input_bam.replace(".bam", "_RG.bam"), "RGLB": self.lib_ID, "RGPL": self.pl_ID, "RGPU": self.pu_ID, "RGSM": input_bam.replace(".bam", "_RG").split("/")[-1] } ReadGroups_log = self.output_folder + t_n_sample_name + ".ReadGroups.log" gatk_docker("gatk_add_read_groups", parameters_dict, ReadGroups_log, self.ram, self.docker_images_dict["gatk"])
def applyRecalibrator(): # Base recalibrator - applying model (GATK) parameters_dict = { "input":sorted_bam, "output":BaseRecalibrator_bam, "bqsr":BaseRecalibrator_metrics, "use-original-qualities":"true", "static-quantized-quals":"10", "static-quantized-quals":"20", "static-quantized-quals":"30" } ApplyBQSR_log = output_folder+self.sample_name+".ApplyBQSR.log" gatk_docker("gatk_apply_recalibrator", parameters_dict, ApplyBQSR_log, self.ram,self.docker_images_dict["gatk"])
def markDuplicates(input_bam): # Mark duplicates (GATK) parameters_dict = { "input": input_bam, "output": input_bam.replace("bam", "MarkDuplicates.bam"), "mark_dupl_metrics": markDuplicates_metrics, "optical_duplicate_pixel_dist": "2500", "assume_sort_order": "queryname", "clear_DT": "false", "add_pg_tag_to_reads": "false" } markDuplicates_log = self.output_folder + self.sample_name + ".MarkDuplicates.log" gatk_docker("gatk_mark_duplicates", parameters_dict, markDuplicates_log, self.ram, self.docker_images_dict["gatk"])
def addReadGroups(): # Add read groups (GATK) print("") print("STARTING ADDING READGROUPS") print("") parameters_dict = { "input": unsorted_bam, "output": ReadGroups_bam, "RGLB": self.lib_ID, "RGPL": self.pl_ID, "RGPU": self.pu_ID, "RGSM": self.sample_name } ReadGroups_log = self.output_folder + self.sample_name + ".ReadGroups.log" gatk_docker("gatk_add_read_groups", parameters_dict, ReadGroups_log, self.ram, self.docker_images_dict["broadinstitute/gatk"])