def post_align(out_sam): keep_logging('START: Post-Alignment using SAMTOOLS, PICARD etc...', 'START: Post-Alignment using SAMTOOLS, PICARD etc...', logger, 'info') out_sorted_bam = prepare_bam(out_sam, args.output_folder, args.analysis_name, files_to_delete, logger, Config) keep_logging('END: Post-Alignment using SAMTOOLS, PICARD etc...', 'END: Post-Alignment using SAMTOOLS, PICARD etc...', logger, 'info') #out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder, args.analysis_name) keep_logging('START: Creating BedGraph Coverage', 'START: Creating BedGraph Coverage', logger, 'info') bedgraph_coverage(out_sorted_bam, args.output_folder, args.analysis_name, reference, logger, Config) only_unmapped_positions_file = bedtools(out_sorted_bam, args.output_folder, args.analysis_name, logger, Config) keep_logging('END: Creating BedGraph Coverage', 'END: Creating BedGraph Coverage', logger, 'info') return out_sorted_bam
def bedgraph(out_sorted_bam): final_coverage_file = bedtools(out_sorted_bam, args.output_folder, args.analysis_name, logger, Config) #final_coverage_file = "%s/%s_coverage.bed" % (args.output_folder, args.analysis_name) return final_coverage_file
def pipeline(args, logger): keep_logging('START: Pipeline', 'START: Pipeline', logger, 'info') """ SANITATION CHECKS """ # Check Subroutines: Arguments, Input FASTQ files, Reference Index keep_logging('START: Checking Dependencies...', 'Checking Dependencies', logger, 'info') # Reference Genome file name reference = ConfigSectionMap(args.index, Config)['ref_path'] + "/" + ConfigSectionMap( args.index, Config)['ref_name'] keep_logging( 'Getting Reference Genome name from config file: {}'.format(reference), 'Getting Reference Genome name from config file: {}'.format(reference), logger, 'info') # Check if FASTQ files exists if args.type != "PE" and args.type != "BAM": reverse_raw = "None" file_exists(args.forward_raw, args.forward_raw, reference) elif args.type != "PE" and args.type != "SE": print "BAM type... Not Integrated... continue" else: file_exists(args.forward_raw, args.reverse_raw, reference) # Check Java Version java_check() keep_logging('END: Checking Dependencies...', 'END: Checking Dependencies', logger, 'info') """ Start the pipeline: """ # split values provided with -steps argument and decide the starting point of pipeline steps_list = args.steps.split(',') # Check cluster parameter and set cluster variable, used for running pipeline locally or parallelly on local or on cluster if args.cluster: cluster = args.cluster else: cluster = "local" """ INDIVIDUAL SUBPROCESS FOR EACH PIPELINE STEPS""" ## 1. Pre-Processing Raw reads using Trimmomatic def clean(): keep_logging('START: Pre-Processing Raw reads using Trimmomatic', 'START: Pre-Processing Raw reads using Trimmomatic', logger, 'info') if args.type == "PE": trimmomatic(args.forward_raw, args.reverse_raw, args.output_folder, args.croplength, logger, Config) else: reverse_raw = "None" trimmomatic(args.forward_raw, reverse_raw, args.output_folder, args.croplength, logger, Config) keep_logging('END: Pre-Processing Raw reads using Trimmomatic', 'END: Pre-Processing Raw reads using Trimmomatic', logger, 'info') ## 2. Stages: Alignment using BWA def align_reads(): keep_logging('START: Mapping Reads using BWA', 'START: Mapping Reads using BWA', logger, 'info') split_field = prepare_readgroup( args.forward_raw, ConfigSectionMap("pipeline", Config)['aligner'], logger) out_sam = align(args.output_folder, args.index, split_field, args.analysis_name, files_to_delete, logger, Config, args.type) keep_logging('END: Mapping Reads using BWA', 'END: Mapping Reads using BWA', logger, 'info') return out_sam # Run Depth of Coverage Module after read mapping and stop. Dont proceed to variant calling step. def coverage_depth_stats(): gatk_DepthOfCoverage_file = gatk_DepthOfCoverage( out_sorted_bam, args.output_folder, args.analysis_name, reference, logger, Config) alignment_stats_file = alignment_stats(out_sorted_bam, args.output_folder, args.analysis_name, logger, Config) return gatk_DepthOfCoverage_file ## 3. Stages: Post-Alignment using SAMTOOLS, PICARD etc def post_align(out_sam): keep_logging('START: Post-Alignment using SAMTOOLS, PICARD etc...', 'START: Post-Alignment using SAMTOOLS, PICARD etc...', logger, 'info') out_sorted_bam = prepare_bam(out_sam, args.output_folder, args.analysis_name, files_to_delete, logger, Config) keep_logging('END: Post-Alignment using SAMTOOLS, PICARD etc...', 'END: Post-Alignment using SAMTOOLS, PICARD etc...', logger, 'info') #out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder, args.analysis_name) keep_logging('START: Creating BedGraph Coverage', 'START: Creating BedGraph Coverage', logger, 'info') bedgraph_coverage(out_sorted_bam, args.output_folder, args.analysis_name, reference, logger, Config) only_unmapped_positions_file = bedtools(out_sorted_bam, args.output_folder, args.analysis_name, logger, Config) keep_logging('END: Creating BedGraph Coverage', 'END: Creating BedGraph Coverage', logger, 'info') return out_sorted_bam ## 4. Stages: Variant Calling def varcall(): keep_logging('START: Variant Calling', 'START: Variant Calling', logger, 'info') caller = ConfigSectionMap("pipeline", Config)['variant_caller'] if caller == "gatkhaplotypecaller": keep_logging('START: Variant Calling using GATK haplotyper.', 'START: Variant Calling using GATK haplotyper.', logger, 'info') final_raw_vcf_mpileup = variant_calling(out_sorted_bam, args.output_folder, args.index, args.analysis_name, logger, Config) #final_raw_vcf_mpileup = "%s/%s_aln_mpileup_raw.vcf" % (args.output_folder, args.analysis_name) final_raw_vcf = remove_5_bp_snp_indel(final_raw_vcf_mpileup, args.output_folder, args.analysis_name, reference, logger, Config) final_raw_indel_vcf = prepare_indel(final_raw_vcf_mpileup, args.output_folder, args.analysis_name, reference, logger, Config) keep_logging('The final raw VCF file: {}'.format(final_raw_vcf), 'The final raw VCF file: {}'.format(final_raw_vcf), logger, 'debug') keep_logging( 'The final raw Indel VCF file: {}'.format(final_raw_indel_vcf), 'The final raw Indel VCF file: {}'.format(final_raw_indel_vcf), logger, 'debug') keep_logging( 'END: Variant Calling using Samtools without post-align bam input files.', 'END: Variant Calling using Samtools without post-align bam input files.', logger, 'info') return final_raw_vcf, final_raw_indel_vcf elif caller == "samtools": keep_logging( 'START: Variant Calling using Samtools without post-align bam input files.', 'START: Variant Calling using Samtools without post-align bam input files.', logger, 'info') final_raw_indel_vcf = prepare_indel_gatk(out_sorted_bam, args.output_folder, args.analysis_name, args.index, logger, Config) final_raw_vcf_mpileup = variant_calling(out_sorted_bam, args.output_folder, args.index, args.analysis_name, logger, Config) #final_raw_vcf_mpileup = "%s/%s_aln_mpileup_raw.vcf" % (args.output_folder, args.analysis_name) final_raw_vcf = remove_5_bp_snp_indel(final_raw_vcf_mpileup, args.output_folder, args.analysis_name, reference, logger, Config) # GATK indel calling integration #final_raw_indel_vcf = prepare_indel(final_raw_vcf_mpileup, args.output_folder, args.analysis_name, reference, logger, Config) keep_logging('The final raw VCF file: {}'.format(final_raw_vcf), 'The final raw VCF file: {}'.format(final_raw_vcf), logger, 'debug') keep_logging( 'END: Variant Calling using Samtools without post-align bam input files.', 'END: Variant Calling using Samtools without post-align bam input files.', logger, 'info') return final_raw_vcf, final_raw_indel_vcf else: keep_logging( 'Please provide Variant Caller name in config file under the section [pipeline]. Options for Variant caller: 1. samtools 2. gatkhaplotypecaller', 'Please provide Variant Caller name in config file under the section [pipeline]. Options for Variant caller: 1. samtools 2. gatkhaplotypecaller', logger, 'info') exit() keep_logging('END: Variant Calling', 'END: Variant Calling', logger, 'info') ## 5. Stages: Variant Filteration def filter(gatk_depth_of_coverage_file): keep_logging('START: Variant Filteration', 'START: Variant Filteration', logger, 'info') final_raw_vcf_mpileup = "%s/%s_aln_mpileup_raw.vcf" % ( args.output_folder, args.analysis_name) #final_raw_indel_vcf = prepare_indel(final_raw_vcf_mpileup, args.output_folder, args.analysis_name, reference, logger, Config) if not os.path.isfile(gatk_depth_of_coverage_file): file_basename = os.path.basename(gatk_depth_of_coverage_file) keep_logging( 'The input file {} does not exists. Please provide another file with full path or check the files path.\n' .format(file_basename), 'The input file {} does not exists. Please provide another file or check the files path.\n' .format(file_basename), logger, 'exception') exit() Avg_dp_cmd = "grep \'^Total\' %s | awk -F\'\t\' \'{print $3}\'" % gatk_depth_of_coverage_file proc = sp.Popen([Avg_dp_cmd], stdout=sp.PIPE, shell=True) (out, err) = proc.communicate() Avg_dp = float(out) print "The Average Depth per reference genome base is: %s" % Avg_dp filter_variants(final_raw_vcf, args.output_folder, args.analysis_name, args.index, logger, Config, Avg_dp) final_raw_indel_vcf = final_raw_vcf_mpileup + "_indel.vcf" filter_indels(final_raw_indel_vcf, args.output_folder, args.analysis_name, args.index, logger, Config, Avg_dp) keep_logging('END: Variant Filteration', 'END: Variant Filteration', logger, 'info') ## 6. Stages: Statistics def stats(): keep_logging('START: Generating Statistics Reports', 'START: Generating Statistics Reports', logger, 'info') alignment_stats_file = alignment_stats(out_sorted_bam, args.output_folder, args.analysis_name, logger, Config) vcf_stats_file = vcf_stats(final_raw_vcf, args.output_folder, args.analysis_name, logger, Config) picard_stats_file = picardstats(out_sorted_bam, args.output_folder, args.analysis_name, args.index, logger, Config) #qualimap_report = qualimap(out_sorted_bam, args.output_folder, args.analysis_name, logger, Config) keep_logging('END: Generating Statistics Reports', 'END: Generating Statistics Reports', logger, 'info') # ################################################### Stages: Remove Unwanted Intermediate files ###################################### # # print "Removing Imtermediate Files...\n%s" % files_to_delete # # for files in files_to_delete: # # os.remove(files) # # print "Removing Imtermediate Files...\n%s" % files_to_delete # # for files in files_to_delete: # # os.remove(files) # ############################################################################ End #################################################### if args.downsample == "yes": read1, read2 = downsample(args, logger) args.forward_raw = read1 args.reverse_raw = read2 print "Using downsampled forward reads %s" % args.forward_raw print "Using downsampled reverse reads %s" % args.reverse_raw if len(steps_list) == 1: if steps_list[0] == "coverage_depth_stats": #clean() #out_sam = align_reads() #out_sorted_bam = post_align() out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder, args.analysis_name) gatk_DepthOfCoverage_file = coverage_depth_stats() if steps_list[0] == "filter": #Sanity Check Post-varcall vcf and other files here out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder, args.analysis_name) final_raw_vcf = "%s/%s_aln_mpileup_raw.vcf_5bp_indel_removed.vcf" % ( args.output_folder, args.analysis_name) gatk_depth_of_coverage_file = "%s/%s_depth_of_coverage.sample_summary" % ( args.output_folder, args.analysis_name) final_raw_vcf_mpileup = "%s/%s_aln_mpileup_raw.vcf" % ( args.output_folder, args.analysis_name) if not os.path.exists(gatk_depth_of_coverage_file): gatk_depth_of_coverage_file = coverage_depth_stats() if os.path.exists(out_sorted_bam) and os.path.exists( final_raw_vcf) and os.path.exists( gatk_depth_of_coverage_file) and os.path.exists( final_raw_vcf_mpileup): filter(gatk_depth_of_coverage_file) stats() else: keep_logging( 'The required intermediate files does not exists. Please rerun the variant calling pipeline to generate the files\n', 'The required intermediate files does not exists. Please rerun the variant calling pipeline to generate the files', logger, 'exception') exit() if steps_list[0] == "stats": #Sanity Check Post-varcall vcf and other files here out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder, args.analysis_name) final_raw_vcf = "%s/%s_aln_mpileup_raw.vcf_5bp_indel_removed.vcf" % ( args.output_folder, args.analysis_name) gatk_depth_of_coverage_file = "%s/%s_depth_of_coverage.sample_summary" % ( args.output_folder, args.analysis_name) final_raw_vcf_mpileup = "%s/%s_aln_mpileup_raw.vcf" % ( args.output_folder, args.analysis_name) if not os.path.exists(gatk_depth_of_coverage_file): print gatk_depth_of_coverage_file gatk_depth_of_coverage_file = coverage_depth_stats() if os.path.exists(out_sorted_bam) and os.path.exists( final_raw_vcf) and os.path.exists( gatk_depth_of_coverage_file) and os.path.exists( final_raw_vcf_mpileup): stats() else: keep_logging( 'The required intermediate files does not exists. Please rerun the variant calling pipeline to generate the files\n', 'The required intermediate files does not exists. Please rerun the variant calling pipeline to generate the files', logger, 'exception') exit() elif steps_list[0] == "All": clean() out_sam = align_reads() out_sorted_bam = post_align(out_sam) out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder, args.analysis_name) gatk_depth_of_coverage_file = "%s/%s_depth_of_coverage.sample_summary" % ( args.output_folder, args.analysis_name) if not os.path.exists(gatk_depth_of_coverage_file): gatk_depth_of_coverage_file = coverage_depth_stats() final_raw_vcf, final_raw_indel_vcf = varcall() final_raw_vcf = "%s/%s_aln_mpileup_raw.vcf_5bp_indel_removed.vcf" % ( args.output_folder, args.analysis_name) filter(gatk_depth_of_coverage_file) stats() elif steps_list[0] == "bedtools": out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder, args.analysis_name) only_unmapped_positions_file = bedtools(out_sorted_bam, args.output_folder, args.analysis_name, logger, Config) elif steps_list[0] == "varcall": out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder, args.analysis_name) gatk_depth_of_coverage_file = "%s/%s_depth_of_coverage.sample_summary" % ( args.output_folder, args.analysis_name) if not os.path.exists(gatk_depth_of_coverage_file): gatk_depth_of_coverage_file = coverage_depth_stats() final_raw_vcf, final_raw_indel_vcf = varcall() # Run individual variant calling steps: clean, align, post-align, varcall, filter, stats etc else: if steps_list[0] == "clean": clean() out_sam = align_reads() out_sorted_bam = post_align() #out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder, args.analysis_name) gatk_depth_of_coverage_file = "%s/%s_depth_of_coverage.sample_summary" % ( args.output_folder, args.analysis_name) if not os.path.exists(gatk_depth_of_coverage_file): gatk_depth_of_coverage_file = coverage_depth_stats() final_raw_vcf, final_raw_indel_vcf = varcall() filter(gatk_depth_of_coverage_file) stats() elif steps_list[0] == "align": #Sanity Check clean reads here out_sam = align_reads() out_sorted_bam = post_align(out_sam) out_sorted_bam = post_align() gatk_depth_of_coverage_file = "%s/%s_depth_of_coverage.sample_summary" % ( args.output_folder, args.analysis_name) if not os.path.exists(gatk_depth_of_coverage_file): gatk_depth_of_coverage_file = coverage_depth_stats() final_raw_vcf, final_raw_indel_vcf = varcall() filter(gatk_depth_of_coverage_file) stats() elif steps_list[0] == "post-align": #Sanity Check BAM file here out_sam = "%s/%s_aln.sam" % (args.output_folder, args.analysis_name) out_sorted_bam = post_align(out_sam) gatk_depth_of_coverage_file = "%s/%s_depth_of_coverage.sample_summary" % ( args.output_folder, args.analysis_name) if not os.path.exists(gatk_depth_of_coverage_file): gatk_depth_of_coverage_file = coverage_depth_stats() final_raw_vcf, final_raw_indel_vcf = varcall() filter(gatk_depth_of_coverage_file) stats() elif steps_list[0] == "varcall": #Sanity Check Post-aligned-BAM and Bed files here out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder, args.analysis_name) if not os.path.exists("%s.bai" % out_sorted_bam): index_bam(out_sorted_bam, args.output_folder, logger, Config) gatk_depth_of_coverage_file = "%s/%s_depth_of_coverage.sample_summary" % ( args.output_folder, args.analysis_name) if not os.path.exists(gatk_depth_of_coverage_file): gatk_depth_of_coverage_file = coverage_depth_stats() final_raw_vcf, final_raw_indel_vcf = varcall() filter(gatk_depth_of_coverage_file) stats() elif steps_list[0] == "filter": #Sanity Check Post-varcall vcf and other files here out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder, args.analysis_name) gatk_depth_of_coverage_file = "%s/%s_depth_of_coverage.sample_summary" % ( args.output_folder, args.analysis_name) if not os.path.exists(gatk_depth_of_coverage_file): gatk_depth_of_coverage_file = coverage_depth_stats() final_raw_vcf = "%s/%s_aln_mpileup_raw.vcf_5bp_indel_removed.vcf" % ( args.output_folder, args.analysis_name) filter(gatk_depth_of_coverage_file) stats() elif steps_list[0] == "stats": #Sanity check BAM and vcf files gatk_depth_of_coverage_file = "%s/%s_depth_of_coverage.sample_summary" % ( args.output_folder, args.analysis_name) if not os.path.exists(gatk_depth_of_coverage_file): gatk_depth_of_coverage_file = coverage_depth_stats() out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder, args.analysis_name) final_raw_vcf = "%s/%s_aln_mpileup_raw.vcf_5bp_indel_removed.vcf" % ( args.output_folder, args.analysis_name) stats() else: keep_logging( 'Seems like the Analysis Steps are not in sequential order. Please recheck the -steps argument and run the pipeline again', 'Seems like the Analysis Steps are not in sequential order. Please recheck the -steps argument and run the pipeline again', logger, 'exception')
def pipeline(args, logger): keep_logging('START: Pipeline', 'START: Pipeline', logger, 'info') # Check Subroutines and create logger object: Arguments, Input files, Reference Index keep_logging('START: Checking Dependencies...', 'Checking Dependencies', logger, 'info') # Reference Genome file name reference = ConfigSectionMap(args.index, Config)['ref_path'] + "/" + ConfigSectionMap(args.index, Config)['ref_name'] keep_logging('Getting Reference Genome name from config file: {}'.format(reference), 'Getting Reference Genome name from config file: {}'.format(reference), logger, 'info') # Check FASTQ files if args.type != "PE": reverse_raw = "None" file_exists(args.forward_raw, args.forward_raw, reference) else: file_exists(args.forward_raw, args.reverse_raw, reference) # Check Java Version java_check() keep_logging('END: Checking Dependencies...', 'END: Checking Dependencies', logger, 'info') ## 1. Pre-Processing Raw reads using Trimmomatic keep_logging('START: Pre-Processing Raw reads using Trimmomatic', 'START: Pre-Processing Raw reads using Trimmomatic', logger, 'info') if args.type == "PE": trimmomatic(args.forward_raw, args.reverse_raw, args.output_folder, args.croplength, logger, Config) else: reverse_raw = "None" trimmomatic(args.forward_raw, reverse_raw, args.output_folder, args.croplength, logger, Config) keep_logging('END: Pre-Processing Raw reads using Trimmomatic', 'END: Pre-Processing Raw reads using Trimmomatic', logger, 'info') ## 2. Stages: Alignment using BWA keep_logging('START: Mapping Reads using BWA', 'START: Mapping Reads using BWA', logger, 'info') split_field = prepare_readgroup(args.forward_raw, logger) files_to_delete = [] out_sam = align(args.bam_input, args.output_folder, args.index, split_field, args.analysis_name, files_to_delete, logger, Config, args.type) keep_logging('END: Mapping Reads using BWA', 'END: Mapping Reads using BWA', logger, 'info') ## 3. Stages: Post-Alignment using SAMTOOLS, PICARD etc keep_logging('START: Post-Alignment using SAMTOOLS, PICARD etc...', 'START: Post-Alignment using SAMTOOLS, PICARD etc...', logger, 'info') out_sorted_bam = prepare_bam(out_sam, args.output_folder, args.analysis_name, files_to_delete, logger, Config) keep_logging('END: Post-Alignment using SAMTOOLS, PICARD etc...', 'END: Post-Alignment using SAMTOOLS, PICARD etc...', logger, 'info') out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder, args.analysis_name) # Run Depth of Coverage Module after read mapping and stop. Dont proceed to variant calling step. if args.coverage_depth_stats: gatk_DepthOfCoverage(out_sorted_bam, args.output_folder, args.analysis_name, reference, logger, Config) alignment_stats_file = alignment_stats(out_sorted_bam, args.output_folder, args.analysis_name, logger, Config) else: ## Continue: 3. Stages: Post-Alignment using SAMTOOLS, PICARD etc keep_logging('START: Creating BedGraph Coverage', 'START: Creating BedGraph Coverage', logger, 'info') bedgraph_coverage(out_sorted_bam, args.output_folder, args.analysis_name, reference, logger, Config) only_unmapped_positions_file = bedtools(out_sorted_bam, args.output_folder, args.analysis_name, logger, Config) keep_logging('END: Creating BedGraph Coverage', 'END: Creating BedGraph Coverage', logger, 'info') ## 4. Stages: Variant Calling keep_logging('START: Variant Calling', 'START: Variant Calling', logger, 'info') caller = ConfigSectionMap("pipeline", Config)['variant_caller'] if caller == "samtoolswithpostalignbam": keep_logging('START: Variant Calling using Samtools and post-align bam input files', 'START: Variant Calling using Samtools and post-align bam input files', logger, 'info') out_finalbam = post_align_bam(out_sorted_bam, args.output_folder, args.index, args.analysis_name) final_raw_vcf = variant_calling(out_finalbam, args.output_folder, args.index, args.analysis_name) keep_logging('The final raw VCF file: {}'.format(final_raw_vcf), 'The final raw VCF file: {}'.format(final_raw_vcf), logger, 'debug') keep_logging('END: Variant Calling using Samtools and post-align bam input files', 'END: Variant Calling using Samtools and post-align bam input files', logger, 'info') elif caller == "gatkhaplotypecaller": keep_logging('START: Variant Calling using GATK haplotyper and post-align bam input files', 'START: Variant Calling using GATK haplotyper and post-align bam input files', logger, 'info') out_finalbam = post_align_bam(out_sorted_bam, args.output_folder, args.index, args.analysis_name) final_raw_vcf = variant_calling(out_finalbam, args.output_folder, args.index, args.analysis_name) keep_logging('The final raw VCF file: {}'.format(final_raw_vcf), 'The final raw VCF file: {}'.format(final_raw_vcf), logger, 'debug') keep_logging('END: Variant Calling using GATK haplotyper and post-align bam input files', 'END: Variant Calling using GATK haplotyper and post-align bam input files', logger, 'info') elif caller == "samtools": keep_logging('START: Variant Calling using Samtools without post-align bam input files.', 'START: Variant Calling using Samtools without post-align bam input files.', logger, 'info') final_raw_vcf_mpileup = variant_calling(out_sorted_bam, args.output_folder, args.index, args.analysis_name, logger, Config) #final_raw_vcf_mpileup = "%s/%s_aln_mpileup_raw.vcf" % (args.output_folder, args.analysis_name) final_raw_vcf = remove_5_bp_snp_indel(final_raw_vcf_mpileup, args.output_folder, args.analysis_name, reference, logger, Config) #final_raw_vcf = "%s/%s_aln_mpileup_raw.vcf_5bp_indel_removed.vcf" % (args.output_folder, args.analysis_name) keep_logging('The final raw VCF file: {}'.format(final_raw_vcf), 'The final raw VCF file: {}'.format(final_raw_vcf), logger, 'debug') keep_logging('END: Variant Calling using Samtools without post-align bam input files.', 'END: Variant Calling using Samtools without post-align bam input files.', logger, 'info') else: keep_logging('Please provide Variant Caller name in config file under the section [pipeline]. Options for Variant caller: 1. samtools 2. samtoolswithpostalignbam 3. gatkhaplotypecaller', 'Please provide Variant Caller name in config file under the section [pipeline]. Options for Variant caller: 1. samtools 2. samtoolswithpostalignbam 3. gatkhaplotypecaller', logger, 'info') exit() keep_logging('END: Variant Calling', 'END: Variant Calling', logger, 'info') ## 5. Stages: Variant Filteration keep_logging('START: Variant Filteration', 'START: Variant Filteration', logger, 'info') filter2_variants(final_raw_vcf, args.output_folder, args.analysis_name, args.index, logger, Config) keep_logging('END: Variant Filteration', 'END: Variant Filteration', logger, 'info') ## 6. Stages: Statistics keep_logging('START: Generating Statistics Reports', 'START: Generating Statistics Reports', logger, 'info') alignment_stats_file = alignment_stats(out_sorted_bam, args.output_folder, args.analysis_name, logger, Config) gatk_DepthOfCoverage(out_sorted_bam, args.output_folder, args.analysis_name, reference, logger, Config) vcf_stats_file = vcf_stats(final_raw_vcf, args.output_folder, args.analysis_name, logger, Config) #qualimap_report = qualimap(out_sorted_bam, args.output_folder, args.analysis_name, logger, Config) keep_logging('END: Generating Statistics Reports', 'END: Generating Statistics Reports', logger, 'info')