Exemplo n.º 1
0
 def post_align(out_sam):
     keep_logging('START: Post-Alignment using SAMTOOLS, PICARD etc...',
                  'START: Post-Alignment using SAMTOOLS, PICARD etc...',
                  logger, 'info')
     out_sorted_bam = prepare_bam(out_sam, args.output_folder,
                                  args.analysis_name, files_to_delete,
                                  logger, Config)
     keep_logging('END: Post-Alignment using SAMTOOLS, PICARD etc...',
                  'END: Post-Alignment using SAMTOOLS, PICARD etc...',
                  logger, 'info')
     #out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder, args.analysis_name)
     keep_logging('START: Creating BedGraph Coverage',
                  'START: Creating BedGraph Coverage', logger, 'info')
     bedgraph_coverage(out_sorted_bam, args.output_folder,
                       args.analysis_name, reference, logger, Config)
     only_unmapped_positions_file = bedtools(out_sorted_bam,
                                             args.output_folder,
                                             args.analysis_name, logger,
                                             Config)
     keep_logging('END: Creating BedGraph Coverage',
                  'END: Creating BedGraph Coverage', logger, 'info')
     return out_sorted_bam
Exemplo n.º 2
0
 def bedgraph(out_sorted_bam):
     final_coverage_file = bedtools(out_sorted_bam, args.output_folder, args.analysis_name, logger, Config)
     #final_coverage_file = "%s/%s_coverage.bed" % (args.output_folder, args.analysis_name)
     return final_coverage_file
Exemplo n.º 3
0
def pipeline(args, logger):
    keep_logging('START: Pipeline', 'START: Pipeline', logger, 'info')
    """ SANITATION CHECKS """

    # Check Subroutines: Arguments, Input FASTQ files, Reference Index
    keep_logging('START: Checking Dependencies...', 'Checking Dependencies',
                 logger, 'info')

    # Reference Genome file name
    reference = ConfigSectionMap(args.index,
                                 Config)['ref_path'] + "/" + ConfigSectionMap(
                                     args.index, Config)['ref_name']
    keep_logging(
        'Getting Reference Genome name from config file: {}'.format(reference),
        'Getting Reference Genome name from config file: {}'.format(reference),
        logger, 'info')

    # Check if FASTQ files exists
    if args.type != "PE" and args.type != "BAM":
        reverse_raw = "None"
        file_exists(args.forward_raw, args.forward_raw, reference)
    elif args.type != "PE" and args.type != "SE":
        print "BAM type... Not Integrated... continue"
    else:
        file_exists(args.forward_raw, args.reverse_raw, reference)

    # Check Java Version
    java_check()
    keep_logging('END: Checking Dependencies...', 'END: Checking Dependencies',
                 logger, 'info')
    """ Start the pipeline: """
    # split values provided with -steps argument and decide the starting point of pipeline
    steps_list = args.steps.split(',')

    # Check cluster parameter and set cluster variable, used for running pipeline locally or parallelly on local or on cluster
    if args.cluster:
        cluster = args.cluster
    else:
        cluster = "local"
    """ INDIVIDUAL SUBPROCESS FOR EACH PIPELINE STEPS"""

    ## 1. Pre-Processing Raw reads using Trimmomatic
    def clean():
        keep_logging('START: Pre-Processing Raw reads using Trimmomatic',
                     'START: Pre-Processing Raw reads using Trimmomatic',
                     logger, 'info')
        if args.type == "PE":
            trimmomatic(args.forward_raw, args.reverse_raw, args.output_folder,
                        args.croplength, logger, Config)
        else:
            reverse_raw = "None"
            trimmomatic(args.forward_raw, reverse_raw, args.output_folder,
                        args.croplength, logger, Config)
        keep_logging('END: Pre-Processing Raw reads using Trimmomatic',
                     'END: Pre-Processing Raw reads using Trimmomatic', logger,
                     'info')

    ## 2. Stages: Alignment using BWA
    def align_reads():
        keep_logging('START: Mapping Reads using BWA',
                     'START: Mapping Reads using BWA', logger, 'info')
        split_field = prepare_readgroup(
            args.forward_raw,
            ConfigSectionMap("pipeline", Config)['aligner'], logger)
        out_sam = align(args.output_folder, args.index, split_field,
                        args.analysis_name, files_to_delete, logger, Config,
                        args.type)
        keep_logging('END: Mapping Reads using BWA',
                     'END: Mapping Reads using BWA', logger, 'info')
        return out_sam

    # Run Depth of Coverage Module after read mapping and stop. Dont proceed to variant calling step.
    def coverage_depth_stats():
        gatk_DepthOfCoverage_file = gatk_DepthOfCoverage(
            out_sorted_bam, args.output_folder, args.analysis_name, reference,
            logger, Config)
        alignment_stats_file = alignment_stats(out_sorted_bam,
                                               args.output_folder,
                                               args.analysis_name, logger,
                                               Config)
        return gatk_DepthOfCoverage_file

    ## 3. Stages: Post-Alignment using SAMTOOLS, PICARD etc
    def post_align(out_sam):
        keep_logging('START: Post-Alignment using SAMTOOLS, PICARD etc...',
                     'START: Post-Alignment using SAMTOOLS, PICARD etc...',
                     logger, 'info')
        out_sorted_bam = prepare_bam(out_sam, args.output_folder,
                                     args.analysis_name, files_to_delete,
                                     logger, Config)
        keep_logging('END: Post-Alignment using SAMTOOLS, PICARD etc...',
                     'END: Post-Alignment using SAMTOOLS, PICARD etc...',
                     logger, 'info')
        #out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder, args.analysis_name)
        keep_logging('START: Creating BedGraph Coverage',
                     'START: Creating BedGraph Coverage', logger, 'info')
        bedgraph_coverage(out_sorted_bam, args.output_folder,
                          args.analysis_name, reference, logger, Config)
        only_unmapped_positions_file = bedtools(out_sorted_bam,
                                                args.output_folder,
                                                args.analysis_name, logger,
                                                Config)
        keep_logging('END: Creating BedGraph Coverage',
                     'END: Creating BedGraph Coverage', logger, 'info')
        return out_sorted_bam

    ## 4. Stages: Variant Calling
    def varcall():
        keep_logging('START: Variant Calling', 'START: Variant Calling',
                     logger, 'info')
        caller = ConfigSectionMap("pipeline", Config)['variant_caller']
        if caller == "gatkhaplotypecaller":
            keep_logging('START: Variant Calling using GATK haplotyper.',
                         'START: Variant Calling using GATK haplotyper.',
                         logger, 'info')
            final_raw_vcf_mpileup = variant_calling(out_sorted_bam,
                                                    args.output_folder,
                                                    args.index,
                                                    args.analysis_name, logger,
                                                    Config)
            #final_raw_vcf_mpileup = "%s/%s_aln_mpileup_raw.vcf" % (args.output_folder, args.analysis_name)
            final_raw_vcf = remove_5_bp_snp_indel(final_raw_vcf_mpileup,
                                                  args.output_folder,
                                                  args.analysis_name,
                                                  reference, logger, Config)
            final_raw_indel_vcf = prepare_indel(final_raw_vcf_mpileup,
                                                args.output_folder,
                                                args.analysis_name, reference,
                                                logger, Config)
            keep_logging('The final raw VCF file: {}'.format(final_raw_vcf),
                         'The final raw VCF file: {}'.format(final_raw_vcf),
                         logger, 'debug')
            keep_logging(
                'The final raw Indel VCF file: {}'.format(final_raw_indel_vcf),
                'The final raw Indel VCF file: {}'.format(final_raw_indel_vcf),
                logger, 'debug')
            keep_logging(
                'END: Variant Calling using Samtools without post-align bam input files.',
                'END: Variant Calling using Samtools without post-align bam input files.',
                logger, 'info')
            return final_raw_vcf, final_raw_indel_vcf

        elif caller == "samtools":
            keep_logging(
                'START: Variant Calling using Samtools without post-align bam input files.',
                'START: Variant Calling using Samtools without post-align bam input files.',
                logger, 'info')
            final_raw_indel_vcf = prepare_indel_gatk(out_sorted_bam,
                                                     args.output_folder,
                                                     args.analysis_name,
                                                     args.index, logger,
                                                     Config)
            final_raw_vcf_mpileup = variant_calling(out_sorted_bam,
                                                    args.output_folder,
                                                    args.index,
                                                    args.analysis_name, logger,
                                                    Config)
            #final_raw_vcf_mpileup = "%s/%s_aln_mpileup_raw.vcf" % (args.output_folder, args.analysis_name)
            final_raw_vcf = remove_5_bp_snp_indel(final_raw_vcf_mpileup,
                                                  args.output_folder,
                                                  args.analysis_name,
                                                  reference, logger, Config)
            # GATK indel calling integration
            #final_raw_indel_vcf = prepare_indel(final_raw_vcf_mpileup, args.output_folder, args.analysis_name, reference, logger, Config)
            keep_logging('The final raw VCF file: {}'.format(final_raw_vcf),
                         'The final raw VCF file: {}'.format(final_raw_vcf),
                         logger, 'debug')
            keep_logging(
                'END: Variant Calling using Samtools without post-align bam input files.',
                'END: Variant Calling using Samtools without post-align bam input files.',
                logger, 'info')
            return final_raw_vcf, final_raw_indel_vcf
        else:
            keep_logging(
                'Please provide Variant Caller name in config file under the section [pipeline]. Options for Variant caller: 1. samtools 2. gatkhaplotypecaller',
                'Please provide Variant Caller name in config file under the section [pipeline]. Options for Variant caller: 1. samtools 2. gatkhaplotypecaller',
                logger, 'info')
            exit()
        keep_logging('END: Variant Calling', 'END: Variant Calling', logger,
                     'info')

    ## 5. Stages: Variant Filteration
    def filter(gatk_depth_of_coverage_file):
        keep_logging('START: Variant Filteration',
                     'START: Variant Filteration', logger, 'info')
        final_raw_vcf_mpileup = "%s/%s_aln_mpileup_raw.vcf" % (
            args.output_folder, args.analysis_name)
        #final_raw_indel_vcf = prepare_indel(final_raw_vcf_mpileup, args.output_folder, args.analysis_name, reference, logger, Config)
        if not os.path.isfile(gatk_depth_of_coverage_file):
            file_basename = os.path.basename(gatk_depth_of_coverage_file)
            keep_logging(
                'The input file {} does not exists. Please provide another file with full path or check the files path.\n'
                .format(file_basename),
                'The input file {} does not exists. Please provide another file or check the files path.\n'
                .format(file_basename), logger, 'exception')
            exit()
        Avg_dp_cmd = "grep \'^Total\' %s | awk -F\'\t\' \'{print $3}\'" % gatk_depth_of_coverage_file
        proc = sp.Popen([Avg_dp_cmd], stdout=sp.PIPE, shell=True)
        (out, err) = proc.communicate()
        Avg_dp = float(out)
        print "The Average Depth per reference genome base is: %s" % Avg_dp
        filter_variants(final_raw_vcf, args.output_folder, args.analysis_name,
                        args.index, logger, Config, Avg_dp)
        final_raw_indel_vcf = final_raw_vcf_mpileup + "_indel.vcf"
        filter_indels(final_raw_indel_vcf, args.output_folder,
                      args.analysis_name, args.index, logger, Config, Avg_dp)
        keep_logging('END: Variant Filteration', 'END: Variant Filteration',
                     logger, 'info')

    ## 6. Stages: Statistics
    def stats():
        keep_logging('START: Generating Statistics Reports',
                     'START: Generating Statistics Reports', logger, 'info')
        alignment_stats_file = alignment_stats(out_sorted_bam,
                                               args.output_folder,
                                               args.analysis_name, logger,
                                               Config)
        vcf_stats_file = vcf_stats(final_raw_vcf, args.output_folder,
                                   args.analysis_name, logger, Config)
        picard_stats_file = picardstats(out_sorted_bam, args.output_folder,
                                        args.analysis_name, args.index, logger,
                                        Config)
        #qualimap_report = qualimap(out_sorted_bam, args.output_folder, args.analysis_name, logger, Config)
        keep_logging('END: Generating Statistics Reports',
                     'END: Generating Statistics Reports', logger, 'info')

    # ################################################### Stages: Remove Unwanted Intermediate files ######################################
    # # print "Removing Imtermediate Files...\n%s" % files_to_delete
    # # for files in files_to_delete:
    # #     os.remove(files)
    # # print "Removing Imtermediate Files...\n%s" % files_to_delete
    # # for files in files_to_delete:
    # #     os.remove(files)
    # ############################################################################ End ####################################################

    if args.downsample == "yes":
        read1, read2 = downsample(args, logger)
        args.forward_raw = read1
        args.reverse_raw = read2
        print "Using downsampled forward reads %s" % args.forward_raw
        print "Using downsampled reverse reads %s" % args.reverse_raw

    if len(steps_list) == 1:
        if steps_list[0] == "coverage_depth_stats":
            #clean()
            #out_sam = align_reads()
            #out_sorted_bam = post_align()
            out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder,
                                                     args.analysis_name)
            gatk_DepthOfCoverage_file = coverage_depth_stats()

        if steps_list[0] == "filter":
            #Sanity Check Post-varcall vcf and other files here
            out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder,
                                                     args.analysis_name)
            final_raw_vcf = "%s/%s_aln_mpileup_raw.vcf_5bp_indel_removed.vcf" % (
                args.output_folder, args.analysis_name)
            gatk_depth_of_coverage_file = "%s/%s_depth_of_coverage.sample_summary" % (
                args.output_folder, args.analysis_name)
            final_raw_vcf_mpileup = "%s/%s_aln_mpileup_raw.vcf" % (
                args.output_folder, args.analysis_name)
            if not os.path.exists(gatk_depth_of_coverage_file):
                gatk_depth_of_coverage_file = coverage_depth_stats()
            if os.path.exists(out_sorted_bam) and os.path.exists(
                    final_raw_vcf) and os.path.exists(
                        gatk_depth_of_coverage_file) and os.path.exists(
                            final_raw_vcf_mpileup):
                filter(gatk_depth_of_coverage_file)
                stats()
            else:
                keep_logging(
                    'The required intermediate files does not exists. Please rerun the variant calling pipeline to generate the files\n',
                    'The required intermediate files does not exists. Please rerun the variant calling pipeline to generate the files',
                    logger, 'exception')
                exit()

        if steps_list[0] == "stats":
            #Sanity Check Post-varcall vcf and other files here
            out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder,
                                                     args.analysis_name)
            final_raw_vcf = "%s/%s_aln_mpileup_raw.vcf_5bp_indel_removed.vcf" % (
                args.output_folder, args.analysis_name)
            gatk_depth_of_coverage_file = "%s/%s_depth_of_coverage.sample_summary" % (
                args.output_folder, args.analysis_name)
            final_raw_vcf_mpileup = "%s/%s_aln_mpileup_raw.vcf" % (
                args.output_folder, args.analysis_name)
            if not os.path.exists(gatk_depth_of_coverage_file):
                print gatk_depth_of_coverage_file
                gatk_depth_of_coverage_file = coverage_depth_stats()
            if os.path.exists(out_sorted_bam) and os.path.exists(
                    final_raw_vcf) and os.path.exists(
                        gatk_depth_of_coverage_file) and os.path.exists(
                            final_raw_vcf_mpileup):
                stats()
            else:
                keep_logging(
                    'The required intermediate files does not exists. Please rerun the variant calling pipeline to generate the files\n',
                    'The required intermediate files does not exists. Please rerun the variant calling pipeline to generate the files',
                    logger, 'exception')
                exit()

        elif steps_list[0] == "All":
            clean()
            out_sam = align_reads()
            out_sorted_bam = post_align(out_sam)
            out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder,
                                                     args.analysis_name)
            gatk_depth_of_coverage_file = "%s/%s_depth_of_coverage.sample_summary" % (
                args.output_folder, args.analysis_name)
            if not os.path.exists(gatk_depth_of_coverage_file):
                gatk_depth_of_coverage_file = coverage_depth_stats()
            final_raw_vcf, final_raw_indel_vcf = varcall()
            final_raw_vcf = "%s/%s_aln_mpileup_raw.vcf_5bp_indel_removed.vcf" % (
                args.output_folder, args.analysis_name)
            filter(gatk_depth_of_coverage_file)
            stats()

        elif steps_list[0] == "bedtools":
            out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder,
                                                     args.analysis_name)
            only_unmapped_positions_file = bedtools(out_sorted_bam,
                                                    args.output_folder,
                                                    args.analysis_name, logger,
                                                    Config)

        elif steps_list[0] == "varcall":
            out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder,
                                                     args.analysis_name)
            gatk_depth_of_coverage_file = "%s/%s_depth_of_coverage.sample_summary" % (
                args.output_folder, args.analysis_name)
            if not os.path.exists(gatk_depth_of_coverage_file):
                gatk_depth_of_coverage_file = coverage_depth_stats()
            final_raw_vcf, final_raw_indel_vcf = varcall()

    # Run individual variant calling steps: clean, align, post-align, varcall, filter, stats etc
    else:

        if steps_list[0] == "clean":
            clean()
            out_sam = align_reads()
            out_sorted_bam = post_align()
            #out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder, args.analysis_name)
            gatk_depth_of_coverage_file = "%s/%s_depth_of_coverage.sample_summary" % (
                args.output_folder, args.analysis_name)
            if not os.path.exists(gatk_depth_of_coverage_file):
                gatk_depth_of_coverage_file = coverage_depth_stats()
            final_raw_vcf, final_raw_indel_vcf = varcall()
            filter(gatk_depth_of_coverage_file)
            stats()
        elif steps_list[0] == "align":
            #Sanity Check clean reads here
            out_sam = align_reads()
            out_sorted_bam = post_align(out_sam)
            out_sorted_bam = post_align()
            gatk_depth_of_coverage_file = "%s/%s_depth_of_coverage.sample_summary" % (
                args.output_folder, args.analysis_name)
            if not os.path.exists(gatk_depth_of_coverage_file):
                gatk_depth_of_coverage_file = coverage_depth_stats()
            final_raw_vcf, final_raw_indel_vcf = varcall()
            filter(gatk_depth_of_coverage_file)
            stats()
        elif steps_list[0] == "post-align":
            #Sanity Check BAM file here
            out_sam = "%s/%s_aln.sam" % (args.output_folder,
                                         args.analysis_name)
            out_sorted_bam = post_align(out_sam)
            gatk_depth_of_coverage_file = "%s/%s_depth_of_coverage.sample_summary" % (
                args.output_folder, args.analysis_name)
            if not os.path.exists(gatk_depth_of_coverage_file):
                gatk_depth_of_coverage_file = coverage_depth_stats()
            final_raw_vcf, final_raw_indel_vcf = varcall()
            filter(gatk_depth_of_coverage_file)
            stats()

        elif steps_list[0] == "varcall":
            #Sanity Check Post-aligned-BAM and Bed files here
            out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder,
                                                     args.analysis_name)
            if not os.path.exists("%s.bai" % out_sorted_bam):
                index_bam(out_sorted_bam, args.output_folder, logger, Config)

            gatk_depth_of_coverage_file = "%s/%s_depth_of_coverage.sample_summary" % (
                args.output_folder, args.analysis_name)
            if not os.path.exists(gatk_depth_of_coverage_file):
                gatk_depth_of_coverage_file = coverage_depth_stats()
            final_raw_vcf, final_raw_indel_vcf = varcall()
            filter(gatk_depth_of_coverage_file)
            stats()

        elif steps_list[0] == "filter":
            #Sanity Check Post-varcall vcf and other files here
            out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder,
                                                     args.analysis_name)
            gatk_depth_of_coverage_file = "%s/%s_depth_of_coverage.sample_summary" % (
                args.output_folder, args.analysis_name)
            if not os.path.exists(gatk_depth_of_coverage_file):
                gatk_depth_of_coverage_file = coverage_depth_stats()
            final_raw_vcf = "%s/%s_aln_mpileup_raw.vcf_5bp_indel_removed.vcf" % (
                args.output_folder, args.analysis_name)
            filter(gatk_depth_of_coverage_file)
            stats()
        elif steps_list[0] == "stats":
            #Sanity check BAM and vcf files
            gatk_depth_of_coverage_file = "%s/%s_depth_of_coverage.sample_summary" % (
                args.output_folder, args.analysis_name)
            if not os.path.exists(gatk_depth_of_coverage_file):
                gatk_depth_of_coverage_file = coverage_depth_stats()
            out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder,
                                                     args.analysis_name)
            final_raw_vcf = "%s/%s_aln_mpileup_raw.vcf_5bp_indel_removed.vcf" % (
                args.output_folder, args.analysis_name)
            stats()
        else:
            keep_logging(
                'Seems like the Analysis Steps are not in sequential order. Please recheck the -steps argument and run the pipeline again',
                'Seems like the Analysis Steps are not in sequential order. Please recheck the -steps argument and run the pipeline again',
                logger, 'exception')
Exemplo n.º 4
0
def pipeline(args, logger):
    keep_logging('START: Pipeline', 'START: Pipeline', logger, 'info')

    # Check Subroutines and create logger object: Arguments, Input files, Reference Index
    keep_logging('START: Checking Dependencies...', 'Checking Dependencies', logger, 'info')

    # Reference Genome file name
    reference = ConfigSectionMap(args.index, Config)['ref_path'] + "/" + ConfigSectionMap(args.index, Config)['ref_name']
    keep_logging('Getting Reference Genome name from config file: {}'.format(reference), 'Getting Reference Genome name from config file: {}'.format(reference), logger, 'info')

    # Check FASTQ files
    if args.type != "PE":
        reverse_raw = "None"
        file_exists(args.forward_raw, args.forward_raw, reference)
    else:
        file_exists(args.forward_raw, args.reverse_raw, reference)

    # Check Java Version
    java_check()
    keep_logging('END: Checking Dependencies...', 'END: Checking Dependencies', logger, 'info')


    ## 1. Pre-Processing Raw reads using Trimmomatic
    keep_logging('START: Pre-Processing Raw reads using Trimmomatic', 'START: Pre-Processing Raw reads using Trimmomatic', logger, 'info')
    if args.type == "PE":
        trimmomatic(args.forward_raw, args.reverse_raw, args.output_folder, args.croplength, logger, Config)
    else:
        reverse_raw = "None"
        trimmomatic(args.forward_raw, reverse_raw, args.output_folder, args.croplength, logger, Config)
    keep_logging('END: Pre-Processing Raw reads using Trimmomatic', 'END: Pre-Processing Raw reads using Trimmomatic', logger, 'info')


    ## 2. Stages: Alignment using BWA
    keep_logging('START: Mapping Reads using BWA', 'START: Mapping Reads using BWA', logger, 'info')
    split_field = prepare_readgroup(args.forward_raw, logger)
    files_to_delete = []
    out_sam = align(args.bam_input, args.output_folder, args.index, split_field, args.analysis_name, files_to_delete, logger, Config, args.type)
    keep_logging('END: Mapping Reads using BWA', 'END: Mapping Reads using BWA', logger, 'info')


    ## 3. Stages: Post-Alignment using SAMTOOLS, PICARD etc
    keep_logging('START: Post-Alignment using SAMTOOLS, PICARD etc...', 'START: Post-Alignment using SAMTOOLS, PICARD etc...', logger, 'info')
    out_sorted_bam = prepare_bam(out_sam, args.output_folder, args.analysis_name, files_to_delete, logger, Config)
    keep_logging('END: Post-Alignment using SAMTOOLS, PICARD etc...', 'END: Post-Alignment using SAMTOOLS, PICARD etc...', logger, 'info')
    out_sorted_bam = "%s/%s_aln_sort.bam" % (args.output_folder, args.analysis_name)


    # Run Depth of Coverage Module after read mapping and stop. Dont proceed to variant calling step.
    if args.coverage_depth_stats:
        gatk_DepthOfCoverage(out_sorted_bam, args.output_folder, args.analysis_name, reference, logger, Config)
        alignment_stats_file = alignment_stats(out_sorted_bam, args.output_folder, args.analysis_name, logger, Config)
    else:
        ## Continue: 3. Stages: Post-Alignment using SAMTOOLS, PICARD etc
        keep_logging('START: Creating BedGraph Coverage', 'START: Creating BedGraph Coverage', logger, 'info')
        bedgraph_coverage(out_sorted_bam, args.output_folder, args.analysis_name, reference, logger, Config)
        only_unmapped_positions_file = bedtools(out_sorted_bam, args.output_folder, args.analysis_name, logger, Config)
        keep_logging('END: Creating BedGraph Coverage', 'END: Creating BedGraph Coverage', logger, 'info')


        ## 4. Stages: Variant Calling
        keep_logging('START: Variant Calling', 'START: Variant Calling', logger, 'info')
        caller = ConfigSectionMap("pipeline", Config)['variant_caller']
        if caller == "samtoolswithpostalignbam":
            keep_logging('START: Variant Calling using Samtools and post-align bam input files', 'START: Variant Calling using Samtools and post-align bam input files', logger, 'info')
            out_finalbam = post_align_bam(out_sorted_bam, args.output_folder, args.index, args.analysis_name)
            final_raw_vcf = variant_calling(out_finalbam, args.output_folder, args.index, args.analysis_name)
            keep_logging('The final raw VCF file: {}'.format(final_raw_vcf), 'The final raw VCF file: {}'.format(final_raw_vcf), logger, 'debug')
            keep_logging('END: Variant Calling using Samtools and post-align bam input files', 'END: Variant Calling using Samtools and post-align bam input files', logger, 'info')
        elif caller == "gatkhaplotypecaller":
            keep_logging('START: Variant Calling using GATK haplotyper and post-align bam input files', 'START: Variant Calling using GATK haplotyper and post-align bam input files', logger, 'info')
            out_finalbam = post_align_bam(out_sorted_bam, args.output_folder, args.index, args.analysis_name)
            final_raw_vcf = variant_calling(out_finalbam, args.output_folder, args.index, args.analysis_name)
            keep_logging('The final raw VCF file: {}'.format(final_raw_vcf), 'The final raw VCF file: {}'.format(final_raw_vcf), logger, 'debug')
            keep_logging('END: Variant Calling using GATK haplotyper and post-align bam input files', 'END: Variant Calling using GATK haplotyper and post-align bam input files', logger, 'info')
        elif caller == "samtools":
            keep_logging('START: Variant Calling using Samtools without post-align bam input files.', 'START: Variant Calling using Samtools without post-align bam input files.', logger, 'info')
            final_raw_vcf_mpileup = variant_calling(out_sorted_bam, args.output_folder, args.index, args.analysis_name, logger, Config)
            #final_raw_vcf_mpileup = "%s/%s_aln_mpileup_raw.vcf" % (args.output_folder, args.analysis_name)
            final_raw_vcf = remove_5_bp_snp_indel(final_raw_vcf_mpileup, args.output_folder, args.analysis_name, reference, logger, Config)
            #final_raw_vcf = "%s/%s_aln_mpileup_raw.vcf_5bp_indel_removed.vcf" % (args.output_folder, args.analysis_name)
            keep_logging('The final raw VCF file: {}'.format(final_raw_vcf), 'The final raw VCF file: {}'.format(final_raw_vcf), logger, 'debug')
            keep_logging('END: Variant Calling using Samtools without post-align bam input files.', 'END: Variant Calling using Samtools without post-align bam input files.', logger, 'info')
        else:
            keep_logging('Please provide Variant Caller name in config file under the section [pipeline]. Options for Variant caller: 1. samtools 2. samtoolswithpostalignbam 3. gatkhaplotypecaller', 'Please provide Variant Caller name in config file under the section [pipeline]. Options for Variant caller: 1. samtools 2. samtoolswithpostalignbam 3. gatkhaplotypecaller', logger, 'info')
            exit()
        keep_logging('END: Variant Calling', 'END: Variant Calling', logger, 'info')


        ## 5. Stages: Variant Filteration
        keep_logging('START: Variant Filteration', 'START: Variant Filteration', logger, 'info')
        filter2_variants(final_raw_vcf, args.output_folder, args.analysis_name, args.index, logger, Config)
        keep_logging('END: Variant Filteration', 'END: Variant Filteration', logger, 'info')


        ## 6. Stages: Statistics
        keep_logging('START: Generating Statistics Reports', 'START: Generating Statistics Reports', logger, 'info')
        alignment_stats_file = alignment_stats(out_sorted_bam, args.output_folder, args.analysis_name, logger, Config)
        gatk_DepthOfCoverage(out_sorted_bam, args.output_folder, args.analysis_name, reference, logger, Config)
        vcf_stats_file = vcf_stats(final_raw_vcf, args.output_folder, args.analysis_name, logger, Config)
        #qualimap_report = qualimap(out_sorted_bam, args.output_folder, args.analysis_name, logger, Config)
        keep_logging('END: Generating Statistics Reports', 'END: Generating Statistics Reports', logger, 'info')