def main():
    parser = argparse.ArgumentParser('Broad cocleaning (Inderrealignment and BQSR) pipeline')

    # Logging flags.
    parser.add_argument('-d', '--debug',
        action = 'store_const',
        const = logging.DEBUG,
        dest = 'level',
        help = 'Enable debug logging.',
    )
    parser.set_defaults(level = logging.INFO)

    # Required flags.

    parser.add_argument('-r', '--reference_fasta_name',
                        required = True,
                        help = 'Reference fasta path.',
    )
    parser.add_argument('-indel','--known_1k_genome_indel_sites',
                        required=True,
                        help='Reference INDEL path.',
    )
    parser.add_argument('-snp','--dbsnp_known_snp_sites',
                        required=True,
                        help='Reference SNP path.',
    )
    parser.add_argument('-b', '--harmonized_bam_path',
                        required = False,
                        action="append",
                        help = 'Source bam path.',
    )
    parser.add_argument('-list', '--harmonized_bam_list_path',
                        required = False,
                        help = 'Source bam list path.',
    )
    parser.add_argument('-s', '--scratch_dir',
                        required = False,
                        type = is_dir,
                        help = 'Scratch file directory.',
    )
    parser.add_argument('-l', '--log_dir',
                        required = False,
                        type = is_dir,
                        help = 'Log file directory.',
    )
    parser.add_argument('-j', '--thread_count',
                        required = True,
                        type = is_nat,
                        help = 'Maximum number of threads for execution.',
    )
    parser.add_argument('-u', '--uuid',
                        required = True,
                        help = 'analysis_id string',
    )
    parser.add_argument('-m', '--md5',
                        required = False,
                        action = 'store_true',
                        help = 'calculate final size/MD5',
    )
    parser.add_argument('-e', '--eliminate_intermediate_files',
                        required = False,
                        action = 'store_true',
                        help = 'do not (really) reduce disk usage. set if you want to use more disk space!'
    )

    args = parser.parse_args()
    reference_fasta_name = args.reference_fasta_name
    known_1k_genome_indel_sites = args.known_1k_genome_indel_sites
    dbsnp_known_snp_sites = args.dbsnp_known_snp_sites
    uuid = args.uuid
    harmonized_bam_path = args.harmonized_bam_path
    if not args.harmonized_bam_list_path:
        list_dir = os.path.dirname(harmonized_bam_path[0])
        harmonized_bam_list_path = os.path.join(list_dir, uuid + '_harmonized_bam_list.list')
        with open(harmonized_bam_list_path, "w") as handle:
            for bam in harmonized_bam_path:
                handle.write(bam + "\n")
    else:
        harmonized_bam_list_path = args.harmonized_bam_list_path

    if not args.scratch_dir:
        scratch_dir = os.path.dirname(harmonized_bam_list_path)
    else:
        scratch_dir = args.scratch_dir
    if not args.log_dir:
        log_dir = os.path.dirname(harmonized_bam_list_path)
    else:
        log_dir = args.log_dir
    thread_count = str(args.thread_count)
    if not args.eliminate_intermediate_files:
        eliminate_intermediate_files = True
    else:
        eliminate_intermediate_files = False
    if not args.md5:
        md5 = False
    else:
        md5 = True

    ##logging
    logging.basicConfig(
        filename=os.path.join(log_dir, 'Broad_cocleaning_' + uuid + '.log'),  # /host for docker
        level=args.level,
        filemode='a',
        format='%(asctime)s %(levelname)s %(message)s',
        datefmt='%Y-%m-%d_%H:%M:%S_%Z',
    )
    logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
    logger = logging.getLogger(__name__)
    hostname = os.uname()[1]
    logger.info('hostname=%s' % hostname)
    logger.info('harmonized_bam_list_path=%s' % harmonized_bam_list_path)
    if not args.harmonized_bam_path:
        with open(harmonized_bam_list_path) as f:
            harmonized_bam_path = f.read().splitlines()
            for path in harmonized_bam_path:
                logger.info('harmonized_bam_path=%s' % path)
    else:
        for path in harmonized_bam_path:
            logger.info('harmonized_bam_path=%s' % path)

    engine_path = 'sqlite:///' + os.path.join(log_dir, uuid + '_Broad_cocleaning.db')
    engine = sqlalchemy.create_engine(engine_path, isolation_level='SERIALIZABLE')
    
    ##Pipeline
    #check .bai file, call samtools index if not exist
    RealignerTargetCreator.index(uuid, harmonized_bam_list_path, engine, logger)
    
    #call RealignerTargetCreator for harmonized bam list
    harmonized_bam_intervals_path = RealignerTargetCreator.RTC(uuid, harmonized_bam_list_path, thread_count, reference_fasta_name, known_1k_genome_indel_sites, engine, logger)
    
    #call IndelRealigner together but save the reads in the output coresponding to the input that the read came from.
    harmonized_IR_bam_list_path = IndelRealigner.IR(uuid, harmonized_bam_list_path, reference_fasta_name, known_1k_genome_indel_sites, harmonized_bam_intervals_path, engine, logger)
    
    #call BQSR table individually and apply it on bam
    Analysis_ready_bam_list_path = []
    for bam in harmonized_IR_bam_list_path:
        harmonized_IR_bam_BQSR_table_path = BaseRecalibrator.BQSR(uuid, bam, thread_count, reference_fasta_name, dbsnp_known_snp_sites, engine, logger)
        Analysis_ready_bam_path = PrintReads.PR(uuid, bam, thread_count, reference_fasta_name, harmonized_IR_bam_BQSR_table_path, engine, logger)
        bam_validate.bam_validate(uuid, Analysis_ready_bam_path, engine, logger)
        Analysis_ready_bam_list_path.append(Analysis_ready_bam_path)
    
    if md5:
        for bam in Analysis_ready_bam_list_path:
            bam_name = os.path.basename(bam)
            bam_dir = os.path.dirname(bam)
            bam_basename, bam_ext = os.path.splitext(bam_name)
            bai_name = bam_basename + '.bai'
            bai_path = os.path.join(bam_dir, bai_name)
            verify_util.store_md5_size(uuid, bam, engine, logger)
            verify_util.store_md5_size(uuid, bai_path, engine, logger)
    
    if eliminate_intermediate_files:
            pipe_util.remove_file_list(uuid, harmonized_IR_bam_list_path, engine, logger)
    
    for bam in Analysis_ready_bam_list_path:
        validate_file = bam_validate.bam_validate(uuid, bam, engine, logger)
def main():
    parser = argparse.ArgumentParser('MuSE variant calling pipeline')

    # Logging flags.
    parser.add_argument('-d', '--debug',
        action = 'store_const',
        const = logging.DEBUG,
        dest = 'level',
        help = 'Enable debug logging.',
    )
    parser.set_defaults(level = logging.INFO)

    # Required flags.

    parser.add_argument('-r', '--reference_fasta_name',
                        required = True,
                        help = 'Reference fasta path.',
    )
    parser.add_argument('-snp','--dbsnp_known_snp_sites',
                        required=True,
                        help='Reference SNP path, that should be bgzip compressed, tabix indexed',
    )
    parser.add_argument('-tb', '--analysis_ready_tumor_bam_path',
                        required = True,
                        nargs = '?',
                        default = [sys.stdin],
                        help = 'Source patient tumor bam path.',
    )
    parser.add_argument('-nb', '--analysis_ready_normal_bam_path',
                        required = True,
                        nargs = '?',
                        default = [sys.stdin],
                        help = 'Source patient normal bam path.',
    )
    parser.add_argument('-g', '--Whole_genome_squencing_data',
                        required = False,
                        action = 'store_true',
                        help = 'Whole genome squencing data',
    )
    parser.add_argument('-bs', '--Parallel_Block_Size',
                        type = is_nat,
                        default = 50000000,
                        help = 'Parallel Block Size',
    )
    parser.add_argument('-s', '--scratch_dir',
                        required = False,
                        type = is_dir,
                        help = 'Scratch file directory.',
    )
    parser.add_argument('-l', '--log_dir',
                        required = False,
                        type = is_dir,
                        help = 'Log file directory.',
    )
    parser.add_argument('-j', '--thread_count',
                        required = True,
                        type = is_nat,
                        help = 'Maximum number of threads for execution.',
    )
    parser.add_argument('-u', '--uuid',
                        required = True,
                        help = 'analysis_id string',
    )
    parser.add_argument('-m', '--md5',
                        required = False,
                        action = 'store_true',
                        help = 'calculate final size/MD5',
    )
    parser.add_argument('-e', '--eliminate_intermediate_files',
                        required = False,
                        action = 'store_true',
                        help = 'do not (really) reduce disk usage. set if you want to use more disk space!'
    )

    args = parser.parse_args()
    reference_fasta_name = args.reference_fasta_name
    dbsnp_known_snp_sites = args.dbsnp_known_snp_sites
    uuid = args.uuid
    analysis_ready_tumor_bam_path = args.analysis_ready_tumor_bam_path
    analysis_ready_normal_bam_path = args.analysis_ready_normal_bam_path
    blocksize = args.Parallel_Block_Size
    if not args.scratch_dir:
        scratch_dir = os.path.dirname(analysis_ready_tumor_bam_path)
    else:
        scratch_dir = args.scratch_dir
    if not args.log_dir:
        log_dir = os.path.dirname(analysis_ready_tumor_bam_path)
    else:
        log_dir = args.log_dir
    thread_count = str(args.thread_count)
    if not args.eliminate_intermediate_files:
        eliminate_intermediate_files = True
    else:
        eliminate_intermediate_files = False
    if not args.md5:
        md5 = False
    else:
        md5 = True

    ##logging
    logging.basicConfig(
        filename=os.path.join(log_dir, 'MuSE_variant_calling' + uuid + '.log'),  # /host for docker
        level=args.level,
        filemode='a',
        format='%(asctime)s %(levelname)s %(message)s',
        datefmt='%Y-%m-%d_%H:%M:%S_%Z',
    )
    logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
    logger = logging.getLogger(__name__)
    hostname = os.uname()[1]
    logger.info('hostname=%s' % hostname)
    logger.info('analysis_ready_tumor_bam_path=%s' % analysis_ready_tumor_bam_path)
    logger.info('analysis_ready_normal_bam_path=%s' % analysis_ready_normal_bam_path)
    engine_path = 'sqlite:///' + os.path.join(log_dir, uuid + '_MuSE_variant_calling.db')
    engine = sqlalchemy.create_engine(engine_path, isolation_level='SERIALIZABLE')
    
    ##Pipeline
    #faidx reference fasta file if needed.
    fai_path = reference_fasta_name + '.fai'
    if os.path.isfile(fai_path):
      logger.info('reference_fai_path=%s' % fai_path)
    else:
      fai_path = index_util.samtools_faidx(uuid, reference_fasta_name, engine, logger)
      logger.info('reference_fai_path=%s' % fai_path)
    
    #index input bam files if needed.
    bam_path = []
    bam_path.extend([analysis_ready_tumor_bam_path, analysis_ready_normal_bam_path])
    for path in bam_path:
        bai_path = path + '.bai'
        if os.path.isfile(bai_path):
            logger.info('analysis_ready_bam_bai_path=%s' % bai_path)
        else:
            bai_path = index_util.samtools_bam_index(uuid, path, engine, logger)
            logger.info('analysis_ready_bam_bai_path=%s' % bai_path)
    
    #bgzip compress and tabix index dbsnp file if needed.
    dbsnp_name, dbsnp_ext = os.path.splitext(dbsnp_known_snp_sites)
    dbsnp_tabix_path = dbsnp_known_snp_sites + '.tbi'
    if dbsnp_ext == '.bgz':
        logger.info('dbsnp file is already bgzip compressed =%s' % dbsnp_known_snp_sites)
        if os.path.isfile(dbsnp_tabix_path):
            logger.info('tabix index of dbsnp_bgz file =%s' % dbsnp_tabix_path)
        else:
            dbsnp_tabix_path = index_util.tabix_index(uuid, dbsnp_known_snp_sites, engine, logger)
            logger.info('tabix index of dbsnp_bgz file =%s' % dbsnp_tabix_path)
    else:
        dbsnp_known_snp_sites = index_util.bgzip_compress(uuid, dbsnp_known_snp_sites, engine, logger)
        logger.info('dbsnp file is already bgzip compressed =%s' % dbsnp_known_snp_sites)
        dbsnp_tabix_path = index_util.tabix_index(uuid, dbsnp_known_snp_sites, engine, logger)
        logger.info('tabix index of dbsnp_bgz file =%s' % dbsnp_tabix_path)
        #sys.exit('!!!Reference dbSNP file should be bgzip compressed!!!')
    
    #MuSE call
    muse_call_output_path = muse_call.call_region(uuid, thread_count, analysis_ready_tumor_bam_path, analysis_ready_normal_bam_path, reference_fasta_name, fai_path, blocksize, engine, logger)

    #MuSE sump
    if not args.Whole_genome_squencing_data:
        muse_vcf = muse_sump.sump_wxs(uuid, muse_call_output_path, dbsnp_known_snp_sites, engine, logger)
    else:
        muse_vcf = muse_sump.sump_wgs(uuid, muse_call_output_path, dbsnp_known_snp_sites, engine, logger)
        
    #picard sortvcf
    muse_srt_vcf = index_util.picard_sortvcf(uuid, muse_vcf, reference_fasta_name, engine, logger)
    
    if eliminate_intermediate_files:
        pipe_util.remove_file_list(uuid, [muse_call_output_path], engine, logger)
        pipe_util.remove_file_list(uuid, [muse_vcf], engine, logger)
        
    if md5:
        verify_util.store_md5_size(uuid, muse_srt_vcf, engine, logger)