def vcfanno(job, config, name, samples, input_vcf):
    """Take the specified VCF and use vcfanno to add additional annotations
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param input_vcf: The input_vcf file name to process.
    :type input_vcf: str.
    :returns:  str -- The output vcf file name.

    output_vcf = "{}.vcfanno.snpEff.{}.vcf".format(name, config['snpeff']['reference'])
    logfile = "{}.vcfanno.log".format(name)

    command = ["{}".format(config['vcfanno']['bin']),

    job.fileStore.logToMaster("VCFAnno Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)

    return output_vcf
def snpeff(job, config, name, input_vcf):
    """Annotate the specified VCF using snpEff
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param input_vcf: The input_vcf file name to process.
    :type input_vcf: str.
    :returns:  str -- The output vcf file name.

    output_vcf = "{}.snpEff.{}.vcf".format(name, config['snpeff']['reference'])
    logfile = "{}.snpeff.log".format(name)

    snpeff_command = ["{}".format(config['snpeff']['bin']),

    job.fileStore.logToMaster("snpEff Command: {}\n".format(snpeff_command))
    pipeline.run_and_log_command(" ".join(snpeff_command), logfile)

    return output_vcf
def gemini(job, config, name, input_vcf):
    """Take the specified VCF and use GEMINI to add additional annotations and convert to database format
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param input_vcf: The input_vcf file name to process.
    :type input_vcf: str.
    :returns:  str -- The output GEMINI database name.

    db = "{}.snpEff.{}.db".format(name, config['snpeff']['reference'])
    logfile = "{}.gemini.log".format(name)

    command = ["{}".format(config['gemini']['bin']),

    job.fileStore.logToMaster("GEMINI Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)

    return db
def merge_sam(job, config, name, input_bams):
    """Run Picard MergeSamFiles
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param input_bams: The list of input_bam files to merge.
    :type input_bams: str.
    :returns:  str -- The output bam file name.

    output_sam = "{}.merged.sorted.bam".format(name)
    logfile = "{}.mergesam.log".format(name)

    bam_string = " I=".join(input_bams)

    command = ["{}".format(config['picard-merge']['bin']),

    job.fileStore.logToMaster("Picard MergeSam Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)

    return output_sam
def realign_target_creator(job, config, name, input_bam):
    """Run GATK TargetCreator on the specified BAM to identify targets for realignment
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param input_bam: The input_bam file name to process.
    :type input_bam: str.
    :returns:  str -- The file name of the targets file.

    targets = "{}.targets.intervals".format(name)
    targets_log = "{}.targetcreation.log".format(name)

    command = ["{}".format(config['gatk-realign']['bin']),

    job.fileStore.logToMaster("GATK RealignerTargetCreator Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), targets_log)

    return targets
def mark_duplicates(job, config, name, input_bam):
    """Run Picard MarkDuplicates
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param input_bam: The input_bam file name to process.
    :type input_bam: str.
    :returns:  str -- The output bam file name.

    job.fileStore.logToMaster("Running MarkDuplicates for sample: {}".format(name))

    metrics_file = "{}.dedup.metrics".format(name)
    output_bam = "{}.dedup.sorted.bam".format(name)
    logfile = "{}.markduplicates.log".format(name)

    command = ["{}".format(config['picard-dedup']['bin']),

    job.fileStore.logToMaster("Picard MarkDuplicates Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)

    return output_bam
def diagnose_pooled_targets(job, config, name, regions, samples, input_bam1, input_bam2):
    """Run GATK's DiagnoseTargets against the supplied region
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param regions: regions dictionary key name and tag.
    :type regions: str.
    :param samples: samples dictionary.
    :type samples: str.
    :param input_bam: The input_bam file name to process.
    :type input_bam: str.
    :returns:  str -- The DiagnoseTargets output vcf file name.

    diagnose_targets_vcf = "{}_{}.diagnosetargets.vcf".format(name, regions)
    missing_intervals = "{}_{}.missing.intervals".format(name, regions)
    logfile = "{}.{}.diagnose_targets.log".format(name, regions)

    command = ["{}".format(config['gatk']['bin']),

    job.fileStore.logToMaster("GATK DiagnoseTargets Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)

    return diagnose_targets_vcf
def filter_variants(job, config, name, input_vcf):
    """Run GATK's VariantFilter on the specified VCF
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param input_vcf: The input_vcf file name to process.
    :type input_vcf: str.
    :returns:  str -- The output vcf file name.

    output_vcf = "{}.filtered.vcf".format(name)
    filter_log = "{}.variantfiltration.log".format(name)

    filter_command = ["{}".format(config['gatk-filter']['bin']),
                      "'MQ0 > {}'".format(config['mq0_threshold']),
                      "'DP < {}'".format(config['coverage_threshold']),
                      "'QUAL < {}'".format(config['var_qual_threshold']),
                      "'MQ < {}'".format(config['map_qual_threshold']),

    job.fileStore.logToMaster("GATK VariantFiltration Command: {}\n".format(filter_command))
    pipeline.run_and_log_command(" ".join(filter_command), filter_log)

    return output_vcf
def annotate_vcf(job, config, name, input_vcf, input_bam):
    """Run GATK's VariantAnnotation on the specified VCF
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param input_vcf: The input_vcf file name to process.
    :type input_vcf: str.
    :param input_bam: The input_bam file name to process.
    :type input_bam: str.
    :returns:  str -- The output vcf file name.

    output_vcf = "{}.annotated.vcf".format(name)
    annotation_logfile = "{}.variantannotation.log".format(name)

    annotation_command = ["{}".format(config['gatk-annotate']['bin']),

    job.fileStore.logToMaster("GATK VariantAnnotator Command: {}\n".format(annotation_command))
    pipeline.run_and_log_command(" ".join(annotation_command), annotation_logfile)

    return output_vcf
def realign_indels(job, config, name, input_bam, targets):
    """Run GATK Indel Realignment on the specified BAM
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param input_bam: The input_bam file name to process.
    :type input_bam: str.
    :param targets: The file name of targets to realign.
    :type targets: str.
    :returns:  str -- The output bam file name.

    output_bam = "{}.realigned.sorted.bam".format(name)
    realign_log = "{}.realignindels.log".format(name)

    command = ["{}".format(config['gatk-realign']['bin']),

    job.fileStore.logToMaster("GATK IndelRealigner Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), realign_log)

    return output_bam
def add_or_replace_readgroups(job, config, name, input_bam):
    """Run Picard's AddOrReplaceReadGroups on the specified BAM
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param input_bam: The input_bam file name to process.
    :type input_bam: str.
    :returns:  str -- The output bam file name.

    job.fileStore.logToMaster("Running AddOrReplaceReadGroups in sample: {}".format(name))

    output_bam = "{}.rg.sorted.bam".format(name)
    logfile = "{}.addreadgroups.log".format(name)
    index_log = "{}.buildindex.log".format(name)

    command = ["{}".format(config['picard-add']['bin']),

    command2 = ["{}".format(config['picard-add']['bin']),

    job.fileStore.logToMaster("GATK AddOrReplaceReadGroupsCommand Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)

    job.fileStore.logToMaster("GATK BuildBamIndex Command: {}\n".format(command2))
    pipeline.run_and_log_command(" ".join(command2), index_log)

    return output_bam
def recalibrator(job, config, name, input_bam):
    """Run GATK Recalibrator on the specified BAM

    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param input_bam: The input_bam file name to process.
    :type input_bam: str.
    :returns:  str -- The output bam file name.


    output_bam = "{}.recalibrated.sorted.bam".format(name)
    recal_config = "{}.recal".format(name)
    recal_log = "{}.recalibrate.log".format(name)
    print_log = "{}.printrecalibrated.log".format(name)
    cp_log = "{}.copy.log".format(name)

    # Calculate covariates
    recal_commands = ["{}".format(config['gatk-recal']['bin']),

    # Print recalibrated BAM
    print_reads_command = ["{}".format(config['gatk-recal']['bin']),

    # Copy index to alternative name
    cp_command = ["cp",

    job.fileStore.logToMaster("GATK BaseRecalibrator Command: {}\n".format(recal_commands))
    pipeline.run_and_log_command(" ".join(recal_commands), recal_log)

    job.fileStore.logToMaster("GATK PrintReads Command: {}\n".format(print_reads_command))
    pipeline.run_and_log_command(" ".join(print_reads_command), print_log)

    job.fileStore.logToMaster("GATK Copy Command: {}\n".format(cp_command))
    pipeline.run_and_log_command(" ".join(cp_command), cp_log)

    return output_bam