Exemplo n.º 1
0
def process_fastq(curinfo, ref_index, config, config_file):
    do_realignment = config["algorithm"].get("realignment", "")
    do_kmercorrect = config["algorithm"].get("kmer_correct", "")
    trim_three = config["algorithm"].get("trim_three", "")
    picard = broad.runner_from_config(config)
    in_file = _prepare_fastq(curinfo, config)
    if trim_three:
        in_file = trim_fastq(in_file, three=int(trim_three))
    if do_kmercorrect:
        in_file = remove_ns(in_file)
        in_file = kmer_filter(in_file, do_kmercorrect, config)
    unique_file, count_file = uniquify_reads(in_file, config)
    align_sam = novoalign.align(unique_file, None, ref_index,
                                os.path.splitext(os.path.basename(in_file))[0],
                                config["dir"]["align"],
                                curinfo)
    name = curinfo.get("description", curinfo.get("name", ""))
    ref = curinfo.get("ref", config.get("ref", None))
    names = {"rg": name, "sample": name, "pu": name, "pl": name}
    align_bam = sam_to_sort_bam(align_sam, ref, unique_file, None,
                                names, config)
    if do_realignment == "gatk":
        align_bam = gatk_realigner(align_bam, ref, config, deep_coverage=True)
    picard.run_fn("picard_index", align_bam)
    # XXX Finish remainder of processing
    summarize_at_each_pos(align_bam, in_file, count_file, name, config)
Exemplo n.º 2
0
def process_fastq(curinfo, ref_index, config, config_file):
    do_realignment = config["algorithm"].get("realignment", "")
    do_kmercorrect = config["algorithm"].get("kmer_correct", "")
    trim_three = config["algorithm"].get("trim_three", "")
    picard = broad.runner_from_config(config)
    in_file = _prepare_fastq(curinfo, config)
    if trim_three:
        in_file = trim_fastq(in_file, three=int(trim_three))
    if do_kmercorrect:
        in_file = remove_ns(in_file)
        in_file = kmer_filter(in_file, do_kmercorrect, config)
    unique_file, count_file = uniquify_reads(in_file, config)
    align_sam = novoalign.align(unique_file, None, ref_index,
                                os.path.splitext(os.path.basename(in_file))[0],
                                config["dir"]["align"],
                                curinfo)
    name = curinfo.get("description", curinfo.get("name", ""))
    ref =  curinfo.get("ref", config.get("ref", None))
    align_bam = sam_to_sort_bam(align_sam, ref, unique_file, None,
                                name, name, name, config)
    if do_realignment == "gatk":
        align_bam = gatk_realigner(align_bam, ref, config, deep_coverage=True)
    picard.run_fn("picard_index", align_bam)
    # XXX Finish remainder of processing
    summarize_at_each_pos(align_bam, in_file, count_file, name, config)
    return
    if config["algorithm"].get("range_params", None):
        call_analyze_multiple(align_bam, bc, in_file, config)
    else:
        call_bases_and_analyze(align_bam, bc, in_file, config)
Exemplo n.º 3
0
def run_genotyper(bam_file, ref_file, config):
    """Perform SNP genotyping and analysis using GATK.
    """
    dbsnp_file = _get_dbsnp_file(config, ref_file)
    realign_bam = gatk_realigner(bam_file, ref_file, config, dbsnp_file)
    filter_snp = gatk_genotyper(realign_bam, ref_file, config, dbsnp_file)
    _eval_genotyper(filter_snp, ref_file, dbsnp_file, config)
    return filter_snp
Exemplo n.º 4
0
def process_fastq(bc, ref_index, cur_config, config, config_file):
    do_realignment = config["algorithm"].get("realignment", "")
    do_kmercorrect = config["algorithm"].get("kmer_correct", "")
    trim_three = config["algorithm"].get("trim_three", "")
    picard = broad.runner_from_config(config)
    in_file = bc["file"]
    if trim_three:
        in_file = trim_fastq(in_file, three=int(trim_three))
    if do_kmercorrect:
        in_file = remove_ns(in_file)
        in_file = kmer_filter(in_file, do_kmercorrect, config)
    unique_file = uniquify_bioplayground(in_file, config)
    align_sam = novoalign.align(config["dir"]["align"], ref_index, unique_file,
                                qual_format=cur_config.get("format", None))
    align_bam = sam_to_sort_bam(align_sam, config["ref"], unique_file, None,
                                "", bc["name"], config)
    if do_realignment == "gatk":
        align_bam = gatk_realigner(align_bam, config["ref"], config, deep_coverage=True)
    picard.run_fn("picard_index", align_bam)
    if config["algorithm"].get("range_params", None):
        call_analyze_multiple(align_bam, bc, in_file, config)
    else:
        call_bases_and_analyze(align_bam, bc, in_file, config)