Exemplo n.º 1
0
def _reallocate(in_file):
    tool = do.find_cmd("reallocate.pl")
    cmd = "perl {tool} -i {in_file} 5000 1000 b"
    out_file = in_file + ".weighted-5000-1000-b"
    if not utils.file_exists(out_file):
        do.run(cmd.format(**locals()), "reallocate")
    return out_file
Exemplo n.º 2
0
def _bedpe_to_vcf(bedpe_file, sconfig_file, items):
    """Convert BEDPE output into a VCF file.
    """
    tovcf_script = do.find_cmd("bedpeToVcf")
    if tovcf_script:
        out_file = "%s.vcf.gz" % utils.splitext_plus(bedpe_file)[0]
        out_nogzip = out_file.replace(".vcf.gz", ".vcf")
        raw_file = "%s-raw.vcf" % utils.splitext_plus(bedpe_file)[0]
        if not utils.file_exists(out_file):
            if not utils.file_exists(raw_file):
                with file_transaction(raw_file) as tx_raw_file:
                    ref_file = tz.get_in(["reference", "fasta", "base"], items[0])
                    cmd = [
                        sys.executable,
                        tovcf_script,
                        "-c",
                        sconfig_file,
                        "-f",
                        ref_file,
                        "-b",
                        bedpe_file,
                        "-o",
                        tx_raw_file,
                    ]
                    do.run(cmd, "Convert lumpy bedpe output to VCF")
            prep_file = vcfutils.sort_by_ref(raw_file, items[0])
            if not utils.file_exists(out_nogzip):
                utils.symlink_plus(prep_file, out_nogzip)
        out_file = vcfutils.bgzip_and_index(out_nogzip, items[0]["config"])
        return out_file
Exemplo n.º 3
0
def _clean(in_file):
    tool = do.find_cmd("TBr2_duster.pl")
    cmd = "perl {tool} -i {in_file}"
    out_file = in_file + ".no-dust"
    if not utils.file_exists(out_file):
        do.run(cmd.format(**locals()), "duster")
    return out_file
Exemplo n.º 4
0
def _call_vcf(in_bam, sample, workdir, reference, config):
    """
    recalibration from BisSNP tool
    """
    bissnp = do.find_cmd("bissnp")
    basename = sample
    num_cores = config['algorithm'].get('cores', 1)
    memory = config['algorithm'].get('memory', 4)
    jvm_opts = "-Xms750m -Xmx%sg" % memory
    cmd = ("{bissnp} {jvm_opts} -R {reference} -I {in_bam} "
           "-T BisulfiteGenotyper "
           "-vfn1 {tx_out} "
           "-vfn2 {out_vfn2} "
           "-stand_call_conf 20 "
           "-stand_emit_conf 0 "
           "-mmq 0 "
           "-mbq 0 "
           "-nt {num_cores}")
    with chdir(workdir):
        out_vfn1 = op.join(workdir, sample + ".rawcpg.vcf")
        out_vfn2 = op.join(workdir, sample + ".rawsnp.vcf")
        if not file_exists(out_vfn1):
            with file_transaction(out_vfn1) as tx_out:
                log.logger.debug(cmd.format(**locals()))
                do.run(cmd.format(**locals()), "BisSNP writerecal in %s" % in_bam)
    return out_vfn1, out_vfn2
Exemplo n.º 5
0
def _protac(in_file, reference):
    tool = do.find_cmd("proTRAC.pl")
    cmd = "perl {tool} -genome  {reference} -map {in_file} -nh -nr -rpm -distr 1-90 -pimax 32"
    out_file = "protac"
    if not utils.file_exists(out_file):
        do.run(cmd.format(**locals()), "protac")
        open(out_file, 'w').close()
    return out_file
Exemplo n.º 6
0
def _mapper(in_file, reference):
    tool = do.find_cmd("sRNAmapper.pl")
    cmd = "perl {tool} -i {in_file} -g {reference} -a best -o {tx_out}"
    out_file = "hits.eland"
    if not utils.file_exists(out_file):
        with file_transaction(out_file) as tx_out:
            do.run(cmd.format(**locals()), "mapper")
    return out_file
Exemplo n.º 7
0
def _collapse(in_file):
    tool = do.find_cmd("TBr2_collapse.pl")
    cmd = "perl {tool} -i {in_file} -o {tx_out}"
    basename = splitext_plus(op.basename(in_file))[0]
    out_file = basename + "_collapse.fastq"
    if not utils.file_exists(out_file):
        with file_transaction(out_file) as tx_out:
            do.run(cmd.format(**locals()), "collapse")
    return out_file
Exemplo n.º 8
0
def _align(in_fastq, sample, workdir, genome_index, is_directional, bowtie2,
           reference, config):
    """ align with bismark. this is actually not used. the align is in ngsalign.bismark.align """
    bismark = do.find_cmd("bismark")
    resources = config_utils.get_resources("bismark")
    num_cores = 1
    if resources and resources.get("bismark_threads"):
        num_cores = resources.get("bismark_threads")
    else:
        num_cores = max(int(config['algorithm'].get('cores', 1) / 2), 1)
    bowtie_threads = 1
    if resources and resources.get("bowtie_threads"):
        bowtie_threads = resources.get("bowtie_threads")
    basename = sample
    if is_directional:
        is_directional = ""
    else:
        is_directional = "--non_directional"

    cmd = "{bismark} -n 1 -o {tx_dir} --basename {sample} --unmapped {is_directional} {genome_index} {in_fastq}"
    if bowtie2:
        cmd = "{bismark} --bowtie2 --parallel {num_cores} -p {bowtie_threads} -o {tx_dir} --basename {sample} --unmapped {is_directional} {genome_index} {in_fastq}"
    out_dir = op.join(workdir, sample)
    out_bam = op.join(out_dir, sample + ".bam")

    with chdir(workdir):
        if not file_exists(out_bam):
            with tx_tmpdir() as tx_dir:
                cmd = cmd.format(**locals())
                log.logger.debug(cmd)
                do.run(cmd, "bismark in %s" % in_fastq)
                shutil.move(tx_dir, out_dir)

        broad_runner = broad.runner_from_config(config)
        # out_bam, _ = broad_runner.run_fn("picard_formatconverter", out_sam)
        names = {
            'rg': in_fastq,
            'library': 'BS_LIB',
            'pl': 'Illumina',
            'pu': 'R1',
            'sm': in_fastq,
            'sample': sample
        }
        out_fix_bam = broad_runner.run_fn("picard_fix_rgs", out_bam, names)
        order_bam = splitext_plus(out_fix_bam)[0] + "_order.bam"
        broad_runner.run_fn("picard_reorder", out_fix_bam, reference,
                            order_bam)
        index(order_bam, config)
        if bowtie2:
            order_bam = _set_quality(order_bam)
        index(order_bam, config)
    return order_bam
Exemplo n.º 9
0
def _bedpe_to_vcf(bedpe_file, sconfig_file, items):
    """Convert BEDPE output into a VCF file.
    """
    tovcf_script = do.find_cmd("bedpeToVcf")
    if tovcf_script:
        out_file = "%s.vcf" % utils.splitext_plus(bedpe_file)[0]
        if not utils.file_exists(out_file) and not utils.file_exists(out_file + ".gz"):
            with file_transaction(out_file) as tx_out_file:
                ref_file = tz.get_in(["reference", "fasta", "base"], items[0])
                cmd = [sys.executable, tovcf_script, "-c", sconfig_file, "-f", ref_file,
                       "-b", bedpe_file, "-o", tx_out_file]
                do.run(cmd, "Convert lumpy bedpe output to VCF")
        out_file = vcfutils.bgzip_and_index(out_file, items[0]["config"])
        return out_file
Exemplo n.º 10
0
def _run_report(bam_in, sample, biasm_file, workdir, config):
    """
    Run bismark2report command
    """
    bismark = do.find_cmd("bismark2report")
    bam_report = op.join(op.dirname(bam_in), sample) + '_SE_report.txt'
    cmd = "{bismark} --alignment_report  {bam_report} -o {tx_out} --mbias_report {biasm_file}"
    out_dir = op.join(workdir, sample)
    out_file = op.join(out_dir, sample + '.html')
    with chdir(out_dir):
        if not file_exists(out_file):
            with file_transaction(out_file) as tx_out:
                do.run(cmd.format(**locals()), "bismarkr2report  in %s" % bam_in)

    return out_dir
Exemplo n.º 11
0
def _bedpe_to_vcf(bedpe_file, sconfig_file, items):
    """Convert BEDPE output into a VCF file.
    """
    tovcf_script = do.find_cmd("bedpeToVcf")
    if tovcf_script:
        out_file = "%s.vcf" % utils.splitext_plus(bedpe_file)[0]
        if not utils.file_exists(out_file) and not utils.file_exists(out_file +
                                                                     ".gz"):
            with file_transaction(out_file) as tx_out_file:
                ref_file = tz.get_in(["reference", "fasta", "base"], items[0])
                cmd = [
                    sys.executable, tovcf_script, "-c", sconfig_file, "-f",
                    ref_file, "-b", bedpe_file, "-o", tx_out_file
                ]
                do.run(cmd, "Convert lumpy bedpe output to VCF")
        out_file = vcfutils.bgzip_and_index(out_file, items[0]["config"])
        return out_file
Exemplo n.º 12
0
def _run_meth_extractor(bam_in, sample, workdir, config):
    """
    Run bismark_methylation_extractor command
    """
    bismark = do.find_cmd("bismark_methylation_extractor")
    cores = config['algorithm'].get('cores', 1)
    memory = config['algorithm'].get('mem', 5)
    cmd = "{bismark}  --no_overlap --comprehensive --multicore {cores} --buffer_size {memory}G --bedGraph --counts --gzip {bam_in}"
    out_dir = op.join(workdir, sample)
    mbias_file = op.join(out_dir, op.basename(splitext_plus(bam_in)[0]) + '.M-bias.txt')
    if not file_exists(mbias_file):
        with tx_tmpdir() as tx_dir:
            with chdir(tx_dir):
                do.run(cmd.format(**locals()), "bismark_methylation_extractor  in %s" % bam_in)
                shutil.move(tx_dir, out_dir)
    assert op.exists(mbias_file), "mbias report doesn't exists:%s" % mbias_file
    return mbias_file
Exemplo n.º 13
0
def _bedpe_to_vcf(bedpe_file, sconfig_file, items):
    """Convert BEDPE output into a VCF file.
    """
    tovcf_script = do.find_cmd("bedpeToVcf")
    if tovcf_script:
        out_file = "%s.vcf.gz" % utils.splitext_plus(bedpe_file)[0]
        out_nogzip = out_file.replace(".vcf.gz", ".vcf")
        raw_file = "%s-raw.vcf" % utils.splitext_plus(bedpe_file)[0]
        if not utils.file_exists(out_file):
            if not utils.file_exists(raw_file):
                with file_transaction(items[0], raw_file) as tx_raw_file:
                    cmd = [sys.executable, tovcf_script, "-c", sconfig_file, "-f", dd.get_ref_file(items[0]),
                           "-t", "LUMPY", "-b", bedpe_file, "-o", tx_raw_file]
                    do.run(cmd, "Convert lumpy bedpe output to VCF")
            prep_file = vcfutils.sort_by_ref(raw_file, items[0])
            if not utils.file_exists(out_nogzip):
                utils.symlink_plus(prep_file, out_nogzip)
        out_file = vcfutils.bgzip_and_index(out_nogzip, items[0]["config"])
        return out_file
Exemplo n.º 14
0
def _recal_BQ_score(in_bam, sample, workdir, counts_file, reference, config):
    """
    recalibration from BisSNP tool
    """
    bissnp = do.find_cmd("bissnp")
    basename = sample
    num_cores = config['algorithm'].get('cores', 1)
    memory = config['algorithm'].get('memory', 4)
    jvm_opts = "-Xms750m -Xmx%sg" % memory
    cmd = ("{bissnp} {jvm_opts} -R {reference} -I {in_bam} "
           "-T BisulfiteTableRecalibration "
           "-recalFile {counts_file} "
           "-o {tx_out} "
           "-maxQ 60 ")
    with chdir(workdir):
        out_recal = op.join(workdir, sample + "_recal1.bam")
        if not file_exists(out_recal):
            with file_transaction(out_recal) as tx_out:
                log.logger.debug(cmd.format(**locals()))
                do.run(cmd.format(**locals()), "BisSNP writerecal in %s" % in_bam)
        index(out_recal, config)
    return out_recal
Exemplo n.º 15
0
def _align(in_fastq, sample, workdir, genome_index, is_directional, bowtie2, reference, config):
    """
    align with bismark
    """
    bismark = do.find_cmd("bismark")
    num_cores = max(int(config['algorithm'].get('cores', 1) / 2), 1)
    basename = sample
    if is_directional:
        is_directional = ""
    else:
        is_directional = "--non_directional"

    cmd = "{bismark} -n 1 -o {tx_dir} --basename {sample} --unmapped {is_directional} {genome_index} {in_fastq}"
    if bowtie2:
        cmd = "{bismark} --bowtie2 -p {num_cores} -n 1 -o {tx_dir} --basename {sample} --unmapped {is_directional} {genome_index} {in_fastq}"
    out_dir = op.join(workdir, sample)
    out_bam = op.join(out_dir, sample + ".bam")

    with chdir(workdir):
        if not file_exists(out_bam):
            with tx_tmpdir() as tx_dir:
                cmd = cmd.format(**locals())
                log.logger.debug(cmd)
                do.run(cmd, "bismark in %s" % in_fastq)
                shutil.move(tx_dir, out_dir)

        broad_runner = broad.runner_from_config(config)
        # out_bam, _ = broad_runner.run_fn("picard_formatconverter", out_sam)
        names = {'rg': in_fastq, 'library': 'RRBS_LIB', 'pl': 'Illumina', 'pu': 'R1', 'sm': in_fastq, 'sample': sample}
        out_fix_bam = broad_runner.run_fn("picard_fix_rgs", out_bam, names)
        order_bam = splitext_plus(out_fix_bam)[0] + "_order.bam"
        broad_runner.run_fn("picard_reorder", out_fix_bam, reference, order_bam)
        index(order_bam, config)
        if bowtie2:
            order_bam = _set_quality(order_bam)
        index(order_bam, config)
    return order_bam
Exemplo n.º 16
0
def _trimming(in_fastq, out_dir, sample, is_rrbs, is_directional):
    """
    Trimming reads using trim_galore
    """
    trim_galore = find_cmd("trim_galore")
    if is_rrbs:
        is_rrbs = "--rrbs"
    if is_directional:
        is_directional = ""
    else:
        is_directional = "--non_directional"

    cmd = "{trim_galore} {is_rrbs} {is_directional} --length 30 --quality 30 {in_fastq} -o {tx_dir}"
    trimming = op.join(out_dir, sample, sample + "_trimmed.fq")
    if in_fastq.endswith("gz"):
        trimming += ".gz"
    with chdir(out_dir):
        if not file_exists(trimming):
            with tx_tmpdir() as tx_dir:
                logger.debug(cmd.format(**locals()))
                run(cmd.format(**locals()), "trim_galore in %s" % in_fastq)
                shutil.move(tx_dir, op.join(out_dir, sample))
    assert op.exists(trimming), "trimming file doesn't exists:%s" % trimming
    return trimming
Exemplo n.º 17
0
def _count_covars(in_bam, sample, workdir, snp, reference, config):
    """
    countcovars from BisSNP tool
    """
    bissnp = do.find_cmd("bissnp")
    basename = sample
    num_cores = config['algorithm'].get('cores', 1)
    memory = config['algorithm'].get('memory', 4)
    jvm_opts = "-Xms750m -Xmx%sg" % memory
    cmd = ("{bissnp} {jvm_opts} -R {reference} -I {in_bam} "
           "-T BisulfiteCountCovariates "
           "-knownSites {snp} "
           "-cov ReadGroupCovariate "
           "-cov QualityScoreCovariate "
           "-cov CycleCovariate "
           "-recalFile {tx_out} "
           "-nt {num_cores} ")
    with chdir(workdir):
        out_count = op.join(workdir, sample + "_recal1.csv")
        if not file_exists(out_count):
            with file_transaction(out_count) as tx_out:
                log.logger.debug(cmd.format(**locals()))
                do.run(cmd.format(**locals()), "BisSNP countcovarts in %s" % in_bam)
    return out_count