示例#1
0
def run(items, config):
    """Run third party disambiguation script, resolving into single set of calls.
    """
    assert len(items) == 2, "Can only resolve two organism disambiguation"
    # check aligner, handling tophat/tophat2 distinctions
    aligner = config["algorithm"].get("aligner")
    aligner = "tophat" if aligner.startswith("tophat") else aligner
    assert aligner in ["bwa", "tophat", "star"], "Disambiguation only supported for bwa, star and tophat alignments."
    if items[0]["disambiguate"].get("base"):
        data_a, data_b = items
    else:
        data_b, data_a = items
    work_bam_a = bam.sort(data_a["work_bam"], config, "queryname")
    work_bam_b = bam.sort(data_b["work_bam"], config, "queryname")
    out_dir = os.path.normpath(os.path.join(os.path.dirname(work_bam_a),
                                            os.pardir, "disambiguate_%s" % aligner))
    base_name = os.path.join(out_dir, os.path.splitext(os.path.basename(work_bam_a))[0])
    summary_file = "%s_summary.txt" % base_name
    if not utils.file_exists(summary_file):
        with file_transaction(items[0], out_dir) as tx_out_dir:
            Args = collections.namedtuple("Args", "A B output_dir intermediate_dir "
                                          "no_sort prefix aligner")
            args = Args(work_bam_a, work_bam_b, tx_out_dir, tx_out_dir,
                        True, "", aligner)
            disambiguate_main(args)
    data_a["disambiguate"] = \
      {data_b["genome_build"]: "%s.disambiguatedSpeciesB.bam" % base_name,
       "%s-ambiguous" % data_a["genome_build"]: "%s.ambiguousSpeciesA.bam" % base_name,
       "%s-ambiguous" % data_b["genome_build"]: "%s.ambiguousSpeciesB.bam" % base_name,
       "summary": summary_file}
    data_a["work_bam"] = bam.sort("%s.disambiguatedSpeciesA.bam" % base_name, config)
    return [[data_a]]
示例#2
0
def _run_python(work_bam_a, work_bam_b, out_dir, aligner, prefix, items):
    """Run python version of disambiguation
    """
    Args = collections.namedtuple("Args", "A B output_dir intermediate_dir "
                                    "no_sort prefix aligner")
    args = Args(work_bam_a, work_bam_b, out_dir, out_dir, True, "", aligner)
    disambiguate_main(args)
示例#3
0
def _run_python(work_bam_a, work_bam_b, out_dir, aligner, prefix, items):
    """Run python version of disambiguation
    """
    Args = collections.namedtuple("Args", "A B output_dir intermediate_dir "
                                    "no_sort prefix aligner")
    args = Args(work_bam_a, work_bam_b, out_dir, out_dir, True, "", aligner)
    disambiguate_main(args)
示例#4
0
def run(items, config):
    """Run third party disambiguation script, resolving into single set of calls.
    """
    assert len(items) == 2, "Can only resolve two organism disambiguation"
    # check aligner, handling tophat/tophat2 distinctions
    aligner = config["algorithm"].get("aligner")
    aligner = "tophat" if aligner.startswith("tophat") else aligner
    assert aligner in [
        "bwa", "hisat2", "tophat", "star"
    ], "Disambiguation only supported for bwa, hisat2, star and tophat alignments."
    if items[0]["disambiguate"].get("base"):
        data_a, data_b = items
    else:
        data_b, data_a = items
    work_bam_a = bam.sort(data_a["work_bam"], config, "queryname")
    work_bam_b = bam.sort(data_b["work_bam"], config, "queryname")
    if data_a.get("align_split"):
        base_dir = utils.safe_makedir(
            os.path.normpath(
                os.path.join(os.path.dirname(work_bam_a), os.pardir, os.pardir,
                             "disambiguate_%s" % aligner)))
        out_dir = os.path.join(
            base_dir,
            "_".join([str(x) for x in data_a["align_split"].split("-")]))
    else:
        out_dir = os.path.normpath(
            os.path.join(os.path.dirname(work_bam_a), os.pardir,
                         "disambiguate_%s" % aligner))
    base_name = os.path.join(out_dir,
                             os.path.splitext(os.path.basename(work_bam_a))[0])
    summary_file = "%s_summary.txt" % base_name
    if not utils.file_exists(summary_file):
        with file_transaction(items[0], out_dir) as tx_out_dir:
            Args = collections.namedtuple(
                "Args", "A B output_dir intermediate_dir "
                "no_sort prefix aligner")
            args = Args(work_bam_a, work_bam_b, tx_out_dir, tx_out_dir, True,
                        "", aligner)
            disambiguate_main(args)
    data_a["disambiguate"] = \
      {data_b["genome_build"]: bam.sort("%s.disambiguatedSpeciesB.bam" % base_name, config),
       "%s-ambiguous" % data_a["genome_build"]: bam.sort("%s.ambiguousSpeciesA.bam" % base_name, config),
       "%s-ambiguous" % data_b["genome_build"]: bam.sort("%s.ambiguousSpeciesB.bam" % base_name, config),
       "summary": summary_file}
    data_a["work_bam"] = bam.sort("%s.disambiguatedSpeciesA.bam" % base_name,
                                  config)
    return [[data_a]]