Exemplo n.º 1
0
def finalize_sv(samples, config):
    """Combine results from multiple sv callers into a single ordered 'sv' key.
    """
    by_bam = collections.OrderedDict()
    for x in samples:
        batch = dd.get_batch(x) or [dd.get_sample_name(x)]
        try:
            by_bam[x["align_bam"], tuple(batch)].append(x)
        except KeyError:
            by_bam[x["align_bam"], tuple(batch)] = [x]
    by_batch = collections.OrderedDict()
    lead_batches = {}
    for grouped_calls in by_bam.values():

        def orig_svcaller_order(x):
            orig_callers = tz.get_in(["config", "algorithm", "svcaller_orig"],
                                     x)
            cur_caller = tz.get_in(["config", "algorithm", "svcaller"], x)
            return orig_callers.index(cur_caller)

        sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x],
                                key=orig_svcaller_order)
        final = grouped_calls[0]
        if len(sorted_svcalls) > 0:
            final["sv"] = reduce(operator.add,
                                 [x["sv"] for x in sorted_svcalls])
        final["config"]["algorithm"]["svcaller"] = final["config"][
            "algorithm"].pop("svcaller_orig")
        batch = dd.get_batch(final) or dd.get_sample_name(final)
        batches = batch if isinstance(batch, (list, tuple)) else [batch]
        if len(batches) > 1:
            lead_batches[(dd.get_sample_name(final),
                          dd.get_phenotype(final) == "germline")] = batches[0]
        for batch in batches:
            try:
                by_batch[batch].append(final)
            except KeyError:
                by_batch[batch] = [final]
    out = []
    for batch, items in by_batch.items():
        if any("svplots" in dd.get_tools_on(d) for d in items):
            items = plot.by_regions(items)
        for data in items:
            if lead_batches.get(
                (dd.get_sample_name(data),
                 dd.get_phenotype(data) == "germline")) in [batch, None]:
                out.append([data])
    return out
Exemplo n.º 2
0
def finalize_sv(samples, config, initial_only=False):
    """Combine results from multiple sv callers into a single ordered 'sv' key.

    Handles ensemble calling and plotting of results.
    """
    by_bam = collections.OrderedDict()
    for x in samples:
        try:
            by_bam[x["align_bam"]].append(x)
        except KeyError:
            by_bam[x["align_bam"]] = [x]
    by_batch = collections.OrderedDict()
    lead_batches = {}
    for grouped_calls in by_bam.values():

        def orig_svcaller_order(x):
            return _get_svcallers(x).index(x["config"]["algorithm"]["svcaller_active"])

        sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x], key=orig_svcaller_order)
        final = grouped_calls[0]
        if len(sorted_svcalls) > 0:
            final_calls = reduce(operator.add, [x["sv"] for x in sorted_svcalls])
            if not initial_only:
                for caller in (c for c in _get_svcallers(final) if c in _ENSEMBLE_CALLERS):
                    final_calls = _ENSEMBLE_CALLERS[caller](final_calls, final)
                final_calls = ensemble.summarize(final_calls, final, grouped_calls)
                final_calls = validate.evaluate(final, final_calls)
            final["sv"] = final_calls
        del final["config"]["algorithm"]["svcaller_active"]
        batch = dd.get_batch(final) or dd.get_sample_name(final)
        batches = batch if isinstance(batch, (list, tuple)) else [batch]
        lead_batches[dd.get_sample_name(final)] = batches[0]
        for batch in batches:
            try:
                by_batch[batch].append(final)
            except KeyError:
                by_batch[batch] = [final]
    out = []
    for batch, items in by_batch.items():
        if any("svplots" in dd.get_tools_on(d) for d in items):
            plot_items = plot.by_regions(items)
        else:
            plot_items = items
        for data in plot_items:
            if lead_batches[dd.get_sample_name(data)] == batch:
                out.append([data])
    return out
Exemplo n.º 3
0
def finalize_sv(samples, config, initial_only=False):
    """Combine results from multiple sv callers into a single ordered 'sv' key.

    Handles ensemble calling and plotting of results.
    """
    by_bam = collections.OrderedDict()
    for x in samples:
        try:
            by_bam[x["align_bam"]].append(x)
        except KeyError:
            by_bam[x["align_bam"]] = [x]
    by_batch = collections.OrderedDict()
    lead_batches = {}
    for grouped_calls in by_bam.values():
        def orig_svcaller_order(x):
            return _get_svcallers(x).index(x["config"]["algorithm"]["svcaller_active"])
        sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x],
                                key=orig_svcaller_order)
        final = grouped_calls[0]
        if len(sorted_svcalls) > 0:
            final_calls = reduce(operator.add, [x["sv"] for x in sorted_svcalls])
            if not initial_only:
                for caller in (c for c in _get_svcallers(final) if c in _ENSEMBLE_CALLERS):
                    final_calls = _ENSEMBLE_CALLERS[caller](final_calls, final)
                final_calls = ensemble.summarize(final_calls, final, grouped_calls)
                final_calls = validate.evaluate(final, final_calls)
            final["sv"] = final_calls
        del final["config"]["algorithm"]["svcaller_active"]
        batch = dd.get_batch(final) or dd.get_sample_name(final)
        batches = batch if isinstance(batch, (list, tuple)) else [batch]
        lead_batches[dd.get_sample_name(final)] = batches[0]
        for batch in batches:
            try:
                by_batch[batch].append(final)
            except KeyError:
                by_batch[batch] = [final]
    out = []
    for batch, items in by_batch.items():
        if any("svplots" in dd.get_tools_on(d) for d in items):
            plot_items = plot.by_regions(items)
        else:
            plot_items = items
        for data in plot_items:
            if lead_batches[dd.get_sample_name(data)] == batch:
                out.append([data])
    return out
Exemplo n.º 4
0
def finalize_sv(samples, config):
    """Combine results from multiple sv callers into a single ordered 'sv' key.

    Handles ensemble calling and plotting of results.
    """
    by_bam = collections.OrderedDict()
    for x in samples:
        try:
            by_bam[x["align_bam"]].append(x)
        except KeyError:
            by_bam[x["align_bam"]] = [x]
    highdepths = filter(
        lambda x: x is not None,
        list(set([tz.get_in(["config", "algorithm", "highdepth_regions"], x) for x in samples])),
    )
    by_batch = collections.OrderedDict()
    lead_batches = {}
    for grouped_calls in by_bam.values():

        def orig_svcaller_order(x):
            return _get_svcallers(x).index(x["config"]["algorithm"]["svcaller_active"])

        sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x], key=orig_svcaller_order)
        final = grouped_calls[0]
        if len(sorted_svcalls) > 0:
            final_calls = reduce(operator.add, [x["sv"] for x in sorted_svcalls])
            final_calls = ensemble.summarize(final_calls, final, highdepths)
            final_calls = validate.evaluate(final, final_calls)
            final["sv"] = final_calls
        del final["config"]["algorithm"]["svcaller_active"]
        batch = dd.get_batch(final) or dd.get_sample_name(final)
        batches = batch if isinstance(batch, (list, tuple)) else [batch]
        lead_batches[dd.get_sample_name(final)] = batches[0]
        for batch in batches:
            try:
                by_batch[batch].append(final)
            except KeyError:
                by_batch[batch] = [final]
    out = []
    for batch, items in by_batch.items():
        plot_items = plot.by_regions(items)
        for data in plot_items:
            if lead_batches[dd.get_sample_name(data)] == batch:
                out.append([data])
    return out
Exemplo n.º 5
0
def finalize_sv(samples, config):
    """Combine results from multiple sv callers into a single ordered 'sv' key.
    """
    by_bam = collections.OrderedDict()
    for x in samples:
        try:
            by_bam[x["align_bam"]].append(x)
        except KeyError:
            by_bam[x["align_bam"]] = [x]
    by_batch = collections.OrderedDict()
    lead_batches = {}
    for grouped_calls in by_bam.values():

        def orig_svcaller_order(x):
            return _get_svcallers(x).index(
                x["config"]["algorithm"]["svcaller_active"])

        sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x],
                                key=orig_svcaller_order)
        final = grouped_calls[0]
        if len(sorted_svcalls) > 0:
            final["sv"] = reduce(operator.add,
                                 [x["sv"] for x in sorted_svcalls])
        del final["config"]["algorithm"]["svcaller_active"]
        batch = dd.get_batch(final) or dd.get_sample_name(final)
        batches = batch if isinstance(batch, (list, tuple)) else [batch]
        lead_batches[dd.get_sample_name(final)] = batches[0]
        for batch in batches:
            try:
                by_batch[batch].append(final)
            except KeyError:
                by_batch[batch] = [final]
    out = []
    for batch, items in by_batch.items():
        if any("svplots" in dd.get_tools_on(d) for d in items):
            plot_items = plot.by_regions(items)
        else:
            plot_items = items
        for data in plot_items:
            if lead_batches[dd.get_sample_name(data)] == batch:
                out.append([data])
    return out
Exemplo n.º 6
0
def finalize_sv(samples, config):
    """Combine results from multiple sv callers into a single ordered 'sv' key.
    """
    by_bam = collections.OrderedDict()
    for x in samples:
        batch = dd.get_batch(x) or [dd.get_sample_name(x)]
        try:
            by_bam[x["align_bam"], tuple(batch)].append(x)
        except KeyError:
            by_bam[x["align_bam"], tuple(batch)] = [x]
    by_batch = collections.OrderedDict()
    lead_batches = {}
    for grouped_calls in by_bam.values():
        def orig_svcaller_order(x):
            orig_callers = tz.get_in(["config", "algorithm", "svcaller_orig"], x)
            cur_caller = tz.get_in(["config", "algorithm", "svcaller"], x)
            return orig_callers.index(cur_caller)
        sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x],
                                key=orig_svcaller_order)
        final = grouped_calls[0]
        if len(sorted_svcalls) > 0:
            final["sv"] = reduce(operator.add, [x["sv"] for x in sorted_svcalls])
        final["config"]["algorithm"]["svcaller"] = final["config"]["algorithm"].pop("svcaller_orig")
        batch = dd.get_batch(final) or dd.get_sample_name(final)
        batches = batch if isinstance(batch, (list, tuple)) else [batch]
        lead_batches[dd.get_sample_name(final)] = batches[0]
        for batch in batches:
            try:
                by_batch[batch].append(final)
            except KeyError:
                by_batch[batch] = [final]
    out = []
    for batch, items in by_batch.items():
        if any("svplots" in dd.get_tools_on(d) for d in items):
            plot_items = plot.by_regions(items)
        else:
            plot_items = items
        for data in plot_items:
            if lead_batches[dd.get_sample_name(data)] == batch:
                out.append([data])
    return out