Exemplo n.º 1
0
def _combine_multiple_svcallers(samples):
    """
    """
    by_bam = collections.OrderedDict()
    for x in samples:
        try:
            by_bam[x[0]["align_bam"]].append(x[0])
        except KeyError:
            by_bam[x[0]["align_bam"]] = [x[0]]
    highdepths = filter(
        lambda x: x is not None,
        list(
            set([
                tz.get_in(["config", "algorithm", "highdepth_regions"], x[0])
                for x in samples
            ])))
    out = []
    for grouped_calls in by_bam.values():

        def orig_svcaller_order(x):
            return _get_svcallers(x).index(
                x["config"]["algorithm"]["svcaller_active"])

        sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x],
                                key=orig_svcaller_order)
        final_calls = reduce(operator.add, [x["sv"] for x in sorted_svcalls])
        final = grouped_calls[0]
        final_calls = ensemble.summarize(final_calls, final, highdepths)
        final_calls = validate.evaluate(final, final_calls)
        final["sv"] = final_calls
        del final["config"]["algorithm"]["svcaller_active"]
        out.append([final])
    return out
Exemplo n.º 2
0
def _combine_multiple_svcallers(samples):
    """
    """
    by_bam = collections.OrderedDict()
    for x in samples:
        try:
            by_bam[x[0]["align_bam"]].append(x[0])
        except KeyError:
            by_bam[x[0]["align_bam"]] = [x[0]]
    out = []
    for grouped_calls in by_bam.values():

        def orig_svcaller_order(x):
            return _get_svcallers(x).index(
                x["config"]["algorithm"]["svcaller_active"])

        sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x],
                                key=orig_svcaller_order)
        final_calls = reduce(operator.add, [x["sv"] for x in sorted_svcalls])
        final = grouped_calls[0]
        final_calls = ensemble.summarize(final_calls, final)
        final["sv"] = final_calls
        del final["config"]["algorithm"]["svcaller_active"]
        out.append([final])
    return out
Exemplo n.º 3
0
def finalize_sv(samples, config, initial_only=False):
    """Combine results from multiple sv callers into a single ordered 'sv' key.

    Handles ensemble calling and plotting of results.
    """
    by_bam = collections.OrderedDict()
    for x in samples:
        try:
            by_bam[x["align_bam"]].append(x)
        except KeyError:
            by_bam[x["align_bam"]] = [x]
    by_batch = collections.OrderedDict()
    lead_batches = {}
    for grouped_calls in by_bam.values():

        def orig_svcaller_order(x):
            return _get_svcallers(x).index(x["config"]["algorithm"]["svcaller_active"])

        sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x], key=orig_svcaller_order)
        final = grouped_calls[0]
        if len(sorted_svcalls) > 0:
            final_calls = reduce(operator.add, [x["sv"] for x in sorted_svcalls])
            if not initial_only:
                for caller in (c for c in _get_svcallers(final) if c in _ENSEMBLE_CALLERS):
                    final_calls = _ENSEMBLE_CALLERS[caller](final_calls, final)
                final_calls = ensemble.summarize(final_calls, final, grouped_calls)
                final_calls = validate.evaluate(final, final_calls)
            final["sv"] = final_calls
        del final["config"]["algorithm"]["svcaller_active"]
        batch = dd.get_batch(final) or dd.get_sample_name(final)
        batches = batch if isinstance(batch, (list, tuple)) else [batch]
        lead_batches[dd.get_sample_name(final)] = batches[0]
        for batch in batches:
            try:
                by_batch[batch].append(final)
            except KeyError:
                by_batch[batch] = [final]
    out = []
    for batch, items in by_batch.items():
        if any("svplots" in dd.get_tools_on(d) for d in items):
            plot_items = plot.by_regions(items)
        else:
            plot_items = items
        for data in plot_items:
            if lead_batches[dd.get_sample_name(data)] == batch:
                out.append([data])
    return out
Exemplo n.º 4
0
def finalize_sv(samples, config, initial_only=False):
    """Combine results from multiple sv callers into a single ordered 'sv' key.

    Handles ensemble calling and plotting of results.
    """
    by_bam = collections.OrderedDict()
    for x in samples:
        try:
            by_bam[x["align_bam"]].append(x)
        except KeyError:
            by_bam[x["align_bam"]] = [x]
    by_batch = collections.OrderedDict()
    lead_batches = {}
    for grouped_calls in by_bam.values():
        def orig_svcaller_order(x):
            return _get_svcallers(x).index(x["config"]["algorithm"]["svcaller_active"])
        sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x],
                                key=orig_svcaller_order)
        final = grouped_calls[0]
        if len(sorted_svcalls) > 0:
            final_calls = reduce(operator.add, [x["sv"] for x in sorted_svcalls])
            if not initial_only:
                for caller in (c for c in _get_svcallers(final) if c in _ENSEMBLE_CALLERS):
                    final_calls = _ENSEMBLE_CALLERS[caller](final_calls, final)
                final_calls = ensemble.summarize(final_calls, final, grouped_calls)
                final_calls = validate.evaluate(final, final_calls)
            final["sv"] = final_calls
        del final["config"]["algorithm"]["svcaller_active"]
        batch = dd.get_batch(final) or dd.get_sample_name(final)
        batches = batch if isinstance(batch, (list, tuple)) else [batch]
        lead_batches[dd.get_sample_name(final)] = batches[0]
        for batch in batches:
            try:
                by_batch[batch].append(final)
            except KeyError:
                by_batch[batch] = [final]
    out = []
    for batch, items in by_batch.items():
        if any("svplots" in dd.get_tools_on(d) for d in items):
            plot_items = plot.by_regions(items)
        else:
            plot_items = items
        for data in plot_items:
            if lead_batches[dd.get_sample_name(data)] == batch:
                out.append([data])
    return out
Exemplo n.º 5
0
def finalize_sv(samples, config):
    """Combine results from multiple sv callers into a single ordered 'sv' key.

    Handles ensemble calling and plotting of results.
    """
    by_bam = collections.OrderedDict()
    for x in samples:
        try:
            by_bam[x["align_bam"]].append(x)
        except KeyError:
            by_bam[x["align_bam"]] = [x]
    highdepths = filter(
        lambda x: x is not None,
        list(set([tz.get_in(["config", "algorithm", "highdepth_regions"], x) for x in samples])),
    )
    by_batch = collections.OrderedDict()
    lead_batches = {}
    for grouped_calls in by_bam.values():

        def orig_svcaller_order(x):
            return _get_svcallers(x).index(x["config"]["algorithm"]["svcaller_active"])

        sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x], key=orig_svcaller_order)
        final = grouped_calls[0]
        if len(sorted_svcalls) > 0:
            final_calls = reduce(operator.add, [x["sv"] for x in sorted_svcalls])
            final_calls = ensemble.summarize(final_calls, final, highdepths)
            final_calls = validate.evaluate(final, final_calls)
            final["sv"] = final_calls
        del final["config"]["algorithm"]["svcaller_active"]
        batch = dd.get_batch(final) or dd.get_sample_name(final)
        batches = batch if isinstance(batch, (list, tuple)) else [batch]
        lead_batches[dd.get_sample_name(final)] = batches[0]
        for batch in batches:
            try:
                by_batch[batch].append(final)
            except KeyError:
                by_batch[batch] = [final]
    out = []
    for batch, items in by_batch.items():
        plot_items = plot.by_regions(items)
        for data in plot_items:
            if lead_batches[dd.get_sample_name(data)] == batch:
                out.append([data])
    return out
Exemplo n.º 6
0
def _combine_multiple_svcallers(samples):
    """
    """
    by_bam = collections.OrderedDict()
    for x in samples:
        try:
            by_bam[x[0]["align_bam"]].append(x[0])
        except KeyError:
            by_bam[x[0]["align_bam"]] = [x[0]]
    out = []
    for grouped_calls in by_bam.values():
        def orig_svcaller_order(x):
            return _get_svcallers(x).index(x["config"]["algorithm"]["svcaller_active"])
        sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x],
                                key=orig_svcaller_order)
        final_calls = reduce(operator.add, [x["sv"] for x in sorted_svcalls])
        final = grouped_calls[0]
        final_calls = ensemble.summarize(final_calls, final)
        final_calls = validate.evaluate(final, final_calls)
        final["sv"] = final_calls
        del final["config"]["algorithm"]["svcaller_active"]
        out.append([final])
    return out