def finalize_sv(samples, config): """Combine results from multiple sv callers into a single ordered 'sv' key. """ by_bam = collections.OrderedDict() for x in samples: batch = dd.get_batch(x) or [dd.get_sample_name(x)] try: by_bam[x["align_bam"], tuple(batch)].append(x) except KeyError: by_bam[x["align_bam"], tuple(batch)] = [x] by_batch = collections.OrderedDict() lead_batches = {} for grouped_calls in by_bam.values(): def orig_svcaller_order(x): orig_callers = tz.get_in(["config", "algorithm", "svcaller_orig"], x) cur_caller = tz.get_in(["config", "algorithm", "svcaller"], x) return orig_callers.index(cur_caller) sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x], key=orig_svcaller_order) final = grouped_calls[0] if len(sorted_svcalls) > 0: final["sv"] = reduce(operator.add, [x["sv"] for x in sorted_svcalls]) final["config"]["algorithm"]["svcaller"] = final["config"][ "algorithm"].pop("svcaller_orig") batch = dd.get_batch(final) or dd.get_sample_name(final) batches = batch if isinstance(batch, (list, tuple)) else [batch] if len(batches) > 1: lead_batches[(dd.get_sample_name(final), dd.get_phenotype(final) == "germline")] = batches[0] for batch in batches: try: by_batch[batch].append(final) except KeyError: by_batch[batch] = [final] out = [] for batch, items in by_batch.items(): if any("svplots" in dd.get_tools_on(d) for d in items): items = plot.by_regions(items) for data in items: if lead_batches.get( (dd.get_sample_name(data), dd.get_phenotype(data) == "germline")) in [batch, None]: out.append([data]) return out
def finalize_sv(samples, config, initial_only=False): """Combine results from multiple sv callers into a single ordered 'sv' key. Handles ensemble calling and plotting of results. """ by_bam = collections.OrderedDict() for x in samples: try: by_bam[x["align_bam"]].append(x) except KeyError: by_bam[x["align_bam"]] = [x] by_batch = collections.OrderedDict() lead_batches = {} for grouped_calls in by_bam.values(): def orig_svcaller_order(x): return _get_svcallers(x).index(x["config"]["algorithm"]["svcaller_active"]) sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x], key=orig_svcaller_order) final = grouped_calls[0] if len(sorted_svcalls) > 0: final_calls = reduce(operator.add, [x["sv"] for x in sorted_svcalls]) if not initial_only: for caller in (c for c in _get_svcallers(final) if c in _ENSEMBLE_CALLERS): final_calls = _ENSEMBLE_CALLERS[caller](final_calls, final) final_calls = ensemble.summarize(final_calls, final, grouped_calls) final_calls = validate.evaluate(final, final_calls) final["sv"] = final_calls del final["config"]["algorithm"]["svcaller_active"] batch = dd.get_batch(final) or dd.get_sample_name(final) batches = batch if isinstance(batch, (list, tuple)) else [batch] lead_batches[dd.get_sample_name(final)] = batches[0] for batch in batches: try: by_batch[batch].append(final) except KeyError: by_batch[batch] = [final] out = [] for batch, items in by_batch.items(): if any("svplots" in dd.get_tools_on(d) for d in items): plot_items = plot.by_regions(items) else: plot_items = items for data in plot_items: if lead_batches[dd.get_sample_name(data)] == batch: out.append([data]) return out
def finalize_sv(samples, config): """Combine results from multiple sv callers into a single ordered 'sv' key. Handles ensemble calling and plotting of results. """ by_bam = collections.OrderedDict() for x in samples: try: by_bam[x["align_bam"]].append(x) except KeyError: by_bam[x["align_bam"]] = [x] highdepths = filter( lambda x: x is not None, list(set([tz.get_in(["config", "algorithm", "highdepth_regions"], x) for x in samples])), ) by_batch = collections.OrderedDict() lead_batches = {} for grouped_calls in by_bam.values(): def orig_svcaller_order(x): return _get_svcallers(x).index(x["config"]["algorithm"]["svcaller_active"]) sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x], key=orig_svcaller_order) final = grouped_calls[0] if len(sorted_svcalls) > 0: final_calls = reduce(operator.add, [x["sv"] for x in sorted_svcalls]) final_calls = ensemble.summarize(final_calls, final, highdepths) final_calls = validate.evaluate(final, final_calls) final["sv"] = final_calls del final["config"]["algorithm"]["svcaller_active"] batch = dd.get_batch(final) or dd.get_sample_name(final) batches = batch if isinstance(batch, (list, tuple)) else [batch] lead_batches[dd.get_sample_name(final)] = batches[0] for batch in batches: try: by_batch[batch].append(final) except KeyError: by_batch[batch] = [final] out = [] for batch, items in by_batch.items(): plot_items = plot.by_regions(items) for data in plot_items: if lead_batches[dd.get_sample_name(data)] == batch: out.append([data]) return out
def finalize_sv(samples, config): """Combine results from multiple sv callers into a single ordered 'sv' key. """ by_bam = collections.OrderedDict() for x in samples: try: by_bam[x["align_bam"]].append(x) except KeyError: by_bam[x["align_bam"]] = [x] by_batch = collections.OrderedDict() lead_batches = {} for grouped_calls in by_bam.values(): def orig_svcaller_order(x): return _get_svcallers(x).index( x["config"]["algorithm"]["svcaller_active"]) sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x], key=orig_svcaller_order) final = grouped_calls[0] if len(sorted_svcalls) > 0: final["sv"] = reduce(operator.add, [x["sv"] for x in sorted_svcalls]) del final["config"]["algorithm"]["svcaller_active"] batch = dd.get_batch(final) or dd.get_sample_name(final) batches = batch if isinstance(batch, (list, tuple)) else [batch] lead_batches[dd.get_sample_name(final)] = batches[0] for batch in batches: try: by_batch[batch].append(final) except KeyError: by_batch[batch] = [final] out = [] for batch, items in by_batch.items(): if any("svplots" in dd.get_tools_on(d) for d in items): plot_items = plot.by_regions(items) else: plot_items = items for data in plot_items: if lead_batches[dd.get_sample_name(data)] == batch: out.append([data]) return out
def finalize_sv(samples, config): """Combine results from multiple sv callers into a single ordered 'sv' key. """ by_bam = collections.OrderedDict() for x in samples: batch = dd.get_batch(x) or [dd.get_sample_name(x)] try: by_bam[x["align_bam"], tuple(batch)].append(x) except KeyError: by_bam[x["align_bam"], tuple(batch)] = [x] by_batch = collections.OrderedDict() lead_batches = {} for grouped_calls in by_bam.values(): def orig_svcaller_order(x): orig_callers = tz.get_in(["config", "algorithm", "svcaller_orig"], x) cur_caller = tz.get_in(["config", "algorithm", "svcaller"], x) return orig_callers.index(cur_caller) sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x], key=orig_svcaller_order) final = grouped_calls[0] if len(sorted_svcalls) > 0: final["sv"] = reduce(operator.add, [x["sv"] for x in sorted_svcalls]) final["config"]["algorithm"]["svcaller"] = final["config"]["algorithm"].pop("svcaller_orig") batch = dd.get_batch(final) or dd.get_sample_name(final) batches = batch if isinstance(batch, (list, tuple)) else [batch] lead_batches[dd.get_sample_name(final)] = batches[0] for batch in batches: try: by_batch[batch].append(final) except KeyError: by_batch[batch] = [final] out = [] for batch, items in by_batch.items(): if any("svplots" in dd.get_tools_on(d) for d in items): plot_items = plot.by_regions(items) else: plot_items = items for data in plot_items: if lead_batches[dd.get_sample_name(data)] == batch: out.append([data]) return out