Exemplo n.º 1
0
def read_to_tr_obj(read: TRRead, max_slope_dev: float) -> Tuple[List, List]:
    """Create objects for tandem repeats and self alignments."""
    traces, shapes = [], []
    if read.trs is None or len(read.trs) == 0:
        return traces, shapes
    # TRs on diagonal
    shapes += [
        make_line(tr.start, tr.start, tr.end, tr.end, width=3, col="black")
        for tr in read.trs
    ]
    # Start/end positions of each self alignment
    ab, ae, bb, be, texts = zip(*[(aln.ab, aln.ae, aln.bb, aln.be,
                                   f"({aln.ab}, {aln.bb})<br>"
                                   f"distance={aln.distance}")
                                  for aln in read.self_alns])
    traces += [
        make_scatter(ab, bb, text=texts, name="start"),
        make_scatter(ae, be, name="end")
    ]
    # Self alignments as lines
    inner_alns = find_inner_alns(read, max_slope_dev)
    shapes += [
        make_line(aln.ab,
                  aln.bb,
                  aln.ae,
                  aln.be,
                  width=3 if aln in inner_alns else 1,
                  col=("purple" if aln in inner_alns else
                       "black" if 0.95 <= aln.slope <= 1.05 else "yellow"),
                  layer="below") for aln in read.self_alns
    ]
    return traces, shapes
Exemplo n.º 2
0
def reads_to_axis_obj(a_read: TRRead, b_read: TRRead) -> Tuple[List, List]:
    """Create objects on each axis (= read)."""
    def read_to_texts_cols(read: TRRead) -> Tuple[List, List]:
        has_aln = all([unit.repr_aln is not None for unit in read.units])
        hov_texts, cols = zip(*[(
            f"Unit {i} (repr={unit.repr_id}; strand={unit.repr_aln.strand if has_aln else '-'})<br>"
            f"[{unit.start}:{unit.end}] ({unit.length} bp)<br>"
            f"{unit.repr_aln.diff * 100 if has_aln else '-':{'.2f' if has_aln else ''}}% diff from repr unit",
            ID_TO_COL[unit.repr_id]) for i, unit in enumerate(read.units)])
        return hov_texts, cols

    traces, shapes = [], []
    shapes += [
        make_line(0,
                  -b_read.length * 0.01,
                  a_read.length,
                  -b_read.length * 0.01,
                  width=3,
                  col="grey"),
        make_line(-a_read.length * 0.01,
                  0,
                  -a_read.length * 0.01,
                  b_read.length,
                  width=3,
                  col="grey")
    ]
    shapes += [
        make_line(unit.start,
                  -b_read.length * 0.01,
                  unit.end,
                  -b_read.length * 0.01,
                  width=5,
                  col=ID_TO_COL[unit.repr_id]) for unit in a_read.units
    ]
    shapes += [
        make_line(-a_read.length * 0.01,
                  unit.start,
                  -a_read.length * 0.01,
                  unit.end,
                  width=5,
                  col=ID_TO_COL[unit.repr_id]) for unit in b_read.units
    ]
    a_hov_texts, a_cols = read_to_texts_cols(a_read)
    b_hov_texts, b_cols = read_to_texts_cols(b_read)
    traces += [
        make_scatter([(unit.start + unit.end) / 2 for unit in a_read.units],
                     [-b_read.length * 0.01 for unit in a_read.units],
                     text=a_hov_texts,
                     col=a_cols),
        make_scatter([-a_read.length * 0.01 for unit in b_read.units],
                     [(unit.start + unit.end) / 2 for unit in b_read.units],
                     text=b_hov_texts,
                     col=b_cols)
    ]
    return traces, shapes
Exemplo n.º 3
0
def plot_overlaps_for_read(read_id: int,
                           overlaps: List[Overlap],
                           min_ovlp_len: int = 10000):
    _overlaps = list(filter(None, [o if o.a_read_id == read_id
                                   else o.swap() if o.b_read_id == read_id
                                   else None
                                   for o in overlaps]))
    assert len(_overlaps) > 0, "No overlaps for the read"
    read_len = _overlaps[0].a_len
    lens = [o.length for o in _overlaps]
    diffs = [o.diff * 100 for o in _overlaps]
    show_plot(make_scatter(x=lens,
                           y=diffs,
                           col=[o.b_read_id for o in _overlaps],
                           marker_size=8),
              make_layout(
                  shapes=[make_line(min_ovlp_len,
                                    min(diffs),
                                    min_ovlp_len,
                                    max(diffs),
                                    col="red"),   # min ovlp len threshold
                          make_line(read_len,
                                    min(diffs),
                                    read_len,
                                    max(diffs),
                                    col="green"),   # read length (contained)
                          make_rect(min_ovlp_len,
                                    min(diffs),
                                    read_len,
                                    max(diffs),
                                    opacity=0.1),   # accepted ovlps
                          make_line(lens[np.argmin(diffs)],
                                    min(diffs),
                                    lens[np.argmin(diffs)],
                                    max(diffs)),   # ovlp len of min diff
                          make_line(min(lens),
                                    min(diffs),
                                    max(lens),
                                    min(diffs))]))   # min ovlp diff
Exemplo n.º 4
0
def plot_self(read: TRRead, unit_dist_by: str, max_dist: Optional[float],
              max_slope_dev: float, plot_size: int):
    if unit_dist_by == "repr":
        assert read.repr_units is not None, "No representative units"
    read_shapes = [
        make_line(0, 0, read.length, read.length, width=2, col="grey")
    ]
    tr_traces, tr_shapes = read_to_tr_obj(read, max_slope_dev)
    unit_traces, unit_shapes = read_to_unit_obj(read, unit_dist_by, max_dist)
    traces = tr_traces + unit_traces
    shapes = read_shapes + tr_shapes + unit_shapes
    layout = make_layout(plot_size,
                         plot_size,
                         title=f"Read {read.id} (strand={read.strand})",
                         x_range=(0, read.length),
                         y_range=(0, read.length),
                         x_grid=False,
                         y_grid=False,
                         y_reversed=True,
                         margin=dict(l=10, r=10, t=50, b=10),
                         shapes=shapes)
    layout["yaxis"]["scaleanchor"] = "x"
    show_plot(traces, layout)
Exemplo n.º 5
0
def read_to_unit_obj(read: TRRead, unit_dist_by: str,
                     max_dist: Optional[float]) -> Tuple[List, List]:
    """Create objects for a heatmap of the distance matrix among units."""
    traces, shapes = [], []
    if read.units is None or len(read.units) == 0:
        return traces, shapes
    # On diagonal
    has_aln = all([unit.repr_aln is not None for unit in read.units])
    starts, lab_texts, hov_texts, cols = zip(*[(
        unit.start, f" {i}",
        f"Unit {i} (repr={unit.repr_id}; strand={unit.repr_aln.strand if has_aln else '-'})<br>"
        f"[{unit.start}:{unit.end}] ({unit.length} bp)<br>"
        f"{unit.repr_aln.diff * 100 if has_aln else '-':{'.2f' if has_aln else ''}}% diff from repr unit",
        ID_TO_COL[unit.repr_id] if read.synchronized else "black"
    ) for i, unit in enumerate(read.units)])
    shapes += [
        make_line(unit.start,
                  unit.start,
                  unit.end,
                  unit.end,
                  width=5,
                  col=cols[i]) for i, unit in enumerate(read.units)
    ]
    traces += [
        make_scatter(starts,
                     starts,
                     text=lab_texts,
                     mode="text",
                     text_pos="top right",
                     text_size=10,
                     text_col="black"),
        make_scatter(starts, starts, text=hov_texts, col=cols)
    ]
    # Distance matrix as a heatmap
    c = ClusteringSeq(read.unit_seqs if unit_dist_by == "raw" else
                      [read.repr_units[unit.repr_id] for unit in read.units],
                      revcomp=False,
                      cyclic=not read.synchronized)
    c.calc_dist_mat()
    dist_mat = c.s_dist_mat * 100
    max_dist = np.max(dist_mat) if max_dist is None else max_dist
    cols = DIST_TO_COL(np.clip(dist_mat, 0, max_dist) / max_dist)
    x, y, texts, cols, cells = zip(*[(
        (unit_i.start + unit_i.end) / 2, (unit_j.start + unit_j.end) / 2,
        f"Unit {i} (repr={unit_i.repr_id}) vs Unit {j} (repr={unit_j.repr_id})<br>"
        f"{dist_mat[i][j]:.2f}% diff ({unit_dist_by})", dist_mat[i][j],
        make_rect(unit_i.start,
                  unit_j.start,
                  unit_i.end,
                  unit_j.end,
                  fill_col=cols[i][j],
                  layer="below")) for i, unit_i in enumerate(read.units)
                                     for j, unit_j in enumerate(read.units)
                                     if i < j])
    traces += [
        make_scatter(x,
                     y,
                     text=texts,
                     col=cols,
                     marker_size=3,
                     col_range=(0, max_dist),
                     col_scale="Blues",
                     reverse_scale=True,
                     show_scale=True)
    ]
    shapes += cells
    return traces, shapes