Exemplo n.º 1
0
def plot_profiles(
        seqlets_by_pattern,
        x,
        tracks,
        importance_scores={},
        figsize=(20, 2),
        start_vec=None,
        width=20,
        legend=True,
        rotate_y=90,
        seq_height=1,
        ymax=None,  # determine y-max
        n_limit=35,
        n_bootstrap=None,
        flip_neg=False,
        patterns=None,
        fpath_template=None,
        only_idx=None,
        mkdir=False,
        rc_fn=lambda x: x[::-1, ::-1]):
    """
    Plot the sequence profiles
    Args:
      x: one-hot-encoded sequence
      tracks: dictionary of profile tracks
      importance_scores: optional dictionary of importance scores

    """
    import matplotlib.pyplot as plt
    from concise.utils.plot import seqlogo_fig, seqlogo

    # Setup start-vec
    if start_vec is not None:
        if not isinstance(start_vec, list):
            start_vec = [start_vec] * len(patterns)
    else:
        start_vec = [0] * len(patterns)
        width = len(x)

    if patterns is None:
        patterns = list(seqlets_by_pattern)
    # aggregated profiles
    d_signal_patterns = {
        pattern: {
            k: aggregate_profiles(extract_signal(
                y, seqlets_by_pattern[pattern])[:,
                                                start_vec[ip]:(start_vec[ip] +
                                                               width)],
                                  n_bootstrap=n_bootstrap,
                                  only_idx=only_idx)
            for k, y in tracks.items()
        }
        for ip, pattern in enumerate(patterns)
    }
    if ymax is None:
        # infer ymax
        def take_max(x, dx):
            if dx is None:
                return x.max()
            else:
                # HACK - hard-coded 2
                return (x + 2 * dx).max()

        ymax = [
            max([
                take_max(*d_signal_patterns[pattern][k])
                for pattern in patterns
            ]) for k in tracks
        ]  # loop through all the tracks
    if not isinstance(ymax, list):
        ymax = [ymax] * len(tracks)

    figs = []
    for i, pattern in enumerate(tqdm(patterns)):
        j = i
        # --------------
        # extract signal
        seqs = extract_signal(
            x,
            seqlets_by_pattern[pattern])[:,
                                         start_vec[i]:(start_vec[i] + width)]
        ext_importance_scores = {
            s: extract_signal(
                imp, seqlets_by_pattern[pattern])[:,
                                                  start_vec[i]:(start_vec[i] +
                                                                width)]
            for s, imp in importance_scores.items()
        }
        d_signal = d_signal_patterns[pattern]
        # --------------
        if only_idx is None:
            sequence = ic_scale(seqs.mean(axis=0))
        else:
            sequence = seqs[only_idx]

        n = len(seqs)
        if n < n_limit:
            continue
        fig, ax = plt.subplots(1 + len(importance_scores) + len(tracks),
                               1,
                               sharex=True,
                               figsize=figsize,
                               gridspec_kw={
                                   'height_ratios': [1] * len(tracks) +
                                   [seq_height] * (1 + len(importance_scores))
                               })

        # signal
        ax[0].set_title(f"{pattern} ({n})")
        for i, (k, signal) in enumerate(d_signal.items()):
            signal_mean, signal_std = d_signal_patterns[pattern][k]
            plot_stranded_profile(signal_mean,
                                  ax=ax[i],
                                  ymax=ymax[i],
                                  profile_std=signal_std,
                                  flip_neg=flip_neg)
            simple_yaxis_format(ax[i])
            strip_axis(ax[i])
            ax[i].set_ylabel(f"{k}", rotation=rotate_y, ha='right', labelpad=5)

            if legend:
                ax[i].legend()

        # -----------
        # importance scores (seqlogo)
        # -----------
        # average the importance scores
        if only_idx is None:
            norm_importance_scores = {
                k: v.mean(axis=0)
                for k, v in ext_importance_scores.items()
            }
        else:
            norm_importance_scores = {
                k: v[only_idx]
                for k, v in ext_importance_scores.items()
            }

        max_scale = max([
            np.maximum(v, 0).sum(axis=-1).max()
            for v in norm_importance_scores.values()
        ])
        min_scale = min([
            np.minimum(v, 0).sum(axis=-1).min()
            for v in norm_importance_scores.values()
        ])
        for k, (imp_score_name,
                logo) in enumerate(norm_importance_scores.items()):
            ax_id = len(tracks) + k

            # Trim the pattern if necessary
            # plot
            ax[ax_id].set_ylim([min_scale, max_scale])
            ax[ax_id].axhline(y=0, linewidth=1, linestyle='--', color='grey')
            seqlogo(logo, ax=ax[ax_id])

            # style
            simple_yaxis_format(ax[ax_id])
            strip_axis(ax[ax_id])
            # ax[ax_id].set_ylabel(imp_score_name)
            ax[ax_id].set_ylabel(imp_score_name,
                                 rotation=rotate_y,
                                 ha='right',
                                 labelpad=5)  # va='bottom',

        # -----------
        # information content (seqlogo)
        # -----------
        # plot
        seqlogo(sequence, ax=ax[-1])

        # style
        simple_yaxis_format(ax[-1])
        strip_axis(ax[-1])
        ax[-1].set_ylabel("Inf. content",
                          rotation=rotate_y,
                          ha='right',
                          labelpad=5)
        ax[-1].set_xticks(list(range(0, len(sequence) + 1, 5)))

        figs.append(fig)
        # save to file
        if fpath_template is not None:
            pname = pattern.replace("/", ".")
            basepath = fpath_template.format(pname=pname, pattern=pattern)
            if mkdir:
                os.makedirs(os.path.dirname(basepath), exist_ok=True)
            plt.savefig(basepath + '.png', dpi=600)
            plt.savefig(basepath + '.pdf', dpi=600)
            plt.close(fig)  # close the figure
            show_figure(fig)
            plt.show()
    return figs
Exemplo n.º 2
0
def plot_profiles_single(seqlet,
                         x,
                         tracks,
                         importance_scores={},
                         figsize=(20, 2),
                         legend=True,
                         rotate_y=90,
                         seq_height=1,
                         flip_neg=False,
                         rc_fn=lambda x: x[::-1, ::-1]):
    """
    Plot the sequence profiles
    Args:
      x: one-hot-encoded sequence
      tracks: dictionary of profile tracks
      importance_scores: optional dictionary of importance scores

    """
    import matplotlib.pyplot as plt
    from concise.utils.plot import seqlogo_fig, seqlogo

    # --------------
    # extract signal
    seq = seqlet.extract(x)
    ext_importance_scores = {
        s: seqlet.extract(imp)
        for s, imp in importance_scores.items()
    }

    fig, ax = plt.subplots(1 + len(importance_scores) + len(tracks),
                           1,
                           sharex=True,
                           figsize=figsize,
                           gridspec_kw={
                               'height_ratios': [1] * len(tracks) +
                               [seq_height] * (1 + len(importance_scores))
                           })

    # signal
    for i, (k, signal) in enumerate(tracks.items()):
        plot_stranded_profile(seqlet.extract(signal),
                              ax=ax[i],
                              flip_neg=flip_neg)
        simple_yaxis_format(ax[i])
        strip_axis(ax[i])
        ax[i].set_ylabel(f"{k}", rotation=rotate_y, ha='right', labelpad=5)

        if legend:
            ax[i].legend()

    # -----------
    # importance scores (seqlogo)
    # -----------
    max_scale = max([
        np.maximum(v, 0).sum(axis=-1).max()
        for v in ext_importance_scores.values()
    ])
    min_scale = min([
        np.minimum(v, 0).sum(axis=-1).min()
        for v in ext_importance_scores.values()
    ])
    for k, (imp_score_name, logo) in enumerate(ext_importance_scores.items()):
        ax_id = len(tracks) + k
        # plot
        ax[ax_id].set_ylim([min_scale, max_scale])
        ax[ax_id].axhline(y=0, linewidth=1, linestyle='--', color='grey')
        seqlogo(logo, ax=ax[ax_id])

        # style
        simple_yaxis_format(ax[ax_id])
        strip_axis(ax[ax_id])
        # ax[ax_id].set_ylabel(imp_score_name)
        ax[ax_id].set_ylabel(imp_score_name,
                             rotation=rotate_y,
                             ha='right',
                             labelpad=5)  # va='bottom',

    # -----------
    # information content (seqlogo)
    # -----------
    # plot
    seqlogo(seq, ax=ax[-1])

    # style
    simple_yaxis_format(ax[-1])
    strip_axis(ax[-1])
    ax[-1].set_ylabel("Inf. content",
                      rotation=rotate_y,
                      ha='right',
                      labelpad=5)
    ax[-1].set_xticks(list(range(0, len(seq) + 1, 5)))
    return fig
Exemplo n.º 3
0
def plot_tracks(tracks, seqlets=[], 
                title=None, 
                rotate_y=90, 
                legend=False, 
                fig_width=20, 
                fig_height_per_track=2, 
                ylim=None, 
                same_ylim=False,
                use_spine_subset=False,
                seqlet_plot_fn=plot_seqlet_box,
                ylab=True, 
                color=None,
                height_ratios=None,
                plot_track_fn=plot_track):
    """Plot a multiple tracks.

    One-hot-encoded sequence as a logo,
    and 1 or 2 dim tracks as normal line-plots.

    Args:
      tracks: dictionary of numpy arrays with the same axis0 length
      fig_width: figure width
      fig_height_per_track: figure height per track.
      ylim: if not None, a single tuple or a list of tuples representing the ylim to use

    Returns:
      matplotlib.figure.Figure
    """

    if height_ratios is not None:
        gridspec_kw = {"height_ratios": height_ratios}
    else:
        gridspec_kw = dict()

    tracks = skip_nan_tracks(tracks)  # ignore None values
    fig, axes = plt.subplots(len(tracks), 1,
                             figsize=(fig_width, fig_height_per_track * len(tracks)),
                             gridspec_kw=gridspec_kw,
                             sharex=True)

    if len(tracks) == 1:
        axes = [axes]

    if same_ylim:
        ylim = (0, max([v.max() for k, v in get_items(tracks)]))

    for i, (ax, (track, arr)) in enumerate(zip(axes, get_items(tracks))):
        yl = get_list_value(ylim, i)
        plot_track_fn(arr, ax, legend, yl,
                      color=get_list_value(color, i),
                      track=track)
        if use_spine_subset:
            spine_subset(ax, max(yl[0], 0), yl[1])

        # TODO allow to specify separate seqlets for different regions (e.g. via dicts)
        for seqlet in seqlets:
            if seqlet.seqname == track:
                seqlet_plot_fn(seqlet, ax, add_label=True)
        # ax.set_ylabel(track)
        if ylab:
            if rotate_y == 90:
                ax.set_ylabel(track)
            else:
                ax.set_ylabel(track, rotation=rotate_y,
                              multialignment='center',
                              ha='right', labelpad=5)
        simple_yaxis_format(ax)
        if i != len(tracks) - 1:
            ax.xaxis.set_ticks_position('none')
        if i == 0 and title is not None:
            ax.set_title(title)

        # if seqlets:
        #    pass

    # add ticks to the final axis
    ax.xaxis.set_major_locator(ticker.AutoLocator())
    # ax.xaxis.set_major_locator(ticker.MaxNLocator(4))
    # spaced_xticks(ax, spacing=5)
    fig.subplots_adjust(hspace=0)
    # fig.subplots_adjust(left=0, right=1, top=1, bottom=0)
    # cleanup the plot
    return fig