Пример #1
0
def plot_seq_importance(grads,
                        x,
                        xlim=None,
                        ylim=None,
                        layer_idx=-2,
                        figsize=(25, 3),
                        title=""):
    """Plot  sequence importance score
    
    Args:
      grads: either deeplift or gradientxinput score matrix 
      x: one-hot encoded DNA sequence
      xlim: restrict the plotted xrange
      figsize: matplotlib figure size
    """
    grads = grads.squeeze()
    x = x.squeeze()

    seq_len = x.shape[0]
    vals_to_plot = grads * x
    if xlim is None:
        xlim = (0, seq_len)
    if ylim is None:
        ylim = (np.amin(vals_to_plot), np.amax(vals_to_plot))
    seqlogo_fig(vals_to_plot, figsize=figsize)
    plt.xticks(list(range(xlim[0], xlim[1], 5)))
    plt.xlim(xlim)
    plt.ylim(ylim)
    plt.title(title)
Пример #2
0
def manual_test_layer_plots_AA():
    motifs = ["ACDEFGGIKNY"]

    seq = encodeAA(motifs)

    seq_length = 100
    motif_width = 7

    seqlogo_fig(seq[0], vocab="AA")
    plt.show()

    # specify the input shape
    input_dna = cl.InputAA(seq_length)

    # convolutional layer with filters initialized on a PWM
    x = ConvAA(
        filters=1,
        kernel_size=motif_width,  # motif width
        activation="relu",
        # mean_max_scale of 1 means that only consensus sequence gets score larger than 0
    )(input_dna)

    # Smoothing layer - positional-dependent effect
    x = cl.GAMSmooth(n_bases=10, l2_smooth=1e-3, l2=0)(x)
    x = cl.GlobalSumPooling1D()(x)
    x = kl.Dense(units=1, activation="linear")(x)
    model = Model(inputs=input_dna, outputs=x)
    model.compile("adam", "mse")
    # TODO - test
    model.layers[1].plot_weights(plot_type="heatmap")

    model.layers[1].plot_weights(0, plot_type="motif_raw")
    model.layers[1].plot_weights(0, plot_type="motif_pwm_info")
Пример #3
0
    def _plot_weights_motif(self, index, plot_type="motif_raw",
                            background_probs=DEFAULT_BASE_BACKGROUND,
                            ncol=1,
                            figsize=None):
        """Index can only be a single int
        """

        w_all = self.get_weights()

        if len(w_all) == 0:
            raise Exception("Layer needs to be initialized first")
        W = w_all[0]
        if index is None:
            index = np.arange(W.shape[2])

        if isinstance(index, int):
            index = [index]
        fig = plt.figure(figsize=figsize)

        if plot_type == "motif_pwm" and plot_type in self.AVAILABLE_PLOTS:
            arr = pssm_array2pwm_array(W, background_probs)
        elif plot_type == "motif_raw" and plot_type in self.AVAILABLE_PLOTS:
            arr = W
        elif plot_type == "motif_pwm_info" and plot_type in self.AVAILABLE_PLOTS:
            quasi_pwm = pssm_array2pwm_array(W, background_probs)
            arr = _pwm2pwm_info(quasi_pwm)
        else:
            raise ValueError("plot_type needs to be from {0}".format(self.AVAILABLE_PLOTS))

        fig = seqlogo_fig(arr, vocab=self.VOCAB_name, figsize=figsize, ncol=ncol, plot_name="filter: ")
        # fig.show()
        return fig
Пример #4
0
def plot_seq_importance(model, x, xlim=None, layer_idx=-2, figsize=(25, 3)):
    """Plot input x gradient sequence importance score
    
    Args:
      model: DNA-sequence based Sequential keras model
      x: one-hot encoded DNA sequence
      xlim: restrict the plotted xrange
      figsize: matplotlib figure size
    """
    seq_len = x.shape[1]
    if xlim is None:
        xlim = (0, seq_len)
    grads = input_grad(model, x, layer_idx=layer_idx)
    for i in range(len(x)):
        seqlogo_fig(grads[i]*x[i], figsize=figsize)
        plt.xticks(list(range(xlim[0], xlim[1], 5)))
        plt.xlim(xlim)
Пример #5
0
    def plotPWMInfo(self, figsize=(10, 2)):
        pwm = self.pwm

        info = _pwm2pwm_info(pwm)

        fig = seqlogo_fig(info, vocab="DNA", figsize=figsize)
        plt.ylabel("Bits")
        return fig
Пример #6
0
def plot_seq_importance(scores,
                        data,
                        ylim=None,
                        figsize=(25, 3),
                        outf=None,
                        tick_interval=5):
    seq_len = data.shape[0]
    product = np.expand_dims(scores, axis=1) * data
    product = np.nan_to_num(product, copy=False)
    print(product.shape)
    seqlogo_fig(product, figsize=figsize)
    plt.xticks(list(range(0, product.shape[0], tick_interval)))
    if ylim != None:
        plt.ylim(ylim)
    if outf == None:
        plt.show()
    else:
        plt.axis("off")
        plt.gca().set_position([0, 0, 1, 1])
        plt.savefig(outf)
Пример #7
0
def plot_seq_importance(grads, x, xlim=None, ylim=None, layer_idx=-2, figsize=(25, 3),title="",snp_pos=0):
    """Plot  sequence importance score
    
    Args:
      grads: either deeplift or gradientxinput score matrix 
      x: one-hot encoded DNA sequence
      xlim: restrict the plotted xrange
      figsize: matplotlib figure size
    """
    grads=grads.squeeze()
    x=x.squeeze()
    
    seq_len = x.shape[0]
    vals_to_plot=grads*x
    if xlim is None:
        xlim = (0, seq_len)
    if ylim is None:
        ylim= (np.amin(vals_to_plot),np.amax(vals_to_plot))
    seqlogo_fig(vals_to_plot, figsize=figsize)
    plt.xticks(list(range(xlim[0], xlim[1], 5)))
    plt.xlim(xlim)
    plt.ylim(ylim)
    plt.title(title)
    plt.axvline(x=snp_pos, color='k', linestyle='--')
Пример #8
0
 def plotPSSM(self, background_probs=DEFAULT_BASE_BACKGROUND, figsize=(10, 2)):
     pssm = self.get_pssm()
     return seqlogo_fig(pssm, vocab="DNA", figsize=figsize)
Пример #9
0
 def plotPWM(self, figsize=(10, 2)):
     pwm = self.pwm
     fig = seqlogo_fig(pwm, vocab="DNA", figsize=figsize)
     plt.ylabel("Probability")
     return fig