def plot_seq_importance(grads, x, xlim=None, ylim=None, layer_idx=-2, figsize=(25, 3), title=""): """Plot sequence importance score Args: grads: either deeplift or gradientxinput score matrix x: one-hot encoded DNA sequence xlim: restrict the plotted xrange figsize: matplotlib figure size """ grads = grads.squeeze() x = x.squeeze() seq_len = x.shape[0] vals_to_plot = grads * x if xlim is None: xlim = (0, seq_len) if ylim is None: ylim = (np.amin(vals_to_plot), np.amax(vals_to_plot)) seqlogo_fig(vals_to_plot, figsize=figsize) plt.xticks(list(range(xlim[0], xlim[1], 5))) plt.xlim(xlim) plt.ylim(ylim) plt.title(title)
def manual_test_layer_plots_AA(): motifs = ["ACDEFGGIKNY"] seq = encodeAA(motifs) seq_length = 100 motif_width = 7 seqlogo_fig(seq[0], vocab="AA") plt.show() # specify the input shape input_dna = cl.InputAA(seq_length) # convolutional layer with filters initialized on a PWM x = ConvAA( filters=1, kernel_size=motif_width, # motif width activation="relu", # mean_max_scale of 1 means that only consensus sequence gets score larger than 0 )(input_dna) # Smoothing layer - positional-dependent effect x = cl.GAMSmooth(n_bases=10, l2_smooth=1e-3, l2=0)(x) x = cl.GlobalSumPooling1D()(x) x = kl.Dense(units=1, activation="linear")(x) model = Model(inputs=input_dna, outputs=x) model.compile("adam", "mse") # TODO - test model.layers[1].plot_weights(plot_type="heatmap") model.layers[1].plot_weights(0, plot_type="motif_raw") model.layers[1].plot_weights(0, plot_type="motif_pwm_info")
def _plot_weights_motif(self, index, plot_type="motif_raw", background_probs=DEFAULT_BASE_BACKGROUND, ncol=1, figsize=None): """Index can only be a single int """ w_all = self.get_weights() if len(w_all) == 0: raise Exception("Layer needs to be initialized first") W = w_all[0] if index is None: index = np.arange(W.shape[2]) if isinstance(index, int): index = [index] fig = plt.figure(figsize=figsize) if plot_type == "motif_pwm" and plot_type in self.AVAILABLE_PLOTS: arr = pssm_array2pwm_array(W, background_probs) elif plot_type == "motif_raw" and plot_type in self.AVAILABLE_PLOTS: arr = W elif plot_type == "motif_pwm_info" and plot_type in self.AVAILABLE_PLOTS: quasi_pwm = pssm_array2pwm_array(W, background_probs) arr = _pwm2pwm_info(quasi_pwm) else: raise ValueError("plot_type needs to be from {0}".format(self.AVAILABLE_PLOTS)) fig = seqlogo_fig(arr, vocab=self.VOCAB_name, figsize=figsize, ncol=ncol, plot_name="filter: ") # fig.show() return fig
def plot_seq_importance(model, x, xlim=None, layer_idx=-2, figsize=(25, 3)): """Plot input x gradient sequence importance score Args: model: DNA-sequence based Sequential keras model x: one-hot encoded DNA sequence xlim: restrict the plotted xrange figsize: matplotlib figure size """ seq_len = x.shape[1] if xlim is None: xlim = (0, seq_len) grads = input_grad(model, x, layer_idx=layer_idx) for i in range(len(x)): seqlogo_fig(grads[i]*x[i], figsize=figsize) plt.xticks(list(range(xlim[0], xlim[1], 5))) plt.xlim(xlim)
def plotPWMInfo(self, figsize=(10, 2)): pwm = self.pwm info = _pwm2pwm_info(pwm) fig = seqlogo_fig(info, vocab="DNA", figsize=figsize) plt.ylabel("Bits") return fig
def plot_seq_importance(scores, data, ylim=None, figsize=(25, 3), outf=None, tick_interval=5): seq_len = data.shape[0] product = np.expand_dims(scores, axis=1) * data product = np.nan_to_num(product, copy=False) print(product.shape) seqlogo_fig(product, figsize=figsize) plt.xticks(list(range(0, product.shape[0], tick_interval))) if ylim != None: plt.ylim(ylim) if outf == None: plt.show() else: plt.axis("off") plt.gca().set_position([0, 0, 1, 1]) plt.savefig(outf)
def plot_seq_importance(grads, x, xlim=None, ylim=None, layer_idx=-2, figsize=(25, 3),title="",snp_pos=0): """Plot sequence importance score Args: grads: either deeplift or gradientxinput score matrix x: one-hot encoded DNA sequence xlim: restrict the plotted xrange figsize: matplotlib figure size """ grads=grads.squeeze() x=x.squeeze() seq_len = x.shape[0] vals_to_plot=grads*x if xlim is None: xlim = (0, seq_len) if ylim is None: ylim= (np.amin(vals_to_plot),np.amax(vals_to_plot)) seqlogo_fig(vals_to_plot, figsize=figsize) plt.xticks(list(range(xlim[0], xlim[1], 5))) plt.xlim(xlim) plt.ylim(ylim) plt.title(title) plt.axvline(x=snp_pos, color='k', linestyle='--')
def plotPSSM(self, background_probs=DEFAULT_BASE_BACKGROUND, figsize=(10, 2)): pssm = self.get_pssm() return seqlogo_fig(pssm, vocab="DNA", figsize=figsize)
def plotPWM(self, figsize=(10, 2)): pwm = self.pwm fig = seqlogo_fig(pwm, vocab="DNA", figsize=figsize) plt.ylabel("Probability") return fig