def hist_snr(self, bins=50, alpha=0.5, hold=False, fontsize=12, grid=True, xlabel="SNR", ylabel="#",title="", clip_upper_SNR=30): """Plot histogram of the ACGT SNRs for all reads :param int bins: binning for the histogram. Note that the range starts at 0 and ends at clip_upper_SNR :param float alpha: transparency of the histograms :param bool hold: :param int fontsize: :param bool grid: :param str xlabel: :param str ylabel: :param str title: .. plot:: :include-source: from sequana.pacbio import PacbioSubreads from sequana import sequana_data b = PacbioSubreads(sequana_data("test_pacbio_subreads.bam")) b.hist_snr() """ if self._df is None: self._get_df() # old pacbio format has no SNR stored if len(self._df['snr_A'].dropna()) == 0: # nothing to plot from sequana import sequana_data pylab.clf() pylab.imshow(pylab.imread(sequana_data("no_data.jpg"))) pylab.gca().axis('off') return if hold is False: pylab.clf() maxSNR = 0 for letter in "ACGT": m = self._df.loc[:,"snr_{}".format(letter)].max() if m > maxSNR: maxSNR = m if maxSNR > clip_upper_SNR: maxSNR = clip_upper_SNR bins = pylab.linspace(0, maxSNR, bins) pylab.hist(self._df.loc[:,'snr_A'].clip_upper(maxSNR), alpha=alpha, label="A", bins=bins) pylab.hist(self._df.loc[:,'snr_C'].clip_upper(maxSNR), alpha=alpha, label="C", bins=bins) pylab.hist(self._df.loc[:,'snr_G'].clip_upper(maxSNR), alpha=alpha, label="G", bins=bins) pylab.hist(self._df.loc[:,'snr_T'].clip_upper(maxSNR), alpha=alpha, label="T", bins=bins) pylab.legend() pylab.xlabel(xlabel, fontsize=fontsize) pylab.ylabel(ylabel, fontsize=fontsize) pylab.title(title,fontsize=fontsize) if grid is True: pylab.grid(True)
def plot_corr(self): lengths = self.SIRV_data.SIRV.get_lengths_as_dict() spikes = self.spikes_found() spikes["lengths"] = [lengths[k] for k in spikes.index] corr = spikes.corr() pylab.imshow(corr) N = len(spikes.columns) pylab.xticks(range(N), spikes.columns, rotation=90) pylab.yticks(range(N), spikes.columns) pylab.clim(0,1) pylab.colorbar()
def plot_corr(self): lengths = self.SIRV_data.SIRV.get_lengths_as_dict() spikes = self.spikes_found() spikes["lengths"] = [lengths[k] for k in spikes.index] corr = spikes.corr() pylab.imshow(corr) N = len(spikes.columns) pylab.xticks(range(N), spikes.columns, rotation=90) pylab.yticks(range(N), spikes.columns) pylab.clim(0, 1) pylab.colorbar()
def hist_snr(self, bins=50, alpha=0.5, hold=False, fontsize=12, grid=True, xlabel="SNR", ylabel="#", title=""): """Plot histogram of the ACGT SNRs for all reads :param int bins: binning for the histogram :param float alpha: transparency of the histograms :param bool hold: :param int fontsize: :param bool grid: :param str xlabel: :param str ylabel: :param str title: .. plot:: :include-source: from sequana.pacbio import BAMPacbio from sequana import sequana_data b = BAMPacbio(sequana_data("test_pacbio_subreads.bam")) b.hist_snr() """ if self._df is None: self._get_df() # old pacbio format has no SNR stored if len(self._df['snr_A'].dropna()) == 0: # nothing to plot from sequana import sequana_data pylab.clf() pylab.imshow(pylab.imread(sequana_data("no_data.jpg"))) pylab.gca().axis('off') return if hold is False: pylab.clf() pylab.hist(self._df.loc[:, 'snr_A'], alpha=alpha, label="A", bins=bins) pylab.hist(self._df.loc[:, 'snr_C'], alpha=alpha, label="C", bins=bins) pylab.hist(self._df.loc[:, 'snr_G'], alpha=alpha, label="G", bins=bins) pylab.hist(self._df.loc[:, 'snr_T'], alpha=alpha, label="T", bins=bins) pylab.legend() pylab.xlabel(xlabel, fontsize=fontsize) pylab.ylabel(ylabel, fontsize=fontsize) pylab.title(title, fontsize=fontsize) if grid is True: pylab.grid(True)
def plot(self, interpolation='None', aspect='auto', cmap='hot', tight_layout=True, colorbar=True, fontsize_x=None, fontsize_y=None, rotation_x=90, xticks_on=True, yticks_on=True, **kargs): """wrapper around imshow to plot a dataframe :param interpolation: set to None :param aspect: set to 'auto' :param cmap: colormap to be used. :param tight_layout: :param colorbar: add a colobar (default to True) :param fontsize_x: fontsize on xlabels :param fontsize_y: fontsize on ylabels :param rotation_x: rotate labels on xaxis :param xticks_on: switch off the xticks and labels :param yticks_on: switch off the yticks and labels """ data = self.df pylab.clf() pylab.imshow(data, interpolation=interpolation, aspect=aspect, cmap=cmap, **kargs) if fontsize_x == None: fontsize_x = 16 #FIXME use default values if fontsize_y == None: fontsize_y = 16 #FIXME use default values if yticks_on is True: pylab.yticks(range(0, len(data.index)), data.index, fontsize=fontsize_y) else: pylab.yticks([]) if xticks_on is True: pylab.xticks(range(0, len(data.columns[:])), data.columns, fontsize=fontsize_x, rotation=rotation_x) else: pylab.xticks([]) if colorbar is True: pylab.colorbar() if tight_layout: pylab.tight_layout()
def hist_snr(self, bins=50, alpha=0.5, hold=False, fontsize=12, grid=True, xlabel="SNR", ylabel="#", title="", clip_upper_SNR=30): """Plot histogram of the ACGT SNRs for all reads :param int bins: binning for the histogram. Note that the range starts at 0 and ends at clip_upper_SNR :param float alpha: transparency of the histograms :param bool hold: :param int fontsize: :param bool grid: :param str xlabel: :param str ylabel: :param str title: .. plot:: :include-source: from sequana.pacbio import PacbioSubreads from sequana import sequana_data b = PacbioSubreads(sequana_data("test_pacbio_subreads.bam")) b.hist_snr() """ if self._df is None: self._get_df() # old pacbio format has no SNR stored if len(self._df['snr_A'].dropna()) == 0: # nothing to plot from sequana import sequana_data pylab.clf() pylab.imshow(pylab.imread(sequana_data("no_data.jpg"))) pylab.gca().axis('off') return if hold is False: pylab.clf() maxSNR = 0 for letter in "ACGT": m = self._df.loc[:, "snr_{}".format(letter)].max() if m > maxSNR: maxSNR = m if maxSNR > clip_upper_SNR: maxSNR = clip_upper_SNR bins = pylab.linspace(0, maxSNR, bins) pylab.hist(self._df.loc[:, 'snr_A'].clip_upper(maxSNR), alpha=alpha, label="A", bins=bins) pylab.hist(self._df.loc[:, 'snr_C'].clip_upper(maxSNR), alpha=alpha, label="C", bins=bins) pylab.hist(self._df.loc[:, 'snr_G'].clip_upper(maxSNR), alpha=alpha, label="G", bins=bins) pylab.hist(self._df.loc[:, 'snr_T'].clip_upper(maxSNR), alpha=alpha, label="T", bins=bins) pylab.legend() pylab.xlabel(xlabel, fontsize=fontsize) pylab.ylabel(ylabel, fontsize=fontsize) pylab.title(title, fontsize=fontsize) if grid is True: pylab.grid(True)