def plot_bar_flags(self, logy=True, fontsize=16, filename=None): """Plot an histogram of the flags contained in the BAM .. plot:: :include-source: from sequana import BAM, sequana_data b = BAM(sequana_data('test.bam', "testing")) b.plot_bar_flags() .. seealso:: :class:`SAMFlags` for meaning of each flag """ df = self.get_flags_as_df() df = df.sum() pylab.clf() if logy is True: barplot = df.plot(kind='bar', logy=logy, grid=True) else: barplot = df.plot(kind='bar', grid=True) pylab.xlabel("flags", fontsize=fontsize) pylab.ylabel("count", fontsize=fontsize) pylab.tight_layout() if filename: pylab.savefig(filename) return barplot
def plot_unknown_barcodes(self, N=20): ub = self.data['UnknownBarcodes'] df = pd.DataFrame({x['Lane']: x['Barcodes'] for x in ub}) if "unknown" in df.index and len(df) == 1: df.loc['known'] = [0 for i in df.columns] # if data is made of undetermined only, the dataframe is just made of # N lanes with one entry : unknown S = df.sum(axis=1).sort_values(ascending=False).index[0:N] data = df.loc[S][::-1] #print(data) data.columns = ["Lane {}".format(x) for x in data.columns] from matplotlib import rcParams rcParams['axes.axisbelow'] = True pylab.figure(figsize=(10, 8)) ax = pylab.gca() data.plot(kind="barh", width=1, ec="k", ax=ax) rcParams['axes.axisbelow'] = False pylab.xlabel("Number of reads", fontsize=12) pylab.ylabel("") pylab.grid(True) pylab.legend( ["Lane {}".format(x) for x in range(1, len(df.columns) + 1)], loc="lower right") try: pylab.tight_layout() except Exception as err: print(err) return data
def plot_bar(self, spikes_filename=None, ratio=100): data = self.spikes_found(spikes_filename) lengths = [self.SIRV_lengths[x] for x in data.index] data.plot(kind="bar") pylab.plot(np.array(lengths)/ratio) pylab.tight_layout() return data
def plot_bar(self, spikes_filename=None, ratio=100): data = self.spikes_found(spikes_filename) lengths = [self.SIRV_lengths[x] for x in data.index] data.plot(kind="bar") pylab.plot(np.array(lengths) / ratio) pylab.tight_layout() return data
def plot_polymerase_per_barcode(self, fontsize=12, unbarcoded=True): """Number Of Polymerase Reads Per Barcode""" PR = self.df_barcoded["Polymerase Reads"].sum() data = self.df_barcoded['Polymerase Reads'].sort_values( ascending=False).values pylab.plot([int(x) for x in range(1, len(data) + 1)], data, label="barcodes") pylab.axhline(data.mean(), color="r", label="average") try: if unbarcoded is True: unbar = self.df_not_barcoded['Polymerase Reads'].iloc[0] pylab.axhline(unbar, color="k", ls="--", label="not barcoded") except: pass pylab.xlabel("Barcode Rank Order", fontsize=fontsize) pylab.ylabel("Counts of Reads", fontsize=fontsize) pylab.title("Total Polymerase count: {}".format(PR)) pylab.legend() pylab.ylim(ymin=0) try: pylab.tight_layout() except: pass
def plot_hist_normalized_coverage(self, filename=None, binwidth=0.1, max_z=4): """ Barplot of the normalized coverage with gaussian fitting """ pylab.clf() # if there are a NaN -> can't set up binning d = self.df["scale"][self.range[0]:self.range[1]].dropna() # remove outlier -> plot crash if range between min and max is too high d = d[np.abs(d - d.mean()) <= (4 * d.std())] bins = self._set_bins(d, binwidth) self.mixture_fitting.data = d try: self.mixture_fitting.plot(self.gaussians_params, bins=bins, Xmin=0, Xmax=max_z) except ZeroDivisionError: pass pylab.grid(True) pylab.xlim([0,max_z]) pylab.xlabel("Normalised per-base coverage") try: pylab.tight_layout() except: pass if filename: pylab.savefig(filename)
def plot_gc_vs_coverage(self, filename=None, bins=None, Nlevels=6, fontsize=20, norm="log", ymin=0, ymax=100, contour=True, **kwargs): if Nlevels is None or Nlevels==0: contour = False data = self.df[['cov','gc']].copy() data['gc'] *= 100 data = data.dropna() if bins is None: bins = [100, min(int(data['gc'].max()-data['gc'].min()+1), max(5,self.bed.gc_window_size - 4))] bins[0] = max(10, min(bins[0], self.df['cov'].max())) from biokit import Hist2D h2 = Hist2D(data) try: h2.plot(bins=bins, xlabel="Per-base coverage", ylabel=r'GC content (%)', Nlevels=Nlevels, contour=contour, norm=norm, fontsize=fontsize, **kwargs) except: h2.plot(bins=bins, xlabel="Per-base coverage", ylabel=r'GC content (%)' , Nlevels=Nlevels, contour=False, norm=norm, fontsize=fontsize, **kwargs) pylab.ylim([ymin, ymax]) try: pylab.tight_layout() except: pass if filename: pylab.savefig(filename)
def scatterplot(self, enrich, cutoff=0.05, nmax=10, gene_set_size=[]): df = self._get_final_df(enrich.results, cutoff=cutoff, nmax=nmax) pylab.clf() pylab.scatter(-pylab.log10(df['Adjusted P-value']), range(len(df)), s=10 * df['size'], c=df['size']) pylab.xlabel("Odd ratio") pylab.ylabel("Gene sets") pylab.yticks(range(len(df)), df.name) a, b = pylab.xlim() pylab.xlim([0, b]) pylab.grid(True) ax = pylab.gca() M = max(df['size']) if M > 100: l1, l2, l3 = "10", "100", str(M) else: l1, l2, l3 = str(round(M / 3)), str(round(M * 2 / 3)), str(M) handles = [ pylab.Line2D([0], [0], marker="o", markersize=5, label=l1, ls=""), pylab.Line2D([0], [0], marker="o", markersize=10, label=l2, ls=""), pylab.Line2D([0], [0], marker="o", markersize=15, label=l3, ls="") ] ax.legend(handles=handles, loc="upper left", title="gene-set size") pylab.axvline(1.3, lw=2, ls="--", color="r") pylab.tight_layout() ax = pylab.colorbar(pylab.gci()) return df
def plot_bar_mapq(self, fontsize=16, filename=None): """Plots bar plots of the MAPQ (quality) of alignments .. plot:: :include-source: from sequana import BAM, sequana_data b = BAM(sequana_data('test.bam', "testing")) b.plot_bar_mapq() """ df = self.get_mapq_as_df() df.plot(kind='hist', bins=range(0, df.max().values[0] + 1), legend=False, grid=True, logy=True) pylab.xlabel("MAPQ", fontsize=fontsize) pylab.ylabel("Count", fontsize=fontsize) try: # This may raise issue on MAC platforms pylab.tight_layout() except: pass if filename: pylab.savefig(filename)
def plot_subreads_histogram(self, bins=10, fontsize=12): self.df_barcoded['Subreads'].hist(bins=bins, ec="k", rwidth=0.8) pylab.xlabel("Number of subreads", fontsize=fontsize) pylab.ylabel("Number of Barcoded Samples", fontsize=fontsize) try: pylab.tight_layout() except: pass
def plot_padj_hist(self, bins=60, fontsize=16): pylab.hist(self.df.padj.dropna(), bins=bins, ec="k") pylab.grid(True) pylab.xlabel("Adjusted p-value", fontsize=fontsize) pylab.ylabel("Occurences", fontsize=fontsize) try: pylab.tight_layout() except: pass
def plot_pvalue_hist(self, bins=60, fontsize=16, rotation=0): pylab.hist(self.df.pvalue.dropna(), bins=bins, ec="k") pylab.grid(True) pylab.xlabel("raw p-value", fontsize=fontsize) pylab.ylabel("Occurences", fontsize=fontsize) try: pylab.tight_layout() except: pass
def barplot_per_sample(self, alpha=0.5, width=0.8, filename=None): df = self.get_data_reads() # this is ugly but will do the job for now under = df.query("name=='Undetermined'") others = df.query("name!='Undetermined'") under = under.groupby("name").sum().reset_index() others = others.groupby("name").sum().reset_index() under = under[["name", "count"]].set_index("name") others = others[["name", "count"]].set_index("name") all_data = others.sort_index(ascending=False) all_data.columns = ["samples"] # appended at the end all_data.loc['undetermined'] = 0 # revert back all_data = all_data.loc[::-1] # just for legend under.columns = ['undetermined'] if all_data.sum().min() > 1e6: all_data /= 1e6 under /= 1e6 M = True else: M = False all_data.plot(kind="barh", alpha=alpha, zorder=1, width=width, ec='k') under.plot(kind="barh", alpha=alpha, color="red", ax=pylab.gca(), zorder=1, width=width, ec='k') pylab.ylim([-0.5, len(all_data) + 0.5]) if len(all_data) < 100: pylab.yticks(range(len(all_data)), all_data.index) pylab.legend() pylab.grid(True, zorder=-1) if M: pylab.xlabel("Number of reads (M)") else: pylab.xlabel("Number of reads") try: pylab.tight_layout() except: pass if filename: pylab.savefig(filename, dpi=200)
def hist_quality_per_barcode(self, bins=10, fontsize=12): self.df_barcoded['Mean Barcode Quality'].hist(bins=bins, ec="k", rwidth=0.8) pylab.xlabel("Mean Barcode Quality", fontsize=fontsize) pylab.ylabel("Number of Barcoded Samples", fontsize=fontsize) try: pylab.tight_layout() except: pass
def hist_mean_polymerase_read_length(self, bins=10, fontsize=12): self.df_barcoded['Mean Read Length'].hist(bins=bins, ec="k", rwidth=0.8) pylab.xlabel("Mean Polymerase Read Length", fontsize=fontsize) pylab.ylabel("Number of Barcoded Samples", fontsize=fontsize) try: pylab.tight_layout() except: pass
def hist_GC(self, bins=50, alpha=0.5, hold=False, fontsize=12, grid=True, xlabel="GC %", ylabel="#", label="", title=None): """Plot histogram GC content :param int bins: binning for the histogram :param float alpha: transparency of the histograms :param bool hold: :param int fontsize: fontsize of the x and y labels and title. :param bool grid: add grid or not :param str xlabel: :param str ylabel: :param str label: label of the histogram (for the legend) :param str title: .. plot:: :include-source: from sequana.pacbio import PacbioSubreads from sequana import sequana_data b = PacbioSubreads(sequana_data("test_pacbio_subreads.bam")) b.hist_GC() """ mean_GC = np.mean(self.df.loc[:, 'GC_content']) # set title if needed if title is None: title = "GC %% \n Mean GC : %.2f" % (mean_GC) # histogram GC percent if hold is False: pylab.clf() pylab.hist(self.df.loc[:, 'GC_content'], bins=bins, alpha=alpha, label=label + ", mean : " + str(round(mean_GC, 2)) + ", N : " + str(len(self))) pylab.xlabel(xlabel, fontsize=fontsize) pylab.ylabel(ylabel, fontsize=fontsize) pylab.title(title, fontsize=fontsize) if grid is True: pylab.grid(True) pylab.xlim([0, 100]) try: pylab.tight_layout() except: pass
def plot_piechart(self, df): # Here we show the GO terms that have number in list > 0 # Note, that this is dangerous to look only at this picture without # the reference plot, which data is not available thourg the pathner API labels = [] for this in df.query("number_in_list!=0").label.values: if len(this) > 50: labels.append(this) else: labels.append(this[0:50] + "...") pylab.pie(df.query("number_in_list!=0").number_in_list, labels=labels) pylab.tight_layout()
def plot_feature_most_present(self): """""" df = [] for x, y in self.counts_raw.idxmax().iteritems(): most_exp_gene_count = self.counts_raw.stack().loc[y, x] total_sample_count = self.counts_raw.sum().loc[x] df.append({ "label": x, "gene_id": y, "count": most_exp_gene_count, "total_sample_count": total_sample_count, "most_exp_percent": most_exp_gene_count / total_sample_count * 100, }) df = pd.DataFrame(df).set_index("label") df = pd.concat([self.design_df, df], axis=1) pylab.clf() p = pylab.barh( df.index, df.most_exp_percent, color=df.group_color, zorder=10, lw=1, ec="k", height=0.9, ) for idx, rect in enumerate(p): pylab.text( 2, # * rect.get_height(), idx, # rect.get_x() + rect.get_width() / 2.0, df.gene_id.iloc[idx], ha="center", va="center", rotation=0, zorder=20, ) self._format_plot( # title="Counts monopolized by the most expressed gene", # xlabel="Sample", xlabel="Percent of total reads", ) pylab.tight_layout()
def barplot(self, enrich, cutoff=0.05, nmax=10): df = self._get_final_df(enrich.results, cutoff=cutoff, nmax=nmax) pylab.clf() pylab.barh(range(len(df)), -pylab.log10(df['Adjusted P-value'])) pylab.yticks(range(len(df)), df.name) pylab.axvline(1.3, lw=2, ls="--", color="r") pylab.grid(True) pylab.xlabel("Adjusted p-value (log10)") pylab.ylabel("Gene sets") a, b = pylab.xlim() pylab.xlim([0, b]) pylab.tight_layout() return df
def plot_sirv_by_group(self, title, shift=5, plot=False, mapq_min=-1): aa = self.df.query("reference_name not in [-1, '-1']").copy() if len(aa) == 0: return pd.Series(), self.df aa['group'] = aa.reference_name.apply(lambda x: x[0:shift]) mapped = aa.query("mapq>@mapq_min").groupby("group").count()["mapq"] mapped.name = None if plot: mapped.plot(kind="bar") pylab.title(title) pylab.tight_layout() #data.to_csv(path + "_hq_sirv_grouped.csv") return mapped, self.df
def hist_polymerase_per_barcode(self, bins=10, fontsize=12): """histogram of number of polymerase per barcode Cumulative histogram gives total number of polymerase reads """ PR = self.df_barcoded["Polymerase Reads"].sum() self.df_barcoded['Polymerase Reads'].hist(bins=bins, ec="k", rwidth=0.8) pylab.title("Total Polymerase count: {}".format(PR)) pylab.xlabel("Number of Polymerase Reads", fontsize=fontsize) pylab.ylabel("Number of Barcoded Samples", fontsize=fontsize) try: pylab.tight_layout() except: pass
def plot_hist_zscore(self, fontsize=16, filename=None, max_z=6, binwidth=0.5, **hist_kargs): """ Barplot of the zscore values """ pylab.clf() bins = self._set_bins(self.df["zscore"][self.range[0]:self.range[1]], binwidth) self.df["zscore"][self.range[0]:self.range[1]].hist( grid=True, bins=bins, **hist_kargs) pylab.xlabel("Z-Score", fontsize=fontsize) try: pylab.tight_layout() except: pass if filename: pylab.savefig(filename)
def hist_transcript(self, hide_unmapped=True): pylab.clf() if hide_unmapped is True: query = "reference_length>0 and reference_name!=-1" else: query = "reference_length>0" print(query) ts = self.df.query(query).groupby("reference_name").count().reference_length if len(ts) == 0: print("nothing to plot") return ts ts.plot(kind="bar" ,color="r") try: pylab.tight_layout() except: pass return ts
def plot(self, interpolation='None', aspect='auto', cmap='hot', tight_layout=True, colorbar=True, fontsize_x=None, fontsize_y=None, rotation_x=90, xticks_on=True, yticks_on=True, **kargs): """wrapper around imshow to plot a dataframe :param interpolation: set to None :param aspect: set to 'auto' :param cmap: colormap to be used. :param tight_layout: :param colorbar: add a colobar (default to True) :param fontsize_x: fontsize on xlabels :param fontsize_y: fontsize on ylabels :param rotation_x: rotate labels on xaxis :param xticks_on: switch off the xticks and labels :param yticks_on: switch off the yticks and labels """ data = self.df pylab.clf() pylab.imshow(data, interpolation=interpolation, aspect=aspect, cmap=cmap, **kargs) if fontsize_x == None: fontsize_x = 16 #FIXME use default values if fontsize_y == None: fontsize_y = 16 #FIXME use default values if yticks_on is True: pylab.yticks(range(0, len(data.index)), data.index, fontsize=fontsize_y) else: pylab.yticks([]) if xticks_on is True: pylab.xticks(range(0, len(data.columns[:])), data.columns, fontsize=fontsize_x, rotation=rotation_x) else: pylab.xticks([]) if colorbar is True: pylab.colorbar() if tight_layout: pylab.tight_layout()
def plot_boxplot_normeddata(self, fliersize=2, linewidth=2, rotation=0, **kwargs): import seaborn as sbn ax = sbn.boxplot( data=self.counts_norm.clip(1), linewidth=linewidth, fliersize=fliersize, palette=self.design_df.group_color, **kwargs, ) pos, labs = pylab.xticks() pylab.xticks(pos, labs, rotation=rotation) ax.set(yscale="log") self._format_plot(ylabel="Normalised count distribution") pylab.tight_layout()
def hist_GC(self, bins=50, alpha=0.5, hold=False, fontsize=12, grid=True, xlabel="GC %", ylabel="#", label="",title=None): """Plot histogram GC content :param int bins: binning for the histogram :param float alpha: transparency of the histograms :param bool hold: :param int fontsize: fontsize of the x and y labels and title. :param bool grid: add grid or not :param str xlabel: :param str ylabel: :param str label: label of the histogram (for the legend) :param str title: .. plot:: :include-source: from sequana.pacbio import PacbioSubreads from sequana import sequana_data b = PacbioSubreads(sequana_data("test_pacbio_subreads.bam")) b.hist_GC() """ mean_GC = np.mean(self.df.loc[:,'GC_content']) # set title if needed if title is None: title = "GC %% \n Mean GC : %.2f" %(mean_GC) # histogram GC percent if hold is False: pylab.clf() pylab.hist(self.df.loc[:,'GC_content'], bins=bins, alpha=alpha, label=label + ", mean : " + str(round(mean_GC, 2)) + ", N : " + str(len(self))) pylab.xlabel(xlabel, fontsize=fontsize) pylab.ylabel(ylabel, fontsize=fontsize) pylab.title(title, fontsize=fontsize) if grid is True: pylab.grid(True) pylab.xlim([0, 100]) try: pylab.tight_layout() except:pass
def plot_bar_mapq(self, fontsize=16, filename=None, ): """Plots bar plots of the MAPQ (quality) of alignments .. plot:: :include-source: from sequana import BAM, sequana_data b = BAM(sequana_data('test.bam', "testing")) b.plot_bar_mapq() """ df = self.get_mapq_as_df() df.plot(kind='hist', bins=range(0,df.max().values[0]+1), legend=False, grid=True, logy=True) pylab.xlabel("MAPQ", fontsize=fontsize) pylab.ylabel("Count", fontsize=fontsize) pylab.tight_layout() if filename: pylab.savefig(filename)
def run_enrichment_kegg(self, organism, annot_col="Name", out_dir="enrichment"): # pragma: no cover out_dir = Path(out_dir) / "figures" out_dir.mkdir(exist_ok=True, parents=True) gene_lists_dict = self.get_gene_lists(annot_col=annot_col, dropna=True) enrichment = {} for compa in self.comparisons: gene_lists = gene_lists_dict[compa] ke = KeggPathwayEnrichment(gene_lists, organism, progress=False) ke.compute_enrichment() for direction in ["up", "down", "all"]: enrichment[(compa, direction)] = ke._get_final_df( ke.enrichment[direction].results, nmax=10000) pylab.figure() ke.scatterplot(direction) pylab.tight_layout() pylab.savefig(out_dir / f"kegg_{compa}_{direction}.pdf") pylab.savefig(out_dir / f"kegg_{compa}_{direction}.png") logger.info(f"KEGG enrichment for {compa} DONE.") df = pd.concat(enrichment).sort_index() df.index.rename(["comparison", "direction", "index"], inplace=True) self.enrichment_kegg = df # Export results (should be moved to enrichment.py at some point I think) with pd.ExcelWriter(out_dir.parent / "enrichment_kegg.xlsx") as writer: df = self.enrichment_kegg.copy() df.reset_index(inplace=True) df.to_excel(writer, "kegg", index=False) ws = writer.sheets["kegg"] try: ws.autofilter(0, 0, df.shape[0], df.shape[1] - 1) except: logger.warning("Fixme")
def plot_boxplot_rawdata(self, fliersize=2, linewidth=2, rotation=0, **kwargs): import seaborn as sbn ax = sbn.boxplot( data=self.counts_raw.clip(1), linewidth=linewidth, fliersize=fliersize, palette=self.design_df.group_color, **kwargs, ) pos, labs = pylab.xticks() pylab.xticks(pos, labs, rotation=rotation) ax.set_ylabel("Counts (raw) in log10 scale") ax.set_yscale("log") self._format_plot(ylabel="Raw count distribution") pylab.tight_layout()
def plot_contig_length_vs_nreads(self, fontsize=16): # same as plot_scatter_contig_length_nread_cov if self._df is None: _ = self.get_df() pylab.clf() df = self._df m1 = df.length.min() M1 = df.length.max() pylab.loglog(df.length, df.nread, "o") pylab.xlabel("Contig length", fontsize=fontsize) pylab.ylabel("Contig N reads", fontsize=fontsize) pylab.grid() X = df.query("nread>10 and length>100000")['length'] Y = df.query("nread>10 and length>100000")['nread'] A = np.vstack([X, np.ones(len(X))]).T m, c = np.linalg.lstsq(A, Y.as_matrix())[0] x = np.array([m1, M1]) pylab.plot(x, m * x + c, "o-r") pylab.tight_layout()
def hist_transcript(self, hide_unmapped=True): pylab.clf() if hide_unmapped is True: query = "reference_length>0 and reference_name!=-1" else: query = "reference_length>0" print(query) ts = self.df.query(query).groupby( "reference_name").count().reference_length if len(ts) == 0: print("nothing to plot") return ts ts.plot(kind="bar", color="r") try: pylab.tight_layout() except: pass return ts
def plot_bar_grouped(self, normalise=False, ncol=2, N=None): """ :param normalise: :param ncol: columns in the legend """ if N is not None: N = np.array(N) else: N = np.array([len(x) for x in self.rawdata]) dd = pd.DataFrame(self.sirv).T if normalise: dd = dd/ (N/max(N)) dd.columns = self.labels dd.plot(kind="bar") pylab.xlabel("") pylab.legend(self.labels, ncol=ncol) pylab.tight_layout() return dd
def plot(self, bins=80, rwidth=0.8, **kwargs): pylab.clf() Y, X, _ = pylab.hist(self.data, bins=bins, rwidth=rwidth, **kwargs) pylab.xlabel(self.xlabel, fontsize=self.fontsize) pylab.ylabel(self.ylabel, fontsize=self.fontsize) """self.Y = Y self.X = X ax_twin = pylab.gca().twinx() shift = (X[1] - X[0]) / 2 ax_twin.plot(X[0:-1]- shift, len(self.data) - pylab.cumsum(Y), "k") ax_twin.set_ylim(bottom=0) pylab.ylabel("CDF", fontsize=self.fontsize) """ pylab.grid(self.grid) pylab.title(self.title) try: pylab.tight_layout() except:pass
def plot_bar_grouped(self, normalise=False, ncol=2, N=None): """ :param normalise: :param ncol: columns in the legend """ if N is not None: N = np.array(N) else: N = np.array([len(x) for x in self.rawdata]) dd = pd.DataFrame(self.sirv).T if normalise: dd = dd / (N / max(N)) dd.columns = self.labels dd.plot(kind="bar") pylab.xlabel("") pylab.legend(self.labels, ncol=ncol) pylab.tight_layout() return dd
def plot_scatter_contig_length_nread_cov(self, fontsize=16, vmin=0, vmax=50, min_nreads=20, min_length=50000): if self._df is None: _ = self.get_df() pylab.clf() df = self._df m1 = df.length.min() M1 = df.length.max() # least square X = df.query("nread>@min_nreads and length>@min_length")['length'] Y = df.query("nread>@min_nreads and length>@min_length")['nread'] Z = df.query("nread>@min_nreads and length>@min_length")['covStat'] print(X) print(Y) print(Z) A = np.vstack([X, np.ones(len(X))]).T m, c = np.linalg.lstsq(A, Y.as_matrix())[0] x = np.array([m1, M1]) X = df['length'] Y = df['nread'] Z = df['covStat'] pylab.scatter(X, Y, c=Z, vmin=vmin, vmax=vmax) pylab.colorbar() pylab.xlabel("Contig length", fontsize=fontsize) pylab.ylabel("Contig reads", fontsize=fontsize) pylab.title("coverage function of contig length and reads used") pylab.grid() pylab.plot(x, m * x + c, "o-r") pylab.loglog() pylab.tight_layout()
def plot_count_per_sample(self, fontsize=12, rotation=45): """Number of mapped and annotated reads (i.e. counts) per sample. Each color for each replicate .. plot:: :include-source: from sequana.rnadiff import RNADiffResults from sequana import sequana_data r = RNADiffResults(sequana_data("rnadiff/rnadiff_onecond_1")) r.plot_count_per_sample() """ pylab.clf() df = self.counts_raw.sum().rename("total_counts") df = pd.concat([self.design_df, df], axis=1) pylab.bar( df.index, df.total_counts / 1000000, color=df.group_color, lw=1, zorder=10, ec="k", width=0.9, ) pylab.xlabel("Samples", fontsize=fontsize) pylab.ylabel("reads (M)", fontsize=fontsize) pylab.grid(True, zorder=0) pylab.title("Total read count per sample", fontsize=fontsize) pylab.xticks(rotation=rotation, ha="right") # pylab.xticks(range(N), self.sample_names) try: pylab.tight_layout() except: pass