class FractionTaxaBarStack(Graph): """Comparing all fractions across all pools in a barstack""" short_name = 'fraction_taxa_barstack' def plot(self): self.frame = OrderedDict((('%s - %s' % (p,f), getattr(p.fractions, f).rdp.phyla) for f in ('low', 'med', 'big') for p in self.parent.pools)) self.frame = pandas.DataFrame(self.frame) self.frame = self.frame.fillna(0) self.frame = self.frame.transpose() self.frame = self.frame.apply(lambda x: 100*x/x.sum(), axis=1) # Sort the table by sum # sums = self.frame.sum() sums.sort(ascending=False) self.frame = self.frame.reindex_axis(sums.keys(), axis=1) # Plot # fig = pyplot.figure() axes = self.frame.plot(kind='bar', stacked=True, color=cool_colors) fig = pyplot.gcf() # Other # axes.set_title('Species relative abundances per fraction per pool') axes.set_ylabel('Relative abundances in percent') axes.xaxis.grid(False) axes.yaxis.grid(False) axes.set_ylim([0,100]) # Put a legend below current axis axes.legend(loc='upper center', bbox_to_anchor=(0.5, -0.20), fancybox=True, shadow=True, ncol=5) # Save it # self.save_plot(fig, axes, width=24.0, height=14.0, bottom=0.30, top=0.97, left=0.04, right=0.98) self.frame.to_csv(self.csv_path) pyplot.close(fig)
def duplications_stats(self): result = OrderedDict() for g in self.genes: # Basic stats # result[g.name] = OrderedDict() result[g.name]['genome'] = g.genome.name result[g.name]['taxon'] = g.genome.info['taxon'] result[g.name]['# of hits'] = len(g.raw_hits) result[g.name]['# of fresh hits'] = len( [h for h in g.hits if h['type'] == 'fresh']) result[g.name]['Is there a marine hit'] = len( [h for h in g.hits if h['type'] == 'marine']) result[g.name]['Is there a refseq hit'] = len( [h for h in g.hits if h['type'] == 'other']) # The hits not in this genome # fresh_outsiders = [ h for h in g.hits if h['type'] == 'fresh' and h['genome'] is not g.genome ] result[g.name]['# of fresh hits not in genome'] = len( fresh_outsiders) # Make a dataframe # result = pandas.DataFrame.from_dict(result) result = result.transpose() return result
def duplications_stats(self): result = OrderedDict() for g in self.genes: # Basic stats # result[g.name] = OrderedDict() result[g.name]['genome'] = g.genome.name result[g.name]['taxon'] = g.genome.info['taxon'] result[g.name]['# of hits'] = len(g.raw_hits) result[g.name]['# of fresh hits'] = len([h for h in g.hits if h['type'] == 'fresh']) result[g.name]['Is there a marine hit'] = len([h for h in g.hits if h['type'] == 'marine']) result[g.name]['Is there a refseq hit'] = len([h for h in g.hits if h['type'] == 'other']) # The hits not in this genome # fresh_outsiders = [h for h in g.hits if h['type'] == 'fresh' and h['genome'] is not g.genome] result[g.name]['# of fresh hits not in genome'] = len(fresh_outsiders) # Make a dataframe # result = pandas.DataFrame.from_dict(result) result = result.transpose() return result
class FractionTaxaBarStack(Graph): """Comparing all fractions across all pools in a barstack""" short_name = 'fraction_taxa_barstack' def plot(self): self.frame = OrderedDict( (('%s - %s' % (p, f), getattr(p.fractions, f).rdp.phyla) for f in ('low', 'med', 'big') for p in self.parent.pools)) self.frame = pandas.DataFrame(self.frame) self.frame = self.frame.fillna(0) self.frame = self.frame.transpose() self.frame = self.frame.apply(lambda x: 100 * x / x.sum(), axis=1) # Sort the table by sum # sums = self.frame.sum() sums.sort(ascending=False) self.frame = self.frame.reindex_axis(sums.keys(), axis=1) # Plot # fig = pyplot.figure() axes = self.frame.plot(kind='bar', stacked=True, color=cool_colors) fig = pyplot.gcf() # Other # axes.set_title('Species relative abundances per fraction per pool') axes.set_ylabel('Relative abundances in percent') axes.xaxis.grid(False) axes.yaxis.grid(False) axes.set_ylim([0, 100]) # Put a legend below current axis axes.legend(loc='upper center', bbox_to_anchor=(0.5, -0.20), fancybox=True, shadow=True, ncol=5) # Save it # self.save_plot(fig, axes, width=24.0, height=14.0, bottom=0.30, top=0.97, left=0.04, right=0.98) self.frame.to_csv(self.csv_path) pyplot.close(fig)
class FractionTaxaBarStack(Graph): short_name = 'fraction_taxa_barstack' bottom = 0.4 top = 0.95 left = 0.1 right = 0.95 formats = ('pdf', 'eps') def plot(self): # Make Frame # self.frame = OrderedDict( (('%s - %s' % (p, f), getattr(p.fractions, f).rdp.phyla) for f in ('low', 'med', 'big') for p in self.parent.pools)) self.frame = pandas.DataFrame(self.frame) self.frame = self.frame.fillna(0) # Rename # new_names = { u"run001-pool01 - low": "2-step PCR low", u"run001-pool02 - low": "2-step PCR low", u"run001-pool03 - low": "2-step PCR low", u"run001-pool04 - low": "1-step PCR low", u"run002-pool01 - low": "New chem low", u"run001-pool01 - med": "2-step PCR med", u"run001-pool02 - med": "2-step PCR med", u"run001-pool03 - med": "2-step PCR med", u"run001-pool04 - med": "1-step PCR med", u"run002-pool01 - med": "New chem med", u"run001-pool01 - big": "2-step PCR high", u"run001-pool02 - big": "2-step PCR high", u"run001-pool03 - big": "2-step PCR high", u"run001-pool04 - big": "1-step PCR high", u"run002-pool01 - big": "New chem high", } self.frame.rename(columns=new_names, inplace=True) self.frame = self.frame.transpose() # Group low abundant into 'others' # low_abundance = self.frame.sum() < 30000 other_count = self.frame.loc[:, low_abundance].sum(axis=1) self.frame = self.frame.loc[:, ~low_abundance] self.frame['Others'] = other_count # Normalize # self.frame = self.frame.apply(lambda x: 100 * x / x.sum(), axis=1) # Sort the table by sum # sums = self.frame.sum() sums.sort(ascending=False) self.frame = self.frame.reindex_axis(sums.keys(), axis=1) # Plot # fig = pyplot.figure() axes = self.frame.plot(kind='bar', stacked=True, color=cool_colors) fig = pyplot.gcf() # Other # axes.set_ylabel('Relative abundances in percent') axes.xaxis.grid(False) axes.yaxis.grid(False) axes.set_ylim([0, 100]) # Put a legend below current axis axes.legend(loc='upper center', bbox_to_anchor=(0.5, -0.40), fancybox=True, shadow=True, ncol=5, prop={'size': 10}) # Font size # axes.tick_params(axis='x', which='major', labelsize=11) # Save it # self.save_plot(fig, axes) self.frame.to_csv(self.csv_path) pyplot.close(fig)
class FractionTaxaBarStack(Graph): """This is figure 3 of the paper""" short_name = 'fraction_taxa_barstack' bottom = 0.4 top = 0.95 left = 0.1 right = 0.95 formats = ('pdf', 'eps') def plot(self): # Make Frame # self.frame = OrderedDict((('%s - %s' % (p,f), getattr(p.fractions, f).rdp.phyla) for f in ('low', 'med', 'big') for p in self.parent.pools)) self.frame = pandas.DataFrame(self.frame) self.frame = self.frame.fillna(0) # Rename # new_names = { u"run001-pool01 - low": "2-step PCR low", u"run001-pool02 - low": "2-step PCR low", u"run001-pool03 - low": "2-step PCR low", u"run001-pool04 - low": "1-step PCR low", u"run002-pool01 - low": "New chem low", u"run001-pool01 - med": "2-step PCR med", u"run001-pool02 - med": "2-step PCR med", u"run001-pool03 - med": "2-step PCR med", u"run001-pool04 - med": "1-step PCR med", u"run002-pool01 - med": "New chem med", u"run001-pool01 - big": "2-step PCR high", u"run001-pool02 - big": "2-step PCR high", u"run001-pool03 - big": "2-step PCR high", u"run001-pool04 - big": "1-step PCR high", u"run002-pool01 - big": "New chem high", } self.frame.rename(columns=new_names, inplace=True) self.frame = self.frame.transpose() # Group low abundant into 'others' # low_abundance = self.frame.sum() < 30000 other_count = self.frame.loc[:, low_abundance].sum(axis=1) self.frame = self.frame.loc[:, ~low_abundance] self.frame['Others'] = other_count # Normalize # self.frame = self.frame.apply(lambda x: 100*x/x.sum(), axis=1) # Sort the table by sum # sums = self.frame.sum() sums.sort(ascending=False) self.frame = self.frame.reindex_axis(sums.keys(), axis=1) # Plot # fig = pyplot.figure() axes = self.frame.plot(kind='bar', stacked=True, color=cool_colors) fig = pyplot.gcf() # Other # axes.set_ylabel('Relative abundances in percent') axes.xaxis.grid(False) axes.yaxis.grid(False) axes.set_ylim([0,100]) # Put a legend below current axis axes.legend(loc='upper center', bbox_to_anchor=(0.5, -0.40), fancybox=True, shadow=True, ncol=5, prop={'size':10}) # Font size # axes.tick_params(axis='x', which='major', labelsize=11) # Save it # self.save_plot(fig, axes) self.frame.to_csv(self.csv_path) pyplot.close(fig)