def diversity_plot(metagenome, x=None, y=None, hue=None, col=None, row=None, output='boxplot.html', points=True, plot_kw={}, box_kw={}, scatter_kw={}): metagenome.compute_alpha_diversity(y) data = pd.concat( [metagenome.alpha_diversity, metagenome.metadata.factor_data()], axis=1) g = BokehFacetGrid(data=data, hue=hue, row=row, col=col, outdir=metagenome.figdir, **plot_kw) g.map(boxplot, x=x, y=y, **box_kw) if points: g.map(swarmplot, x=x, y=y, tooltips=metagenome.metadata.qual_vars, **scatter_kw) g.save(output)
def distr_cmd(mg, factors, diversity=[], otu_list=[]): (hue, x, row, col) = factors if diversity: for div in diversity: print(f'Diversity boxplot: {div}') diversity_plot(mg, x=x, y=div, hue=hue, points=False, col=col, row=row, plot_kw={'height': 400}, output=f'boxplot_{div}.html') if otu_list: if len(otu_list) == 1 and Path(otu_list[0]).is_file(): otu_list = open(otu_list[0]).read().splitlines()[1:] otu_list = list(set(otu_list).intersection(set(mg.columns)))[:50] if not otu_list: return print(f'Feature boxplot ({len(otu_list)} otus (max=50))') data = (mg.get_column_format( tax=False).loc[(slice(None), otu_list), :].reset_index()) repl = {} for otu in otu_list: lineage = mg.taxonomy.data.loc[otu].tolist()[:6] short = [x[0].lower() for x in mg.taxonomy.ranks][:len(lineage)] new_name = ', '.join( ['{}: {}'.format(*it) for it in zip(short, lineage)]) repl[otu] = new_name data = data.replace(repl) g = BokehFacetGrid(data=data, hue=hue, col='OTU', col_wrap=5, outdir=mg.figdir, width='auto', height=400) g.map(boxplot, x=x, y='value', tooltips=['group_size', 'not_null']) g.map(swarmplot, x=x, y='value', tooltips=data.columns) #.drop(columns=['OTU', 'color'])) g.save('specific_otus_distribution.html')
def otu_topics_barplot(metagenome, topics, output='otu_topics_clustermap.html', col=None, row=None, **kwargs): # top_otus = set() # for topic in topics.index: # top_otus |= set(topics.loc[topic].nlargest(10).index) # top_otus = topics.columns[topics.max(axis=0) > 0.01] data = (topics.stack().where(lambda x: x > 0.01).dropna().rename( 'weight').reset_index().merge( metagenome.taxonomy.data, left_on='OTU', right_index=True, how='left').reset_index(drop=True).sort_values(by='weight', ascending=False)) g = BokehFacetGrid(data=data, outdir=Path(output).parent, row=row, col='community', sort=False) g.map(barplot, x='OTU', y='weight', tooltips=metagenome.taxonomy.columns) g.save(Path(output).name)
def pairplot(data=None, cols=None, hue=None, tooltips=None, width=400, height=400, output=None): if cols is None: cols = data.select_dtypes('number').columns comb_data = [] for col1, col2 in combinations(cols, 2): tmp_data = data[[col1, col2]].assign(var1=col1, var2=col2) tmp_data.columns = ['value1', 'value2', 'var1', 'var2'] if tooltips is not None: tmp_data = pd.concat([tmp_data, data[tooltips]], axis=1) if hue is not None: tmp_data[hue] = data[hue] comb_data.append(tmp_data) comb_data = pd.concat(comb_data) g = BokehFacetGrid(data=comb_data, hue=hue, row='var1', col='var2', tooltips=tooltips) g.map(scatter, x='value1', y='value2') if output is None: g.save('pairplot.html')
def tax_cmd(mg, factors=None, ranks=None, plot_bars=True, plot_heatmap=True): (hue, x, row, col) = factors for rank in ranks: print(f'Plotting taxonomic composition for: {rank}') if plot_bars: mg_ = mg.copy() mg_.group_taxa(rank) taxa_stackplot(metagenome=mg_, x=x, hue=hue, col=col, row=row, norm=True, output=f'{mg.figdir}/barplot-{rank}.html', plot_kw={ 'width': 1400, 'height': 800 }) if plot_heatmap: mg_ = mg.copy() # mg_.abundance.data = np.sqrt(mg_.abundance.data) data = mg_.get_column_format() g = BokehFacetGrid(data=data, row=x, col=col, outdir=mg_.figdir) g.map(clustermap, y=hue, x=rank, z='value', cluster_samples=False) g.save(f'clustermap-{rank}.html')
def lda_boxplot(data, metadata=None, taxonomy=None, x=None, row=None, col=None, rank='Genus', output='lda_plot.html', width=1400, top=10): top_otu = (data['features'].stack().rename_axis( index=['topic', 'feature']).rename('weight').reset_index().groupby( 'topic').apply( lambda x: x.nlargest(top, 'weight').reset_index(drop=True))) top_otu[rank] = taxonomy.loc[top_otu.feature, rank].to_numpy() top_otu['top_otus'] = (top_otu[[rank, 'weight']].apply( lambda x: f'{x.weight:.0%} {x[rank]}', axis=1)) top_otu = top_otu['top_otus'].unstack() top_otu.columns = ['OTU_{}'.format(x + 1) for x in top_otu.columns] data = pd.concat([data['samples'], metadata], axis=1) data = data.melt(id_vars=metadata.columns) data = data.merge(top_otu, left_on='variable', right_index=True) idx_size = len(data.variable.unique()) * len(data[x].unique()) width = max(width, idx_size * 15) g = BokehFacetGrid(data=data, hue=x, row='variable', col=col, width=width, outdir=Path(output).parent) g.map(boxplot, x=row, y='value', tooltips=top_otu.columns) g.map(swarmplot, x='variable', y='value', tooltips=metadata.columns) g.save(Path(output).name)
def sample_topics_clustermap(metagenome, topics, output='sample_topics_clustermap.html', row=None, col=None, **kwargs): metadata = metagenome.metadata.factor_data() data = pd.concat([topics, metadata], axis=1).rename_axis(index='groups').reset_index() data = data.melt(id_vars=list(metagenome.metadata.qual_vars) + ['groups'], var_name='topics', value_name='weight') g = BokehFacetGrid(data=data, outdir=Path(output).parent, row=row, col=col) g.map(clustermap, x='topics', y='groups', z='weight', standardize=False) g.save(Path(output).name)
def otu_topics_clustermap(metagenome, topics, output='otu_topics_clustermap.html', col=None, row=None, **kwargs): top_otus = set() for topic in topics.index: top_otus |= set(topics.loc[topic].nlargest(10).index) # top_otus = topics.columns[topics.max(axis=0) > 0.01] data = (topics[top_otus].stack().rename('weight').reset_index().merge( metagenome.taxonomy.data, left_on='OTU', right_index=True, how='left').reset_index(drop=True)) g = BokehFacetGrid(data=data, outdir=Path(output).parent, row=row, col=col) g.map(clustermap, x='community', y='OTU', z='weight', standardize=True) g.save(Path(output).name)
def stats_barplot(data, x=None, variables=['log10_p-adj', 'R2'], hue=None, threshold=0.05, outdir='./', output='stats_barplot.html', plot_kw={}, bar_kw={}): data = data.melt(id_vars=[x for x in data.columns if x not in variables]) g = BokehFacetGrid(data=data, hue=hue, row='variable', row_order=variables, outdir=outdir, **plot_kw) g.map(barplot, x=x, y='value', tooltips=data.columns, **bar_kw) g.save(output)
def test_scatter_ellipse(): N = 500 df = pd.DataFrame( np.random.multivariate_normal([3, 5], [[10, 5], [5, 4]], N)) df.columns = ['x', 'y'] df['fact'] = np.random.choice(['a', 'b', 'c'], N, replace=True) angle = 0.5 R = np.array([[cos(angle), -sin(angle)], [sin(angle), cos(angle)]]) df[['x', 'y']] = df[['x', 'y']].dot(R).to_numpy() df.loc[df.fact == 'a', ['x', 'y']] = df.loc[df.fact == 'a', ['x', 'y']].dot(R).to_numpy() df.loc[df.fact == 'b', ['x', 'y']] = df.loc[df.fact == 'b', ['x', 'y']].dot(R).dot(R).to_numpy() g = BokehFacetGrid(data=df, scale=1.5, hue='fact') g.map(scatter, x='x', y='y', ellipse=True, s=5) g.save('ellipse_test.html')
def test_stackplot_2(): df = sim() g = BokehFacetGrid(data=df, width=800, hue='fact') g.map(stackplot, x=['x', 'col'], y='y')
def taxa_stackplot(feature_table=None, feature_info=None, metagenome=None, x='variable', hue=None, row=None, col=None, output='stackplot.html', norm=True, abd_thresh=0.01, plot_kw={}, bar_kw={}): '''Stacked barplot by sample groups Args: metagenome (MetagenomeDS): If the other dataframe information is skipped feature_table (pd.DataFrame): Count table (sample x OTU) taxonomy (pd.DataFrame): Taxomomy table (OTU x ranks) metadata (pd.DataFrame): Metadata table (sample x factors) norm (bool): Normalize sample group into ratios abd_thresh (float): Abundance threshold to group taxa into "others" Must be in ]0, 1[ Returns: None ''' x = [xi for xi in [x, hue] if xi is not None] if metagenome: table = metagenome.get_column_format().reset_index() tax_cols = [table.columns[1]] + list(metagenome.taxonomy.columns) sample_var = table.columns[0] else: sample_var = 'variable' tax_cols = ['feature'] + list(feature_info.columns) table = (feature_table.T.merge( feature_info, left_index=True, right_index=True).rename_axis( index='feature').reset_index().melt(id_vars=tax_cols)) hue = tax_cols[0] groups = x + [xi for xi in [row, col] if xi is not None] # Set threshold for assigning low abundance OTUs to others taxa_means = table.groupby(groups + [hue])['value'].agg('mean') sample_lims = taxa_means.sum(level=groups) * abd_thresh taxa_means = taxa_means.loc[list(zip(*[table[x] for x in groups + [hue]]))] if len(groups) > 1: sample_lims = sample_lims.reindex( index=table[groups].apply(tuple, axis=1)) else: sample_lims = sample_lims.reindex(index=table[groups[0]]) in_others_cond = sample_lims.to_numpy() > taxa_means.to_numpy() filler = 'Others (< {:.0%})'.format(abd_thresh) table.loc[in_others_cond, tax_cols] = filler agg_values = table.groupby([sample_var, hue]).value.sum() table = ( table.groupby([sample_var, hue]).nth(0, dropna='all') # nth much faster than first() .assign(value=agg_values).reset_index()) # Rank by total abundance hue_order = table.groupby(hue).value.sum().sort_values( ascending=False).index if filler in hue_order: hue_order = hue_order.drop(filler).append(pd.Index([filler])) g = BokehFacetGrid(data=table, row=row, col=col, hue=hue, hue_order=hue_order, outdir=Path(output).parent, **plot_kw) g.map(stackplot, x=x, y='value', **bar_kw) g.save(Path(output).name)
def test_barplot_2(): df = sim() g = BokehFacetGrid(data=df, width=800) g.map(barplot, x='x', y='y')
def test_barplot_1(): df = sim() g = BokehFacetGrid(data=df, hue='fact', width=800) g.map(barplot, x='x', y='y', tooltips=['fact', 'group'])
def test_box_swarm(): df = sim() g = BokehFacetGrid(data=df, hue='fact', width=800) g.map(boxplot, x='x', y='y', tooltips=['fact', 'group']) g.map(swarmplot, x='x', y='y')
def test_swarm_2(): df = sim() g = BokehFacetGrid(data=df, width=800) g.map(swarmplot, x='x', y='y')
def ordination_cmd(mg, factors=[], strata=[], method='pcoa', distance='bray'): (hue, col, row, extra) = factors[:1] + strata + [None] * (4 - len(strata) - 1) if extra is not None or len(factors) > 1: extra = ','.join([str(extra)] + [str(x) for x in factors[1:]]) warnings.warn( f'Could not render {extra} too many levels to render. Ignoring.', UserWarning) result = ordinate(mg, strata=strata, subsample=True) if not all(x is None for x in strata): components = pd.concat(val['sample'] for val in result.values()) else: components = result['sample'] compo_names = components.columns[:2] other_meta = np.setdiff1d(mg.metadata.qual_vars, components.columns) components = pd.concat( [components, mg.metadata.factor_data(other_meta)], axis=1).dropna(subset=compo_names, how='any') hull = (len(components[hue].unique()) < 5) g = BokehFacetGrid(data=components.reset_index(), hue=hue, col=col, row=row, outdir=mg.figdir, scale=1.5) g.map(scatter, x=components.columns[0], y=components.columns[1], hull=hull, s=10, tooltips=mg.metadata.qual_vars) g.save(f'{method}-{distance}.html') g = BokehFacetGrid(data=components.melt( id_vars=components.columns.drop(compo_names), value_name='score', var_name='component'), col=col, row=row, outdir=mg.figdir, hue=hue) g.map(boxplot, x='component', y='score') g.map(swarmplot, x='component', y='score', tooltips=components.columns[2:]) g.save(f'{method}_{distance}_boxplot.html')