def gradient_clustering(table: pd.DataFrame, gradient: MetadataCategory, weighted=True) -> skbio.TreeNode: """ Builds a tree for features based on a gradient. Parameters ---------- table : pd.DataFrame Contingency table where rows are samples and columns are features. gradient : qiime2.MetadataCategory Continuous vector of measurements corresponding to samples. weighted : bool Specifies if abundance or presence/absence information should be used to perform the clustering. Returns ------- skbio.TreeNode Represents the partitioning of features with respect to the gradient. """ c = gradient.to_series() c = c.astype(np.float) if not weighted: table = table > 0 t = gradient_linkage(table, c, method='average') mean_g = mean_niche_estimator(table, c) mean_g = pd.Series(mean_g, index=table.columns) mean_g = mean_g.sort_values() t = gradient_sort(t, mean_g) return t
def dendrogram_heatmap(output_dir: str, table: pd.DataFrame, tree: TreeNode, metadata: MetadataCategory, ndim=10, method='clr', color_map='viridis'): nodes = [n.name for n in tree.levelorder() if not n.is_tip()] nlen = min(ndim, len(nodes)) numerator_color, denominator_color = '#fb9a99', '#e31a1c' highlights = pd.DataFrame([[numerator_color, denominator_color]] * nlen, index=nodes[:nlen]) if method == 'clr': mat = pd.DataFrame(clr(centralize(table)), index=table.index, columns=table.columns) elif method == 'log': mat = pd.DataFrame(np.log(table), index=table.index, columns=table.columns) # TODO: There are a few hard-coded constants here # will need to have some adaptive defaults set in the future fig = heatmap(mat, tree, metadata.to_series(), highlights, cmap=color_map, highlight_width=0.01, figsize=(12, 8)) fig.savefig(os.path.join(output_dir, 'heatmap.svg')) fig.savefig(os.path.join(output_dir, 'heatmap.pdf')) css = r""" .square { float: left; width: 100px; height: 20px; margin: 5px; border: 1px solid rgba(0, 0, 0, .2); } .numerator { background: %s; } .denominator { background: %s; } """ % (numerator_color, denominator_color) index_fp = os.path.join(output_dir, 'index.html') with open(index_fp, 'w') as index_f: index_f.write('<html><body>\n') index_f.write('<h1>Dendrogram heatmap</h1>\n') index_f.write('<img src="heatmap.svg" alt="heatmap">') index_f.write('<a href="heatmap.pdf">') index_f.write('Download as PDF</a><br>\n') index_f.write('<style>%s</style>' % css) index_f.write('<div class="square numerator">' 'Numerator<br/></div>') index_f.write('<div class="square denominator">' 'Denominator<br/></div>') index_f.write('</body></html>\n')
def dendrogram_heatmap(output_dir: str, table: pd.DataFrame, tree: TreeNode, metadata: MetadataCategory, ndim=10, method='clr', color_map='viridis'): nodes = [n.name for n in tree.levelorder()] nlen = min(ndim, len(nodes)) highlights = pd.DataFrame([['#00FF00', '#FF0000']] * nlen, index=nodes[:nlen]) if method == 'clr': mat = pd.DataFrame(clr(centralize(table)), index=table.index, columns=table.columns) elif method == 'log': mat = pd.DataFrame(np.log(table), index=table.index, columns=table.columns) # TODO: There are a few hard-coded constants here # will need to have some adaptive defaults set in the future fig = heatmap(mat, tree, metadata.to_series(), highlights, cmap=color_map, highlight_width=0.01, figsize=(12, 8)) fig.savefig(os.path.join(output_dir, 'heatmap.svg')) index_fp = os.path.join(output_dir, 'index.html') with open(index_fp, 'w') as index_f: index_f.write('<html><body>\n') index_f.write('<h1>Dendrogram heatmap</h1>\n') index_f.write('<img src="heatmap.svg" alt="heatmap">') index_f.write('</body></html>\n')
def balance_taxonomy(output_dir: str, balances: pd.DataFrame, tree: TreeNode, taxonomy: pd.DataFrame, balance_name: Str, taxa_level: Int = 0, metadata: MetadataCategory = None) -> None: # parse out headers for taxonomy taxa_data = list(taxonomy['Taxon'].apply(lambda x: x.split(';')).values) taxa_df = pd.DataFrame(taxa_data, index=taxonomy.index) # fill in NAs def f(x): y = np.array(list(map(lambda k: k is not None, x))) i = max(0, np.where(y)[0][-1]) x[np.logical_not(y)] = [x[i]] * np.sum(np.logical_not(y)) return x taxa_df = taxa_df.apply(f, axis=1) num_clade = tree.find(balance_name).children[NUMERATOR] denom_clade = tree.find(balance_name).children[DENOMINATOR] if num_clade.is_tip(): num_features = pd.DataFrame( {num_clade.name: taxa_df.loc[num_clade.name]} ).T else: num_features = taxa_df.loc[num_clade.subset()] if denom_clade.is_tip(): denom_features = pd.DataFrame( {denom_clade.name: taxa_df.loc[denom_clade.name]} ).T else: denom_features = taxa_df.loc[denom_clade.subset()] num_color, denom_color = '#4c72b0', '#4c72b0' fig, (ax_num, ax_denom) = plt.subplots(2) balance_barplots(tree, balance_name, taxa_level, taxa_df, denom_color=denom_color, num_color=num_color, axes=(ax_num, ax_denom)) ax_num.set_title( r'$%s_{numerator} \; taxa \; (%d \; taxa)$' % (balance_name, len(num_features))) ax_denom.set_title( r'$%s_{denominator} \; taxa \; (%d \; taxa)$' % (balance_name, len(denom_features))) ax_denom.set_xlabel('Number of unique taxa') plt.tight_layout() fig.savefig(os.path.join(output_dir, 'barplots.svg')) fig.savefig(os.path.join(output_dir, 'barplots.pdf')) if metadata is not None: fig2, ax = plt.subplots() c = metadata.to_series() data, c = match(balances, c) data[c.name] = c y = data[balance_name] # check if continuous try: c = c.astype(np.float64) ax.scatter(c.values, y) ax.set_xlabel(c.name) except: balance_boxplot(balance_name, data, y=c.name, ax=ax) ylabel = (r"$%s = \ln \frac{%s_{numerator}}" "{%s_{denominator}}$") % (balance_name, balance_name, balance_name) ax.set_title(ylabel, rotation=0) ax.set_ylabel('log ratio') fig2.savefig(os.path.join(output_dir, 'balance_metadata.svg')) fig2.savefig(os.path.join(output_dir, 'balance_metadata.pdf')) index_fp = os.path.join(output_dir, 'index.html') with open(index_fp, 'w') as index_f: index_f.write('<html><body>\n') if metadata is not None: index_f.write('<h1>Balance vs %s </h1>\n' % c.name) index_f.write(('<img src="balance_metadata.svg" ' 'alt="barplots">\n\n' '<a href="balance_metadata.pdf">' 'Download as PDF</a><br>\n')) index_f.write(('<h1>Balance Taxonomy</h1>\n' '<img src="barplots.svg" alt="barplots">\n\n' '<a href="barplots.pdf">' 'Download as PDF</a><br>\n' '<h3>Numerator taxa</h3>\n' '<a href="numerator.csv">\n' 'Download as CSV</a><br>\n' '<h3>Denominator taxa</h3>\n' '<a href="denominator.csv">\n' 'Download as CSV</a><br>\n')) num_features.to_csv(os.path.join(output_dir, 'numerator.csv'), header=True, index=True) denom_features.to_csv(os.path.join(output_dir, 'denominator.csv'), header=True, index=True) index_f.write('</body></html>\n')