def __init__(self, table: biom.Table, features: pd.DataFrame, variances: biom.Table = None, formatter: Optional['Formatter'] = None): """Establish the taxonomy data Parameters ---------- table : biom.Table Relative abundance data per sample or collapsed into higher order entiries (e.g., abx in the past year) features : pd.DataFrame DataFrame relating an observation to a Taxon variances : biom.Table, optional Variation information about a taxon within a label. """ self._table = table.norm(inplace=False) self._group_id_lookup = set(self._table.ids()) self._feature_id_lookup = set(self._table.ids(axis='observation')) self._feature_order = self._table.ids(axis='observation') self._features = features self._ranks = table.rankdata(inplace=False) if variances is None: self._variances = biom.Table(np.zeros(self._table.shape), self._table.ids(axis='observation'), self._table.ids()) else: self._variances = variances if set(self._variances.ids()) != set(self._table.ids()): raise DisjointError("Table and variances are disjoint") if set(self._variances.ids(axis='observation')) != \ set(self._table.ids(axis='observation')): raise DisjointError("Table and variances are disjoint") if set(self._table.ids(axis='observation')) != \ set(self._features.index): raise DisjointError("Table and features are disjoint") self._features = self._features.loc[self._feature_order] self._variances = self._variances.sort_order(self._feature_order, axis='observation') if formatter is None: formatter: Formatter = GreengenesFormatter() self._formatter = formatter feature_taxons = self._features self._formatted_taxa_names = { i: self._formatter.dict_format(lineage) for i, lineage in feature_taxons['Taxon'].items() }