def test_compress_sample_metadata_missing_sample_from_sid2idx(self): # And if sid2idx is missing samples that are in the metadata, that's # also not allowed! # Subset the sample metadata to remove Sample3 diff_sid2idx = {"Sample1": 0, "Sample2": 1} with self.assertRaisesRegex( ValueError, "The sample IDs in the metadata's index and s_ids_to_indices are " "not identical."): compress_sample_metadata(diff_sid2idx, self.sm_ef)
def test_compress_sample_metadata_missing_sample_from_metadata(self): # If the metadata is missing samples described in sid2idx, that's bad! # ...And also probably impossible, unless someone messes up the code :P # Subset the sample metadata to remove Sample1 diff_sm = self.sm_ef.copy() diff_sm = diff_sm.drop(labels="Sample1", axis="index") with self.assertRaisesRegex( ValueError, "The sample IDs in the metadata's index and s_ids_to_indices are " "not identical."): compress_sample_metadata(self.sid2idx, diff_sm)
def test_compress_sample_metadata_nonstr_vals(self): # Test the "basic" case, just looking at our default data. sm_copy = self.sm_ef.copy() sid2idx_copy = deepcopy(self.sid2idx) sm_cols, sm_vals = compress_sample_metadata(sid2idx_copy, sm_copy) # As with compress_table(), verify that the inputs were left untouched. assert_frame_equal(sm_copy, self.sm_ef) self.assertEqual(sid2idx_copy, self.sid2idx) self.assertEqual(sm_cols, ["Metadata1", "Metadata2", "Metadata3", "Metadata4"]) # For ease-of-reading, here's the metadata from above: # "Metadata1": [0, 0, 0], # "Metadata2": [0, 0, 0], # "Metadata3": [1, 2, 3], # "Metadata4": ["abc", "def", "ghi"] # Check that the metadata values were all converted to strings and are # structured properly. self.assertEqual( sm_vals, [ ["0", "0", "1", "abc"], # Sample1's metadata ["0", "0", "2", "def"], # Sample2's metadata ["0", "0", "3", "ghi"] # Sample3's metadata ])
def test_compress_sample_metadata_nonstr_columns(self): diff_sm = self.sm_ef.copy() diff_sm.columns = [100, 200, 'asdf', 2.5] sm_cols, sm_vals = compress_sample_metadata(self.sid2idx, diff_sm) # Main thing: check that the columns were converted to strings self.assertEqual(sm_cols, ["100", "200", "asdf", "2.5"]) # (Also check that this didn't mess up the values for some reason) self.assertEqual( sm_vals, [ ["0", "0", "1", "abc"], # Sample1's metadata ["0", "0", "2", "def"], # Sample2's metadata ["0", "0", "3", "ghi"] # Sample3's metadata ])
def verify_fails_due_to_sid2idx(sid2idx): with self.assertRaisesRegex( ValueError, r"Indices \(values\) of s_ids_to_indices are invalid."): compress_sample_metadata(sid2idx, self.sm_ef)
def to_dict(self): """Convert processed data into a dictionary Warning: the object returned by to_dict will contain references to internal variables. Exercise caution if modifying the value of objects returned by to_dict. Returns ------- dict A dictionary describing the plots contained in the ordination object and the sample + feature metadata. """ s_ids = f_ids = cmp_table = sm_cols = compressed_sm = None sid2idxs = fid2idxs = {} if self.is_community_plot: # The fid2idxs dict we get from compress_table() is temporary -- # later, we'll restructure it so that the keys (feature IDs) are # nodes' postorder positions in the tree rather than arbitrary # unique integers. (TODO: it should be possible to speed this up by # passing the tree to compress_table() so postorder positions can # immediately be used as keys / feature IDs without an intermediate # step.) s_ids, f_ids, sid2idxs, fid2idxs_t, cmp_table = compress_table( self.table) sm_cols, compressed_sm = compress_sample_metadata( sid2idxs, self.samples) fm_cols, compressed_tm_tmp, compressed_im_tmp = \ compress_feature_metadata(self.tip_md, self.int_md) # Use nodes' postorder positions as their "IDs" for the BIOM table and # feature metadata compressed_tm = {} compressed_im = {} # bptree indices start at one, hence we pad the arrays names = [-1] lengths = [-1] for i in range(1, len(self.tree) + 1): node = self.tree.postorderselect(i) name = self.tree.name(node) names.append(name) lengths.append(self.tree.length(node)) if self.is_community_plot and name in fid2idxs_t: fid2idxs[i] = fid2idxs_t[name] f_ids[fid2idxs[i]] = i if name in compressed_tm_tmp: compressed_tm[i] = compressed_tm_tmp[name] # Note: for internal metadata, node names may not be unique. Thus, # we duplicate the internal node metadata for each node in the # metadata with the same name. if name in compressed_im_tmp: compressed_im[i] = compressed_im_tmp[name] data_to_render = { 'base_url': self.base_url, # tree info 'tree': shifting(self.tree.B), 'lengths': lengths, 'names': names, # Should we show sample metadata coloring / animation panels? 'is_community_plot': self.is_community_plot, # Are we working with an EMPire plot? 'is_empire_plot': self.is_empire_plot, # feature table 's_ids': s_ids, 'f_ids': f_ids, 's_ids_to_indices': sid2idxs, 'f_ids_to_indices': fid2idxs, 'compressed_table': cmp_table, # sample metadata 'sample_metadata_columns': sm_cols, 'compressed_sample_metadata': compressed_sm, # feature metadata 'feature_metadata_columns': fm_cols, 'split_taxonomy_columns': self.tax_cols, 'compressed_tip_metadata': compressed_tm, 'compressed_int_metadata': compressed_im, # Emperor integration 'emperor_div': '', 'emperor_require_logic': '', 'emperor_style': '', 'emperor_base_dependencies': '', 'emperor_classes': '' } if self._emperor is not None: data_to_render.update(self._scavenge_emperor()) return data_to_render