Python compress_sample_metadata示例，empress.compression_utils.compress_sample_metadata Python示例

示例#1

0

显示文件

    def test_compress_sample_metadata_missing_sample_from_sid2idx(self):
        # And if sid2idx is missing samples that are in the metadata, that's
        # also not allowed!

        # Subset the sample metadata to remove Sample3
        diff_sid2idx = {"Sample1": 0, "Sample2": 1}
        with self.assertRaisesRegex(
                ValueError,
                "The sample IDs in the metadata's index and s_ids_to_indices are "
                "not identical."):
            compress_sample_metadata(diff_sid2idx, self.sm_ef)

示例#2

0

显示文件

    def test_compress_sample_metadata_missing_sample_from_metadata(self):
        # If the metadata is missing samples described in sid2idx, that's bad!
        # ...And also probably impossible, unless someone messes up the code :P

        # Subset the sample metadata to remove Sample1
        diff_sm = self.sm_ef.copy()
        diff_sm = diff_sm.drop(labels="Sample1", axis="index")
        with self.assertRaisesRegex(
                ValueError,
                "The sample IDs in the metadata's index and s_ids_to_indices are "
                "not identical."):
            compress_sample_metadata(self.sid2idx, diff_sm)

示例#3

0

显示文件

    def test_compress_sample_metadata_nonstr_vals(self):
        # Test the "basic" case, just looking at our default data.
        sm_copy = self.sm_ef.copy()
        sid2idx_copy = deepcopy(self.sid2idx)
        sm_cols, sm_vals = compress_sample_metadata(sid2idx_copy, sm_copy)

        # As with compress_table(), verify that the inputs were left untouched.
        assert_frame_equal(sm_copy, self.sm_ef)
        self.assertEqual(sid2idx_copy, self.sid2idx)

        self.assertEqual(sm_cols,
                         ["Metadata1", "Metadata2", "Metadata3", "Metadata4"])
        # For ease-of-reading, here's the metadata from above:
        # "Metadata1": [0, 0, 0],
        # "Metadata2": [0, 0, 0],
        # "Metadata3": [1, 2, 3],
        # "Metadata4": ["abc", "def", "ghi"]
        # Check that the metadata values were all converted to strings and are
        # structured properly.
        self.assertEqual(
            sm_vals,
            [
                ["0", "0", "1", "abc"],  # Sample1's metadata
                ["0", "0", "2", "def"],  # Sample2's metadata
                ["0", "0", "3", "ghi"]  # Sample3's metadata
            ])

示例#4

0

显示文件

 def test_compress_sample_metadata_nonstr_columns(self):
     diff_sm = self.sm_ef.copy()
     diff_sm.columns = [100, 200, 'asdf', 2.5]
     sm_cols, sm_vals = compress_sample_metadata(self.sid2idx, diff_sm)
     # Main thing: check that the columns were converted to strings
     self.assertEqual(sm_cols, ["100", "200", "asdf", "2.5"])
     # (Also check that this didn't mess up the values for some reason)
     self.assertEqual(
         sm_vals,
         [
             ["0", "0", "1", "abc"],  # Sample1's metadata
             ["0", "0", "2", "def"],  # Sample2's metadata
             ["0", "0", "3", "ghi"]  # Sample3's metadata
         ])

示例#5

0

显示文件

 def verify_fails_due_to_sid2idx(sid2idx):
     with self.assertRaisesRegex(
             ValueError,
             r"Indices \(values\) of s_ids_to_indices are invalid."):
         compress_sample_metadata(sid2idx, self.sm_ef)

示例#6

0

显示文件

文件： core.py 项目： kwcantrell/empress

    def to_dict(self):
        """Convert processed data into a dictionary

        Warning: the object returned by to_dict will contain references to
        internal variables. Exercise caution if modifying the value of objects
        returned by to_dict.

        Returns
        -------
        dict
            A dictionary describing the plots contained in the ordination
            object and the sample + feature metadata.
        """

        s_ids = f_ids = cmp_table = sm_cols = compressed_sm = None
        sid2idxs = fid2idxs = {}
        if self.is_community_plot:
            # The fid2idxs dict we get from compress_table() is temporary --
            # later, we'll restructure it so that the keys (feature IDs) are
            # nodes' postorder positions in the tree rather than arbitrary
            # unique integers. (TODO: it should be possible to speed this up by
            # passing the tree to compress_table() so postorder positions can
            # immediately be used as keys / feature IDs without an intermediate
            # step.)
            s_ids, f_ids, sid2idxs, fid2idxs_t, cmp_table = compress_table(
                self.table)
            sm_cols, compressed_sm = compress_sample_metadata(
                sid2idxs, self.samples)
        fm_cols, compressed_tm_tmp, compressed_im_tmp = \
            compress_feature_metadata(self.tip_md, self.int_md)

        # Use nodes' postorder positions as their "IDs" for the BIOM table and
        # feature metadata
        compressed_tm = {}
        compressed_im = {}
        # bptree indices start at one, hence we pad the arrays
        names = [-1]
        lengths = [-1]
        for i in range(1, len(self.tree) + 1):
            node = self.tree.postorderselect(i)
            name = self.tree.name(node)

            names.append(name)
            lengths.append(self.tree.length(node))

            if self.is_community_plot and name in fid2idxs_t:
                fid2idxs[i] = fid2idxs_t[name]
                f_ids[fid2idxs[i]] = i

            if name in compressed_tm_tmp:
                compressed_tm[i] = compressed_tm_tmp[name]

            # Note: for internal metadata, node names may not be unique. Thus,
            # we duplicate the internal node metadata for each node in the
            # metadata with the same name.
            if name in compressed_im_tmp:
                compressed_im[i] = compressed_im_tmp[name]

        data_to_render = {
            'base_url': self.base_url,
            # tree info
            'tree': shifting(self.tree.B),
            'lengths': lengths,
            'names': names,
            # Should we show sample metadata coloring / animation panels?
            'is_community_plot': self.is_community_plot,
            # Are we working with an EMPire plot?
            'is_empire_plot': self.is_empire_plot,
            # feature table
            's_ids': s_ids,
            'f_ids': f_ids,
            's_ids_to_indices': sid2idxs,
            'f_ids_to_indices': fid2idxs,
            'compressed_table': cmp_table,
            # sample metadata
            'sample_metadata_columns': sm_cols,
            'compressed_sample_metadata': compressed_sm,
            # feature metadata
            'feature_metadata_columns': fm_cols,
            'split_taxonomy_columns': self.tax_cols,
            'compressed_tip_metadata': compressed_tm,
            'compressed_int_metadata': compressed_im,
            # Emperor integration
            'emperor_div': '',
            'emperor_require_logic': '',
            'emperor_style': '',
            'emperor_base_dependencies': '',
            'emperor_classes': ''
        }

        if self._emperor is not None:
            data_to_render.update(self._scavenge_emperor())

        return data_to_render