Пример #1
0
    def test_filter_table_underscores_escaped(self):
        table = pd.DataFrame([[2.0, 2.0], [1.0, 1.0], [9.0, 8.0], [0.0, 4.0]],
                             index=['A', 'B', 'C', 'D'],
                             columns=['feat1', 'feat2'])
        taxonomy = qiime2.Metadata(
            pd.DataFrame(['aa; bb; cc', 'aa; bb; dd ee'],
                         index=pd.Index(['feat1', 'feat2'], name='id'),
                         columns=['Taxon']))

        # keep feat1 only - underscore not treated as a wild card
        obs = filter_table(table, taxonomy, include='cc,d_')
        exp = pd.DataFrame([[2.0], [1.0], [9.0]],
                           index=['A', 'B', 'C'],
                           columns=['feat1'])
        pdt.assert_frame_equal(obs, exp, check_like=True)

        # keep feat1 only - underscore in query matches underscore in
        # taxonomy annotation
        taxonomy = qiime2.Metadata(
            pd.DataFrame(['aa; bb; c_', 'aa; bb; dd ee'],
                         index=pd.Index(['feat1', 'feat2'], name='id'),
                         columns=['Taxon']))
        obs = filter_table(table, taxonomy, include='c_')
        exp = pd.DataFrame([[2.0], [1.0], [9.0]],
                           index=['A', 'B', 'C'],
                           columns=['feat1'])
        pdt.assert_frame_equal(obs, exp, check_like=True)
Пример #2
0
    def test_alt_delimiter(self):
        table = pd.DataFrame([[2.0, 2.0], [1.0, 1.0], [9.0, 8.0], [0.0, 4.0]],
                             index=['A', 'B', 'C', 'D'],
                             columns=['feat1', 'feat2'])
        taxonomy = qiime2.Metadata(
            pd.DataFrame(['aa; bb; cc', 'aa; bb; dd ee'],
                         index=pd.Index(['feat1', 'feat2'], name='id'),
                         columns=['Taxon']))

        # include with delimiter
        obs = filter_table(table,
                           taxonomy,
                           include='cc@peanut@ee',
                           query_delimiter='@peanut@')
        pdt.assert_frame_equal(obs, table, check_like=True)

        # exclude with delimiter
        obs = filter_table(table,
                           taxonomy,
                           exclude='ww@peanut@ee',
                           query_delimiter='@peanut@')
        exp = pd.DataFrame([[2.0], [1.0], [9.0]],
                           index=['A', 'B', 'C'],
                           columns=['feat1'])
        pdt.assert_frame_equal(obs, exp, check_like=True)
Пример #3
0
    def test_missing_taxon_errors(self):
        table = pd.DataFrame([[2.0, 2.0], [1.0, 1.0], [9.0, 8.0], [0.0, 4.0]],
                             index=['A', 'B', 'C', 'D'],
                             columns=['feat1', 'feat2'])
        taxonomy = qiime2.Metadata(
            pd.DataFrame(['aa; bb; cc'],
                         index=pd.Index(['feat1'], name='id'),
                         columns=['Taxon']))

        with self.assertRaisesRegex(ValueError, expected_regex='All.*feat2'):
            filter_table(table, taxonomy, include='bb')
Пример #4
0
    def test_filter_table_unknown_mode(self):
        table = pd.DataFrame([[2.0, 2.0], [1.0, 1.0], [9.0, 8.0], [0.0, 4.0]],
                             index=['A', 'B', 'C', 'D'],
                             columns=['feat1', 'feat2'])
        taxonomy = qiime2.Metadata(
            pd.DataFrame(['aa; bb; cc', 'aa; bb; dd ee'],
                         index=pd.Index(['feat1', 'feat2'], name='id'),
                         columns=['Taxon']))

        with self.assertRaisesRegex(ValueError, 'Unknown mode'):
            filter_table(table, taxonomy, include='bb', mode='not-a-mode')
Пример #5
0
    def test_filter_no_filters(self):
        table = pd.DataFrame([[2.0, 2.0], [1.0, 1.0], [9.0, 8.0], [0.0, 4.0]],
                             index=['A', 'B', 'C', 'D'],
                             columns=['feat1', 'feat2'])
        taxonomy = qiime2.Metadata(
            pd.DataFrame(['aa; bb; cc', 'aa; bb; dd ee'],
                         index=pd.Index(['feat1', 'feat2'], name='id'),
                         columns=['Taxon']))

        with self.assertRaisesRegex(ValueError, 'At least one'):
            filter_table(table, taxonomy)
Пример #6
0
    def test_filter_table_include(self):
        table = pd.DataFrame([[2.0, 2.0], [1.0, 1.0], [9.0, 8.0], [0.0, 4.0]],
                             index=['A', 'B', 'C', 'D'],
                             columns=['feat1', 'feat2'])
        taxonomy = qiime2.Metadata(
            pd.DataFrame(['aa; bb; cc', 'aa; bb; dd ee'],
                         index=pd.Index(['feat1', 'feat2'], name='id'),
                         columns=['Taxon']))

        # keep both features
        obs = filter_table(table, taxonomy, include='bb')
        pdt.assert_frame_equal(obs, table, check_like=True)

        obs = filter_table(table, taxonomy, include='cc,ee')
        pdt.assert_frame_equal(obs, table, check_like=True)

        # keep feat1 only
        obs = filter_table(table, taxonomy, include='cc')
        exp = pd.DataFrame([[2.0], [1.0], [9.0]],
                           index=['A', 'B', 'C'],
                           columns=['feat1'])
        pdt.assert_frame_equal(obs, exp, check_like=True)

        obs = filter_table(table, taxonomy, include='aa; bb; cc')
        exp = pd.DataFrame([[2.0], [1.0], [9.0]],
                           index=['A', 'B', 'C'],
                           columns=['feat1'])
        pdt.assert_frame_equal(obs, exp, check_like=True)

        # keep feat2 only
        obs = filter_table(table, taxonomy, include='dd')
        exp = pd.DataFrame([[2.0], [1.0], [8.0], [4.0]],
                           index=['A', 'B', 'C', 'D'],
                           columns=['feat2'])
        pdt.assert_frame_equal(obs, exp, check_like=True)

        obs = filter_table(table, taxonomy, include='ee')
        exp = pd.DataFrame([[2.0], [1.0], [8.0], [4.0]],
                           index=['A', 'B', 'C', 'D'],
                           columns=['feat2'])
        pdt.assert_frame_equal(obs, exp, check_like=True)

        obs = filter_table(table, taxonomy, include='dd ee')
        exp = pd.DataFrame([[2.0], [1.0], [8.0], [4.0]],
                           index=['A', 'B', 'C', 'D'],
                           columns=['feat2'])
        pdt.assert_frame_equal(obs, exp, check_like=True)

        obs = filter_table(table, taxonomy, include='aa; bb; dd ee')
        exp = pd.DataFrame([[2.0], [1.0], [8.0], [4.0]],
                           index=['A', 'B', 'C', 'D'],
                           columns=['feat2'])
        pdt.assert_frame_equal(obs, exp, check_like=True)

        # keep no features
        with self.assertRaisesRegex(ValueError, expected_regex='empty table'):
            obs = filter_table(table, taxonomy, include='peanut!')
Пример #7
0
    def test_all_features_with_frequency_greater_than_zero_get_filtered(self):
        table = pd.DataFrame([[2.0, 0.0], [1.0, 0.0], [9.0, 0.0], [1.0, 0.0]],
                             index=['A', 'B', 'C', 'D'],
                             columns=['feat1', 'feat2'])
        taxonomy = qiime2.Metadata(
            pd.DataFrame(['aa; bb; cc', 'aa; bb; dd ee'],
                         index=pd.Index(['feat1', 'feat2'], name='id'),
                         columns=['Taxon']))

        # empty - feat2, which is matched by the include term, has a frequency
        # of zero in all samples, so all samples end up dropped from the table
        with self.assertRaisesRegex(ValueError,
                                    expected_regex='greater than zero'):
            filter_table(table, taxonomy, include='dd')
Пример #8
0
    def test_extra_taxon_ignored(self):
        table = pd.DataFrame([[2.0, 2.0], [1.0, 1.0], [9.0, 8.0], [0.0, 4.0]],
                             index=['A', 'B', 'C', 'D'],
                             columns=['feat1', 'feat2'])
        taxonomy = qiime2.Metadata(
            pd.DataFrame(['aa; bb; cc', 'aa; bb; dd ee', 'aa; bb; cc'],
                         index=pd.Index(['feat1', 'feat2', 'feat3'],
                                        name='id'),
                         columns=['Taxon']))

        # keep both features
        obs = filter_table(table, taxonomy, include='bb')
        pdt.assert_frame_equal(obs, table, check_like=True)