Пример #1
0
 def test_average(self):
     t1 = Table(np.array([[1, 1, 1], [1, 1, 1]]), ['O1', 'O2'],
                ['S1', 'S2', 'S3'])
     obs = merge([t1] * 3, 'average')
     exp = Table(np.array([[1, 1, 1], [1, 1, 1]]), ['O1', 'O2'],
                 ['S1', 'S2', 'S3'])
     self.assertEqual(obs, exp)
Пример #2
0
 def test_invalid_overlap_method(self):
     t1 = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
                ['S1', 'S2', 'S3'])
     t2 = Table(np.array([[0, 2, 6], [2, 2, 4]]), ['O1', 'O3'],
                ['S1', 'S5', 'S6'])
     with self.assertRaisesRegex(ValueError, 'overlap method'):
         merge([t1, t2], 'peanut')
Пример #3
0
 def test_invalid_overlapping_feature_ids(self):
     t1 = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
                ['S1', 'S2', 'S3'])
     t2 = Table(np.array([[0, 2, 6], [2, 2, 4]]), ['O1', 'O3'],
                ['S4', 'S5', 'S6'])
     with self.assertRaisesRegex(ValueError, 'features are present'):
         merge([t1, t2], 'error_on_overlapping_feature')
Пример #4
0
 def test_invalid_overlapping_sample_ids(self):
     t1 = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
                ['S1', 'S2', 'S3'])
     t2 = Table(np.array([[0, 2, 6], [2, 2, 4]]), ['O1', 'O3'],
                ['S1', 'S5', 'S6'])
     with self.assertRaisesRegex(ValueError, 'samples.*S1'):
         merge([t1, t2])
    def test_table_based_filtering_exclude_ids(self):
        # filter all
        table = Table(np.array([[0, 1, 3], [1, 1, 2], [42, 3, 0], [0, 0, 0]]),
                      ['O1', 'O2', 'O3', 'O4'],
                      ['S1', 'S2', 'S3'])
        with self.assertRaisesRegex(ValueError, 'All.*filtered'):
            obs = filter_seqs(self.seqs, table=table, exclude_ids=True)

        # filter all (extra ids in table is ok)
        table = Table(np.array([[0, 1, 3], [1, 1, 2],
                                [42, 3, 0], [0, 0, 0], [1, 0, 0]]),
                      ['O1', 'O2', 'O3', 'O4', 'O5'],
                      ['S1', 'S2', 'S3'])
        with self.assertRaisesRegex(ValueError, 'All.*filtered'):
            obs = filter_seqs(self.seqs, table=table, exclude_ids=True)

        # filter three
        table = Table(np.array([[1, 1, 2], [42, 3, 0], [0, 0, 0]]),
                      ['O2', 'O3', 'O4'],
                      ['S1', 'S2', 'S3'])
        obs = filter_seqs(self.seqs, table=table, exclude_ids=True)
        exp = pd.Series(['ACGT'], index=['O1'])
        assert_series_equal(obs, exp)

        # filter none
        table = Table(np.array([[0, 1, 3], [1, 1, 2], [42, 3, 0], [0, 0, 0]]),
                      ['O1-alt', 'O2-alt', 'O3-alt', 'O4-alt'],
                      ['S1', 'S2', 'S3'])
        obs = filter_seqs(self.seqs, table=table, exclude_ids=True)
        assert_series_equal(obs, self.seqs)
Пример #6
0
    def setUp(self):
        """define some top-level data"""
        self.output_dir = '/tmp/'

        otu_table_vals = array([[0, 0], [1, 5]])

        self.otu_table = Table(
            otu_table_vals,
            ['OTU1', 'OTU2'],
            ['Sample1', 'Sample2'],
            [{
                "taxonomy": ["Bacteria"]
            }, {
                "taxonomy": ["Archaea"]
            }],
            [None, None],
        )

        filt_otu_table_vals = array([[1, 5]])

        self.filt_otu_table = Table(filt_otu_table_vals, ['OTU2'],
                                    ['Sample1', 'Sample2'], [{
                                        "taxonomy": ["Archaea"]
                                    }], [None, None])

        self.num_otu_hits = 5
        self._folders_to_cleanup = []
    def test_combine_id_and_frequency_filters(self):
        # no filtering
        df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'],
                           'SampleType': ['gut', 'tongue', 'gut']},
                          index=pd.Index(['S1', 'S2', 'S3'], name='#SampleID'))
        metadata = qiime2.Metadata(df)
        table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                      ['O1', 'O2'],
                      ['S1', 'S2', 'S3'])
        where = "Subject='subject-1' OR Subject='subject-2'"
        actual = filter_samples(table, metadata=metadata, where=where,
                                min_frequency=1)
        expected = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                         ['O1', 'O2'],
                         ['S1', 'S2', 'S3'])
        self.assertEqual(actual, expected)

        # id and frequency filters active
        df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'],
                           'SampleType': ['gut', 'tongue', 'gut']},
                          index=pd.Index(['S1', 'S2', 'S3'], name='#SampleID'))
        metadata = qiime2.Metadata(df)
        table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                      ['O1', 'O2'],
                      ['S1', 'S2', 'S3'])
        where = "Subject='subject-1'"
        actual = filter_samples(table, metadata=metadata, where=where,
                                min_frequency=2)
        expected = Table(np.array([[1], [1]]),
                         ['O1', 'O2'],
                         ['S2'])
        self.assertEqual(actual, expected)
Пример #8
0
 def test_sum_triple_overlap(self):
     t1 = Table(np.array([[1, 1, 1], [1, 1, 1]]), ['O1', 'O2'],
                ['S1', 'S2', 'S3'])
     obs = merge([t1] * 3, 'sum')
     exp = Table(np.array([[3, 3, 3], [3, 3, 3]]), ['O1', 'O2'],
                 ['S1', 'S2', 'S3'])
     self.assertEqual(obs, exp)
Пример #9
0
    def setUp(self):
        self.tmp_dir = get_qiime_temp_dir()

        self.l19_data = np.array([[7, 1, 0, 0, 0, 0, 0, 0, 0],
                                  [4, 2, 0, 0, 0, 1, 0, 0, 0],
                                  [2, 4, 0, 0, 0, 1, 0, 0, 0],
                                  [1, 7, 0, 0, 0, 0, 0, 0, 0],
                                  [0, 8, 0, 0, 0, 0, 0, 0, 0],
                                  [0, 7, 1, 0, 0, 0, 0, 0, 0],
                                  [0, 4, 2, 0, 0, 0, 2, 0, 0],
                                  [0, 2, 4, 0, 0, 0, 1, 0, 0],
                                  [0, 1, 7, 0, 0, 0, 0, 0, 0],
                                  [0, 0, 8, 0, 0, 0, 0, 0, 0],
                                  [0, 0, 7, 1, 0, 0, 0, 0, 0],
                                  [0, 0, 4, 2, 0, 0, 0, 3, 0],
                                  [0, 0, 2, 4, 0, 0, 0, 1, 0],
                                  [0, 0, 1, 7, 0, 0, 0, 0, 0],
                                  [0, 0, 0, 8, 0, 0, 0, 0, 0],
                                  [0, 0, 0, 7, 1, 0, 0, 0, 0],
                                  [0, 0, 0, 4, 2, 0, 0, 0, 4],
                                  [0, 0, 0, 2, 4, 0, 0, 0, 1],
                                  [0, 0, 0, 1, 7, 0, 0, 0, 0]])
        self.l19_sample_names = [
            'sam1', 'sam2', 'sam3', 'sam4', 'sam5', 'sam6', 'sam7', 'sam8',
            'sam9', 'sam_middle', 'sam11', 'sam12', 'sam13', 'sam14', 'sam15',
            'sam16', 'sam17', 'sam18', 'sam19'
        ]
        self.l19_taxon_names = [
            'tax1', 'tax2', 'tax3', 'tax4', 'endbigtaxon', 'tax6', 'tax7',
            'tax8', 'tax9'
        ]
        self.l19_taxon_names_w_underscore = [
            'ta_x1', 'tax2', 'tax3', 'tax4', 'endbigtaxon', 'tax6', 'tax7',
            'tax8', 'tax9'
        ]

        l19 = Table(self.l19_data.T, self.l19_taxon_names,
                    self.l19_sample_names)
        fd, self.l19_fp = mkstemp(dir=self.tmp_dir,
                                  prefix='test_bdiv_otu_table',
                                  suffix='.blom')
        os.close(fd)
        write_biom_table(l19, self.l19_fp)

        l19_w_underscore = Table(self.l19_data.T,
                                 self.l19_taxon_names_w_underscore,
                                 self.l19_sample_names)
        fd, self.l19_w_underscore_fp = mkstemp(dir=self.tmp_dir,
                                               prefix='test_bdiv_otu_table',
                                               suffix='.blom')
        os.close(fd)
        write_biom_table(l19_w_underscore, self.l19_w_underscore_fp)

        self.l19_tree_str = '((((tax7:0.1,tax3:0.2):.98,tax8:.3, tax4:.3):.4,\
 ((tax1:0.3, tax6:.09):0.43,tax2:0.4):0.5):.2, (tax9:0.3, endbigtaxon:.08));'

        self.l19_tree = parse_newick(self.l19_tree_str, PhyloNode)

        self.files_to_remove = [self.l19_fp, self.l19_w_underscore_fp]
        self.folders_to_remove = []
Пример #10
0
 def setUp(self):
     self.ambiguities_json = '%s/ambiguities/json' % ROOT
     self.read_counts_diff = {
         "10317.000001778.57016": 1,
         "10317.000002860.57016": 2,
         "10317.000002860.58862": 1
     }
     self.read_counts_equal = {
         "10317.000001778.57016": 1,
         "10317.000002860.57016": 1,
         "10317.000002860.58862": 1
     }
     self.feat_counts = {
         "10317.000001778.57016": 1,
         "10317.000002860.57016": 1,
         "10317.000002860.58862": 2
     }
     self.biom = Table(np.array([[1, 1, 1], [1, 1, 1]]), ['sp1', 'sp2'], [
         "10317.000001778.57016", "10317.000002860.57016",
         "10317.000002860.58862"
     ])
     self.biom_diff = Table(
         np.array([[1, 1], [1, 1]]), ['sp1', 'sp2'],
         ["10317.000001778.57016", "10317.000002860.57016"])
     self.biom_equal = Table(
         np.array([[1, 1], [1, 1]]), ['sp1', 'sp2'],
         ["10317.000001778.57016", "10317.000002860.58862"])
Пример #11
0
 def setUp(self):
     self.sample_metadata_1 = \
         {'s1': {'source_sink': 'source1', 'cat2': 'random_nonsense'},
          's2': {'source_sink': 'sink', 'cat2': 'sink'},
          's5': {'source_sink': 'source1', 'cat2': 'random_nonsense'},
          's0': {'source_sink': 'source2', 'cat2': 'random_nonsense'},
          's100': {'source_sink': 'sink', 'cat2': 'sink'}}
     # Data for testing sinks_and_sources
     self.sample_metadata_2 = \
         {'s1': {'SourceSink': 'source', 'Env': 'source1'},
          's2': {'SourceSink': 'sink', 'Env': 'e1'},
          's5': {'SourceSink': 'source', 'Env': 'source1'},
          's0': {'SourceSink': 'source', 'Env': 'source2'},
          's100': {'SourceSink': 'sink', 'Env': 'e2'}}
     self.sample_metadata_3 = \
         {'s1': {'SourceSink': 'source', 'Env': 'source1'},
          's2': {'SourceSink': 'source', 'Env': 'e1'},
          's5': {'SourceSink': 'source', 'Env': 'source1'},
          's0': {'SourceSink': 'source', 'Env': 'source2'},
          's100': {'SourceSink': 'source', 'Env': 'e2'}}
     # Data for testing _cli_sync_biom_and_sample_metadata
     oids = ['o1', 'o2', 'o3']
     # Data for an example where samples are removed from biom table only.
     sids = ['Sample1', 'Sample2', 'Sample3', 'Sample4']
     bt_1_data = np.arange(12).reshape(3, 4)
     self.bt_1_in = Table(bt_1_data, oids, sids)
     self.bt_1_out = Table(bt_1_data[:, :-1], oids, sids[:-1])
     self.mf_1_in = \
         {'Sample1': {'cat1': 'X', 'cat2': 'Y'},
          'Sample2': {'cat1': 'X', 'cat2': 'Y'},
          'Sample3': {'cat1': 'X', 'cat2': 'Y'}}
     self.mf_1_out = self.mf_1_in
     # Data for an example where sample are removed from mapping file only.
     self.bt_2_in = self.bt_1_in
     self.bt_2_out = self.bt_1_in
     self.mf_2_in = \
         {'Sample1': {'cat1': 'X', 'cat2': 'Y'},
          'Sample6': {'cat1': 'X', 'cat2': 'Y'},
          'Sample3': {'cat1': 'X', 'cat2': 'Y'},
          'Sample4': {'cat1': 'X', 'cat2': 'Y'},
          'Sample2': {'cat1': 'X', 'cat2': 'Y'}}
     self.mf_2_out = \
         {'Sample1': {'cat1': 'X', 'cat2': 'Y'},
          'Sample3': {'cat1': 'X', 'cat2': 'Y'},
          'Sample4': {'cat1': 'X', 'cat2': 'Y'},
          'Sample2': {'cat1': 'X', 'cat2': 'Y'}}
     # Data for an example where samples are removed from mapping file and
     # biom file.
     sids = ['Sample1', 'sampleA', 'Sample3', 'Sample4']
     bt_3_data = np.arange(12).reshape(3, 4)
     self.bt_3_in = Table(bt_3_data, oids, sids)
     self.bt_3_out = Table(bt_1_data[:, [0, 2, 3]], oids,
                           [sids[0], sids[2], sids[3]])
     self.mf_3_in = self.mf_2_out
     self.mf_3_out = \
         {'Sample1': {'cat1': 'X', 'cat2': 'Y'},
          'Sample3': {'cat1': 'X', 'cat2': 'Y'},
          'Sample4': {'cat1': 'X', 'cat2': 'Y'}}
Пример #12
0
 def test_sum_full_overlap(self):
     t1 = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
                ['S1', 'S2', 'S3'])
     t2 = Table(np.array([[0, 2, 6], [2, 2, 4]]), ['O1', 'O2'],
                ['S1', 'S2', 'S3'])
     obs = merge([t1, t2], 'sum')
     exp = Table(np.array([[0, 3, 9], [3, 3, 6]]), ['O1', 'O2'],
                 ['S1', 'S2', 'S3'])
     self.assertEqual(obs, exp)
Пример #13
0
 def test_average_relative_frequency(self):
     t1 = Table(np.array([[0.75, 0.75, 0.75], [0.75, 0.75, 0.75]]),
                ['O1', 'O2'], ['S1', 'S2', 'S3'])
     t2 = Table(np.array([[0.25, 0.25, 0.25], [0.25, 0.25, 0.25]]),
                ['O1', 'O2'], ['S1', 'S2', 'S3'])
     obs = merge([t1, t2], 'average')
     exp = Table(np.array([[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]]), ['O1', 'O2'],
                 ['S1', 'S2', 'S3'])
     self.assertEqual(obs, exp)
Пример #14
0
 def test_sum_some_overlap(self):
     # Did I stutter?
     t1 = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
                ['S1', 'S2', 'S3'])
     t2 = Table(np.array([[0, 2, 6], [2, 2, 4]]), ['O1', 'O3'],
                ['S4', 'S2', 'S5'])
     obs = merge([t1, t2], 'sum')
     exp = Table(
         np.array([[0, 3, 3, 0, 6], [1, 1, 2, 0, 0], [0, 2, 0, 2, 4]]),
         ['O1', 'O2', 'O3'], ['S1', 'S2', 'S3', 'S4', 'S5'])
     self.assertEqual(obs, exp)
Пример #15
0
 def test_valid_overlapping_sample_ids(self):
     t1 = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
                ['S1', 'S2', 'S3'])
     t2 = Table(np.array([[0, 2, 6], [2, 2, 4]]), ['O3', 'O4'],
                ['S1', 'S5', 'S6'])
     obs = merge([t1, t2], 'error_on_overlapping_feature')
     exp = Table(
         np.array([[0, 1, 3, 0, 0], [1, 1, 2, 0, 0], [0, 0, 0, 2, 6],
                   [2, 0, 0, 2, 4]]), ['O1', 'O2', 'O3', 'O4'],
         ['S1', 'S2', 'S3', 'S5', 'S6'])
     self.assertEqual(obs, exp)
Пример #16
0
    def setUp(self):
        """define some top-level data"""

        self.otu_table_values = array([[0, 0, 9, 5, 3, 1], [1, 5, 4, 0, 3, 2],
                                       [2, 3, 1, 1, 2, 5]])
        {
            (0, 2): 9.0,
            (0, 3): 5.0,
            (0, 4): 3.0,
            (0, 5): 1.0,
            (1, 0): 1.0,
            (1, 1): 5.0,
            (1, 2): 4.0,
            (1, 4): 3.0,
            (1, 5): 2.0,
            (2, 0): 2.0,
            (2, 1): 3.0,
            (2, 2): 1.0,
            (2, 3): 1.0,
            (2, 4): 2.0,
            (2, 5): 5.0
        }
        self.otu_table = Table(
            self.otu_table_values, ['OTU1', 'OTU2', 'OTU3'],
            ['Sample1', 'Sample2', 'Sample3', 'Sample4', 'Sample5', 'Sample6'],
            [{
                "taxonomy": ['Bacteria']
            }, {
                "taxonomy": ['Archaea']
            }, {
                "taxonomy": ['Streptococcus']
            }], [None, None, None, None, None, None])
        self.otu_table_f = Table(
            self.otu_table_values, ['OTU1', 'OTU2', 'OTU3'],
            ['Sample1', 'Sample2', 'Sample3', 'Sample4', 'Sample5', 'Sample6'],
            [{
                "taxonomy": ['1A', '1B', '1C', 'Bacteria']
            }, {
                "taxonomy": ['2A', '2B', '2C', 'Archaea']
            }, {
                "taxonomy": ['3A', '3B', '3C', 'Streptococcus']
            }], [None, None, None, None, None, None])

        self.full_lineages = [['1A', '1B', '1C', 'Bacteria'],
                              ['2A', '2B', '2C', 'Archaea'],
                              ['3A', '3B', '3C', 'Streptococcus']]
        self.metadata = [[['Sample1', 'NA', 'A'], ['Sample2', 'NA', 'B'],
                          ['Sample3', 'NA', 'A'], ['Sample4', 'NA', 'B'],
                          ['Sample5', 'NA', 'A'], ['Sample6', 'NA', 'B']],
                         ['SampleID', 'CAT1', 'CAT2'], []]
        self.tree_text = ["('OTU3',('OTU1','OTU2'))"]
        fh, self.tmp_heatmap_fpath = mkstemp(prefix='test_heatmap_',
                                             suffix='.pdf')
        close(fh)
Пример #17
0
 def setUp(self):
     THIS_DIR = os.path.dirname(os.path.abspath(__file__))
     table = Table({}, [], [])
     self.emptyfeatures = table
     table = Table({}, ['a', 'b', 'c'], [])
     self.wrongtips = table
     self.goodtable = os.path.join(THIS_DIR, 'data/features_formated.biom')
     self.goodcsi = os.path.join(THIS_DIR, 'data/goodcsi')
     self.goodthresh = 0.5
     tablefp = collate_fingerprint(self.goodcsi)
     treeout = make_hierarchy(tablefp, prob_threshold=self.goodthresh)
     self.goodtree = treeout
Пример #18
0
 def test_sum_overlapping_feature_ids(self):
     # This should produce the same result as `error_on_overlapping_sample`
     t1 = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
                ['S1', 'S2', 'S3'])
     t2 = Table(np.array([[0, 2, 6], [2, 2, 4]]), ['O1', 'O3'],
                ['S4', 'S5', 'S6'])
     obs = merge([t1, t2], 'sum')
     exp = Table(
         np.array([[0, 1, 3, 0, 2, 6], [1, 1, 2, 0, 0, 0],
                   [0, 0, 0, 2, 2, 4]]), ['O1', 'O2', 'O3'],
         ['S1', 'S2', 'S3', 'S4', 'S5', 'S6'])
     self.assertEqual(obs, exp)
Пример #19
0
    def test_get_overlapping_no_overlap(self):
        t1 = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
                   ['S1', 'S2', 'S3'])
        t2 = Table(np.array([[0, 2, 6], [2, 2, 4]]), ['O3', 'O4'],
                   ['S4', 'S5', 'S6'])
        # samples
        obs = _get_overlapping([t1, t2], 'sample')
        self.assertEqual(set(), obs)

        # features
        obs = _get_overlapping([t1, t2], 'observation')
        self.assertEqual(set(), obs)
Пример #20
0
 def test_sample_metadata_extra_ids(self):
     df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'],
                        'SampleType': ['gut', 'tongue', 'gut']},
                       index=['S-not-in-table', 'S2', 'S3'])
     metadata = qiime2.Metadata(df)
     table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                   ['O1', 'O2'],
                   ['S1', 'S2', 'S3'])
     actual = filter_samples(table, sample_metadata=metadata)
     expected = Table(np.array([[1, 3], [1, 2]]),
                      ['O1', 'O2'],
                      ['S2', 'S3'])
     self.assertEqual(actual, expected)
Пример #21
0
    def test_feature_metadata(self):
        # no filtering
        df = pd.DataFrame({'SequencedGenome': ['yes', 'yes']},
                          index=pd.Index(['O1', 'O2'], name='id'))
        metadata = qiime2.Metadata(df)
        table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
                      ['S1', 'S2', 'S3'])
        actual = filter_features(table, metadata=metadata)
        expected = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
                         ['S1', 'S2', 'S3'])
        self.assertEqual(actual, expected)

        # filter one
        df = pd.DataFrame({'SequencedGenome': ['yes']},
                          index=pd.Index(['O1'], name='id'))
        metadata = qiime2.Metadata(df)
        table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
                      ['S1', 'S2', 'S3'])
        actual = filter_features(table, metadata=metadata)
        expected = Table(np.array([[1, 3]]), ['O1'], ['S2', 'S3'])
        self.assertEqual(actual, expected)

        # filter all
        df = pd.DataFrame({}, index=pd.Index(['foo'], name='id'))
        metadata = qiime2.Metadata(df)
        table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
                      ['S1', 'S2', 'S3'])
        actual = filter_features(table, metadata=metadata)
        expected = Table(np.array([]), [], [])
        self.assertEqual(actual, expected)

        # exclude one
        df = pd.DataFrame({'SequencedGenome': ['yes']},
                          index=pd.Index(['O1'], name='id'))
        metadata = qiime2.Metadata(df)
        table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
                      ['S1', 'S2', 'S3'])
        actual = filter_features(table, metadata=metadata, exclude_ids=True)
        expected = Table(np.array([[1, 1, 2]]), ['O2'], ['S1', 'S2', 'S3'])
        self.assertEqual(actual, expected)

        # exclude all
        df = pd.DataFrame({'SequencedGenome': ['yes', 'yes']},
                          index=pd.Index(['O1', 'O2'], name='id'))
        metadata = qiime2.Metadata(df)
        table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
                      ['S1', 'S2', 'S3'])
        actual = filter_features(table, metadata=metadata, exclude_ids=True)
        expected = Table(np.array([]), [], [])
        self.assertEqual(actual, expected)
    def test_max_frequency(self):
        # no filtering
        table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
                      ['S1', 'S2', 'S3'])
        actual = filter_samples(table, max_frequency=42)
        expected = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
                         ['S1', 'S2', 'S3'])
        self.assertEqual(actual, expected)

        # filter one
        table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
                      ['S1', 'S2', 'S3'])
        actual = filter_samples(table, max_frequency=4)
        expected = Table(np.array([[0, 1], [1, 1]]), ['O1', 'O2'],
                         ['S1', 'S2'])
        self.assertEqual(actual, expected)

        # filter two
        table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
                      ['S1', 'S2', 'S3'])
        actual = filter_samples(table, max_frequency=1)
        expected = Table(np.array([[1]]), ['O2'], ['S1'])
        self.assertEqual(actual, expected)

        # filter all
        table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
                      ['S1', 'S2', 'S3'])
        actual = filter_samples(table, max_frequency=0)
        expected = Table(np.array([]), [], [])
        self.assertEqual(actual, expected)
Пример #23
0
    def test_get_overlapping_multiple(self):
        t1 = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
                   ['S1', 'S2', 'S3'])
        t2 = Table(np.array([[0, 2, 6], [2, 2, 4]]), ['O1', 'O3'],
                   ['S1', 'S5', 'S6'])
        t3 = Table(np.array([[3, 3, 1], [0, 2, 1]]), ['O1', 'O2'],
                   ['S1', 'S3', 'S6'])

        # samples
        obs = _get_overlapping([t1, t2, t3], 'sample')
        self.assertEqual({'S1', 'S3', 'S6'}, obs)

        # features
        obs = _get_overlapping([t1, t2, t3], 'observation')
        self.assertEqual({'O1', 'O2'}, obs)
Пример #24
0
    def test_where(self):
        # no filtering
        df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'],
                           'SampleType': ['gut', 'tongue', 'gut']},
                          index=pd.Index(['S1', 'S2', 'S3'], name='#SampleID'))
        metadata = qiime2.Metadata(df)
        table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                      ['O1', 'O2'],
                      ['S1', 'S2', 'S3'])
        where = "Subject='subject-1' OR Subject='subject-2'"
        actual = filter_samples(table, sample_metadata=metadata, where=where)
        expected = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                         ['O1', 'O2'],
                         ['S1', 'S2', 'S3'])
        self.assertEqual(actual, expected)

        # filter one
        df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'],
                           'SampleType': ['gut', 'tongue', 'gut']},
                          index=pd.Index(['S1', 'S2', 'S3'], name='#SampleID'))
        metadata = qiime2.Metadata(df)
        table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                      ['O1', 'O2'],
                      ['S1', 'S2', 'S3'])
        where = "Subject='subject-1'"
        actual = filter_samples(table, sample_metadata=metadata, where=where)
        expected = Table(np.array([[0, 1], [1, 1]]),
                         ['O1', 'O2'],
                         ['S1', 'S2'])
        self.assertEqual(actual, expected)

        # filter two
        df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'],
                           'SampleType': ['gut', 'tongue', 'gut']},
                          index=pd.Index(['S1', 'S2', 'S3'], name='#SampleID'))
        metadata = qiime2.Metadata(df)
        table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                      ['O1', 'O2'],
                      ['S1', 'S2', 'S3'])
        where = "Subject='subject-1' AND SampleType='gut'"
        actual = filter_samples(table, sample_metadata=metadata, where=where)
        expected = Table(np.array([[1]]),
                         ['O2'],
                         ['S1'])
        self.assertEqual(actual, expected)

        # filter all
        df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'],
                           'SampleType': ['gut', 'tongue', 'gut']},
                          index=pd.Index(['S1', 'S2', 'S3'], name='#SampleID'))
        metadata = qiime2.Metadata(df)
        table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                      ['O1', 'O2'],
                      ['S1', 'S2', 'S3'])
        where = "Subject='subject-1' AND Subject='subject-2'"
        actual = filter_samples(table, sample_metadata=metadata, where=where)
        expected = Table(np.array([]), [], [])
        self.assertEqual(actual, expected)
Пример #25
0
    def setUp(self):

        self.qiime_config = load_qiime_config()
        self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'

        self.otu_table_data = np.array([[2, 1, 0], [0, 5, 0], [0, 3, 0],
                                        [1, 2, 0]])
        self.sample_names = list('YXZ')
        self.taxon_names = list('bacd')
        self.otu_metadata = [{
            'domain': 'Archaea'
        }, {
            'domain': 'Bacteria'
        }, {
            'domain': 'Bacteria'
        }, {
            'domain': 'Bacteria'
        }]

        self.otu_table = Table(self.otu_table_data,
                               self.taxon_names,
                               self.sample_names,
                               observation_metadata=[{}, {}, {}, {}],
                               sample_metadata=[{}, {}, {}])

        self.otu_table_meta = Table(self.otu_table_data,
                                    self.taxon_names,
                                    self.sample_names,
                                    observation_metadata=self.otu_metadata)

        fd, self.otu_table_fp = mkstemp(dir=self.tmp_dir,
                                        prefix='test_rarefaction',
                                        suffix='.biom')
        close(fd)
        fd, self.otu_table_meta_fp = mkstemp(dir=self.tmp_dir,
                                             prefix='test_rarefaction',
                                             suffix='.biom')
        close(fd)

        self.rare_dir = mkdtemp(dir=self.tmp_dir,
                                prefix='test_rarefaction_dir',
                                suffix='')

        write_biom_table(self.otu_table, self.otu_table_fp)
        write_biom_table(self.otu_table_meta, self.otu_table_meta_fp)

        self._paths_to_clean_up = [self.otu_table_fp, self.otu_table_meta_fp]
        self._dirs_to_clean_up = [self.rare_dir]
Пример #26
0
def table_from_template(new_data,sample_ids,observation_ids,\
    sample_metadata_source=None,observation_metadata_source=None,\
    verbose=False):
    """Build a new BIOM table from new_data, and transfer metadata from 1-2 existing tables"""

    #Build the BIOM table
    result_table = Table(new_data,
                         observation_ids,
                         sample_ids,
                         type='Gene table')

    #Transfer sample metadata from the OTU table
    #to the metagenome table (samples are the same)
    if sample_metadata_source:
        result_table = transfer_metadata(sample_metadata_source,result_table,\
          donor_metadata_type='sample',\
          recipient_metadata_type='sample',verbose=verbose)

    #Now transfer observation metadata (e.g. gene metadata)
    #from the genome table to the result table
    if observation_metadata_source:
        result_table = transfer_metadata(observation_metadata_source,\
          result_table,donor_metadata_type='observation',\
          recipient_metadata_type='observation',verbose=verbose)

    return result_table
Пример #27
0
 def test_non_phylogenetic_invalid_input(self):
     t = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
               ['S1', 'S2', 'S3'])
     tree = skbio.TreeNode.read(
         io.StringIO('((O1:0.25, O2:0.50):0.25, O3:0.75)root;'))
     with self.assertRaises(TypeError):
         beta_diversity('bray_curtis', t, phylogeny=tree)
Пример #28
0
    def test_generate_biom_table(self):
        """ Test generating BIOM table
        """
        seqs = [("s1_80;size=3;", "AGTCGTACGTGCATGCA"),
                ("s1_0;size=3;", "TGTGTAGCTGTGCTGAT"),
                ("s1_10;size=3;", "CGGGTGCATGTCGTGAC")]
        uc_output = """S\t0\t100\t*\t*\t*\t*\t*\ts1_80\t*
H\t0\t100\t100.0\t*\t0\t0\t*\ts1_81\ts1_80
H\t0\t100\t100.0\t*\t0\t0\t*\ts1_82\ts1_80
S\t1\t100\t*\t*\t*\t*\t*\ts1_0\t*
H\t1\t100\t100.0\t*\t0\t0\t*\ts1_1\ts1_0
H\t1\t100\t100.0\t*\t0\t0\t*\ts1_60\ts1_0
S\t2\t100\t*\t*\t*\t*\t*\ts1_10\t*
H\t2\t100\t100.0\t*\t0\t0\t*\ts1_12\ts1_10
H\t2\t100\t100.0\t*\t0\t0\t*\ts1_13\ts1_10
"""
        data = {(2, 0): 3, (1, 0): 3, (0, 0): 3}
        otu_ids = [
            'CGGGTGCATGTCGTGAC', 'TGTGTAGCTGTGCTGAT', 'AGTCGTACGTGCATGCA'
        ]
        sample_ids = ['s1']
        seqs_fp = join(self.working_dir, "seqs.fasta")
        with open(seqs_fp, 'w') as seqs_f:
            for seq in seqs:
                seqs_f.write(">%s\n%s\n" % seq)
        # temporary file for .uc output
        uc_output_fp = join(self.working_dir, "derep.uc")
        with open(uc_output_fp, 'w') as uc_output_f:
            uc_output_f.write(uc_output)
        table_exp = Table(data, otu_ids, sample_ids, sample_metadata=None)
        clusters, table = generate_biom_table(seqs_fp, uc_output_fp)
        self.assertEqual(table, table_exp)
Пример #29
0
    def setUp(self):
        super().setUp()
        self.beta = self.plugin.pipelines['beta']
        self.beta_phylogenetic = self.plugin.pipelines['beta_phylogenetic']

        two_feature_table = self.get_data_path('two_feature_table.biom')
        self.two_feature_table = Artifact.import_data(
                'FeatureTable[Frequency]',
                two_feature_table)

        three_feature_tree = self.get_data_path('three_feature.tree')
        self.three_feature_tree = Artifact.import_data('Phylogeny[Rooted]',
                                                       three_feature_tree)

        crawford_table = self.get_data_path('crawford.biom')
        self.crawford_table = Artifact.import_data('FeatureTable[Frequency]',
                                                   crawford_table)
        crawford_tree = self.get_data_path('crawford.nwk')
        self.crawford_tree = Artifact.import_data('Phylogeny[Rooted]',
                                                  crawford_tree)

        t = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                  ['O1', 'O2'],
                  ['S1', 'S2', 'S3'])
        self.t = Artifact.import_data('FeatureTable[Frequency]', t)
        tree = skbio.TreeNode.read(io.StringIO(
            '((O1:0.25, O2:0.50):0.25, O3:0.75)root;'))
        self.tree = Artifact.import_data('Phylogeny[Rooted]', tree)
Пример #30
0
def create_biom_table(sample_counts, taxa):
    """
    Create a BIOM table from sample counts and taxonomy metadata.

    :type sample_counts: dict
    :param sample_counts: A dictionary of dictionaries with the first level
                          keyed on sample ID, and the second level keyed on
                          taxon ID with counts as values.
    :type taxa: dict
    :param taxa: A mapping between the taxon IDs from sample_counts to the
                 full representation of the taxonomy string. The values in
                 this dict will be used as metadata in the BIOM table.
    :rtype: biom.Table
    :return: A BIOM table containing the per-sample taxon counts and full
             taxonomy identifiers as metadata for each taxon.
    """
    data = [[
        0 if taxid not in sample_counts[sid] else sample_counts[sid][taxid]
        for sid in sample_counts
    ] for taxid in taxa]
    data = np.array(data, dtype=int)
    tax_meta = [{'taxonomy': taxa[taxid]} for taxid in taxa]

    gen_str = "kraken-biom v{} ({})".format(__version__, __url__)

    return Table(data,
                 list(taxa),
                 list(sample_counts),
                 tax_meta,
                 type="OTU table",
                 create_date=str(dt.now().isoformat()),
                 generated_by=gen_str,
                 input_is_dense=True)