def test_average(self): t1 = Table(np.array([[1, 1, 1], [1, 1, 1]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) obs = merge([t1] * 3, 'average') exp = Table(np.array([[1, 1, 1], [1, 1, 1]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) self.assertEqual(obs, exp)
def test_invalid_overlap_method(self): t1 = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) t2 = Table(np.array([[0, 2, 6], [2, 2, 4]]), ['O1', 'O3'], ['S1', 'S5', 'S6']) with self.assertRaisesRegex(ValueError, 'overlap method'): merge([t1, t2], 'peanut')
def test_invalid_overlapping_feature_ids(self): t1 = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) t2 = Table(np.array([[0, 2, 6], [2, 2, 4]]), ['O1', 'O3'], ['S4', 'S5', 'S6']) with self.assertRaisesRegex(ValueError, 'features are present'): merge([t1, t2], 'error_on_overlapping_feature')
def test_invalid_overlapping_sample_ids(self): t1 = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) t2 = Table(np.array([[0, 2, 6], [2, 2, 4]]), ['O1', 'O3'], ['S1', 'S5', 'S6']) with self.assertRaisesRegex(ValueError, 'samples.*S1'): merge([t1, t2])
def test_table_based_filtering_exclude_ids(self): # filter all table = Table(np.array([[0, 1, 3], [1, 1, 2], [42, 3, 0], [0, 0, 0]]), ['O1', 'O2', 'O3', 'O4'], ['S1', 'S2', 'S3']) with self.assertRaisesRegex(ValueError, 'All.*filtered'): obs = filter_seqs(self.seqs, table=table, exclude_ids=True) # filter all (extra ids in table is ok) table = Table(np.array([[0, 1, 3], [1, 1, 2], [42, 3, 0], [0, 0, 0], [1, 0, 0]]), ['O1', 'O2', 'O3', 'O4', 'O5'], ['S1', 'S2', 'S3']) with self.assertRaisesRegex(ValueError, 'All.*filtered'): obs = filter_seqs(self.seqs, table=table, exclude_ids=True) # filter three table = Table(np.array([[1, 1, 2], [42, 3, 0], [0, 0, 0]]), ['O2', 'O3', 'O4'], ['S1', 'S2', 'S3']) obs = filter_seqs(self.seqs, table=table, exclude_ids=True) exp = pd.Series(['ACGT'], index=['O1']) assert_series_equal(obs, exp) # filter none table = Table(np.array([[0, 1, 3], [1, 1, 2], [42, 3, 0], [0, 0, 0]]), ['O1-alt', 'O2-alt', 'O3-alt', 'O4-alt'], ['S1', 'S2', 'S3']) obs = filter_seqs(self.seqs, table=table, exclude_ids=True) assert_series_equal(obs, self.seqs)
def setUp(self): """define some top-level data""" self.output_dir = '/tmp/' otu_table_vals = array([[0, 0], [1, 5]]) self.otu_table = Table( otu_table_vals, ['OTU1', 'OTU2'], ['Sample1', 'Sample2'], [{ "taxonomy": ["Bacteria"] }, { "taxonomy": ["Archaea"] }], [None, None], ) filt_otu_table_vals = array([[1, 5]]) self.filt_otu_table = Table(filt_otu_table_vals, ['OTU2'], ['Sample1', 'Sample2'], [{ "taxonomy": ["Archaea"] }], [None, None]) self.num_otu_hits = 5 self._folders_to_cleanup = []
def test_combine_id_and_frequency_filters(self): # no filtering df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'], 'SampleType': ['gut', 'tongue', 'gut']}, index=pd.Index(['S1', 'S2', 'S3'], name='#SampleID')) metadata = qiime2.Metadata(df) table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) where = "Subject='subject-1' OR Subject='subject-2'" actual = filter_samples(table, metadata=metadata, where=where, min_frequency=1) expected = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) self.assertEqual(actual, expected) # id and frequency filters active df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'], 'SampleType': ['gut', 'tongue', 'gut']}, index=pd.Index(['S1', 'S2', 'S3'], name='#SampleID')) metadata = qiime2.Metadata(df) table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) where = "Subject='subject-1'" actual = filter_samples(table, metadata=metadata, where=where, min_frequency=2) expected = Table(np.array([[1], [1]]), ['O1', 'O2'], ['S2']) self.assertEqual(actual, expected)
def test_sum_triple_overlap(self): t1 = Table(np.array([[1, 1, 1], [1, 1, 1]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) obs = merge([t1] * 3, 'sum') exp = Table(np.array([[3, 3, 3], [3, 3, 3]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) self.assertEqual(obs, exp)
def setUp(self): self.tmp_dir = get_qiime_temp_dir() self.l19_data = np.array([[7, 1, 0, 0, 0, 0, 0, 0, 0], [4, 2, 0, 0, 0, 1, 0, 0, 0], [2, 4, 0, 0, 0, 1, 0, 0, 0], [1, 7, 0, 0, 0, 0, 0, 0, 0], [0, 8, 0, 0, 0, 0, 0, 0, 0], [0, 7, 1, 0, 0, 0, 0, 0, 0], [0, 4, 2, 0, 0, 0, 2, 0, 0], [0, 2, 4, 0, 0, 0, 1, 0, 0], [0, 1, 7, 0, 0, 0, 0, 0, 0], [0, 0, 8, 0, 0, 0, 0, 0, 0], [0, 0, 7, 1, 0, 0, 0, 0, 0], [0, 0, 4, 2, 0, 0, 0, 3, 0], [0, 0, 2, 4, 0, 0, 0, 1, 0], [0, 0, 1, 7, 0, 0, 0, 0, 0], [0, 0, 0, 8, 0, 0, 0, 0, 0], [0, 0, 0, 7, 1, 0, 0, 0, 0], [0, 0, 0, 4, 2, 0, 0, 0, 4], [0, 0, 0, 2, 4, 0, 0, 0, 1], [0, 0, 0, 1, 7, 0, 0, 0, 0]]) self.l19_sample_names = [ 'sam1', 'sam2', 'sam3', 'sam4', 'sam5', 'sam6', 'sam7', 'sam8', 'sam9', 'sam_middle', 'sam11', 'sam12', 'sam13', 'sam14', 'sam15', 'sam16', 'sam17', 'sam18', 'sam19' ] self.l19_taxon_names = [ 'tax1', 'tax2', 'tax3', 'tax4', 'endbigtaxon', 'tax6', 'tax7', 'tax8', 'tax9' ] self.l19_taxon_names_w_underscore = [ 'ta_x1', 'tax2', 'tax3', 'tax4', 'endbigtaxon', 'tax6', 'tax7', 'tax8', 'tax9' ] l19 = Table(self.l19_data.T, self.l19_taxon_names, self.l19_sample_names) fd, self.l19_fp = mkstemp(dir=self.tmp_dir, prefix='test_bdiv_otu_table', suffix='.blom') os.close(fd) write_biom_table(l19, self.l19_fp) l19_w_underscore = Table(self.l19_data.T, self.l19_taxon_names_w_underscore, self.l19_sample_names) fd, self.l19_w_underscore_fp = mkstemp(dir=self.tmp_dir, prefix='test_bdiv_otu_table', suffix='.blom') os.close(fd) write_biom_table(l19_w_underscore, self.l19_w_underscore_fp) self.l19_tree_str = '((((tax7:0.1,tax3:0.2):.98,tax8:.3, tax4:.3):.4,\ ((tax1:0.3, tax6:.09):0.43,tax2:0.4):0.5):.2, (tax9:0.3, endbigtaxon:.08));' self.l19_tree = parse_newick(self.l19_tree_str, PhyloNode) self.files_to_remove = [self.l19_fp, self.l19_w_underscore_fp] self.folders_to_remove = []
def setUp(self): self.ambiguities_json = '%s/ambiguities/json' % ROOT self.read_counts_diff = { "10317.000001778.57016": 1, "10317.000002860.57016": 2, "10317.000002860.58862": 1 } self.read_counts_equal = { "10317.000001778.57016": 1, "10317.000002860.57016": 1, "10317.000002860.58862": 1 } self.feat_counts = { "10317.000001778.57016": 1, "10317.000002860.57016": 1, "10317.000002860.58862": 2 } self.biom = Table(np.array([[1, 1, 1], [1, 1, 1]]), ['sp1', 'sp2'], [ "10317.000001778.57016", "10317.000002860.57016", "10317.000002860.58862" ]) self.biom_diff = Table( np.array([[1, 1], [1, 1]]), ['sp1', 'sp2'], ["10317.000001778.57016", "10317.000002860.57016"]) self.biom_equal = Table( np.array([[1, 1], [1, 1]]), ['sp1', 'sp2'], ["10317.000001778.57016", "10317.000002860.58862"])
def setUp(self): self.sample_metadata_1 = \ {'s1': {'source_sink': 'source1', 'cat2': 'random_nonsense'}, 's2': {'source_sink': 'sink', 'cat2': 'sink'}, 's5': {'source_sink': 'source1', 'cat2': 'random_nonsense'}, 's0': {'source_sink': 'source2', 'cat2': 'random_nonsense'}, 's100': {'source_sink': 'sink', 'cat2': 'sink'}} # Data for testing sinks_and_sources self.sample_metadata_2 = \ {'s1': {'SourceSink': 'source', 'Env': 'source1'}, 's2': {'SourceSink': 'sink', 'Env': 'e1'}, 's5': {'SourceSink': 'source', 'Env': 'source1'}, 's0': {'SourceSink': 'source', 'Env': 'source2'}, 's100': {'SourceSink': 'sink', 'Env': 'e2'}} self.sample_metadata_3 = \ {'s1': {'SourceSink': 'source', 'Env': 'source1'}, 's2': {'SourceSink': 'source', 'Env': 'e1'}, 's5': {'SourceSink': 'source', 'Env': 'source1'}, 's0': {'SourceSink': 'source', 'Env': 'source2'}, 's100': {'SourceSink': 'source', 'Env': 'e2'}} # Data for testing _cli_sync_biom_and_sample_metadata oids = ['o1', 'o2', 'o3'] # Data for an example where samples are removed from biom table only. sids = ['Sample1', 'Sample2', 'Sample3', 'Sample4'] bt_1_data = np.arange(12).reshape(3, 4) self.bt_1_in = Table(bt_1_data, oids, sids) self.bt_1_out = Table(bt_1_data[:, :-1], oids, sids[:-1]) self.mf_1_in = \ {'Sample1': {'cat1': 'X', 'cat2': 'Y'}, 'Sample2': {'cat1': 'X', 'cat2': 'Y'}, 'Sample3': {'cat1': 'X', 'cat2': 'Y'}} self.mf_1_out = self.mf_1_in # Data for an example where sample are removed from mapping file only. self.bt_2_in = self.bt_1_in self.bt_2_out = self.bt_1_in self.mf_2_in = \ {'Sample1': {'cat1': 'X', 'cat2': 'Y'}, 'Sample6': {'cat1': 'X', 'cat2': 'Y'}, 'Sample3': {'cat1': 'X', 'cat2': 'Y'}, 'Sample4': {'cat1': 'X', 'cat2': 'Y'}, 'Sample2': {'cat1': 'X', 'cat2': 'Y'}} self.mf_2_out = \ {'Sample1': {'cat1': 'X', 'cat2': 'Y'}, 'Sample3': {'cat1': 'X', 'cat2': 'Y'}, 'Sample4': {'cat1': 'X', 'cat2': 'Y'}, 'Sample2': {'cat1': 'X', 'cat2': 'Y'}} # Data for an example where samples are removed from mapping file and # biom file. sids = ['Sample1', 'sampleA', 'Sample3', 'Sample4'] bt_3_data = np.arange(12).reshape(3, 4) self.bt_3_in = Table(bt_3_data, oids, sids) self.bt_3_out = Table(bt_1_data[:, [0, 2, 3]], oids, [sids[0], sids[2], sids[3]]) self.mf_3_in = self.mf_2_out self.mf_3_out = \ {'Sample1': {'cat1': 'X', 'cat2': 'Y'}, 'Sample3': {'cat1': 'X', 'cat2': 'Y'}, 'Sample4': {'cat1': 'X', 'cat2': 'Y'}}
def test_sum_full_overlap(self): t1 = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) t2 = Table(np.array([[0, 2, 6], [2, 2, 4]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) obs = merge([t1, t2], 'sum') exp = Table(np.array([[0, 3, 9], [3, 3, 6]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) self.assertEqual(obs, exp)
def test_average_relative_frequency(self): t1 = Table(np.array([[0.75, 0.75, 0.75], [0.75, 0.75, 0.75]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) t2 = Table(np.array([[0.25, 0.25, 0.25], [0.25, 0.25, 0.25]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) obs = merge([t1, t2], 'average') exp = Table(np.array([[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) self.assertEqual(obs, exp)
def test_sum_some_overlap(self): # Did I stutter? t1 = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) t2 = Table(np.array([[0, 2, 6], [2, 2, 4]]), ['O1', 'O3'], ['S4', 'S2', 'S5']) obs = merge([t1, t2], 'sum') exp = Table( np.array([[0, 3, 3, 0, 6], [1, 1, 2, 0, 0], [0, 2, 0, 2, 4]]), ['O1', 'O2', 'O3'], ['S1', 'S2', 'S3', 'S4', 'S5']) self.assertEqual(obs, exp)
def test_valid_overlapping_sample_ids(self): t1 = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) t2 = Table(np.array([[0, 2, 6], [2, 2, 4]]), ['O3', 'O4'], ['S1', 'S5', 'S6']) obs = merge([t1, t2], 'error_on_overlapping_feature') exp = Table( np.array([[0, 1, 3, 0, 0], [1, 1, 2, 0, 0], [0, 0, 0, 2, 6], [2, 0, 0, 2, 4]]), ['O1', 'O2', 'O3', 'O4'], ['S1', 'S2', 'S3', 'S5', 'S6']) self.assertEqual(obs, exp)
def setUp(self): """define some top-level data""" self.otu_table_values = array([[0, 0, 9, 5, 3, 1], [1, 5, 4, 0, 3, 2], [2, 3, 1, 1, 2, 5]]) { (0, 2): 9.0, (0, 3): 5.0, (0, 4): 3.0, (0, 5): 1.0, (1, 0): 1.0, (1, 1): 5.0, (1, 2): 4.0, (1, 4): 3.0, (1, 5): 2.0, (2, 0): 2.0, (2, 1): 3.0, (2, 2): 1.0, (2, 3): 1.0, (2, 4): 2.0, (2, 5): 5.0 } self.otu_table = Table( self.otu_table_values, ['OTU1', 'OTU2', 'OTU3'], ['Sample1', 'Sample2', 'Sample3', 'Sample4', 'Sample5', 'Sample6'], [{ "taxonomy": ['Bacteria'] }, { "taxonomy": ['Archaea'] }, { "taxonomy": ['Streptococcus'] }], [None, None, None, None, None, None]) self.otu_table_f = Table( self.otu_table_values, ['OTU1', 'OTU2', 'OTU3'], ['Sample1', 'Sample2', 'Sample3', 'Sample4', 'Sample5', 'Sample6'], [{ "taxonomy": ['1A', '1B', '1C', 'Bacteria'] }, { "taxonomy": ['2A', '2B', '2C', 'Archaea'] }, { "taxonomy": ['3A', '3B', '3C', 'Streptococcus'] }], [None, None, None, None, None, None]) self.full_lineages = [['1A', '1B', '1C', 'Bacteria'], ['2A', '2B', '2C', 'Archaea'], ['3A', '3B', '3C', 'Streptococcus']] self.metadata = [[['Sample1', 'NA', 'A'], ['Sample2', 'NA', 'B'], ['Sample3', 'NA', 'A'], ['Sample4', 'NA', 'B'], ['Sample5', 'NA', 'A'], ['Sample6', 'NA', 'B']], ['SampleID', 'CAT1', 'CAT2'], []] self.tree_text = ["('OTU3',('OTU1','OTU2'))"] fh, self.tmp_heatmap_fpath = mkstemp(prefix='test_heatmap_', suffix='.pdf') close(fh)
def setUp(self): THIS_DIR = os.path.dirname(os.path.abspath(__file__)) table = Table({}, [], []) self.emptyfeatures = table table = Table({}, ['a', 'b', 'c'], []) self.wrongtips = table self.goodtable = os.path.join(THIS_DIR, 'data/features_formated.biom') self.goodcsi = os.path.join(THIS_DIR, 'data/goodcsi') self.goodthresh = 0.5 tablefp = collate_fingerprint(self.goodcsi) treeout = make_hierarchy(tablefp, prob_threshold=self.goodthresh) self.goodtree = treeout
def test_sum_overlapping_feature_ids(self): # This should produce the same result as `error_on_overlapping_sample` t1 = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) t2 = Table(np.array([[0, 2, 6], [2, 2, 4]]), ['O1', 'O3'], ['S4', 'S5', 'S6']) obs = merge([t1, t2], 'sum') exp = Table( np.array([[0, 1, 3, 0, 2, 6], [1, 1, 2, 0, 0, 0], [0, 0, 0, 2, 2, 4]]), ['O1', 'O2', 'O3'], ['S1', 'S2', 'S3', 'S4', 'S5', 'S6']) self.assertEqual(obs, exp)
def test_get_overlapping_no_overlap(self): t1 = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) t2 = Table(np.array([[0, 2, 6], [2, 2, 4]]), ['O3', 'O4'], ['S4', 'S5', 'S6']) # samples obs = _get_overlapping([t1, t2], 'sample') self.assertEqual(set(), obs) # features obs = _get_overlapping([t1, t2], 'observation') self.assertEqual(set(), obs)
def test_sample_metadata_extra_ids(self): df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'], 'SampleType': ['gut', 'tongue', 'gut']}, index=['S-not-in-table', 'S2', 'S3']) metadata = qiime2.Metadata(df) table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) actual = filter_samples(table, sample_metadata=metadata) expected = Table(np.array([[1, 3], [1, 2]]), ['O1', 'O2'], ['S2', 'S3']) self.assertEqual(actual, expected)
def test_feature_metadata(self): # no filtering df = pd.DataFrame({'SequencedGenome': ['yes', 'yes']}, index=pd.Index(['O1', 'O2'], name='id')) metadata = qiime2.Metadata(df) table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) actual = filter_features(table, metadata=metadata) expected = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) self.assertEqual(actual, expected) # filter one df = pd.DataFrame({'SequencedGenome': ['yes']}, index=pd.Index(['O1'], name='id')) metadata = qiime2.Metadata(df) table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) actual = filter_features(table, metadata=metadata) expected = Table(np.array([[1, 3]]), ['O1'], ['S2', 'S3']) self.assertEqual(actual, expected) # filter all df = pd.DataFrame({}, index=pd.Index(['foo'], name='id')) metadata = qiime2.Metadata(df) table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) actual = filter_features(table, metadata=metadata) expected = Table(np.array([]), [], []) self.assertEqual(actual, expected) # exclude one df = pd.DataFrame({'SequencedGenome': ['yes']}, index=pd.Index(['O1'], name='id')) metadata = qiime2.Metadata(df) table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) actual = filter_features(table, metadata=metadata, exclude_ids=True) expected = Table(np.array([[1, 1, 2]]), ['O2'], ['S1', 'S2', 'S3']) self.assertEqual(actual, expected) # exclude all df = pd.DataFrame({'SequencedGenome': ['yes', 'yes']}, index=pd.Index(['O1', 'O2'], name='id')) metadata = qiime2.Metadata(df) table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) actual = filter_features(table, metadata=metadata, exclude_ids=True) expected = Table(np.array([]), [], []) self.assertEqual(actual, expected)
def test_max_frequency(self): # no filtering table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) actual = filter_samples(table, max_frequency=42) expected = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) self.assertEqual(actual, expected) # filter one table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) actual = filter_samples(table, max_frequency=4) expected = Table(np.array([[0, 1], [1, 1]]), ['O1', 'O2'], ['S1', 'S2']) self.assertEqual(actual, expected) # filter two table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) actual = filter_samples(table, max_frequency=1) expected = Table(np.array([[1]]), ['O2'], ['S1']) self.assertEqual(actual, expected) # filter all table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) actual = filter_samples(table, max_frequency=0) expected = Table(np.array([]), [], []) self.assertEqual(actual, expected)
def test_get_overlapping_multiple(self): t1 = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) t2 = Table(np.array([[0, 2, 6], [2, 2, 4]]), ['O1', 'O3'], ['S1', 'S5', 'S6']) t3 = Table(np.array([[3, 3, 1], [0, 2, 1]]), ['O1', 'O2'], ['S1', 'S3', 'S6']) # samples obs = _get_overlapping([t1, t2, t3], 'sample') self.assertEqual({'S1', 'S3', 'S6'}, obs) # features obs = _get_overlapping([t1, t2, t3], 'observation') self.assertEqual({'O1', 'O2'}, obs)
def test_where(self): # no filtering df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'], 'SampleType': ['gut', 'tongue', 'gut']}, index=pd.Index(['S1', 'S2', 'S3'], name='#SampleID')) metadata = qiime2.Metadata(df) table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) where = "Subject='subject-1' OR Subject='subject-2'" actual = filter_samples(table, sample_metadata=metadata, where=where) expected = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) self.assertEqual(actual, expected) # filter one df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'], 'SampleType': ['gut', 'tongue', 'gut']}, index=pd.Index(['S1', 'S2', 'S3'], name='#SampleID')) metadata = qiime2.Metadata(df) table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) where = "Subject='subject-1'" actual = filter_samples(table, sample_metadata=metadata, where=where) expected = Table(np.array([[0, 1], [1, 1]]), ['O1', 'O2'], ['S1', 'S2']) self.assertEqual(actual, expected) # filter two df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'], 'SampleType': ['gut', 'tongue', 'gut']}, index=pd.Index(['S1', 'S2', 'S3'], name='#SampleID')) metadata = qiime2.Metadata(df) table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) where = "Subject='subject-1' AND SampleType='gut'" actual = filter_samples(table, sample_metadata=metadata, where=where) expected = Table(np.array([[1]]), ['O2'], ['S1']) self.assertEqual(actual, expected) # filter all df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'], 'SampleType': ['gut', 'tongue', 'gut']}, index=pd.Index(['S1', 'S2', 'S3'], name='#SampleID')) metadata = qiime2.Metadata(df) table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) where = "Subject='subject-1' AND Subject='subject-2'" actual = filter_samples(table, sample_metadata=metadata, where=where) expected = Table(np.array([]), [], []) self.assertEqual(actual, expected)
def setUp(self): self.qiime_config = load_qiime_config() self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/' self.otu_table_data = np.array([[2, 1, 0], [0, 5, 0], [0, 3, 0], [1, 2, 0]]) self.sample_names = list('YXZ') self.taxon_names = list('bacd') self.otu_metadata = [{ 'domain': 'Archaea' }, { 'domain': 'Bacteria' }, { 'domain': 'Bacteria' }, { 'domain': 'Bacteria' }] self.otu_table = Table(self.otu_table_data, self.taxon_names, self.sample_names, observation_metadata=[{}, {}, {}, {}], sample_metadata=[{}, {}, {}]) self.otu_table_meta = Table(self.otu_table_data, self.taxon_names, self.sample_names, observation_metadata=self.otu_metadata) fd, self.otu_table_fp = mkstemp(dir=self.tmp_dir, prefix='test_rarefaction', suffix='.biom') close(fd) fd, self.otu_table_meta_fp = mkstemp(dir=self.tmp_dir, prefix='test_rarefaction', suffix='.biom') close(fd) self.rare_dir = mkdtemp(dir=self.tmp_dir, prefix='test_rarefaction_dir', suffix='') write_biom_table(self.otu_table, self.otu_table_fp) write_biom_table(self.otu_table_meta, self.otu_table_meta_fp) self._paths_to_clean_up = [self.otu_table_fp, self.otu_table_meta_fp] self._dirs_to_clean_up = [self.rare_dir]
def table_from_template(new_data,sample_ids,observation_ids,\ sample_metadata_source=None,observation_metadata_source=None,\ verbose=False): """Build a new BIOM table from new_data, and transfer metadata from 1-2 existing tables""" #Build the BIOM table result_table = Table(new_data, observation_ids, sample_ids, type='Gene table') #Transfer sample metadata from the OTU table #to the metagenome table (samples are the same) if sample_metadata_source: result_table = transfer_metadata(sample_metadata_source,result_table,\ donor_metadata_type='sample',\ recipient_metadata_type='sample',verbose=verbose) #Now transfer observation metadata (e.g. gene metadata) #from the genome table to the result table if observation_metadata_source: result_table = transfer_metadata(observation_metadata_source,\ result_table,donor_metadata_type='observation',\ recipient_metadata_type='observation',verbose=verbose) return result_table
def test_non_phylogenetic_invalid_input(self): t = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) tree = skbio.TreeNode.read( io.StringIO('((O1:0.25, O2:0.50):0.25, O3:0.75)root;')) with self.assertRaises(TypeError): beta_diversity('bray_curtis', t, phylogeny=tree)
def test_generate_biom_table(self): """ Test generating BIOM table """ seqs = [("s1_80;size=3;", "AGTCGTACGTGCATGCA"), ("s1_0;size=3;", "TGTGTAGCTGTGCTGAT"), ("s1_10;size=3;", "CGGGTGCATGTCGTGAC")] uc_output = """S\t0\t100\t*\t*\t*\t*\t*\ts1_80\t* H\t0\t100\t100.0\t*\t0\t0\t*\ts1_81\ts1_80 H\t0\t100\t100.0\t*\t0\t0\t*\ts1_82\ts1_80 S\t1\t100\t*\t*\t*\t*\t*\ts1_0\t* H\t1\t100\t100.0\t*\t0\t0\t*\ts1_1\ts1_0 H\t1\t100\t100.0\t*\t0\t0\t*\ts1_60\ts1_0 S\t2\t100\t*\t*\t*\t*\t*\ts1_10\t* H\t2\t100\t100.0\t*\t0\t0\t*\ts1_12\ts1_10 H\t2\t100\t100.0\t*\t0\t0\t*\ts1_13\ts1_10 """ data = {(2, 0): 3, (1, 0): 3, (0, 0): 3} otu_ids = [ 'CGGGTGCATGTCGTGAC', 'TGTGTAGCTGTGCTGAT', 'AGTCGTACGTGCATGCA' ] sample_ids = ['s1'] seqs_fp = join(self.working_dir, "seqs.fasta") with open(seqs_fp, 'w') as seqs_f: for seq in seqs: seqs_f.write(">%s\n%s\n" % seq) # temporary file for .uc output uc_output_fp = join(self.working_dir, "derep.uc") with open(uc_output_fp, 'w') as uc_output_f: uc_output_f.write(uc_output) table_exp = Table(data, otu_ids, sample_ids, sample_metadata=None) clusters, table = generate_biom_table(seqs_fp, uc_output_fp) self.assertEqual(table, table_exp)
def setUp(self): super().setUp() self.beta = self.plugin.pipelines['beta'] self.beta_phylogenetic = self.plugin.pipelines['beta_phylogenetic'] two_feature_table = self.get_data_path('two_feature_table.biom') self.two_feature_table = Artifact.import_data( 'FeatureTable[Frequency]', two_feature_table) three_feature_tree = self.get_data_path('three_feature.tree') self.three_feature_tree = Artifact.import_data('Phylogeny[Rooted]', three_feature_tree) crawford_table = self.get_data_path('crawford.biom') self.crawford_table = Artifact.import_data('FeatureTable[Frequency]', crawford_table) crawford_tree = self.get_data_path('crawford.nwk') self.crawford_tree = Artifact.import_data('Phylogeny[Rooted]', crawford_tree) t = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) self.t = Artifact.import_data('FeatureTable[Frequency]', t) tree = skbio.TreeNode.read(io.StringIO( '((O1:0.25, O2:0.50):0.25, O3:0.75)root;')) self.tree = Artifact.import_data('Phylogeny[Rooted]', tree)
def create_biom_table(sample_counts, taxa): """ Create a BIOM table from sample counts and taxonomy metadata. :type sample_counts: dict :param sample_counts: A dictionary of dictionaries with the first level keyed on sample ID, and the second level keyed on taxon ID with counts as values. :type taxa: dict :param taxa: A mapping between the taxon IDs from sample_counts to the full representation of the taxonomy string. The values in this dict will be used as metadata in the BIOM table. :rtype: biom.Table :return: A BIOM table containing the per-sample taxon counts and full taxonomy identifiers as metadata for each taxon. """ data = [[ 0 if taxid not in sample_counts[sid] else sample_counts[sid][taxid] for sid in sample_counts ] for taxid in taxa] data = np.array(data, dtype=int) tax_meta = [{'taxonomy': taxa[taxid]} for taxid in taxa] gen_str = "kraken-biom v{} ({})".format(__version__, __url__) return Table(data, list(taxa), list(sample_counts), tax_meta, type="OTU table", create_date=str(dt.now().isoformat()), generated_by=gen_str, input_is_dense=True)