def BIOM_return_clipped_taxonomy(taxlevel, BIOM): """ Returns a BIOM table for which the taxonomy has been clipped at a certain level """ from biom.table import Table import numpy as np return_OTUs = {} levels = ['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'unassigned'] clip_level='' to_drop=[] if not taxlevel in levels: raise KeyError("The taxonomic level you are trying to search: '%s', is not valid" %level) clip_level = int(levels.index(taxlevel))+1 #check if the first OTU has 'taxonomy' metadata attached, if yes assume all others have too and resume if not 'taxonomy' in BIOM.metadata(axis='observation')[0]: raise KeyError('The BIOM table you are trying to screen does not have taxonomy metadata attached to it') else: print "Found taxonomy metadata with OTUs - ok!" sample_ids = BIOM.ids(axis='sample') observation_ids = BIOM.ids(axis='observation') data_to_biom = [] sample_metadata = BIOM.metadata(axis='sample') observation_metadata = BIOM.metadata(axis='observation') for OTU in observation_ids: orig=BIOM.data(OTU, axis='observation') data_to_biom.append(orig) data = np.asarray(data_to_biom) for i in range(len(observation_metadata)): if len(observation_metadata[i]['taxonomy']) > clip_level: observation_metadata[i]['taxonomy'] = observation_metadata[i]['taxonomy'][:clip_level] if 'unknown' in observation_metadata[i]['taxonomy'][-1]: print "fishy: %s" %observation_metadata[i]['taxonomy'] to_drop.append(observation_ids[i]) # print observation_metadata[i]['taxonomy'] #construct adjusted table outtable = Table(data, observation_ids, sample_ids, table_id='OTU table', sample_metadata=sample_metadata, observation_metadata=observation_metadata) if to_drop: outtable.filter(to_drop, invert=True, axis='observation',inplace=True) return outtable
def test_tree_filter_table_none(self): rooted_nwk = io.StringIO("(O1:4.5,(O2:4,(a:1,b:1):2):0.5);") tree = skbio.TreeNode.read(rooted_nwk) table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) actual = filter_table(table, tree) expected = table.filter(['O1', 'O2'], axis='observation') self.assertEqual(actual, expected)
def filter_BIOM_by_per_sample_read_prop(BIOM, min_prop=0.01): """ Filter OTU table by mininimum reads per sample """ import numpy as np from biom.table import Table print "\nFiltering at level: %s %%\n" %(min_prop*100) # print "input table:\n" # print BIOM # print "\n" sample_ids = BIOM.ids(axis='sample') observation_ids = BIOM.ids(axis='observation') data_to_biom = [] sample_metadata = BIOM.metadata(axis='sample') observation_metadata = BIOM.metadata(axis='observation') sums = BIOM.sum(axis='sample') for OTU in observation_ids: orig=BIOM.data(OTU, axis='observation') for i in range(len(orig)): if not int(orig[i]) == 0: if not int(orig[i]) >= sums[i]*min_prop: orig[i] = '0.0' data_to_biom.append(orig) data = np.asarray(data_to_biom) #construct adjusted table table = Table(data, observation_ids, sample_ids, table_id='OTU table', sample_metadata=sample_metadata, observation_metadata=observation_metadata) #Filter OTUs with sum = '0' to_exclude = [] observation_sums = table.sum(axis='observation') for i in range(len(observation_sums)): if int(observation_sums[i]) == 0: to_exclude.append(observation_ids[i]) print "Removing %i OTUs for lack of support\n" %len(to_exclude) table.filter(to_exclude, invert=True, axis='observation',inplace=True) # print table return table
def test_tree_filter_table_none(self): rooted_nwk = io.StringIO("(O1:4.5,(O2:4,(a:1,b:1):2):0.5);") tree = skbio.TreeNode.read(rooted_nwk) table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) actual = filter_table(table, tree) expected = table.filter(['O1', 'O2'], axis='observation') self.assertEqual(actual, expected)