Пример #1
0
def BIOM_return_clipped_taxonomy(taxlevel, BIOM):
    """
    Returns a BIOM table for which the taxonomy has been clipped at a certain level
    """
    
    from biom.table import Table
    import numpy as np
    
    return_OTUs = {}
    levels = ['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'unassigned']
    clip_level=''
    to_drop=[]
    
    if not taxlevel in levels:
        raise KeyError("The taxonomic level you are trying to search: '%s', is not valid" %level)
    
    clip_level = int(levels.index(taxlevel))+1
    #check if the first OTU has 'taxonomy' metadata attached, if yes assume all others have too and resume
    if not 'taxonomy' in BIOM.metadata(axis='observation')[0]:
        raise KeyError('The BIOM table you are trying to screen does not have taxonomy metadata attached to it')
    else:
        print "Found taxonomy metadata with OTUs - ok!"
        
    sample_ids = BIOM.ids(axis='sample')
    observation_ids = BIOM.ids(axis='observation')
    data_to_biom = []
    sample_metadata = BIOM.metadata(axis='sample')
    observation_metadata = BIOM.metadata(axis='observation')
    
    for OTU in observation_ids:
        orig=BIOM.data(OTU, axis='observation')
        data_to_biom.append(orig)
        
    data = np.asarray(data_to_biom)
    
    for i in range(len(observation_metadata)):
        if len(observation_metadata[i]['taxonomy']) > clip_level:
            observation_metadata[i]['taxonomy'] = observation_metadata[i]['taxonomy'][:clip_level]
        if 'unknown' in observation_metadata[i]['taxonomy'][-1]:
            print "fishy: %s" %observation_metadata[i]['taxonomy']
            to_drop.append(observation_ids[i])
#        print observation_metadata[i]['taxonomy']
        
    #construct adjusted table
    outtable = Table(data, observation_ids, sample_ids, table_id='OTU table', sample_metadata=sample_metadata, observation_metadata=observation_metadata)
    
    if to_drop:
        outtable.filter(to_drop, invert=True, axis='observation',inplace=True)
        
    
    return outtable
Пример #2
0
 def test_tree_filter_table_none(self):
     rooted_nwk = io.StringIO("(O1:4.5,(O2:4,(a:1,b:1):2):0.5);")
     tree = skbio.TreeNode.read(rooted_nwk)
     table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
                   ['S1', 'S2', 'S3'])
     actual = filter_table(table, tree)
     expected = table.filter(['O1', 'O2'], axis='observation')
     self.assertEqual(actual, expected)
Пример #3
0
def filter_BIOM_by_per_sample_read_prop(BIOM, min_prop=0.01):
    """
    Filter OTU table by mininimum reads per sample
    """

    import numpy as np
    from biom.table import Table

    print "\nFiltering at level: %s %%\n" %(min_prop*100)
    
#    print "input table:\n"
#    print BIOM
#    print "\n"
    
    sample_ids = BIOM.ids(axis='sample')
    observation_ids = BIOM.ids(axis='observation')
    data_to_biom = []
    sample_metadata = BIOM.metadata(axis='sample')
    observation_metadata = BIOM.metadata(axis='observation')
    sums = BIOM.sum(axis='sample')

    for OTU in observation_ids:
        orig=BIOM.data(OTU, axis='observation')
        for i in range(len(orig)):
            if not int(orig[i]) == 0:
                if not int(orig[i]) >= sums[i]*min_prop:
                    orig[i] = '0.0'
        data_to_biom.append(orig)
    
    data = np.asarray(data_to_biom)

    #construct adjusted table
    table = Table(data, observation_ids, sample_ids, table_id='OTU table', sample_metadata=sample_metadata, observation_metadata=observation_metadata)

    #Filter OTUs with sum = '0'
    to_exclude = []
    observation_sums = table.sum(axis='observation')
    for i in range(len(observation_sums)):
        if int(observation_sums[i]) == 0:
            to_exclude.append(observation_ids[i])
    
    print "Removing %i OTUs for lack of support\n" %len(to_exclude)
    table.filter(to_exclude, invert=True, axis='observation',inplace=True)
    
#    print table
    return table
Пример #4
0
 def test_tree_filter_table_none(self):
     rooted_nwk = io.StringIO("(O1:4.5,(O2:4,(a:1,b:1):2):0.5);")
     tree = skbio.TreeNode.read(rooted_nwk)
     table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                   ['O1', 'O2'],
                   ['S1', 'S2', 'S3'])
     actual = filter_table(table, tree)
     expected = table.filter(['O1', 'O2'], axis='observation')
     self.assertEqual(actual, expected)