def parse_biom(biom): with open(biom) as fh: bt = parse_table(fh) df = bt.to_dataframe() otu_df = [ ["Samples", "OTUs", "OTU Total Count", "OTU Table Density"], [ bt.length("sample"), bt.length("observation"), bt.sum(), "{:f}".format(bt.get_table_density()), ], ] otu_df = pd.DataFrame(otu_df) # make the first row the column labels otu_df.rename(columns=otu_df.iloc[0], inplace=True) # drop the first row otu_df = otu_df.reindex(otu_df.index.drop(0)) # build the table summary_table = otu_df.to_html( index=False, bold_rows=False, classes=["table", "table-bordered"], table_id="otuTable", ) # fix for rst summary_table = summary_table.replace("\n", "\n" + 10 * " ") return df, summary_table
def read_biom_file(self): with open(self.args.biom_file) as f: table = parse_table(f) import pdb; pdb.set_trace() pass
def mothur_counts_to_biom(mothur_f): mothur_biom = parse_table(mothur_f) mothur_biom.type = u'OTU table' filter_biom = mothur_biom.filter(lambda val, id_, metadata: id_ in 'total', invert=True) return (filter_biom)
def get_biom_table_from_file(path): with warnings.catch_warnings(): warnings.simplefilter("ignore") from biom import parse_table with open(path) as f: table = parse_table(f) return table return None
def mothur_counts_to_biom(mothur_f): mothur_biom = parse_table(mothur_f) mothur_biom.type = u'OTU table' filter_biom = mothur_biom.filter( lambda val, id_, metadata: id_ in 'total', invert=True) return(filter_biom)
def parse_table_to_biom(table_lines, table_format="tab-delimited",\ biom_format = 'otu table'): """Read the lines of an open trait table file, and output a .biom table object The trait table must be either a biom file, or a picrust tab-delimited file table_format -- must be either 'tab-delimited' or 'biom' """ return parse_table(table_lines)
def test_filter_contaminated_libraries(self): """Test filtering process for heavily contaminated libraries""" test_biom = self.test_biom exp_filtered_biom = parse_table(test_biom_file_filt) contaminant_otus = set(["otu3", "contam1", "contam2", "contam4"]) obs_filtered_biom = filter_contaminated_libraries(test_biom, contaminant_otus, 0.8) self.assertEqual(exp_filtered_biom, obs_filtered_biom)
def _read_biom(fp, transpose=True): '''Read in a biom table file. Parameters ---------- fp : str or file object file path to the biom table transpose : bool (True by default) Transpose the table or not. The biom table has samples in column while sklearn and other packages require samples in row. So you should transpose the data table. Returns ------- sid : list of str the sample ids fid : list of str the feature ids data : numpy array (2d) of float the table feature_md : pandas.DataFrame the feature metadata (if availble in table) ''' logger.debug('loading biom table %s' % fp) if hasattr(fp, 'read'): table = biom.parse_table(fp) else: table = biom.load_table(fp) sid = table.ids(axis='sample') fid = table.ids(axis='observation') logger.info('loaded %d samples, %d features' % (len(sid), len(fid))) data = table.matrix_data feature_md = _get_md_from_biom(table) if transpose: logger.debug('transposing table') data = data.transpose() return sid, fid, data, feature_md
#!/usr/bin/env python import sys from skbio import TreeNode from biom import parse_table tree_fp = sys.argv[1] biom_fp = sys.argv[2] tree = TreeNode.read(tree_fp) biom = parse_table(open(biom_fp,'r')) new_names = {} for otu,sample in biom.nonzero(): if otu not in new_names: new_names[otu] = '{0}_{1}'.format(otu,sample) else: new_names[otu] = '{0}_{1}'.format(new_names[otu],sample) for tip in tree.tips(): otu = tip.name tip.name = new_names[otu] for node in tree.non_tips(): node.name = None print tree
import numpy.core._methods import numpy.lib.format import numpy as np from io import StringIO # https://github.com/biocore/biom-format # http://biom-format.org/documentation/quick_usage_examples.html#examples from biom import parse_table, Table dir = "C:/Projekti/PycharmProjects/UMCGMicrobiomeWeb/example_data" path_tax = dir + '/example1_metaphlan.txt' path_tax_marged = dir + 'example_metaphlan_merged.tsv' with open(path_tax) as f: table = parse_table(f) print("version ", table.format_version) print("id ", table.table_id) print("dtype ", table.dtype) print("type ", table.type) print("version ", table.format_version) print("generated_by ", table.generated_by) print("table_id ", table.table_id) print("ids ", table.ids()) # ['Metaphlan2_Analysis'] # print("ids ", table.to_json()) # with open('c:/tmp/metha.json', 'w') as file: # file.write(table.to_json("janko")) table = table.norm(axis='sample', inplace=False)
def setUp(self): """Init variables for the tests """ self.test_biom = parse_table(test_biom_file)
#!/usr/bin/env python import sys from skbio import TreeNode from biom import parse_table tree_fp = sys.argv[1] biom_fp = sys.argv[2] tree = TreeNode.read(tree_fp) biom = parse_table(open(biom_fp, 'r')) new_names = {} for otu, sample in biom.nonzero(): if otu not in new_names: new_names[otu] = '{0}_{1}'.format(otu, sample) else: new_names[otu] = '{0}_{1}'.format(new_names[otu], sample) for tip in tree.tips(): otu = tip.name tip.name = new_names[otu] for node in tree.non_tips(): node.name = None print tree
def load_biom(biom_filepath): with open(biom_filepath) as f: t = parse_table(f) return t
#!/usr/bin/env python import sys from biom import parse_table deblur_biom = parse_table(open(sys.argv[1], 'r')) sample_counts = dict(zip(deblur_biom.ids(axis='sample'),[0] * len(deblur_biom.ids(axis='sample')))) for seq,sample in deblur_biom.nonzero(): for i in range(int(deblur_biom.get_value_by_ids(seq,sample))): sample_counts[sample] += 1 print ">{0}_{1}\n{2}\n".format(sample,sample_counts[sample],seq)