def load_BIOM(table, informat='json', v=1): """ load a BIOM table from BIOM format. Default format is 'json'. """ from biom.table import Table import json import sys informats = ['json','tsv'] if not informat in informats: print "\nPlease specify a valid BIOM input format. Currently we support: '%s'.\n" %"', '".join(informats) else: if v: print "\nSpecified BIOM input format '%s' - ok!" %(informat) if informat == 'json': with open(table) as data_file: data = json.load(data_file) t = Table.from_json(data) elif informat == 'tsv': tsv = open(in_tsv) func = lambda x : x t = Table.from_tsv(tsv, obs_mapping=None, sample_mapping=None, process_func=func) tsv.close() return t
def BIOM_tsv_to_R_transpose(in_tsv, out_csv): """ Parse a biom table in tsv format and transpose it for input into R """ from biom import Table tsv = open(in_tsv) #in_tsv = open('COI-trim30min100-merge-c3-id97-OTU-taxonomy.kraken.tsv') func = lambda x : x intable = Table.from_tsv(tsv,obs_mapping=None, sample_mapping=None, process_func=func) outtable = intable.transpose() out=open("transposed.tsv","w") out.write(outtable.to_tsv(header_key=None, header_value=None)) out.close() #refine intable = open('transposed.tsv','r') temp = intable.next() out='' for line in intable: if line.startswith('#'): if line.strip().endswith('taxomomy'): print "Removing taxonomy" line = ",".join(line.strip().split("\t")[:-1]).replace('#OTU ID','Sample').replace('\t',',')+'\n' line = line.replace('#OTU ID','Sample').replace('\t',',') out+=line else: line = line.replace('\t',',') out+=line outtable = open(out_csv,'w') outtable.write(out) outtable.close()
def convert_table_to_biom(table_f, sample_mapping, obs_mapping, process_func, **kwargs): """Convert a contigency table to a biom table sample_mapping : dict of {'sample_id':metadata} or None obs_mapping : dict of {'obs_id':metadata} or None process_func: a function to transform observation metadata dtype : type of table data """ otu_table = Table.from_tsv(table_f, obs_mapping, sample_mapping, process_func, **kwargs) return otu_table.to_json(generatedby())
def convert_table_to_biom(table_f, sample_mapping, obs_mapping, process_func, **kwargs): """Convert a contigency table to a biom table sample_mapping : dict of {'sample_id':metadata} or None obs_mapping : dict of {'obs_id':metadata} or None process_func: a function to transform observation metadata dtype : type of table data """ otu_table = Table.from_tsv(table_f, obs_mapping, sample_mapping, process_func, **kwargs) return otu_table.to_json(generatedby())
def parse_biom_table(fp, ids=None, axis='sample', input_is_dense=False): r"""Parses the biom table stored in the filepath `fp` Parameters ---------- fp : file like File alike object storing the BIOM table ids : iterable The sample/observation ids of the samples/observations that we need to retrieve from the biom table axis : {'sample', 'observation'}, optional The axis to subset on input_is_dense : boolean Indicates if the BIOM table is dense or sparse. Valid only for JSON tables. Returns ------- Table The BIOM table stored at fp Raises ------ ValueError If `samples` and `observations` are provided. Notes ----- Subsetting from the BIOM table is only supported in one axis Examples -------- Parse a hdf5 biom table >>> from h5py import File # doctest: +SKIP >>> from biom.parse import parse_biom_table >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP >>> t = parse_biom_table(f) # doctest: +SKIP Parse a hdf5 biom table subsetting observations >>> from h5py import File # doctest: +SKIP >>> from biom.parse import parse_biom_table >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP >>> t = parse_biom_table(f, ids=["GG_OTU_1"], ... axis='observation') # doctest: +SKIP """ if axis not in ['observation', 'sample']: UnknownAxisError(axis) try: return Table.from_hdf5(fp, ids=ids, axis=axis) except ValueError: pass except RuntimeError: pass if hasattr(fp, 'read'): old_pos = fp.tell() # Read in characters until first non-whitespace # If it is a {, then this is (most likely) JSON c = fp.read(1) while c.isspace(): c = fp.read(1) if c == '{': fp.seek(old_pos) t = Table.from_json(json.load(fp, object_pairs_hook=OrderedDict), input_is_dense=input_is_dense) else: fp.seek(old_pos) t = Table.from_tsv(fp, None, None, lambda x: x) elif isinstance(fp, list): try: t = Table.from_json(json.loads(''.join(fp), object_pairs_hook=OrderedDict), input_is_dense=input_is_dense) except ValueError: t = Table.from_tsv(fp, None, None, lambda x: x) else: t = Table.from_json(json.loads(fp, object_pairs_hook=OrderedDict), input_is_dense=input_is_dense) def subset_ids(data, id_, md): return id_ in ids def gt_zero(vals, id_, md): return np.any(vals) if ids is not None: t.filter(subset_ids, axis=axis) axis = 'observation' if axis == 'sample' else 'sample' t.filter(gt_zero, axis=axis) return t
def parse_biom_table(fp, ids=None, axis='sample', input_is_dense=False): r"""Parses the biom table stored in the filepath `fp` Parameters ---------- fp : file like File alike object storing the BIOM table ids : iterable The sample/observation ids of the samples/observations that we need to retrieve from the biom table axis : {'sample', 'observation'}, optional The axis to subset on input_is_dense : boolean Indicates if the BIOM table is dense or sparse. Valid only for JSON tables. Returns ------- Table The BIOM table stored at fp Raises ------ ValueError If `samples` and `observations` are provided. Notes ----- Subsetting from the BIOM table is only supported in one axis Examples -------- Parse a hdf5 biom table >>> from h5py import File # doctest: +SKIP >>> from biom.parse import parse_biom_table >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP >>> t = parse_biom_table(f) # doctest: +SKIP Parse a hdf5 biom table subsetting observations >>> from h5py import File # doctest: +SKIP >>> from biom.parse import parse_biom_table >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP >>> t = parse_biom_table(f, ids=["GG_OTU_1"], ... axis='observation') # doctest: +SKIP """ if axis not in ['observation', 'sample']: UnknownAxisError(axis) try: return Table.from_hdf5(fp, ids=ids, axis=axis) except: pass if hasattr(fp, 'read'): old_pos = fp.tell() try: t = Table.from_json(json.load(fp), input_is_dense=input_is_dense) except ValueError: fp.seek(old_pos) t = Table.from_tsv(fp, None, None, lambda x: x) elif isinstance(fp, list): try: t = Table.from_json(json.loads(''.join(fp)), input_is_dense=input_is_dense) except ValueError: t = Table.from_tsv(fp, None, None, lambda x: x) else: t = Table.from_json(json.loads(fp), input_is_dense=input_is_dense) if ids is not None: f = lambda data, id_, md: id_ in ids t.filter(f, axis=axis) axis = 'observation' if axis == 'sample' else 'sample' f = lambda vals, id_, md: np.any(vals) t.filter(f, axis=axis) return t
def parse_biom_table(fp, ids=None, axis='sample', input_is_dense=False): r"""Parses the biom table stored in the filepath `fp` Parameters ---------- fp : file like File alike object storing the BIOM table ids : iterable The sample/observation ids of the samples/observations that we need to retrieve from the biom table axis : {'sample', 'observation'}, optional The axis to subset on input_is_dense : boolean Indicates if the BIOM table is dense or sparse. Valid only for JSON tables. Returns ------- Table The BIOM table stored at fp Raises ------ ValueError If `samples` and `observations` are provided. Notes ----- Subsetting from the BIOM table is only supported in one axis Examples -------- Parse a hdf5 biom table >>> from h5py import File # doctest: +SKIP >>> from biom.parse import parse_biom_table >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP >>> t = parse_biom_table(f) # doctest: +SKIP Parse a hdf5 biom table subsetting observations >>> from h5py import File # doctest: +SKIP >>> from biom.parse import parse_biom_table >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP >>> t = parse_biom_table(f, ids=["GG_OTU_1"], ... axis='observation') # doctest: +SKIP """ if axis not in ['observation', 'sample']: UnknownAxisError(axis) try: return Table.from_hdf5(fp, ids=ids, axis=axis) except: pass if hasattr(fp, 'read'): old_pos = fp.tell() try: t = Table.from_json(json.load(fp), input_is_dense=input_is_dense) except ValueError: fp.seek(old_pos) t = Table.from_tsv(fp, None, None, lambda x: x) elif isinstance(fp, list): try: t = Table.from_json(json.loads(''.join(fp)), input_is_dense=input_is_dense) except ValueError: t = Table.from_tsv(fp, None, None, lambda x: x) else: t = Table.from_json(json.loads(fp), input_is_dense=input_is_dense) if ids is not None: f = lambda data, id_, md: id_ in ids t.filter(f, axis=axis) axis = 'observation' if axis == 'sample' else 'sample' f = lambda vals, id_, md: np.any(vals) t.filter(f, axis=axis) return t
def parse_biom_table(file_obj, ids=None, axis='sample', input_is_dense=False): r"""Parses the biom table stored in `file_obj` Parameters ---------- file_obj : file-like object, or list file-like object storing the BIOM table (tab-delimited or JSON), or a list of lines of the BIOM table in tab-delimited or JSON format ids : iterable The sample/observation ids of the samples/observations that we need to retrieve from the biom table axis : {'sample', 'observation'}, optional The axis to subset on input_is_dense : boolean Indicates if the BIOM table is dense or sparse. Valid only for JSON tables. Returns ------- Table The BIOM table stored at file_obj Raises ------ ValueError If `samples` and `observations` are provided. Notes ----- Subsetting from the BIOM table is only supported in one axis Examples -------- Parse a hdf5 biom table >>> from h5py import File # doctest: +SKIP >>> from biom.parse import parse_biom_table >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP >>> t = parse_biom_table(f) # doctest: +SKIP Parse a hdf5 biom table subsetting observations >>> from h5py import File # doctest: +SKIP >>> from biom.parse import parse_biom_table >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP >>> t = parse_biom_table(f, ids=["GG_OTU_1"], ... axis='observation') # doctest: +SKIP """ if axis not in ['observation', 'sample']: UnknownAxisError(axis) try: return Table.from_hdf5(file_obj, ids=ids, axis=axis) except ValueError: pass except RuntimeError: pass if hasattr(file_obj, 'read'): old_pos = file_obj.tell() # Read in characters until first non-whitespace # If it is a {, then this is (most likely) JSON c = file_obj.read(1) while c.isspace(): c = file_obj.read(1) if c == '{': file_obj.seek(old_pos) t = Table.from_json(json.load(file_obj, object_pairs_hook=OrderedDict), input_is_dense=input_is_dense) else: file_obj.seek(old_pos) t = Table.from_tsv(file_obj, None, None, lambda x: x) elif isinstance(file_obj, list): try: t = Table.from_json(json.loads(''.join(file_obj), object_pairs_hook=OrderedDict), input_is_dense=input_is_dense) except ValueError: t = Table.from_tsv(file_obj, None, None, lambda x: x) else: t = Table.from_json(json.loads(file_obj, object_pairs_hook=OrderedDict), input_is_dense=input_is_dense) def subset_ids(data, id_, md): return id_ in ids def gt_zero(vals, id_, md): return np.any(vals) if ids is not None: t.filter(subset_ids, axis=axis) axis = 'observation' if axis == 'sample' else 'sample' t.filter(gt_zero, axis=axis) return t