Пример #1
0
def parse_biom_table(fp, input_is_dense=False):
    try:
        return Table.from_hdf5(fp)
    except:
        pass

    if hasattr(fp, 'read'):
        return Table.from_json(json.load(fp), input_is_dense=input_is_dense)
    elif isinstance(fp, list):
        return Table.from_json(json.loads(''.join(fp)),
                               input_is_dense=input_is_dense)
    else:
        return Table.from_json(json.loads(fp), input_is_dense=input_is_dense)
Пример #2
0
def load_BIOM(table, informat='json', v=1):
    """
    load a BIOM table from BIOM format. Default format is 'json'.
    """
    from biom.table import Table
    import json
    import sys
    
    informats = ['json','tsv']
    if not informat in informats:
        print "\nPlease specify a valid BIOM input format. Currently we support: '%s'.\n" %"', '".join(informats)
    else:
        if v:
            print "\nSpecified BIOM input format '%s' - ok!" %(informat)
    
    if informat == 'json':
        with open(table) as data_file:
            data = json.load(data_file)
        t = Table.from_json(data)

    elif informat == 'tsv':
        tsv = open(in_tsv)
        func = lambda x : x
        t = Table.from_tsv(tsv, obs_mapping=None, sample_mapping=None, process_func=func)
        tsv.close()
        
    return t
Пример #3
0
def parse_biom_table(fp, ids=None, axis='sample', input_is_dense=False):
    r"""Parses the biom table stored in the filepath `fp`

    Parameters
    ----------
    fp : file like
        File alike object storing the BIOM table
    ids : iterable
        The sample/observation ids of the samples/observations that we need
        to retrieve from the biom table
    axis : {'sample', 'observation'}, optional
        The axis to subset on
    input_is_dense : boolean
        Indicates if the BIOM table is dense or sparse. Valid only for JSON
        tables.

    Returns
    -------
    Table
        The BIOM table stored at fp

    Raises
    ------
    ValueError
        If `samples` and `observations` are provided.

    Notes
    -----
    Subsetting from the BIOM table is only supported in one axis

    Examples
    --------
    Parse a hdf5 biom table

    >>> from h5py import File # doctest: +SKIP
    >>> from biom.parse import parse_biom_table
    >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP
    >>> t = parse_biom_table(f) # doctest: +SKIP

    Parse a hdf5 biom table subsetting observations
    >>> from h5py import File # doctest: +SKIP
    >>> from biom.parse import parse_biom_table
    >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP
    >>> t = parse_biom_table(f, ids=["GG_OTU_1"],
    ...                      axis='observation') # doctest: +SKIP
    """
    if axis not in ['observation', 'sample']:
        UnknownAxisError(axis)

    try:
        return Table.from_hdf5(fp, ids=ids, axis=axis)
    except ValueError:
        pass
    except RuntimeError:
        pass
    if hasattr(fp, 'read'):
        old_pos = fp.tell()
        # Read in characters until first non-whitespace
        # If it is a {, then this is (most likely) JSON
        c = fp.read(1)
        while c.isspace():
            c = fp.read(1)
        if c == '{':
            fp.seek(old_pos)
            t = Table.from_json(json.load(fp, object_pairs_hook=OrderedDict),
                                input_is_dense=input_is_dense)
        else:
            fp.seek(old_pos)
            t = Table.from_tsv(fp, None, None, lambda x: x)
    elif isinstance(fp, list):
        try:
            t = Table.from_json(json.loads(''.join(fp),
                                           object_pairs_hook=OrderedDict),
                                input_is_dense=input_is_dense)
        except ValueError:
            t = Table.from_tsv(fp, None, None, lambda x: x)
    else:
        t = Table.from_json(json.loads(fp, object_pairs_hook=OrderedDict),
                            input_is_dense=input_is_dense)

    def subset_ids(data, id_, md):
        return id_ in ids

    def gt_zero(vals, id_, md):
        return np.any(vals)

    if ids is not None:
        t.filter(subset_ids, axis=axis)
        axis = 'observation' if axis == 'sample' else 'sample'
        t.filter(gt_zero, axis=axis)

    return t
Пример #4
0
from __future__ import division
import argparse
from biom.table import Table
import json

parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input', type=argparse.FileType('r'), nargs='+', help='profile list separated by space')
parser.add_argument('-biom_out', default='combined.biom', help='Output biom file name')
args = parser.parse_args()

biomFile = args.biom_out

biomList = []
for f in args.input:
    biomList.append(f)
if len(biomList) <= 1:
    print('Found only one biom profile, will still give you a (brand) new one.')
    biomProfile = Table.from_json(json.load(biomList[0]))
    with open(biomFile, 'w') as f:
        biomProfile.to_json('Generated_by_almighty_metaSeq', f)
else:
    print('Found {0} biom profiles under.'.format(len(biomList)))
    biomProfile = Table.from_json(json.load(biomList[0]))

    for f in biomList[1:]:
        biomProfile = biomProfile.concat([Table.from_json(json.load(f))])

    with open(biomFile, 'w') as f:
        biomProfile = biomProfile.sort()
        biomProfile.to_json('Generated_by_almighty_metaSeq', f)
    print('Concatenated {0} profiles into {1}.'.format(len(biomList), biomFile))
Пример #5
0
def parse_biom_table(fp, ids=None, axis='sample', input_is_dense=False):
    r"""Parses the biom table stored in the filepath `fp`

    Parameters
    ----------
    fp : file like
        File alike object storing the BIOM table
    ids : iterable
        The sample/observation ids of the samples/observations that we need
        to retrieve from the biom table
    axis : {'sample', 'observation'}, optional
        The axis to subset on
    input_is_dense : boolean
        Indicates if the BIOM table is dense or sparse. Valid only for JSON
        tables.

    Returns
    -------
    Table
        The BIOM table stored at fp

    Raises
    ------
    ValueError
        If `samples` and `observations` are provided.

    Notes
    -----
    Subsetting from the BIOM table is only supported in one axis

    Examples
    --------
    Parse a hdf5 biom table

    >>> from h5py import File # doctest: +SKIP
    >>> from biom.parse import parse_biom_table
    >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP
    >>> t = parse_biom_table(f) # doctest: +SKIP

    Parse a hdf5 biom table subsetting observations
    >>> from h5py import File # doctest: +SKIP
    >>> from biom.parse import parse_biom_table
    >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP
    >>> t = parse_biom_table(f, ids=["GG_OTU_1"],
    ...                      axis='observation') # doctest: +SKIP
    """
    if axis not in ['observation', 'sample']:
        UnknownAxisError(axis)

    try:
        return Table.from_hdf5(fp, ids=ids, axis=axis)
    except:
        pass

    if hasattr(fp, 'read'):
        old_pos = fp.tell()
        try:
            t = Table.from_json(json.load(fp), input_is_dense=input_is_dense)
        except ValueError:
            fp.seek(old_pos)
            t = Table.from_tsv(fp, None, None, lambda x: x)
    elif isinstance(fp, list):
        try:
            t = Table.from_json(json.loads(''.join(fp)),
                                input_is_dense=input_is_dense)
        except ValueError:
            t = Table.from_tsv(fp, None, None, lambda x: x)
    else:
        t = Table.from_json(json.loads(fp), input_is_dense=input_is_dense)

    if ids is not None:
        f = lambda data, id_, md: id_ in ids
        t.filter(f, axis=axis)
        axis = 'observation' if axis == 'sample' else 'sample'
        f = lambda vals, id_, md: np.any(vals)
        t.filter(f, axis=axis)

    return t
Пример #6
0
def parse_biom_table(fp, ids=None, axis='sample', input_is_dense=False):
    r"""Parses the biom table stored in the filepath `fp`

    Parameters
    ----------
    fp : file like
        File alike object storing the BIOM table
    ids : iterable
        The sample/observation ids of the samples/observations that we need
        to retrieve from the biom table
    axis : {'sample', 'observation'}, optional
        The axis to subset on
    input_is_dense : boolean
        Indicates if the BIOM table is dense or sparse. Valid only for JSON
        tables.

    Returns
    -------
    Table
        The BIOM table stored at fp

    Raises
    ------
    ValueError
        If `samples` and `observations` are provided.

    Notes
    -----
    Subsetting from the BIOM table is only supported in one axis

    Examples
    --------
    Parse a hdf5 biom table

    >>> from h5py import File # doctest: +SKIP
    >>> from biom.parse import parse_biom_table
    >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP
    >>> t = parse_biom_table(f) # doctest: +SKIP

    Parse a hdf5 biom table subsetting observations
    >>> from h5py import File # doctest: +SKIP
    >>> from biom.parse import parse_biom_table
    >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP
    >>> t = parse_biom_table(f, ids=["GG_OTU_1"],
    ...                      axis='observation') # doctest: +SKIP
    """
    if axis not in ['observation', 'sample']:
        UnknownAxisError(axis)

    try:
        return Table.from_hdf5(fp, ids=ids, axis=axis)
    except:
        pass

    if hasattr(fp, 'read'):
        old_pos = fp.tell()
        try:
            t = Table.from_json(json.load(fp), input_is_dense=input_is_dense)
        except ValueError:
            fp.seek(old_pos)
            t = Table.from_tsv(fp, None, None, lambda x: x)
    elif isinstance(fp, list):
        try:
            t = Table.from_json(json.loads(''.join(fp)),
                                input_is_dense=input_is_dense)
        except ValueError:
            t = Table.from_tsv(fp, None, None, lambda x: x)
    else:
        t = Table.from_json(json.loads(fp), input_is_dense=input_is_dense)

    if ids is not None:
        f = lambda data, id_, md: id_ in ids
        t.filter(f, axis=axis)
        axis = 'observation' if axis == 'sample' else 'sample'
        f = lambda vals, id_, md: np.any(vals)
        t.filter(f, axis=axis)

    return t
Пример #7
0
def parse_biom_table(file_obj, ids=None, axis='sample', input_is_dense=False):
    r"""Parses the biom table stored in `file_obj`

    Parameters
    ----------
    file_obj : file-like object, or list
        file-like object storing the BIOM table (tab-delimited or JSON), or
        a list of lines of the BIOM table in tab-delimited or JSON format
    ids : iterable
        The sample/observation ids of the samples/observations that we need
        to retrieve from the biom table
    axis : {'sample', 'observation'}, optional
        The axis to subset on
    input_is_dense : boolean
        Indicates if the BIOM table is dense or sparse. Valid only for JSON
        tables.

    Returns
    -------
    Table
        The BIOM table stored at file_obj

    Raises
    ------
    ValueError
        If `samples` and `observations` are provided.

    Notes
    -----
    Subsetting from the BIOM table is only supported in one axis

    Examples
    --------
    Parse a hdf5 biom table

    >>> from h5py import File # doctest: +SKIP
    >>> from biom.parse import parse_biom_table
    >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP
    >>> t = parse_biom_table(f) # doctest: +SKIP

    Parse a hdf5 biom table subsetting observations
    >>> from h5py import File # doctest: +SKIP
    >>> from biom.parse import parse_biom_table
    >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP
    >>> t = parse_biom_table(f, ids=["GG_OTU_1"],
    ...                      axis='observation') # doctest: +SKIP
    """
    if axis not in ['observation', 'sample']:
        UnknownAxisError(axis)

    try:
        return Table.from_hdf5(file_obj, ids=ids, axis=axis)
    except ValueError:
        pass
    except RuntimeError:
        pass
    if hasattr(file_obj, 'read'):
        old_pos = file_obj.tell()
        # Read in characters until first non-whitespace
        # If it is a {, then this is (most likely) JSON
        c = file_obj.read(1)
        while c.isspace():
            c = file_obj.read(1)
        if c == '{':
            file_obj.seek(old_pos)
            t = Table.from_json(json.load(file_obj,
                                          object_pairs_hook=OrderedDict),
                                input_is_dense=input_is_dense)
        else:
            file_obj.seek(old_pos)
            t = Table.from_tsv(file_obj, None, None, lambda x: x)
    elif isinstance(file_obj, list):
        try:
            t = Table.from_json(json.loads(''.join(file_obj),
                                           object_pairs_hook=OrderedDict),
                                input_is_dense=input_is_dense)
        except ValueError:
            t = Table.from_tsv(file_obj, None, None, lambda x: x)
    else:
        t = Table.from_json(json.loads(file_obj,
                                       object_pairs_hook=OrderedDict),
                            input_is_dense=input_is_dense)

    def subset_ids(data, id_, md):
        return id_ in ids

    def gt_zero(vals, id_, md):
        return np.any(vals)

    if ids is not None:
        t.filter(subset_ids, axis=axis)
        axis = 'observation' if axis == 'sample' else 'sample'
        t.filter(gt_zero, axis=axis)

    return t