Пример #1
0
def load_BIOM(table, informat='json', v=1):
    """
    load a BIOM table from BIOM format. Default format is 'json'.
    """
    from biom.table import Table
    import json
    import sys
    
    informats = ['json','tsv']
    if not informat in informats:
        print "\nPlease specify a valid BIOM input format. Currently we support: '%s'.\n" %"', '".join(informats)
    else:
        if v:
            print "\nSpecified BIOM input format '%s' - ok!" %(informat)
    
    if informat == 'json':
        with open(table) as data_file:
            data = json.load(data_file)
        t = Table.from_json(data)

    elif informat == 'tsv':
        tsv = open(in_tsv)
        func = lambda x : x
        t = Table.from_tsv(tsv, obs_mapping=None, sample_mapping=None, process_func=func)
        tsv.close()
        
    return t
Пример #2
0
def BIOM_tsv_to_R_transpose(in_tsv, out_csv):
    """
    Parse a biom table in tsv format and transpose it for input into R
    """
    
    from biom import Table
    
    tsv = open(in_tsv)
    #in_tsv = open('COI-trim30min100-merge-c3-id97-OTU-taxonomy.kraken.tsv')
    func = lambda x : x
    intable = Table.from_tsv(tsv,obs_mapping=None, sample_mapping=None, process_func=func)
    outtable = intable.transpose()
    out=open("transposed.tsv","w")
    out.write(outtable.to_tsv(header_key=None, header_value=None))
    out.close()

    #refine
    intable = open('transposed.tsv','r')
    temp = intable.next()

    out=''
    for line in intable:
        if line.startswith('#'):
            if line.strip().endswith('taxomomy'):
                print "Removing taxonomy"
                line = ",".join(line.strip().split("\t")[:-1]).replace('#OTU ID','Sample').replace('\t',',')+'\n'
            line = line.replace('#OTU ID','Sample').replace('\t',',')
            out+=line
        else:
            line = line.replace('\t',',')
            out+=line

    outtable = open(out_csv,'w')
    outtable.write(out)
    outtable.close()
Пример #3
0
def convert_table_to_biom(table_f, sample_mapping, obs_mapping,
                          process_func, **kwargs):
    """Convert a contigency table to a biom table

    sample_mapping : dict of {'sample_id':metadata} or None
    obs_mapping : dict of {'obs_id':metadata} or None
    process_func: a function to transform observation metadata
    dtype : type of table data
    """
    otu_table = Table.from_tsv(table_f, obs_mapping, sample_mapping,
                               process_func, **kwargs)
    return otu_table.to_json(generatedby())
Пример #4
0
def convert_table_to_biom(table_f, sample_mapping, obs_mapping, process_func,
                          **kwargs):
    """Convert a contigency table to a biom table

    sample_mapping : dict of {'sample_id':metadata} or None
    obs_mapping : dict of {'obs_id':metadata} or None
    process_func: a function to transform observation metadata
    dtype : type of table data
    """
    otu_table = Table.from_tsv(table_f, obs_mapping, sample_mapping,
                               process_func, **kwargs)
    return otu_table.to_json(generatedby())
Пример #5
0
def parse_biom_table(fp, ids=None, axis='sample', input_is_dense=False):
    r"""Parses the biom table stored in the filepath `fp`

    Parameters
    ----------
    fp : file like
        File alike object storing the BIOM table
    ids : iterable
        The sample/observation ids of the samples/observations that we need
        to retrieve from the biom table
    axis : {'sample', 'observation'}, optional
        The axis to subset on
    input_is_dense : boolean
        Indicates if the BIOM table is dense or sparse. Valid only for JSON
        tables.

    Returns
    -------
    Table
        The BIOM table stored at fp

    Raises
    ------
    ValueError
        If `samples` and `observations` are provided.

    Notes
    -----
    Subsetting from the BIOM table is only supported in one axis

    Examples
    --------
    Parse a hdf5 biom table

    >>> from h5py import File # doctest: +SKIP
    >>> from biom.parse import parse_biom_table
    >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP
    >>> t = parse_biom_table(f) # doctest: +SKIP

    Parse a hdf5 biom table subsetting observations
    >>> from h5py import File # doctest: +SKIP
    >>> from biom.parse import parse_biom_table
    >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP
    >>> t = parse_biom_table(f, ids=["GG_OTU_1"],
    ...                      axis='observation') # doctest: +SKIP
    """
    if axis not in ['observation', 'sample']:
        UnknownAxisError(axis)

    try:
        return Table.from_hdf5(fp, ids=ids, axis=axis)
    except ValueError:
        pass
    except RuntimeError:
        pass
    if hasattr(fp, 'read'):
        old_pos = fp.tell()
        # Read in characters until first non-whitespace
        # If it is a {, then this is (most likely) JSON
        c = fp.read(1)
        while c.isspace():
            c = fp.read(1)
        if c == '{':
            fp.seek(old_pos)
            t = Table.from_json(json.load(fp, object_pairs_hook=OrderedDict),
                                input_is_dense=input_is_dense)
        else:
            fp.seek(old_pos)
            t = Table.from_tsv(fp, None, None, lambda x: x)
    elif isinstance(fp, list):
        try:
            t = Table.from_json(json.loads(''.join(fp),
                                           object_pairs_hook=OrderedDict),
                                input_is_dense=input_is_dense)
        except ValueError:
            t = Table.from_tsv(fp, None, None, lambda x: x)
    else:
        t = Table.from_json(json.loads(fp, object_pairs_hook=OrderedDict),
                            input_is_dense=input_is_dense)

    def subset_ids(data, id_, md):
        return id_ in ids

    def gt_zero(vals, id_, md):
        return np.any(vals)

    if ids is not None:
        t.filter(subset_ids, axis=axis)
        axis = 'observation' if axis == 'sample' else 'sample'
        t.filter(gt_zero, axis=axis)

    return t
Пример #6
0
def parse_biom_table(fp, ids=None, axis='sample', input_is_dense=False):
    r"""Parses the biom table stored in the filepath `fp`

    Parameters
    ----------
    fp : file like
        File alike object storing the BIOM table
    ids : iterable
        The sample/observation ids of the samples/observations that we need
        to retrieve from the biom table
    axis : {'sample', 'observation'}, optional
        The axis to subset on
    input_is_dense : boolean
        Indicates if the BIOM table is dense or sparse. Valid only for JSON
        tables.

    Returns
    -------
    Table
        The BIOM table stored at fp

    Raises
    ------
    ValueError
        If `samples` and `observations` are provided.

    Notes
    -----
    Subsetting from the BIOM table is only supported in one axis

    Examples
    --------
    Parse a hdf5 biom table

    >>> from h5py import File # doctest: +SKIP
    >>> from biom.parse import parse_biom_table
    >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP
    >>> t = parse_biom_table(f) # doctest: +SKIP

    Parse a hdf5 biom table subsetting observations
    >>> from h5py import File # doctest: +SKIP
    >>> from biom.parse import parse_biom_table
    >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP
    >>> t = parse_biom_table(f, ids=["GG_OTU_1"],
    ...                      axis='observation') # doctest: +SKIP
    """
    if axis not in ['observation', 'sample']:
        UnknownAxisError(axis)

    try:
        return Table.from_hdf5(fp, ids=ids, axis=axis)
    except:
        pass

    if hasattr(fp, 'read'):
        old_pos = fp.tell()
        try:
            t = Table.from_json(json.load(fp), input_is_dense=input_is_dense)
        except ValueError:
            fp.seek(old_pos)
            t = Table.from_tsv(fp, None, None, lambda x: x)
    elif isinstance(fp, list):
        try:
            t = Table.from_json(json.loads(''.join(fp)),
                                input_is_dense=input_is_dense)
        except ValueError:
            t = Table.from_tsv(fp, None, None, lambda x: x)
    else:
        t = Table.from_json(json.loads(fp), input_is_dense=input_is_dense)

    if ids is not None:
        f = lambda data, id_, md: id_ in ids
        t.filter(f, axis=axis)
        axis = 'observation' if axis == 'sample' else 'sample'
        f = lambda vals, id_, md: np.any(vals)
        t.filter(f, axis=axis)

    return t
Пример #7
0
def parse_biom_table(fp, ids=None, axis='sample', input_is_dense=False):
    r"""Parses the biom table stored in the filepath `fp`

    Parameters
    ----------
    fp : file like
        File alike object storing the BIOM table
    ids : iterable
        The sample/observation ids of the samples/observations that we need
        to retrieve from the biom table
    axis : {'sample', 'observation'}, optional
        The axis to subset on
    input_is_dense : boolean
        Indicates if the BIOM table is dense or sparse. Valid only for JSON
        tables.

    Returns
    -------
    Table
        The BIOM table stored at fp

    Raises
    ------
    ValueError
        If `samples` and `observations` are provided.

    Notes
    -----
    Subsetting from the BIOM table is only supported in one axis

    Examples
    --------
    Parse a hdf5 biom table

    >>> from h5py import File # doctest: +SKIP
    >>> from biom.parse import parse_biom_table
    >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP
    >>> t = parse_biom_table(f) # doctest: +SKIP

    Parse a hdf5 biom table subsetting observations
    >>> from h5py import File # doctest: +SKIP
    >>> from biom.parse import parse_biom_table
    >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP
    >>> t = parse_biom_table(f, ids=["GG_OTU_1"],
    ...                      axis='observation') # doctest: +SKIP
    """
    if axis not in ['observation', 'sample']:
        UnknownAxisError(axis)

    try:
        return Table.from_hdf5(fp, ids=ids, axis=axis)
    except:
        pass

    if hasattr(fp, 'read'):
        old_pos = fp.tell()
        try:
            t = Table.from_json(json.load(fp), input_is_dense=input_is_dense)
        except ValueError:
            fp.seek(old_pos)
            t = Table.from_tsv(fp, None, None, lambda x: x)
    elif isinstance(fp, list):
        try:
            t = Table.from_json(json.loads(''.join(fp)),
                                input_is_dense=input_is_dense)
        except ValueError:
            t = Table.from_tsv(fp, None, None, lambda x: x)
    else:
        t = Table.from_json(json.loads(fp), input_is_dense=input_is_dense)

    if ids is not None:
        f = lambda data, id_, md: id_ in ids
        t.filter(f, axis=axis)
        axis = 'observation' if axis == 'sample' else 'sample'
        f = lambda vals, id_, md: np.any(vals)
        t.filter(f, axis=axis)

    return t
Пример #8
0
def parse_biom_table(file_obj, ids=None, axis='sample', input_is_dense=False):
    r"""Parses the biom table stored in `file_obj`

    Parameters
    ----------
    file_obj : file-like object, or list
        file-like object storing the BIOM table (tab-delimited or JSON), or
        a list of lines of the BIOM table in tab-delimited or JSON format
    ids : iterable
        The sample/observation ids of the samples/observations that we need
        to retrieve from the biom table
    axis : {'sample', 'observation'}, optional
        The axis to subset on
    input_is_dense : boolean
        Indicates if the BIOM table is dense or sparse. Valid only for JSON
        tables.

    Returns
    -------
    Table
        The BIOM table stored at file_obj

    Raises
    ------
    ValueError
        If `samples` and `observations` are provided.

    Notes
    -----
    Subsetting from the BIOM table is only supported in one axis

    Examples
    --------
    Parse a hdf5 biom table

    >>> from h5py import File # doctest: +SKIP
    >>> from biom.parse import parse_biom_table
    >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP
    >>> t = parse_biom_table(f) # doctest: +SKIP

    Parse a hdf5 biom table subsetting observations
    >>> from h5py import File # doctest: +SKIP
    >>> from biom.parse import parse_biom_table
    >>> f = File('rich_sparse_otu_table_hdf5.biom') # doctest: +SKIP
    >>> t = parse_biom_table(f, ids=["GG_OTU_1"],
    ...                      axis='observation') # doctest: +SKIP
    """
    if axis not in ['observation', 'sample']:
        UnknownAxisError(axis)

    try:
        return Table.from_hdf5(file_obj, ids=ids, axis=axis)
    except ValueError:
        pass
    except RuntimeError:
        pass
    if hasattr(file_obj, 'read'):
        old_pos = file_obj.tell()
        # Read in characters until first non-whitespace
        # If it is a {, then this is (most likely) JSON
        c = file_obj.read(1)
        while c.isspace():
            c = file_obj.read(1)
        if c == '{':
            file_obj.seek(old_pos)
            t = Table.from_json(json.load(file_obj,
                                          object_pairs_hook=OrderedDict),
                                input_is_dense=input_is_dense)
        else:
            file_obj.seek(old_pos)
            t = Table.from_tsv(file_obj, None, None, lambda x: x)
    elif isinstance(file_obj, list):
        try:
            t = Table.from_json(json.loads(''.join(file_obj),
                                           object_pairs_hook=OrderedDict),
                                input_is_dense=input_is_dense)
        except ValueError:
            t = Table.from_tsv(file_obj, None, None, lambda x: x)
    else:
        t = Table.from_json(json.loads(file_obj,
                                       object_pairs_hook=OrderedDict),
                            input_is_dense=input_is_dense)

    def subset_ids(data, id_, md):
        return id_ in ids

    def gt_zero(vals, id_, md):
        return np.any(vals)

    if ids is not None:
        t.filter(subset_ids, axis=axis)
        axis = 'observation' if axis == 'sample' else 'sample'
        t.filter(gt_zero, axis=axis)

    return t