Пример #1
0
def parse_biom(biom):
    with open(biom) as fh:
        bt = parse_table(fh)
        df = bt.to_dataframe()
        otu_df = [
            ["Samples", "OTUs", "OTU Total Count", "OTU Table Density"],
            [
                bt.length("sample"),
                bt.length("observation"),
                bt.sum(),
                "{:f}".format(bt.get_table_density()),
            ],
        ]
        otu_df = pd.DataFrame(otu_df)
        # make the first row the column labels
        otu_df.rename(columns=otu_df.iloc[0], inplace=True)
        # drop the first row
        otu_df = otu_df.reindex(otu_df.index.drop(0))
        # build the table
        summary_table = otu_df.to_html(
            index=False,
            bold_rows=False,
            classes=["table", "table-bordered"],
            table_id="otuTable",
        )
        # fix for rst
        summary_table = summary_table.replace("\n", "\n" + 10 * " ")
    return df, summary_table
Пример #2
0
    def read_biom_file(self):

        with open(self.args.biom_file) as f:
            table = parse_table(f)

        import pdb; pdb.set_trace()
        pass
Пример #3
0
def mothur_counts_to_biom(mothur_f):

    mothur_biom = parse_table(mothur_f)
    mothur_biom.type = u'OTU table'
    filter_biom = mothur_biom.filter(lambda val, id_, metadata: id_ in 'total',
                                     invert=True)

    return (filter_biom)
Пример #4
0
def get_biom_table_from_file(path):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        from biom import parse_table
    with open(path) as f:
        table = parse_table(f)
        return table
    return None
Пример #5
0
def mothur_counts_to_biom(mothur_f):

    mothur_biom = parse_table(mothur_f)
    mothur_biom.type = u'OTU table'
    filter_biom = mothur_biom.filter(
        lambda val, id_, metadata: id_ in 'total', invert=True)

    return(filter_biom)
Пример #6
0
def parse_table_to_biom(table_lines, table_format="tab-delimited",\
    biom_format = 'otu table'):
    """Read the lines of an open trait table file, and output a .biom table object

    The trait table must be either a biom file, or a picrust tab-delimited file
    table_format -- must be either 'tab-delimited' or 'biom'

    """
    return parse_table(table_lines)
Пример #7
0
def parse_table_to_biom(table_lines, table_format="tab-delimited",\
    biom_format = 'otu table'):
    """Read the lines of an open trait table file, and output a .biom table object

    The trait table must be either a biom file, or a picrust tab-delimited file
    table_format -- must be either 'tab-delimited' or 'biom'

    """
    return parse_table(table_lines)
Пример #8
0
    def test_filter_contaminated_libraries(self):
        """Test filtering process for heavily contaminated libraries"""

        test_biom = self.test_biom
        exp_filtered_biom = parse_table(test_biom_file_filt)

        contaminant_otus = set(["otu3", "contam1", "contam2", "contam4"])

        obs_filtered_biom = filter_contaminated_libraries(test_biom, contaminant_otus, 0.8)

        self.assertEqual(exp_filtered_biom, obs_filtered_biom)
Пример #9
0
def _read_biom(fp, transpose=True):
    '''Read in a biom table file.

    Parameters
    ----------
    fp : str or file object
        file path to the biom table
    transpose : bool (True by default)
        Transpose the table or not. The biom table has samples in
        column while sklearn and other packages require samples in
        row. So you should transpose the data table.

    Returns
    -------
    sid : list of str
        the sample ids
    fid : list of str
        the feature ids
    data : numpy array (2d) of float
        the table
    feature_md : pandas.DataFrame
        the feature metadata (if availble in table)
    '''
    logger.debug('loading biom table %s' % fp)
    if hasattr(fp, 'read'):
        table = biom.parse_table(fp)
    else:
        table = biom.load_table(fp)
    sid = table.ids(axis='sample')
    fid = table.ids(axis='observation')
    logger.info('loaded %d samples, %d features' % (len(sid), len(fid)))
    data = table.matrix_data
    feature_md = _get_md_from_biom(table)

    if transpose:
        logger.debug('transposing table')
        data = data.transpose()

    return sid, fid, data, feature_md
Пример #10
0
#!/usr/bin/env python

import sys

from skbio import TreeNode
from biom import parse_table

tree_fp = sys.argv[1]
biom_fp = sys.argv[2]

tree = TreeNode.read(tree_fp)
biom = parse_table(open(biom_fp,'r'))

new_names = {}

for otu,sample in biom.nonzero():
    if otu not in new_names:
        new_names[otu] = '{0}_{1}'.format(otu,sample)
    else:
        new_names[otu] = '{0}_{1}'.format(new_names[otu],sample)

for tip in tree.tips():
    otu = tip.name
    tip.name = new_names[otu]

for node in tree.non_tips():
    node.name = None

print tree

Пример #11
0
import numpy.core._methods
import numpy.lib.format
import numpy as np
from io import StringIO

# https://github.com/biocore/biom-format
# http://biom-format.org/documentation/quick_usage_examples.html#examples

from biom import parse_table, Table

dir = "C:/Projekti/PycharmProjects/UMCGMicrobiomeWeb/example_data"
path_tax = dir + '/example1_metaphlan.txt'
path_tax_marged = dir + 'example_metaphlan_merged.tsv'

with open(path_tax) as f:
    table = parse_table(f)
    print("version ", table.format_version)
    print("id ", table.table_id)
    print("dtype ", table.dtype)
    print("type ", table.type)
    print("version ", table.format_version)
    print("generated_by ", table.generated_by)
    print("table_id ", table.table_id)
    print("ids ", table.ids())  # ['Metaphlan2_Analysis']
    # print("ids ", table.to_json())

    # with open('c:/tmp/metha.json', 'w') as file:
    #     file.write(table.to_json("janko"))

    table = table.norm(axis='sample', inplace=False)
Пример #12
0
    def setUp(self):
        """Init variables for the tests """

        self.test_biom = parse_table(test_biom_file)
Пример #13
0
#!/usr/bin/env python

import sys

from skbio import TreeNode
from biom import parse_table

tree_fp = sys.argv[1]
biom_fp = sys.argv[2]

tree = TreeNode.read(tree_fp)
biom = parse_table(open(biom_fp, 'r'))

new_names = {}

for otu, sample in biom.nonzero():
    if otu not in new_names:
        new_names[otu] = '{0}_{1}'.format(otu, sample)
    else:
        new_names[otu] = '{0}_{1}'.format(new_names[otu], sample)

for tip in tree.tips():
    otu = tip.name
    tip.name = new_names[otu]

for node in tree.non_tips():
    node.name = None

print tree
Пример #14
0
def load_biom(biom_filepath):
    with open(biom_filepath) as f:
        t = parse_table(f)
    return t
Пример #15
0
#!/usr/bin/env python

import sys
from biom import parse_table

deblur_biom = parse_table(open(sys.argv[1], 'r'))

sample_counts = dict(zip(deblur_biom.ids(axis='sample'),[0] * len(deblur_biom.ids(axis='sample'))))

for seq,sample in deblur_biom.nonzero():
    for i in range(int(deblur_biom.get_value_by_ids(seq,sample))):
        sample_counts[sample] += 1
        print ">{0}_{1}\n{2}\n".format(sample,sample_counts[sample],seq)