def main():
    logging.basicConfig(level=logging.INFO,
                        format="%(levelname)s: %(message)s")

    parser = argparse.ArgumentParser(
        description=
        "Turn a BIOM file with a taxonomy into a taxtable and seqinfo.")

    parser.add_argument('biom',
                        type=argparse.FileType('r'),
                        help='input BIOM file')
    parser.add_argument('taxtable',
                        type=argparse.FileType('w'),
                        help='output taxtable')
    parser.add_argument('seqinfo',
                        type=argparse.FileType('w'),
                        help='output seqinfo')

    args = parser.parse_args()

    log.info('loading biom')
    with args.biom:
        j = json.load(args.biom)

    root = TaxNode('root', root_id, name='Root')
    root.ranks = rank_order
    seqinfo = csv.writer(args.seqinfo)
    seqinfo.writerow(('seqname', 'tax_id'))

    log.info('determining tax_ids')
    for leaf in j['rows']:
        leaf_taxonomy = leaf['metadata']['taxonomy']

        # Drop nodes containing only rank (e.g. `s__`)
        leaf_taxonomy = [i for i in leaf_taxonomy if i[3:]]
        leaf_lineages = list(lineages([i for i in leaf_taxonomy if i[3:]]))

        seqinfo.writerow((leaf['id'], leaf_lineages[-1][0]))

        for tax_id, node, parent in leaf_lineages:
            if tax_id in root.index:
                continue
            root.get_node(parent).add_child(
                TaxNode(ranks[node[0]], tax_id, name=node[3:] or node))

    log.info('writing taxtable')
    with args.taxtable:
        root.write_taxtable(args.taxtable)
Пример #2
0
    def setUp(self):
        self.taxonomy = TaxNode(rank='root', name='root', tax_id='1')
        self.taxonomy.ranks = ['root', 'class', 'genus', 'species']
        g1 = TaxNode(rank='genus', name='g1', tax_id='2')
        self.g1 = g1
        g1.sequence_ids = set(['s1', 's2'])
        self.taxonomy.add_child(g1)
        g1.add_child(TaxNode(rank='species', name='s1', tax_id='s1'))
        g1.add_child(TaxNode(rank='species', name='s2', tax_id='s2'))

        g2 = TaxNode(rank='genus', name='g2', tax_id='3')
        self.taxonomy.add_child(g2)
        s3 = TaxNode(rank='species', name='s3', tax_id='s3')
        s3.sequence_ids = set(['s3', 's4'])
        g2.add_child(s3)
        g2.add_child(TaxNode(rank='species', name='s4', tax_id='s4'))