def main():
    logging.basicConfig(level=logging.INFO,
                        format="%(levelname)s: %(message)s")

    parser = argparse.ArgumentParser(
        description=
        "Turn a BIOM file with a taxonomy into a taxtable and seqinfo.")

    parser.add_argument('biom',
                        type=argparse.FileType('r'),
                        help='input BIOM file')
    parser.add_argument('taxtable',
                        type=argparse.FileType('w'),
                        help='output taxtable')
    parser.add_argument('seqinfo',
                        type=argparse.FileType('w'),
                        help='output seqinfo')

    args = parser.parse_args()

    log.info('loading biom')
    with args.biom:
        j = json.load(args.biom)

    root = TaxNode('root', root_id, name='Root')
    root.ranks = rank_order
    seqinfo = csv.writer(args.seqinfo)
    seqinfo.writerow(('seqname', 'tax_id'))

    log.info('determining tax_ids')
    for leaf in j['rows']:
        leaf_taxonomy = leaf['metadata']['taxonomy']

        # Drop nodes containing only rank (e.g. `s__`)
        leaf_taxonomy = [i for i in leaf_taxonomy if i[3:]]
        leaf_lineages = list(lineages([i for i in leaf_taxonomy if i[3:]]))

        seqinfo.writerow((leaf['id'], leaf_lineages[-1][0]))

        for tax_id, node, parent in leaf_lineages:
            if tax_id in root.index:
                continue
            root.get_node(parent).add_child(
                TaxNode(ranks[node[0]], tax_id, name=node[3:] or node))

    log.info('writing taxtable')
    with args.taxtable:
        root.write_taxtable(args.taxtable)
Пример #2
0
def main():
    logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")

    parser = argparse.ArgumentParser(
        description="Turn a BIOM file with a taxonomy into a taxtable and seqinfo.")

    parser.add_argument(
        'biom', type=argparse.FileType('r'), help='input BIOM file')
    parser.add_argument(
        'taxtable', type=argparse.FileType('w'), help='output taxtable')
    parser.add_argument(
        'seqinfo', type=argparse.FileType('w'), help='output seqinfo')

    args = parser.parse_args()

    log.info('loading biom')
    with args.biom:
        j = json.load(args.biom)

    root = TaxNode('root', root_id, name='Root')
    root.ranks = rank_order
    seqinfo = csv.writer(args.seqinfo)
    seqinfo.writerow(('seqname', 'tax_id'))

    log.info('determining tax_ids')
    for leaf in j['rows']:
        leaf_taxonomy = leaf['metadata']['taxonomy']

        # Drop nodes containing only rank (e.g. `s__`)
        leaf_taxonomy = [i for i in leaf_taxonomy if i[3:]]
        leaf_lineages = list(lineages([i for i in leaf_taxonomy if i[3:]]))

        seqinfo.writerow((leaf['id'], leaf_lineages[-1][0]))

        for tax_id, node, parent in leaf_lineages:
            if tax_id in root.index:
                continue
            root.get_node(parent).add_child(
                TaxNode(ranks[node[0]], tax_id, name=node[3:] or node))

    log.info('writing taxtable')
    with args.taxtable:
        root.write_taxtable(args.taxtable)