Пример #1
0
def action(args):

    engine = sqlalchemy.create_engine(args.url, echo=args.verbosity > 3)
    tax = Taxonomy(engine, schema=args.schema)

    writer = csv.writer(args.outfile)
    for row in tax._get_lineage_table(args.tax_ids):
        writer.writerow(row)

    engine.dispose()
Пример #2
0
def action(args):
    log.info('reading tax_ids')
    if args.tax_ids:
        tax_ids = set(args.tax_ids)
    elif args.tax_id_file:
        tax_ids = set(args.tax_id_file.read().split())
    elif args.seq_info:
        tax_ids = {row['tax_id'] for row in csv.DictReader(args.seq_info)}
    else:
        sys.exit('Error: no tax_ids were specified')

    engine = sqlalchemy.create_engine(args.url, echo=args.verbosity > 3)
    tax = Taxonomy(engine, schema=args.schema)

    rows = tax._get_lineage_table(tax_ids)

    log.info('grouping lineages')
    all_ranks = set()
    taxtable = {}
    for tax_id, grp in groupby(rows, lambda row: row[0]):
        ranks, tax_rows = as_taxtable_rows(grp, seen=taxtable)
        taxtable.update(dict(tax_rows))
        all_ranks |= set(ranks)

    # guppy requires that tax_id == parent_id for the root node;
    # identify the root node by calculating an arbitrary lineage.
    root_id = tax.lineage(tax_id)['root']
    taxtable[root_id]['parent_id'] = root_id

    sorted_ranks = sorted(all_ranks, key=order_ranks(tax.ranks[::-1]))

    # guppy requires this column order
    fieldnames = ['tax_id', 'parent_id', 'rank', 'tax_name'] + sorted_ranks

    output = list(taxtable.values())
    log.info('sorting lineages')

    output = sorted(
        output,
        # key=getitems(*sorted_ranks)
        key=lambda row: tuple(row.get(rank) or '' for rank in sorted_ranks)
    )

    log.info('writing taxtable')
    writer = csv.DictWriter(
        args.outfile, fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
    writer.writeheader()
    writer.writerows(output)
Пример #3
0
def action(args):
    engine = sqlalchemy.create_engine(args.url, echo=args.verbosity > 2)
    tax = Taxonomy(engine, schema=args.schema)

    with engine.connect() as con:
        # TODO: need to order nodes so that parents are always created first

        cmd = """
        select nodes.*, source.name as source_name
        from {nodes}
        join {source} on nodes.source_id = source.id
        where source.name = {x}
        """.format(x=tax.placeholder, nodes=tax.nodes, source=tax.source)

        result = con.execute(cmd, (args.source_name,))
        keys = list(result.keys())
        nodes = [clean_dict(keys, vals) for vals in result.fetchall()]

        # get the complete lineage for each node, and provide an
        # ordering for all nodes so that children may be placed after
        # parents.
        tax_ids = list(map(itemgetter('tax_id'), nodes))
        lineages = tax._get_lineage_table(tax_ids)
        ordering = {}
        for i, lineage in enumerate(lineages):
            tax_id = lineage[1]
            if tax_id not in ordering:
                ordering[tax_id] = i

        nodes = sorted(nodes, key=lambda n: ordering[n['tax_id']])

        cmd = """
        select names.*, source.name as source_name
        from {names}
        join {source} on names.source_id = source.id
        where source.name = {x}
        """.format(x=tax.placeholder, names=tax.names, source=tax.source)

        result = con.execute(cmd, (args.source_name,))
        keys = list(result.keys())
        names = [clean_dict(keys, vals) for vals in result.fetchall()]
        namedict = {key: list(grp)
                    for key, grp in groupby(names, itemgetter('tax_id'))}

        for node in nodes:
            node['type'] = 'node'
            tax_id = node['tax_id']
            if tax_id in namedict:
                node['names'] = namedict.pop(tax_id)

        yaml.safe_dump_all(nodes, args.outfile, default_flow_style=False,
                           explicit_start=True, indent=2)

        # prepare remaining names
        remaining_names = []
        for tax_id, names in list(namedict.items()):
            for name in names:
                del name['tax_id']

            remaining_names.append({
                'tax_id': tax_id,
                'type': 'name',
                'names': names
            })

        yaml.safe_dump_all(remaining_names, args.outfile, default_flow_style=False,
                           explicit_start=True, indent=2)
Пример #4
0
def action(args):
    engine = sqlalchemy.create_engine(args.url, echo=args.verbosity > 2)
    tax = Taxonomy(engine, schema=args.schema)

    with engine.connect() as con:
        # TODO: need to order nodes so that parents are always created first

        cmd = """
        select nodes.*, source.name as source_name
        from {nodes}
        join {source} on nodes.source_id = source.id
        where source.name = {x}
        """.format(x=tax.placeholder, nodes=tax.nodes, source=tax.source)

        result = con.execute(cmd, (args.source_name, ))
        keys = list(result.keys())
        nodes = [clean_dict(keys, vals) for vals in result.fetchall()]

        # get the complete lineage for each node, and provide an
        # ordering for all nodes so that children may be placed after
        # parents.
        tax_ids = list(map(itemgetter('tax_id'), nodes))
        lineages = tax._get_lineage_table(tax_ids)
        ordering = {}
        for i, lineage in enumerate(lineages):
            tax_id = lineage[1]
            if tax_id not in ordering:
                ordering[tax_id] = i

        nodes = sorted(nodes, key=lambda n: ordering[n['tax_id']])

        cmd = """
        select names.*, source.name as source_name
        from {names}
        join {source} on names.source_id = source.id
        where source.name = {x}
        """.format(x=tax.placeholder, names=tax.names, source=tax.source)

        result = con.execute(cmd, (args.source_name, ))
        keys = list(result.keys())
        names = [clean_dict(keys, vals) for vals in result.fetchall()]
        namedict = {
            key: list(grp)
            for key, grp in groupby(names, itemgetter('tax_id'))
        }

        for node in nodes:
            node['type'] = 'node'
            tax_id = node['tax_id']
            if tax_id in namedict:
                node['names'] = namedict.pop(tax_id)

        yaml.safe_dump_all(nodes,
                           args.outfile,
                           default_flow_style=False,
                           explicit_start=True,
                           indent=2)

        # prepare remaining names
        remaining_names = []
        for tax_id, names in list(namedict.items()):
            for name in names:
                del name['tax_id']

            remaining_names.append({
                'tax_id': tax_id,
                'type': 'name',
                'names': names
            })

        yaml.safe_dump_all(remaining_names,
                           args.outfile,
                           default_flow_style=False,
                           explicit_start=True,
                           indent=2)