def action(args): engine = create_engine(args.url, echo=args.verbosity > 2) tax = Taxonomy(engine, schema=args.schema) taxids = set() if args.taxids: if os.access(args.taxids, os.F_OK): for line in getlines(args.taxids): taxids.update(set(re.split(r'[\s,;]+', line))) else: taxids.update([x.strip() for x in re.split(r'[\s,;]+', args.taxids)]) if args.seq_info: with args.seq_info: reader = csv.DictReader(args.seq_info) taxids.update(frozenset(i['tax_id'] for i in reader if i['tax_id'])) writer = csv.writer(args.out) for t in taxids: try: tax._node(t) except ValueError: # Check for merged m = tax._get_merged(t) if m and m != t: writer.writerow([t, m]) else: writer.writerow([t, None]) engine.dispose() return 0
def action(args): engine = create_engine( 'sqlite:///%s' % args.database_file, echo=args.verbosity > 2) tax = Taxonomy(engine, ncbi.ranks) taxids = set() if args.taxids: if os.access(args.taxids, os.F_OK): for line in getlines(args.taxids): taxids.update(set(re.split(r'[\s,;]+', line))) else: taxids.update([x.strip() for x in re.split(r'[\s,;]+', args.taxids)]) if args.taxnames: for taxname in getlines(args.taxnames): for name in re.split(r'\s*[,;]\s*', taxname): tax_id, primary_name, is_primary = tax.primary_from_name( name.strip()) taxids.add(tax_id) if args.seq_info: with args.seq_info: reader = csv.DictReader(args.seq_info) taxids.update(frozenset(i['tax_id'] for i in reader if i['tax_id'])) # Before digging into lineages, make sure all the taxids exist in # the taxonomy database. valid_taxids = True for t in taxids: try: tax._node(t) except KeyError: # Check for merged m = tax._get_merged(t) if m and m != t: msg = ("Taxid {0} has been replaced by {1}. " "Please update your records").format(t, m) print >> sys.stderr, msg else: print >>sys.stderr, "Taxid %s not found in taxonomy." % t valid_taxids = False if not(valid_taxids): print >>sys.stderr, "Some taxids were invalid. Exiting." return 1 # exits with code 1 # Extract all the taxids to be exported in the CSV file. taxids_to_export = set() for t in taxids: taxids_to_export.update([y for (x, y) in tax._get_lineage(t)]) tax.write_table(taxids_to_export, csvfile=args.out_file, full=args.full) engine.dispose() return 0
def action(args): engine = sqlalchemy.create_engine(args.url, echo=False) tax = Taxonomy(engine, schema=args.schema) names = [] if args.infile: names += [line.split('#', 1)[0].strip() for line in args.infile if line.strip() and not line.startswith('#')] if args.names: names += [x.strip() for x in args.names.split(',')] writer = csv.writer(args.outfile) writer.writerow(['input', 'tax_name', 'tax_id', 'rank']) found = 0 for name in names: try: tax_id, tax_name, is_primary = tax.primary_from_name(name) except ValueError: if args.include_unmatched: writer.writerow([name, None, None, None]) else: found += 1 parent, rank = tax._node(tax_id) writer.writerow([name, tax_name, tax_id, rank]) log.warning('found {} of {} names'.format(found, len(names)))
def test_species_below(): engine = create_engine('sqlite:///../testfiles/taxonomy.db', echo=False) tax = Taxonomy(engine, taxtastic.ncbi.RANKS) t = tax.species_below('1239') parent_id, rank = tax._node(t) for t in [None, '1239', '186801', '1117']: s = tax.species_below(t) assert t is None or s is None or tax.is_ancestor_of(s, t) assert s is None or tax.rank(s) == 'species'
def action(args): engine = create_engine('sqlite:///%s' % args.database_file, echo=args.verbosity > 2) tax = Taxonomy(engine, ncbi.RANKS) taxids = set() if args.taxids: if os.access(args.taxids, os.F_OK): for line in getlines(args.taxids): taxids.update(set(re.split(r'[\s,;]+', line))) else: taxids.update([x.strip() for x in re.split(r'[\s,;]+', args.taxids)]) if args.seq_info: with args.seq_info: reader = csv.DictReader(args.seq_info) taxids.update(frozenset(i['tax_id'] for i in reader if i['tax_id'])) writer = csv.writer(args.out_file) for t in taxids: try: tax._node(t) except ValueError: # Check for merged m = tax._get_merged(t) if m and m != t: writer.writerow([t, m]) else: writer.writerow([t, None]) engine.dispose() return 0
def action(args): dbfile = args.dbfile taxnames_file = args.taxnames_file taxnames = args.taxnames outfile = args.outfile engine = create_engine('sqlite:///%s' % dbfile, echo=False) tax = Taxonomy(engine, ncbi.RANKS) names = [] if taxnames_file: names += [line.split('#', 1)[0].strip() for line in taxnames_file if line.strip() and not line.startswith('#')] if taxnames: names += [x.strip() for x in taxnames.split(',')] taxa = {} for name in set(names): tax_id, tax_name, is_primary, rank, note = '', '', '', '', '' try: tax_id, tax_name, is_primary = tax.primary_from_name(name) except ValueError: note = 'not found' else: parent, rank = tax._node(tax_id) note = '' if is_primary else 'not primary' if note: log.warning( '%(name)20s | %(tax_id)7s %(tax_name)20s %(note)s' % locals()) if rank == 'species': taxa[tax_id] = dict(tax_id=tax_id, tax_name=tax_name, rank=rank) else: keys, rows = get_children(engine, [tax_id]) taxa.update(dict((row['tax_id'], row) for row in rows)) for d in sorted(taxa.values(), key=lambda x: x['tax_name']): outfile.write('%(tax_id)s # %(tax_name)s\n' % d)
def test__node(): engine = create_engine( 'sqlite:///../testfiles/small_taxonomy.db', echo=False) tax = Taxonomy(engine, taxtastic.ncbi.RANKS) assert tax._node(None) is None assert tax._node('91061') == (u'1239', u'class')