示例#1
0
from taxolib.taxacomponents import RankTable, Taxon
from taxolib.taxoconfig import ConfigError
from argparse import ArgumentParser


argp = ArgumentParser(description='Searches for taxa in the taxonomy database by matching the taxon name \
string.  "%" can be used as a wildcard character in the search string.')
argp.add_argument('-d', '--dbconf', help='the SQLite database file ("database.sqlite" by default)')
argp.add_argument('-s', '--nosynonyms', action='store_true', help='do not search synonyms for taxa names')
argp.add_argument('search_string', help='the name search string')
argp.set_defaults(dbconf='database.sqlite', numtaxa=-1, maxdepth=-1)
args = argp.parse_args()

# Get a cursor for the taxonomy database.
try:
    pgcur = taxodatabase.getDBCursor(args.dbconf)
except ConfigError as e:
    exit('\n' + str(e) + '\n')

# Initialize the rank table from the database.
ranktable = RankTable()
ranktable.loadFromDB(pgcur)

taxa = Taxon.find(pgcur, args.search_string, ranktable)

# Organize the taxa by their source taxonomies.
# Create a dictionary mapping taxonomy IDs to lists of taxa.
taxonomy_taxa = {}
for taxon in taxa:
    if taxon.taxonomy_id not in taxonomy_taxa:
        taxonomy_taxa[taxon.taxonomy_id] = []
示例#2
0
#!/usr/bin/python

# This program provides a simple example of how to use the approximate string
# matching search library.  Note that this requires only 2 lines of code: 1 to
# instantiate a matcher object, and 1 to actually perform the match.

import sys
import approxmatch
# A hack for now to get the local taxonomy package to import.
sys.path.append('../')
from taxolib import taxodatabase, taxoconfig

# Get a cursor for the taxonomy database.
try:
    pgcur = taxodatabase.getDBCursor('../database.conf')
except taxoconfig.ConfigError as e:
    exit('\n' + str(e) + '\n')

if len(sys.argv) != 2:
    exit('\nPlease provide a name to search for in the names table.\n')

searchstr = sys.argv[1]

# Specify the database table and column names.
#tablename = 'names'
tablename = 'ftest_genus_names'
colname = 'namestr'

# Instantiate a q-gram/DL hybrid algorithm matcher.
matcher = approxmatch.HybridMatcher(tablename, colname, pgcur)
示例#3
0
argp.add_argument('-tr', '--timer_runs', type=int, help='The number of complete search runs to execute when \
running in timer mode.  The best time among all runs is taken as the final run time.  The default is 3.')
argp.add_argument('-m', '--method', help='the matching method to use ("exact", "qgram", "neighbor", \
"wcneighbor", "dmetaphone", "soundex", or "hybrid")')
argp.add_argument('-qgt', '--qgram_threshold', type=float, help='The similarity threshold to use for \
qgram-based matching.  The default is 0.3.')
argp.add_argument('-fo', '--output_format', help='The format for reporting results, either "text" \
[the default] or "json".')
argp.add_argument('csv_file', help='the input CSV file')
argp.set_defaults(dbconf='../database.conf', table='ftest_genus_names', write_failed='', timer_runs=3,
        method='qgram', qgram_threshold=0.3, output_format='text')
args = argp.parse_args()

# Get a cursor for the taxonomy database.
try:
    pgcur = taxodatabase.getDBCursor(args.dbconf)
except taxoconfig.ConfigError as e:
    exit('\n' + str(e) + '\n')

#nhoodMatch(pgcur, 'Anas')
#wcNhoodMatch(pgcur, 'Ictaluris')
#qgramMatch(pgcur, 'Anas')
#exit()

# Instantiate a matcher object for the requested match strategy.
if args.method == 'qgram':
    matcher = approxmatch.QgramMatcher(args.table, 'namestr', pgcur)
    # Set the qgram matching similarity threshold.
    matcher.setSimilarityCutoff(args.qgram_threshold)
elif args.method == 'exact':
    matcher = approxmatch.ExactMatcher(args.table, 'namestr', pgcur)
示例#4
0
#!/usr/bin/python

# This program provides a simple example of how to use the approximate string
# matching search library.  Note that this requires only 2 lines of code: 1 to
# instantiate a matcher object, and 1 to actually perform the match.

import sys
import approxmatch
# A hack for now to get the local taxonomy package to import.
sys.path.append('../')
from taxolib import taxodatabase, taxoconfig


# Get a cursor for the taxonomy database.
try:
    pgcur = taxodatabase.getDBCursor('../database.conf')
except taxoconfig.ConfigError as e:
    exit('\n' + str(e) + '\n')

if len(sys.argv) != 2:
    exit('\nPlease provide a name to search for in the names table.\n')

searchstr = sys.argv[1]

# Specify the database table and column names.
#tablename = 'names'
tablename = 'ftest_genus_names'
colname = 'namestr'

# Instantiate a q-gram/DL hybrid algorithm matcher.
matcher = approxmatch.HybridMatcher(tablename, colname, pgcur)