def test_performance(query_str, data): schema = Schema(key=TEXT(stored=True), tags=KEYWORD) ix = create_in("whoosh", schema) writer = ix.writer() for d in data: writer.add_document(key=d["key"], tags=", ".join(d["tags"])) writer.commit() time_started = datetime.now() with ix.searcher(weighting=scoring.Frequency) as s: query = QueryParser("key", ix.schema).parse(query_str) results = s.search(query, limit=None) time_ended = datetime.now() tr = results[0]["key"] if len(results) > 0 else "No results" print("Whoosh: {} results | {} - Top Result: {}".format( len(results), time_ended - time_started, tr)) time_started = datetime.now() data_old = [d["key"] if isinstance(d, dict) else d for d in data] results = searcher.find(data_old, query_str) time_ended = datetime.now() tr = results[0]["key"] if len(results) > 0 else "No results" print("Searcher: {} results | {} - Top Result: {}".format( len(results), time_ended - time_started, tr)) time_started = datetime.now() results = fuzzle.find(data, query_str, coverage_multiplier=0.05) time_ended = datetime.now() tr = results[0]["key"] if len(results) > 0 else "No results" print("New Searcher: {} results | {} - Top Result: {}\n".format( len(results), time_ended - time_started, tr))
### defining a list of options with tags ### options = [{ "key": "GitHub", "tags": [ "Nat Friedman", "Tom Preston-Werner", "Chris Wanstrath", "Scott Chacon", "P. J. Hyett" ] }, { "key": "Google", "tags": ["Alphabet Inc.", "YouTube", "Sundar Pichai", "Larry Page", "Sergey Bin"] }, { "key": "Reddit", "tags": ["Steve Huffman", "Alexis Ohanian", "Aaron Swartz"] }] search = "GitHib" # the searched string results = fuzzle.find(options, search) if len(results) == 0: print("Couldn't find any results!") elif results[0]["match"]: # if it was a 100% identical match print("Found {}!".format(results[0]["key"])) else: for result in results: print(result["key"])
data = companies else: data = countries search = input("Enter search query: ") if search != "": time_started = datetime.now() data_old = [d["key"] if isinstance(d, dict) else d for d in data] results = searcher.find(data_old, search) max_results = min(len(results), 20) time_ended = datetime.now() ## for result in results[:max_results]: ## print(result["key"]) print("Searcher: {} results | {}\n".format( len(results), time_ended - time_started)) time_started = datetime.now() results = fuzzle.find(data, search, return_all=True, coverage_multiplier=0.05) max_results = min(len(results), 50) time_ended = datetime.now() for result in results[:max_results]: print(result["key"].translate(non_bmp_map)) # if "tags" in result: print(result["tags"]) print("New Searcher: {} results | {}".format( len(results), time_ended - time_started)) except KeyboardInterrupt: exit(0)
import fuzzle options = [ "reddit", "subreddit", "diretide", "automod", "hire mods", "disboard" ] search = "edit" print("Search:", search) print("Options:", ", ".join(options)) print("\nResults:") results = fuzzle.find(options, search, return_all=True) for i in range(len(results)): print("{}.".format(i + 1), results[i]["key"], "| pseudo-string:", results[i]["construct"])