def search(self, w): if not self.ix.up_to_date(): self.initialize_trie( ) # if the index is not up to date, someone has added cards, so we reinitialize the trie # If we searched for an alias, make it the exact hit for alias, name in fetcher.card_aliases(): if w == card.canonicalize(alias): return SearchResult(name, None, None, None) normalized = list(WhooshConstants.normalized_analyzer(w))[0].text # If we get matches by prefix, we return that exact, prefix_whole_word, other_prefixed = self.find_matches_by_prefix( normalized) if exact or len(prefix_whole_word) > 0 or len(other_prefixed) > 0: return SearchResult(exact, prefix_whole_word, other_prefixed, None) # We try fuzzy and stemmed queries query_normalized = fuzzy_term(normalized, self.DIST, "name_normalized") query_stemmed = And([ Term('name_stemmed', q.text) for q in WhooshConstants.stem_analyzer(w) ]) query_tokenized = And([ fuzzy_term(q.text, self.DIST, "name_tokenized") for q in WhooshConstants.tokenized_analyzer(w) ]) query = Or([query_normalized, query_tokenized, query_stemmed]) with self.ix.searcher() as searcher: fuzzy = [(r['name'], r.score) for r in searcher.search(query, limit=40)] return SearchResult(exact, prefix_whole_word, other_prefixed, fuzzy)
def update_fuzzy_matching(): format_id = get_format_id('Penny Dreadful', True) if db().is_sqlite(): db().execute('DROP TABLE IF EXISTS fuzzy') db().execute( 'CREATE VIRTUAL TABLE IF NOT EXISTS fuzzy USING spellfix1') sql = """INSERT INTO fuzzy (word, rank) SELECT LOWER(bq.name), bq.pd_legal FROM ({base_query}) AS bq """.format(base_query=base_query()) db().execute(sql) sql = """INSERT INTO fuzzy (word, rank) SELECT LOWER(f.name), SUM(CASE WHEN cl.format_id = {format_id} THEN 1 ELSE 0 END) > 0 FROM face AS f INNER JOIN card AS c ON f.card_id = c.id LEFT OUTER JOIN card_legality AS cl ON cl.card_id = c.id AND cl.format_id = {format_id} WHERE LOWER(f.name) NOT IN (SELECT word FROM fuzzy) GROUP BY f.id """.format(format_id=format_id) db().execute(sql) aliases = fetcher.card_aliases() for alias, name in aliases: db().execute( 'INSERT INTO fuzzy (word, soundslike) VALUES (LOWER(?), ?)', [name, alias])
def reindex() -> None: writer = WhooshWriter() cs = get_all_cards() for alias, name in fetcher.card_aliases(): for c in cs: if c.name == name: c.names.append(alias) writer.rewrite_index(cs)
def cards_from_query(query): # Skip searching if the request is too short. if len(query) <= 2: return [] mode = 0 if query.startswith('$'): mode = '$' query = query[1:] query = card.canonicalize(query) # If we searched for an alias, change query so we can find the card in the results. for alias, name in fetcher.card_aliases(): if query == card.canonicalize(alias): query = card.canonicalize(name) cards = search(query) cards = [c for c in cards if c.layout != 'token' and c.type != 'Vanguard'] # First look for an exact match. results = [] for c in cards: c.mode = mode if query == card.canonicalize(c.name): results.append(c) if len(results) > 0: return results for c in cards: names = [card.canonicalize(name) for name in c.names] if query in names: results.append(c) if len(results) > 0: return results # If not found, use cards that start with the query and a punctuation char. for c in cards: names = [card.canonicalize(name) for name in c.names] for name in names: if name.startswith( '{query} '.format(query=query)) or name.startswith( '{query},'.format(query=query)): results.append(c) if len(results) > 0: return results # If not found, use cards that start with the query. for c in cards: names = [card.canonicalize(name) for name in c.names] for name in names: if name.startswith(query): results.append(c) if len(results) > 0: return results # If we didn't find any of those then use all search results. return cards