def _lookup(self, words, do_autoexpand=1): """ search a word or a list of words in the lexicon and return a ResultSet of found documents. """ docids = IISet() used_words = {} # remove stopwords from data if self.use_stopwords: words = self.use_stopwords.process( words ) if self.use_thesaurus and self.thesaurus_mode == 'expand_always': TH = ThesaurusRegistry.get(self.use_thesaurus) for word in words[:]: r = TH.getTermsFor(word) words.extend(r) for word in words: # perform casefolding if necessary if self.splitter_casefolding: word = word.lower() if self.use_normalizer: word = NormalizerRegistry.get(self.use_normalizer).process(word) used_words[word] = 1.0 wid = self._lexicon.getWordId(word) # Retrieve list of docIds for this wordid if wid is not None: docids.update( self._storage.get(wid) ) # perform autoexpansion of terms by performing # a search using right-truncation if do_autoexpand and self.autoexpand and len(word) >= self.autoexpand_limit: rs = self.lookupWordsByTruncation(word, right=1) docids.update(rs.docIds()) wlen = len(word) for w in rs.words().keys(): used_words[w] = TRUNC_WEIGHT[len(w)-wlen] return ResultSet(docids, used_words)
########################################################################### # # TextIndexNG The next generation TextIndex for Zope # # This software is governed by a license. See # LICENSE.txt for the terms of this license. # ########################################################################### from Products.TextIndexNG2.Registry import ThesaurusRegistry from Products.TextIndexNG2 import fast_startup from ExampleThesaurus import ExampleThesaurus from Thesaurus_DE import Thesaurus_DE if not fast_startup: ThesaurusRegistry.register('ExampleThesaurus', ExampleThesaurus()) ThesaurusRegistry.register('german', Thesaurus_DE())