示例#1
0
    def _lookup(self, words, do_autoexpand=1):
        """ search a word or a list of words in the lexicon and 
            return a ResultSet of found documents.
        """

        docids = IISet()
        used_words = {} 

        #  remove stopwords from data
        if self.use_stopwords:
            words = self.use_stopwords.process( words ) 

        if self.use_thesaurus and self.thesaurus_mode == 'expand_always':
            TH = ThesaurusRegistry.get(self.use_thesaurus)
            for word in words[:]:
                r = TH.getTermsFor(word)
                words.extend(r)

        for word in words:

            # perform casefolding if necessary
            if self.splitter_casefolding:
                word = word.lower()

            if self.use_normalizer:
                word = NormalizerRegistry.get(self.use_normalizer).process(word)    
 
            used_words[word] = 1.0

            wid = self._lexicon.getWordId(word)

            # Retrieve list of docIds for this wordid
            if wid is not None:
                docids.update( self._storage.get(wid) )

            # perform autoexpansion of terms by performing
            # a search using right-truncation
            if do_autoexpand and self.autoexpand and len(word) >= self.autoexpand_limit:
                rs = self.lookupWordsByTruncation(word, right=1)
                docids.update(rs.docIds())
                wlen = len(word)
                for w in rs.words().keys():
                    used_words[w] = TRUNC_WEIGHT[len(w)-wlen]

        return ResultSet(docids, used_words)
示例#2
0
###########################################################################
#
# TextIndexNG                The next generation TextIndex for Zope
#
# This software is governed by a license. See
# LICENSE.txt for the terms of this license.
#
###########################################################################

from Products.TextIndexNG2.Registry import ThesaurusRegistry
from Products.TextIndexNG2 import fast_startup

from ExampleThesaurus import ExampleThesaurus
from Thesaurus_DE import Thesaurus_DE 

if not fast_startup:
    ThesaurusRegistry.register('ExampleThesaurus', ExampleThesaurus())
    ThesaurusRegistry.register('german', Thesaurus_DE())