def createComponents(self, fieldName):
     source = StandardTokenizer()
     result = EnglishPossessiveFilter(source)
     result = LowerCaseFilter(result)
     result = DiacriticFilter(result)
     result = StopFilter(result, self.stopwords)
     if self.stemExclusionSet.isEmpty() is False:
         result = SetKeywordMarkerFilter(result, self.stemExclusionSet)
     result = PorterStemFilter(result)
     return Analyzer.TokenStreamComponents(source, result)
Пример #2
0
    def createComponents(self, fieldName, reader):
        class _stopFilter(PythonFilteringTokenFilter):
            def __init__(_self, tokenStream):
                super(_stopFilter, _self).__init__(Version.LUCENE_CURRENT,
                                                   tokenStream)
                _self.termAtt = _self.addAttribute(CharTermAttribute.class_)

            def accept(_self):
                return _self.termAtt.toString() != "stop"

        source = WhitespaceTokenizer(Version.LUCENE_CURRENT, reader)
        return Analyzer.TokenStreamComponents(source, _stopFilter(source))
Пример #3
0
 def createComponents(_self, fieldName):
     source = WhitespaceTokenizer()
     return Analyzer.TokenStreamComponents(source,
                                           ICUFoldingFilter(source))
Пример #4
0
 def createComponents(_self, fieldName, reader):
     source = WhitespaceTokenizer(Version.LUCENE_CURRENT, reader)
     return Analyzer.TokenStreamComponents(
         source, ICUNormalizer2Filter(source))
Пример #5
0
 def createComponents(_self, fieldName):
     return Analyzer.TokenStreamComponents(_tokenizer())
Пример #6
0
 def createComponents(self, fieldName):
     source = LowerCaseTokenizer()
     return Analyzer.TokenStreamComponents(source, PayloadFilter(source, fieldName))
Пример #7
0
 def createComponents(self, fieldName, reader):
     source = LowerCaseTokenizer(Version.LUCENE_CURRENT, reader)
     return Analyzer.TokenStreamComponents(source,
                                           PayloadFilter(source, fieldName))