def createComponents(self, fieldName): source = StandardTokenizer() result = EnglishPossessiveFilter(source) result = LowerCaseFilter(result) result = DiacriticFilter(result) result = StopFilter(result, self.stopwords) if self.stemExclusionSet.isEmpty() is False: result = SetKeywordMarkerFilter(result, self.stemExclusionSet) result = PorterStemFilter(result) return Analyzer.TokenStreamComponents(source, result)
def createComponents(self, fieldName, reader): class _stopFilter(PythonFilteringTokenFilter): def __init__(_self, tokenStream): super(_stopFilter, _self).__init__(Version.LUCENE_CURRENT, tokenStream) _self.termAtt = _self.addAttribute(CharTermAttribute.class_) def accept(_self): return _self.termAtt.toString() != "stop" source = WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) return Analyzer.TokenStreamComponents(source, _stopFilter(source))
def createComponents(_self, fieldName): source = WhitespaceTokenizer() return Analyzer.TokenStreamComponents(source, ICUFoldingFilter(source))
def createComponents(_self, fieldName, reader): source = WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) return Analyzer.TokenStreamComponents( source, ICUNormalizer2Filter(source))
def createComponents(_self, fieldName): return Analyzer.TokenStreamComponents(_tokenizer())
def createComponents(self, fieldName): source = LowerCaseTokenizer() return Analyzer.TokenStreamComponents(source, PayloadFilter(source, fieldName))
def createComponents(self, fieldName, reader): source = LowerCaseTokenizer(Version.LUCENE_CURRENT, reader) return Analyzer.TokenStreamComponents(source, PayloadFilter(source, fieldName))