def main(cls, argv): engine = WordNetSynonymEngine(argv[1]) text = "The quick brown fox jumps over the lazy dogs" AnalyzerUtils.displayTokensWithPositions(SynonymAnalyzer(engine), text) text = "\"Oh, we get both kinds - country AND western!\" - B.B." AnalyzerUtils.displayTokensWithPositions(SynonymAnalyzer(engine), text)
def main(cls): query = QueryParser(Version.LUCENE_CURRENT, "content", cls.synonymAnalyzer).parse('"fox jumps"') print "\"fox jumps\" parses to ", query.toString("content") print "From AnalyzerUtils.tokensFromAnalysis: " AnalyzerUtils.displayTokens(cls.synonymAnalyzer, "\"fox jumps\"") print ''
def analyze(cls, text): print 'Analyzing "%s"' %(text) for analyzer in cls.analyzers: name = type(analyzer).__name__ print " %s:" %(name), AnalyzerUtils.displayTokens(analyzer, text) print print
def testHoles(self): expected = ["one", "enough"] AnalyzerUtils.assertAnalyzesTo(self.stopAnalyzer, "one is not enough", expected) AnalyzerUtils.assertAnalyzesTo(self.stopAnalyzer, "one is enough", expected) AnalyzerUtils.assertAnalyzesTo(self.stopAnalyzer, "one enough", expected) AnalyzerUtils.assertAnalyzesTo(self.stopAnalyzer, "one but not enough", expected)
def addAliasesToStack(self): synonyms = self.engine.getSynonyms(self.termAttr.term()) if synonyms is None: return current = self.captureState() for synonym in synonyms: self.save.restoreState(current) AnalyzerUtils.setTerm(self.save, synonym) AnalyzerUtils.setType(self.save, self.TOKEN_TYPE_SYNONYM) AnalyzerUtils.setPositionIncrement(self.save, 0) self.synonymStack.append(self.save.captureState())
import os, sys, lucene lucene.initVM() sys.path.append(os.path.dirname(os.path.abspath(sys.argv[0]))) from lia.analysis.AnalyzerUtils import AnalyzerUtils AnalyzerUtils.main(sys.argv)
def main(cls): text = "The quick brown fox jumps over the lazy dogs" AnalyzerUtils.displayTokensWithPositions(cls.porterAnalyzer, text) print ''
def testSpanish(self): analyzer = SnowballAnalyzer(Version.LUCENE_CURRENT, "Spanish") AnalyzerUtils.assertAnalyzesTo(analyzer, "algoritmos", ["algoritm"])
def testEnglish(self): analyzer = SnowballAnalyzer(Version.LUCENE_CURRENT, "English") AnalyzerUtils.assertAnalyzesTo(analyzer, "stemming algorithms", ["stem", "algorithm"])
def main(cls): AnalyzerUtils.displayTokens(StopAnalyzerFlawed(), "The quick brown...")
def testStopAnalyzerFlawed(self): AnalyzerUtils.assertAnalyzesTo(StopAnalyzerFlawed(), "The quick brown...", ["the", "quick", "brown"])
def testStopAnalyzer2(self): AnalyzerUtils.assertAnalyzesTo(StopAnalyzer2(), "The quick brown...", ["quick", "brown"])