def testProcess(self):
     SW = Stopwords()
     s = unicode('der die das mondauto foobar gehen gut und überhaupt', 'iso-8859-15')
     res = SW.process(s.split(' '), 'de')
     self.assertEqual(res, [u'mondauto', u'foobar', u'gehen', u'gut', unicode('überhaupt', 'iso-8859-15')])
     res = SW.process(s.split(' '), 'en')
     self.assertEqual(res, list(s.split(' ')))
Exemplo n.º 2
0
 def testStopwords(self):
     SW = Stopwords()
     en_words = SW.stopwordsForLanguage('en')
     for w in en_words:
         self.assertEqual(type(w), unicode)
     de_words = SW.stopwordsForLanguage('de')
     for w in de_words:
         self.assertEqual(type(w), unicode)
     self.assertEqual(len(SW.stopwordsForLanguage('xx')), 0)
 def testStopwords(self):
     SW = Stopwords()
     en_words = SW.stopwordsForLanguage('en')
     for w in en_words:
         self.assertEqual(type(w), unicode)            
     de_words = SW.stopwordsForLanguage('de')
     for w in de_words:
         self.assertEqual(type(w), unicode)            
     self.assertEqual(len(SW.stopwordsForLanguage('xx')), 0)
Exemplo n.º 4
0
 def testProcess(self):
     SW = Stopwords()
     s = unicode('der die das mondauto foobar gehen gut und überhaupt',
                 'iso-8859-15')
     res = SW.process(s.split(' '), 'de')
     self.assertEqual(res, [
         u'mondauto', u'foobar', u'gehen', u'gut',
         unicode('überhaupt', 'iso-8859-15')
     ])
     res = SW.process(s.split(' '), 'en')
     self.assertEqual(res, list(s.split(' ')))
Exemplo n.º 5
0
 def setUp(self):
     setUp()
     provideUtility(PDFConverter, IConverter, name='application/pdf')
     provideUtility(SplitterFactory,
                    IFactory,
                    name='txng.splitters.default')
     provideUtility(SimpleSplitterFactory,
                    IFactory,
                    name='txng.splitters.simple')
     provideUtility(EnglishParser(), IParser, name='txng.parsers.en')
     provideUtility(LexiconFactory, IFactory, name='txng.lexicons.default')
     provideUtility(StorageFactory, IFactory, name='txng.storages.default')
     provideUtility(Stopwords(), IStopwords)
     provideUtility(Normalizer(), INormalizer)
Exemplo n.º 6
0
 def testStopwordReader(self):
     SW = Stopwords()
     for lang in SW.availableLanguages():
         words = SW.stopwordsForLanguage(lang)
Exemplo n.º 7
0
 def testAvailableLanguages(self):
     lst = Stopwords().availableLanguages()
     assert 'en' in lst
     assert 'fr' in lst
     assert not 'xx' in lst
 def testStopwordReader(self):
     SW = Stopwords()
     for lang in SW.availableLanguages():
         words = SW.stopwordsForLanguage(lang)