Пример #1
0
 def _getTerms(self, text):
     '''
     Retrieves all the terms (keywords) from the given text.
     This method defines a term to be an alphabetical string of at least three characters. 
     
     @param text: A string.
     @return: A list of terms.
     '''
     
     terms = re.findall(self.word_pattern, text.lower())
     for f in self.filter_func_list:
         terms = f(terms)
     
     return s_text_filter.suffix_stem(terms)
Пример #2
0
 def test_suffix_stem_dont_lose_words(self):
     terms = s_text_filter.word_filter(self.long_text)
     fixed_terms = s_text_filter.suffix_stem(terms)
     self.assertEqual(len(terms), len(fixed_terms))
Пример #3
0
 def test_suffix_stem_complex_example4(self):
     terms = ['inging']
     self.assertEqual(s_text_filter.suffix_stem(terms), ['ing'])
Пример #4
0
 def test_suffix_stem_complex_example3(self):
     terms = ['eeesseseed']
     self.assertEqual(s_text_filter.suffix_stem(terms), ['eeessesee'])
Пример #5
0
 def test_suffix_stem_complex_example1(self):
     terms = ['ssesessses']
     self.assertEqual(s_text_filter.suffix_stem(terms), ['ssesesss'])
Пример #6
0
 def test_suffix_stem_simple_examples(self):
     terms = ['sses', 'ies', 'ss', 's', 'meed', 'ed', 'ing', 'at', 'bl', 'iz']
     fixed_terms = s_text_filter.suffix_stem(terms)
     self.assertEqual(fixed_terms, ['ss', 'i', 'ss', '', 'mee', '', '', 'ate', 'ble', 'ize'])