def _getTerms(self, text): ''' Retrieves all the terms (keywords) from the given text. This method defines a term to be an alphabetical string of at least three characters. @param text: A string. @return: A list of terms. ''' terms = re.findall(self.word_pattern, text.lower()) for f in self.filter_func_list: terms = f(terms) return s_text_filter.suffix_stem(terms)
def test_suffix_stem_dont_lose_words(self): terms = s_text_filter.word_filter(self.long_text) fixed_terms = s_text_filter.suffix_stem(terms) self.assertEqual(len(terms), len(fixed_terms))
def test_suffix_stem_complex_example4(self): terms = ['inging'] self.assertEqual(s_text_filter.suffix_stem(terms), ['ing'])
def test_suffix_stem_complex_example3(self): terms = ['eeesseseed'] self.assertEqual(s_text_filter.suffix_stem(terms), ['eeessesee'])
def test_suffix_stem_complex_example1(self): terms = ['ssesessses'] self.assertEqual(s_text_filter.suffix_stem(terms), ['ssesesss'])
def test_suffix_stem_simple_examples(self): terms = ['sses', 'ies', 'ss', 's', 'meed', 'ed', 'ing', 'at', 'bl', 'iz'] fixed_terms = s_text_filter.suffix_stem(terms) self.assertEqual(fixed_terms, ['ss', 'i', 'ss', '', 'mee', '', '', 'ate', 'ble', 'ize'])