def _split(self, text, removed_words=()): """Split unicode text into tuple of unicode terms @param text: unicode text to split @param remove_words: words to remove from the split result""" return tuple([x for x in text2words(text) if len(x) > 1 and x not in removed_words])
def test_text2words(self): self.assertEqual(text2words('x'), ('x',)) self.assertEqual(text2words('x Y'), ('x', 'Y')) self.assertEqual(text2words('foo bar'), ('foo', 'bar')) self.assertEqual(text2words('<p>foo bar</p>'), ('foo', 'bar')) self.assertEqual(text2words('foo<sub>bar</sub>'), ('foo', 'bar')) self.assertEqual(text2words('<p class="shiny">text</p>'), ('text',)) self.assertEqual(text2words('<p \n>text</\np>'), ('text',)) self.assertEqual(text2words('<br/> <br />'), ())
def test_text2words(self): self.assertEqual(text2words('x'), ('x', )) self.assertEqual(text2words('x Y'), ('x', 'Y')) self.assertEqual(text2words('foo bar'), ('foo', 'bar')) self.assertEqual(text2words('<p>foo bar</p>'), ('foo', 'bar')) self.assertEqual(text2words('foo<sub>bar</sub>'), ('foo', 'bar')) self.assertEqual(text2words('<p class="shiny">text</p>'), ('text', )) self.assertEqual(text2words('<p \n>text</\np>'), ('text', )) self.assertEqual(text2words('<br/> <br />'), ())
def _split(self, text, removed_words=()): """Split unicode text into tuple of unicode terms @param text: unicode text to split @param remove_words: words to remove from the split result""" return tuple([ x for x in text2words(text) if len(x) > 1 and x not in removed_words ])