class TestSentenceTokenizer(unittest.TestCase): def setUp(self): self.tokenizer = SentenceTokenizer() self.text = "Beautiful is better than ugly. Simple is better than complex." def test_tokenize(self): assert_equal(self.tokenizer.tokenize(self.text), [ "Beautiful is better than ugly.", "Simple is better than complex." ]) def test_tokenize_with_multiple_punctuation(self): text = "Hello world. How do you do?! My name's Steve..." assert_equal(self.tokenizer.tokenize(text), ["Hello world.", "How do you do?!", "My name's Steve..."]) text2 = 'OMG! I am soooo LOL!!!' tokens = self.tokenizer.tokenize(text2) assert_equal(len(tokens), 2) assert_equal(tokens, ["OMG!", "I am soooo LOL!!!"])
class TestSentenceTokenizer(unittest.TestCase): def setUp(self): self.tokenizer = SentenceTokenizer() self.text = "Beautiful is better than ugly. Simple is better than complex." def test_tokenize(self): assert_equal(self.tokenizer.tokenize(self.text), ["Beautiful is better than ugly.", "Simple is better than complex."])
class TestSentenceTokenizer(unittest.TestCase): def setUp(self): self.tokenizer = SentenceTokenizer() self.text = "Beautiful is better than ugly. Simple is better than complex." def test_tokenize(self): assert_equal(self.tokenizer.tokenize(self.text), ["Beautiful is better than ugly.", "Simple is better than complex."]) def test_tokenize_with_multiple_punctuation(self): text = "Hello world. How do you do?! My name's Steve..." assert_equal(self.tokenizer.tokenize(text), ["Hello world.", "How do you do?!", "My name's Steve..."]) text2 = 'OMG! I am soooo LOL!!!' tokens = self.tokenizer.tokenize(text2) assert_equal(len(tokens), 2) assert_equal(tokens, ["OMG!", "I am soooo LOL!!!"])