def test_01_tokenize(self): self.assertEqual(tokenize('a b c'), ['a', 'b', 'c'])
def test_05_tokenize_apostrophes(self): self.assertEqual(tokenize(u" 'wow' l'automne Bob's "), [u'wow', u"l'automne", u"Bob's"])
def test_03_tokenize_unicode(self): self.assertEqual( tokenize(u'Saint-Saëns Martinů tästä'), [u'Saint', u'Saëns', u'Martinů', u'tästä'])
def test_04_tokenize_digits(self): self.assertEqual(tokenize(u' KV 457 BWV3 '), [u'KV', u'457', u'BWV', u'3'])
def test_02_tokenize_unstripped(self): self.assertEqual(tokenize(' a b c '), ['a', 'b', 'c'])