def _split_text(text): '''Split up the given text in a useful way. This splits the text into individual tokens. The returned vaule is a generator that yields `(line_num, token)` pairs. `line_num` is 1-indexed. ''' for line_num, line in enumerate(text.split('\n')): for token in split_string(line, *_resources.NWS_DELIMITERS): yield (line_num + 1, token)
def test_split_string(self): s = 'abcdefg' self.assertEqual(prosl_utils.split_string(s), [s]) self.assertEqual(prosl_utils.split_string(s, ' '), [s]) self.assertEqual(prosl_utils.split_string(s, split_whitespace=False), [s]) ### s = 'abc defg' self.assertEqual(prosl_utils.split_string(s), ['abc', 'defg']) self.assertEqual(prosl_utils.split_string(s, ' '), ['abc', 'defg']) self.assertEqual(prosl_utils.split_string(s,' ',split_whitespace=False), ['abc', '', 'defg']) ### s = 'a b`c!d--e><f\\g ' self.assertEqual(prosl_utils.split_string(s), ['a', 'b`c!d--e><f\\g']) self.assertEqual(prosl_utils.split_string(s, '`', '--', '\\'), ['a', 'b', 'c!d', 'e><f', 'g']) self.assertEqual(prosl_utils.split_string(s, '><', split_whitespace=False), ['a b`c!d--e', 'f\\g ']) ### s = '' self.assertEqual(prosl_utils.split_string(s), []) self.assertEqual(prosl_utils.split_string(s, ' '), []) self.assertEqual(prosl_utils.split_string(s, split_whitespace=False),[]) self.assertEqual(prosl_utils.split_string(s, 'a', 'b'), [])