def test_string_too_long_warning(self): # This tests the maximum string length implemented in Python, which is used # to detect input errors. test_input = """ ;; This is a typical error that should get detected for long strings. 2014-01-01 note Assets:Temporary "Bla bla" " 2014-02-01 open Liabilities:US:BankWithLongName:Credit-Card:Account01 2014-02-02 open Liabilities:US:BankWithLongName:Credit-Card:Account02 2014-02-03 open Liabilities:US:BankWithLongName:Credit-Card:Account03 2014-02-04 open Liabilities:US:BankWithLongName:Credit-Card:Account04 2014-02-05 open Liabilities:US:BankWithLongName:Credit-Card:Account05 2014-02-06 open Liabilities:US:BankWithLongName:Credit-Card:Account06 2014-02-07 open Liabilities:US:BankWithLongName:Credit-Card:Account07 2014-02-08 open Liabilities:US:BankWithLongName:Credit-Card:Account08 2014-02-09 open Liabilities:US:BankWithLongName:Credit-Card:Account09 2014-02-10 open Liabilities:US:BankWithLongName:Credit-Card:Account10 2014-02-02 note Assets:Temporary "Bla bla" """ builder = lexer.LexBuilder() builder.long_string_maxlines_default = 8 list(lexer.lex_iter_string(textwrap.dedent(test_input), builder)) self.assertLessEqual(1, len(builder.errors)) self.assertRegex(builder.errors[0].message, 'String too long')
def test_parser_lex_filename(self): # Do not use a string to avoid issues due to string interning. name = object() self.assertEqual(sys.getrefcount(name), 2) f = io.BytesIO(b"") f.name = object() self.assertEqual(sys.getrefcount(f.name), 2) builder = lexer.LexBuilder() parser = _parser.Parser(builder) iterator = parser.lex(f, filename=name) tokens = list(iterator) # The Parser object keeps references to the input file and to # the name while iterating over the tokens in the input file. self.assertEqual(sys.getrefcount(name), 3) self.assertEqual(sys.getrefcount(f), 3) # The name attribute of the file object is not referenced. self.assertEqual(sys.getrefcount(f.name), 2) del parser del iterator # Once the Parser object is gone we should have just the local # reference to the file object and two references to name. self.assertEqual(sys.getrefcount(name), 2) self.assertEqual(sys.getrefcount(f), 2)
def test_lex_lineno(self): f = io.BytesIO(b"1.0") builder = lexer.LexBuilder() parser = _parser.Parser(builder) tokens = list(parser.lex(f, lineno=42)) token, lineno, matched, value = tokens[0] self.assertEqual(lineno, 42)
def test_very_long_string(self): # This tests lexing with a string of 256k. test_input = '"' + ('1234567890ABCDEF' * (256*64)) + '"' builder = lexer.LexBuilder() tokens = list(lexer.lex_iter_string(textwrap.dedent(test_input), builder)) self.assertEqual(tokens[0][3], test_input[1:-1]) self.assertLessEqual(0, len(builder.errors))
def test_bytes_encoded_utf8(self): utf8_bytes = self.test_utf8_string.encode('utf8') builder = lexer.LexBuilder() tokens = list(lexer.lex_iter_string(utf8_bytes, builder)) self.assertFalse(builder.errors) str_tokens = [token for token in tokens if token[0] == 'STRING'] self.assertEqual(self.expected_utf8_string, str_tokens[0][3])
def test_bytes_encoded_latin1_invalid(self): latin1_bytes = self.test_utf8_string.encode('latin1') builder = lexer.LexBuilder() tokens = list(lexer.lex_iter_string(latin1_bytes, builder)) errors = builder.errors self.assertTrue(errors) self.assertRegex(errors[0].message, "^UnicodeDecodeError: 'utf-8' codec ")
def test_string_newline_toolong(self): # Testing a string that busts the limits. line = 'a' * 127 + '\n' string = '"' + line * 128 + '"' builder = lexer.LexBuilder() tokens = list(lexer.lex_iter_string(string, builder)) self.assertTrue(tokens[0], 'error') self.assertTrue(tokens[1], 'EOL')
def _run_lexer_with_raising_builder_method(self, test_input, method_name, expected_tokens): builder = lexer.LexBuilder() def raise_error(string): raise ValueError setattr(builder, method_name, raise_error) tokens = list(lexer.lex_iter_string(textwrap.dedent(test_input), builder)) self.assertEqual(expected_tokens, tokens) self.assertEqual(1, len(builder.errors))
def test_lexer_builder_returns_none(self): builder = lexer.LexBuilder() def return_none(string): return None setattr(builder, 'STRING', return_none) tokens = list(lexer.lex_iter_string('"Something"', builder)) self.assertEqual([('LEX_ERROR', 1, '"', None), ('EOL', 1, '\x00', None)], tokens) self.assertEqual(1, len(builder.errors)) self.assertRegex(builder.errors[0].message, "None result from lexer")
def test_bytes_encoded_utf8(self): utf8_bytes = self.test_utf8_string.encode('utf8') builder = lexer.LexBuilder() tokens = list(lexer.lex_iter_string(utf8_bytes, builder)) # The lexer outputs no errors. self.assertFalse(builder.errors) # Check that the lexer correctly parsed the UTF8 string. str_tokens = [token for token in tokens if token[0] == 'STRING'] self.assertEqual(self.expected_utf8_string, str_tokens[0][3])
def test_bytes_encoded_latin1(self): latin1_bytes = self.test_latin1_string.encode('latin1') builder = lexer.LexBuilder() tokens = list(lexer.lex_iter_string(latin1_bytes, builder, encoding='latin1')) # The lexer outputs no errors. self.assertFalse(builder.errors) # Check that the lexer correctly parsed the latin1 string. str_tokens = [token for token in tokens if token[0] == 'STRING'] self.assertEqual(self.expected_latin1_string, str_tokens[0][3])
def test_bytes_encoded_latin1_invalid(self): latin1_bytes = self.test_utf8_string.encode('latin1') builder = lexer.LexBuilder() tokens = list(lexer.lex_iter_string(latin1_bytes, builder)) # The lexer outputs no errors. self.assertFalse(builder.errors) # Check that the lexer failed to convert the string but did not cause # other errors. str_tokens = [token for token in tokens if token[0] == 'STRING'] self.assertNotEqual(self.expected_utf8_string, str_tokens[0][3])
def test_string_too_long_warning(self): test_input = """ ;; This is a typical error that should get detected for long strings. 2014-01-01 note Assets:Temporary "Bla bla" " 2014-02-01 open Liabilities:US:BankWithLongName:Credit-Card:Account01 """ + "\n" * 64 + """ 2014-02-02 note Assets:Temporary "Bla bla" """ builder = lexer.LexBuilder() tokens = list(lexer.lex_iter_string(textwrap.dedent(test_input), builder)) self.assertLessEqual(1, len(builder.errors)) self.assertEqual(builder.errors[0].message, 'ValueError: String too long (68 lines)')
def test_lexer_exception_CURRENCY(self): test_input = """ USD """ builder = lexer.LexBuilder() builder.commodities = {} # This will force an exception because the # parser calls add() on it. tokens = list(lexer.lex_iter_string(textwrap.dedent(test_input), builder)) self.assertEqual([('EOL', 2, '\n', None), ('LEX_ERROR', 2, 'USD', None), ('EOL', 3, '\n', None), ('EOL', 3, '\x00', None)], tokens) self.assertEqual(1, len(builder.errors))
def test_lexer_exception_ACCOUNT(self): test_input = """ Invalid:Something """ builder = lexer.LexBuilder() # This modification is similar to what the options do, and will cause a # ValueError exception to be raised in the lexer. builder.account_regexp = re.compile('(Assets|Liabilities|Equity)' '(:[A-Z][A-Za-z0-9-]*)*$') tokens = list(lexer.lex_iter_string(textwrap.dedent(test_input), builder)) self.assertEqual([('EOL', 2, '\n', None), ('LEX_ERROR', 2, 'Invalid:Something', None), ('EOL', 3, '\n', None), ('EOL', 3, '\x00', None)], tokens) self.assertEqual(1, len(builder.errors))
def test_lexer_exception_substring_with_quotes(self): test_input = """ 2016-07-15 query "hotels" "SELECT * WHERE account ~ 'Expenses:Accommodation'" """ builder = lexer.LexBuilder() tokens = list( lexer.lex_iter_string(textwrap.dedent(test_input), builder)) self.assertEqual( [('EOL', 2, '\n', None), ('DATE', 2, '2016-07-15', datetime.date(2016, 7, 15)), ('QUERY', 2, 'query', None), ('STRING', 2, '"', 'hotels'), ('STRING', 2, '"', "SELECT * WHERE account ~ 'Expenses:Accommodation'"), ('EOL', 3, '\n', None), ('EOL', 3, '\x00', None)], tokens) self.assertEqual(0, len(builder.errors))
def test_parser_lex(self): # Do not use a string to avoid issues due to string interning. name = object() # Note that passing name as an argument to sys.getrefcount() # counts as one reference, thus the minimum reference count # returned for any object is 2. self.assertEqual(sys.getrefcount(name), 2) f = io.BytesIO(b"") f.name = name # One more refernece from the 'name' attriute. self.assertEqual(sys.getrefcount(name), 3) # Just one reference to the BytesIO object. self.assertEqual(sys.getrefcount(f), 2) builder = lexer.LexBuilder() parser = _parser.Parser(builder) iterator = parser.lex(f) # The Parser object keeps references to the input file and to # the name while iterating over the tokens in the input file. self.assertEqual(sys.getrefcount(name), 4) self.assertEqual(sys.getrefcount(f), 3) # The iterator holds one reference to the parser. self.assertEqual(sys.getrefcount(parser), 3) tokens = list(iterator) # Just the EOL token. self.assertEqual(len(tokens), 1) # Once done scanning is completed the Parser object still has # references to the input file and to the name. self.assertEqual(sys.getrefcount(name), 4) self.assertEqual(sys.getrefcount(f), 3) del parser del iterator # Once the Parser object is gone we should have just the local # reference to the file object and two references to name. self.assertEqual(sys.getrefcount(name), 3) self.assertEqual(sys.getrefcount(f), 2) del f # With the file object gone there is one reference to name. self.assertEqual(sys.getrefcount(name), 2)
def test_parser_lex_multi(self): file1 = io.BytesIO(b"") file1.name = object() self.assertEqual(sys.getrefcount(file1.name), 2) file2 = io.BytesIO(b"") file2.name = object() self.assertEqual(sys.getrefcount(file2.name), 2) builder = lexer.LexBuilder() parser = _parser.Parser(builder) tokens = list(parser.lex(file1)) tokens = list(parser.lex(file2)) del parser # Once the Parser object is gone we should have just the local # references to the file objects and one references to the names. self.assertEqual(sys.getrefcount(file1), 2) self.assertEqual(sys.getrefcount(file1.name), 2) self.assertEqual(sys.getrefcount(file2), 2) self.assertEqual(sys.getrefcount(file2.name), 2)
def test_bytes_encoded_utf16_invalid(self): utf16_bytes = self.test_utf8_string.encode('utf16') builder = lexer.LexBuilder() tokens = list(lexer.lex_iter_string(utf16_bytes, builder)) self.assertTrue(builder.errors)
def test_bytes_encoded_utf16(self): utf16_bytes = self.test_utf8_string.encode('utf16') builder = lexer.LexBuilder() with self.assertRaises(SystemError): tokens = list(lexer.lex_iter_string(utf16_bytes, builder))
def wrapped(self): string = fun.__doc__ builder = lexer.LexBuilder() tokens = list(lexer.lex_iter_string(textwrap.dedent(string), builder)) return fun(self, tokens, builder.errors)