def parse_file(file, report_filename=None, report_firstline=1, encoding=None, **kw): """Parse a beancount input file and return Ledger with the list of transactions and tree of accounts. Args: file: file object or path to the file to be parsed. kw: a dict of keywords to be applied to the C parser. Returns: A tuple of ( list of entries parsed in the file, list of errors that were encountered during parsing, and a dict of the option values that were parsed from the file.) """ if encoding is not None and codecs.lookup(encoding).name != 'utf-8': raise ValueError('Only UTF-8 encoded files are supported.') with contextlib.ExitStack() as ctx: if file == '-': file = sys.stdin.buffer # It would be more appropriate here to check for io.RawIOBase but # that does not work for io.BytesIO despite it implementing the # readinto() method. elif not isinstance(file, io.IOBase): file = ctx.enter_context(open(file, 'rb')) builder = grammar.Builder() parser = _parser.Parser(builder) parser.parse(file, filename=report_filename, lineno=report_firstline, **kw) return builder.finalize()
def lex_iter(file, builder=None, encoding=None): """An iterator that yields all the tokens in the given file. Args: file: A string, the filename to run the lexer on, or a file object. builder: A builder of your choice. If not specified, a LexBuilder is used and discarded (along with its errors). encoding: A string (or None), the default encoding to use for strings. Yields: All the tokens in the input file as ``(token, lineno, text, value)`` tuples where ``token`` is a string representing the token kind, ``lineno`` is the line number in the input file where the token was matched, ``mathed`` is a bytes object containing the exact text matched, and ``value`` is the semantic value of the token or None. """ # It would be more appropriate here to check for io.RawIOBase but # that does not work for io.BytesIO despite it implementing the # readinto() method. if not isinstance(file, io.IOBase): file = open(file, 'rb') if builder is None: builder = LexBuilder() parser = _parser.Parser(builder) yield from parser.lex(file, encoding=encoding)
def test_parser_parse(self): # Do not use a string to avoid issues due to string interning. name = object() self.assertEqual(sys.getrefcount(name), 2) f = io.BytesIO(b"") f.name = name self.assertEqual(sys.getrefcount(f.name), 3) builder = grammar.Builder() parser = _parser.Parser(builder) parser.parse(f) # The Parser object keeps a reference to the input file. self.assertEqual(sys.getrefcount(f), 3) # There are references to the file name from the Parser object # and from the the parsing results. In the case of an empty # input file from the options dictionary stored in the builder. self.assertEqual(sys.getrefcount(name), 5) builder.options = {} self.assertEqual(sys.getrefcount(name), 4) del parser # Once the Parser object is gone we should have just the local # reference to the file object and two references to name. self.assertEqual(sys.getrefcount(name), 3) self.assertEqual(sys.getrefcount(f), 2)
def parse_file(file, report_filename=None, report_firstline=1, **kw): """Parse a beancount input file and return Ledger with the list of transactions and tree of accounts. Args: file: file object or path to the file to be parsed. kw: a dict of keywords to be applied to the C parser. Returns: A tuple of ( list of entries parsed in the file, list of errors that were encountered during parsing, and a dict of the option values that were parsed from the file.) """ close_file = None if file == '-': close_file = file = sys.stdin.buffer # It would be more appropriate here to check for io.RawIOBase but # that does not work for io.BytesIO despite it implementing the # readinto() method. elif not isinstance(file, io.IOBase): close_file = file = open(file, 'rb') builder = grammar.Builder() parser = _parser.Parser(builder) parser.parse(file, filename=report_filename, lineno=report_firstline, **kw) if close_file: close_file.close() return builder.finalize()
def test_parser_lex_filename(self): # Do not use a string to avoid issues due to string interning. name = object() self.assertEqual(sys.getrefcount(name), 2) f = io.BytesIO(b"") f.name = object() self.assertEqual(sys.getrefcount(f.name), 2) builder = lexer.LexBuilder() parser = _parser.Parser(builder) iterator = parser.lex(f, filename=name) tokens = list(iterator) # The Parser object keeps references to the input file and to # the name while iterating over the tokens in the input file. self.assertEqual(sys.getrefcount(name), 3) self.assertEqual(sys.getrefcount(f), 3) # The name attribute of the file object is not referenced. self.assertEqual(sys.getrefcount(f.name), 2) del parser del iterator # Once the Parser object is gone we should have just the local # reference to the file object and two references to name. self.assertEqual(sys.getrefcount(name), 2) self.assertEqual(sys.getrefcount(f), 2)
def test_lex_lineno(self): f = io.BytesIO(b"1.0") builder = lexer.LexBuilder() parser = _parser.Parser(builder) tokens = list(parser.lex(f, lineno=42)) token, lineno, matched, value = tokens[0] self.assertEqual(lineno, 42)
def test_parser_lex(self): # Do not use a string to avoid issues due to string interning. name = object() # Note that passing name as an argument to sys.getrefcount() # counts as one reference, thus the minimum reference count # returned for any object is 2. self.assertEqual(sys.getrefcount(name), 2) f = io.BytesIO(b"") f.name = name # One more refernece from the 'name' attriute. self.assertEqual(sys.getrefcount(name), 3) # Just one reference to the BytesIO object. self.assertEqual(sys.getrefcount(f), 2) builder = lexer.LexBuilder() parser = _parser.Parser(builder) iterator = parser.lex(f) # The Parser object keeps references to the input file and to # the name while iterating over the tokens in the input file. self.assertEqual(sys.getrefcount(name), 4) self.assertEqual(sys.getrefcount(f), 3) # The iterator holds one reference to the parser. self.assertEqual(sys.getrefcount(parser), 3) tokens = list(iterator) # Just the EOL token. self.assertEqual(len(tokens), 1) # Once done scanning is completed the Parser object still has # references to the input file and to the name. self.assertEqual(sys.getrefcount(name), 4) self.assertEqual(sys.getrefcount(f), 3) del parser del iterator # Once the Parser object is gone we should have just the local # reference to the file object and two references to name. self.assertEqual(sys.getrefcount(name), 3) self.assertEqual(sys.getrefcount(f), 2) del f # With the file object gone there is one reference to name. self.assertEqual(sys.getrefcount(name), 2)
def lex_iter(file, builder=None, encoding=None): """An iterator that yields all the tokens in the given file. Args: file: A string, the filename to run the lexer on, or a file object. builder: A builder of your choice. If not specified, a LexBuilder is used and discarded (along with its errors). encoding: A string (or None), the default encoding to use for strings. Yields: Tuples of the token (a string), the matched text (a string), and the line no (an integer). """ # It would be more appropriate here to check for io.RawIOBase but # that does not work for io.BytesIO despite it implementing the # readinto() method. if not isinstance(file, io.IOBase): file = open(file, 'rb') if builder is None: builder = LexBuilder() parser = _parser.Parser(builder) yield from parser.lex(file, encoding=encoding)
def test_parser_lex_multi(self): file1 = io.BytesIO(b"") file1.name = object() self.assertEqual(sys.getrefcount(file1.name), 2) file2 = io.BytesIO(b"") file2.name = object() self.assertEqual(sys.getrefcount(file2.name), 2) builder = lexer.LexBuilder() parser = _parser.Parser(builder) tokens = list(parser.lex(file1)) tokens = list(parser.lex(file2)) del parser # Once the Parser object is gone we should have just the local # references to the file objects and one references to the names. self.assertEqual(sys.getrefcount(file1), 2) self.assertEqual(sys.getrefcount(file1.name), 2) self.assertEqual(sys.getrefcount(file2), 2) self.assertEqual(sys.getrefcount(file2.name), 2)
def test_parse_lineno(self): f = io.BytesIO(b"2020-07-30 open Assets:Test") builder = grammar.Builder() parser = _parser.Parser(builder) parser.parse(f, lineno=42) self.assertEqual(builder.entries[0].meta['lineno'], 42)