def read_file(arguments): """ Read the file(s) in arguments and return the file contents """ try: with open(arguments.filename) as c_file: return c_file.read(), arguments.filename except IOError: descr = "could not read file: '{}'" error_collector.add(CompilerError(descr.format(arguments.filename)))
def write_asm(asm_source, asm_filename): """Save the given assembly source to disk at asm_filename. asm_source (str) - Full assembly source code. asm_filename (str) - Filename to which to save the generated assembly. """ try: with open(asm_filename, "w") as s_file: s_file.write(asm_source) except IOError: descr = "could not write output file '{}'" error_collector.add(CompilerError(descr.format(asm_filename)))
def parse(tokens_to_parse): """Parse the given tokens into an AST. Also, as the entry point for the myparser, responsible for setting the tokens global variable. """ p.best_error = None p.tokens = tokens_to_parse with log_error(): return parse_root(0)[0] error_collector.add(p.best_error) return None
def parse_abstract_declarator(index): """Parse an abstract declarator into a decl_nodes.Node. This function saves a CompilerError if the parsed entity is a declarator, rather than an abstract declarator. """ root, index = parse_declarator(index) node = root while not isinstance(node, decl_nodes.Identifier): node = node.child if node.identifier: # add error to the error_collector because more of a semantic error than a parsing error err = "expected abstract declarator, but identifier name was provided" error_collector.add(CompilerError(err, node.identifier.r)) return root, index
def tokenize(code, filename): """Convert given code into a flat list of Tokens. lines - List of list of Tagged objects, where each embedded list is a separate line in the input program. return - List of Token objects. """ # Store tokens as they are generated tokens = [] lines = split_to_tagged_lines(code, filename) join_extended_lines(lines) in_comment = False for line in lines: try: line_tokens, in_comment = tokenize_line(line, in_comment) tokens += line_tokens except CompilerError as e: error_collector.add(e) return tokens
def parse_decl_specifiers(index, spec_qual=False): """ Parse a declaration specifier. Examples: int/const char The returned `specs` list may contain two types of elements: tokens and Node objects. A Node object will be included for a struct declaration, and a token for all other declaration specifiers. """ type_specs = set(ctypes.simple_types.keys()) type_specs |= {token_kinds.signed_kw, token_kinds.unsigned_kw} type_quals = {token_kinds.const_kw} storage_specs = { token_kinds.auto_kw, token_kinds.static_kw, token_kinds.extern_kw, token_kinds.typedef_kw } specs = [] # The type specifier class, either SIMPLE, STRUCT, or TYPEDEF, represents the allowed kinds of type specifiers. # Once the first specifier is parsed, the type specifier class is set. If the type specifier class is set to STRUCT # or TYPEDEF, no further type specifiers are permitted in the type specifier list. If it is set to SIMPLE, more # simple type specifiers are permitted. This is important for typedef parsing. SIMPLE = 1 STRUCT = 2 TYPEDEF = 3 type_spec_class = None while True: # Parse a struct specifier if there is one. if not type_spec_class and token_is(index, token_kinds.struct_kw): node, index = parse_struct(index + 1) specs.append(node) type_spec_class = STRUCT # Match a typedef name elif not type_spec_class and token_is( index, token_kinds.identifier) and p.symbols.is_typedef( p.tokens[index]): specs.append(p.tokens[index]) index += 1 type_spec_class = TYPEDEF elif type_spec_class in {None, SIMPLE} and token_in(index, type_specs): specs.append(p.tokens[index]) index += 1 type_spec_class = SIMPLE elif token_in(index, type_quals): specs.append(p.tokens[index]) index += 1 elif token_in(index, storage_specs): if not spec_qual: specs.append(p.tokens[index]) else: err = "storage specifier not permitted here" error_collector.add(CompilerError(err, p.tokens[index].r)) index += 1 else: break if specs: return specs, index else: raise_error("expected declaration specifier", index, ParserError.AT)
def tokenize_line(line, in_comment): """Tokenize the given single line. line - List of Tagged objects. in_comment - Whether the first character in this line is part of a C-style comment body. return - List of Token objects, and boolean indicating whether the next character is part of a comment body. """ tokens = [] # line[block_start:block_end] is the section of the line currently being considered for conversion into a token; # this string will be called the 'block'. Everything before the block has already been tokenized, and everything # after has not yet been examined block_start = 0 block_end = 0 while block_end < len(line): symbol_kind = match_symbol_kind_at(line, block_end) next_ = match_symbol_kind_at(line, block_end + 1) if in_comment: # If next characters end the comment... if symbol_kind == token_kinds.star and next_ == token_kinds.slash: in_comment = False block_start = block_end + 2 block_end = block_start # Otherwise, just skip one character. else: block_start = block_end + 1 block_end = block_start # If next characters start a comment, process previous block and set in_comment to true. elif symbol_kind == token_kinds.slash and next_ == token_kinds.star: add_block(line[block_start:block_end], tokens) in_comment = True # If next two characters are //, we skip the rest of this line. elif symbol_kind == token_kinds.slash and next_ == token_kinds.slash: break # Skip spaces and process previous block. elif line[block_end].c.isspace(): add_block(line[block_start:block_end], tokens) block_start = block_end + 1 block_end = block_start # If next character is a quote, we read the whole string as a token. elif symbol_kind in {token_kinds.dquote, token_kinds.squote}: if symbol_kind == token_kinds.dquote: global STR_EX STR_EX.append(1) quote_str = '"' kind = token_kinds.string add_null = True else: quote_str = "'" kind = token_kinds.char_string add_null = False chars, end = read_string(line, block_end + 1, quote_str, add_null) rep = block_to_str(line[block_end:end + 1]) r = Range(line[block_end].p, line[end].p) if kind == token_kinds.char_string and len(chars) == 0: err = "empty character constant" error_collector.add(CompilerError(err, r)) elif kind == token_kinds.char_string and len(chars) > 1: err = "multiple characters in character constant" error_collector.add(CompilerError(err, r)) tokens.append(Token(kind, chars, rep, r=r)) block_start = end + 1 block_end = block_start # If next character is another symbol, add previous block and then add the symbol. elif symbol_kind: symbol_start_index = block_end symbol_end_index = block_end + len(symbol_kind.text_repr) - 1 r = Range(line[symbol_start_index].p, line[symbol_end_index].p) symbol_token = Token(symbol_kind, r=r) add_block(line[block_start:block_end], tokens) tokens.append(symbol_token) block_start = block_end + len(symbol_kind.text_repr) block_end = block_start # Include another character in the block. else: block_end += 1 # Flush out anything that is left in the block to the output add_block(line[block_start:block_end], tokens) return tokens, in_comment
def report_err(): """ Catch and add any errors to error collector """ try: yield except CompilerError as e: error_collector.add(e)