def function_parameter_declarations(tokens, symbol_table): location = loc(error_if_not_value(tokens, TOKENS.LEFT_PARENTHESIS)) parameter_types_decl = tuple(() if peek(tokens, TOKENS.RIGHT_PARENTHESIS) == TOKENS.RIGHT_PARENTHESIS else parameter_type_list(tokens, symbol_table)) return error_if_not_value(tokens, TOKENS.RIGHT_PARENTHESIS) and FunctionType( CType(location), parameter_types_decl, location )
def compound_statement(tokens, symbol_table): #: '{' statement* '}' _, symbol_table = error_if_not_value(tokens, TOKENS.LEFT_BRACE), push(symbol_table) statement = symbol_table['__ statement __'] while peek(tokens, TOKENS.RIGHT_BRACE) != TOKENS.RIGHT_BRACE: yield statement(tokens, symbol_table) _ = error_if_not_value(tokens, TOKENS.RIGHT_BRACE) and pop(symbol_table)
def while_stmnt(tokens, symbol_table): location = loc(error_if_not_value(tokens, TOKENS.WHILE)) _ = error_if_not_value(tokens, TOKENS.LEFT_PARENTHESIS) exp = symbol_table['__ expression __'](tokens, symbol_table) _ = error_if_not_value(tokens, TOKENS.RIGHT_PARENTHESIS) yield WhileStatement(exp, symbol_table['__ statement __'](tokens, symbol_table), location)
def range_designated_expr(start, tokens, symbol_table): constant_expression = symbol_table['__ constant_expression __'] end = error_if_not_value(tokens, TOKENS.ELLIPSIS) and NumericalDesignation( exp(constant_expression(tokens, symbol_table))) return error_if_not_value( tokens, TOKENS.RIGHT_BRACKET) and RangeDesignatedExpression( start, end, _expr_or_designated_expr(tokens, symbol_table), loc(end))
def function_parameter_declarations(tokens, symbol_table): location = loc(error_if_not_value(tokens, TOKENS.LEFT_PARENTHESIS)) parameter_types_decl = tuple( () if peek(tokens, TOKENS.RIGHT_PARENTHESIS) == TOKENS. RIGHT_PARENTHESIS else parameter_type_list(tokens, symbol_table)) return error_if_not_value( tokens, TOKENS.RIGHT_PARENTHESIS) and FunctionType( CType(location), parameter_types_decl, location)
def range_designated_expr(start, tokens, symbol_table): constant_expression = symbol_table['__ constant_expression __'] end = error_if_not_value(tokens, TOKENS.ELLIPSIS) and NumericalDesignation( exp(constant_expression(tokens, symbol_table)) ) return error_if_not_value(tokens, TOKENS.RIGHT_BRACKET) and RangeDesignatedExpression( start, end, _expr_or_designated_expr(tokens, symbol_table), loc(end) )
def exp(tokens, symbol_table): expression = symbol_table['__ expression __'] _, _ = error_if_not_value(tokens, TOKENS.WHILE), error_if_not_value( tokens, TOKENS.LEFT_PARENTHESIS) expr = expression(tokens, symbol_table) _, _ = error_if_not_value( tokens, TOKENS.RIGHT_PARENTHESIS), error_if_not_value( tokens, TOKENS.SEMICOLON) yield expr
def offset_designated_expr(tokens, symbol_table): # '[' positive_integral (... positive_integral)? ']' constant_expression = error_if_not_value(tokens, TOKENS.LEFT_BRACKET) and symbol_table['__ constant_expression __'] designation = NumericalDesignation(exp(constant_expression(tokens, symbol_table))) if peek_or_terminal(tokens) == TOKENS.ELLIPSIS: return range_designated_expr(designation, tokens, symbol_table) return error_if_not_value(tokens, TOKENS.RIGHT_BRACKET) and OffsetDesignatedExpression( designation, _expr_or_designated_expr(tokens, symbol_table) )
def switch(tokens, symbol_table): def _pop_symbol_table(symbol_table): # Pop symbol table once we have gone through the whole body ... _ = pop(symbol_table) yield EmptyStatement() expression, statement = imap(symbol_table.__getitem__, ('__ expression __', '__ statement __')) location, _ = loc(consume(tokens)), error_if_not_value(tokens, TOKENS.LEFT_PARENTHESIS) expr, _ = expression(tokens, symbol_table), error_if_not_value(tokens, TOKENS.RIGHT_PARENTHESIS) symbol_table = push(symbol_table) symbol_table['__ SWITCH STATEMENT __'] = SymbolTable() # Add dict to track cases, emit error on duplicates. symbol_table['__ SWITCH EXPRESSION __'] = expr yield SwitchStatement(expr, chain(statement(tokens, symbol_table), _pop_symbol_table(symbol_table)), location)
def offset_designated_expr( tokens, symbol_table): # '[' positive_integral (... positive_integral)? ']' constant_expression = error_if_not_value( tokens, TOKENS.LEFT_BRACKET) and symbol_table['__ constant_expression __'] designation = NumericalDesignation( exp(constant_expression(tokens, symbol_table))) if peek_or_terminal(tokens) == TOKENS.ELLIPSIS: return range_designated_expr(designation, tokens, symbol_table) return error_if_not_value( tokens, TOKENS.RIGHT_BRACKET) and OffsetDesignatedExpression( designation, _expr_or_designated_expr(tokens, symbol_table))
def default(tokens, symbol_table): location, _ = loc(consume(tokens)), error_if_not_value( tokens, TOKENS.COLON) switch = symbol_table['__ SWITCH STATEMENT __'] switch['default'] = DefaultStatement( symbol_table['__ statement __'](tokens, symbol_table), location) yield switch['default']
def char_literal(char_stream, location): char = consume(char_stream) and consume(char_stream) # consume initial single quote, consume char if char == TOKENS.SINGLE_QUOTE: # empty char ... return CHAR('', location) if char == '\\': # if char is being escaped char = escape_characters.get(peek(char_stream), consume(char_stream)) return error_if_not_value(char_stream, TOKENS.SINGLE_QUOTE) and CHAR(char, location)
def _values(char_stream): while peek(char_stream, TOKENS.DOUBLE_QUOTE) != TOKENS.DOUBLE_QUOTE: value = consume(char_stream) value = escape_characters.get( peek(char_stream), consume(char_stream)) if value == '\\' else value yield value _ = error_if_not_value(char_stream, TOKENS.DOUBLE_QUOTE)
def function_call(tokens, symbol_table, primary_exp): l = loc(consume(tokens)) func_type = error_if_not_type(c_type(c_type(primary_exp)), FunctionType) # get expression arguments. expression_argument_list = ArgumentExpressionList(tuple(get_args(tokens, symbol_table, func_type)), l) return error_if_not_value(tokens, TOKENS.RIGHT_PARENTHESIS) and FunctionCallExpression( primary_exp, expression_argument_list, c_type(func_type)(l), l )
def parse_struct_members(tokens, symbol_table): declarator = symbol_table['__ declarator __'] location, members = loc(consume(tokens)), OrderedDict() while peek(tokens, TOKENS.RIGHT_BRACE) != TOKENS.RIGHT_BRACE: type_spec = specifier_qualifier_list(tokens, symbol_table) while peek(tokens, TOKENS.SEMICOLON) != TOKENS.SEMICOLON: decl = declarator(tokens, symbol_table) set_core_type(decl, type_spec) if name(decl) in members: raise ValueError('{l} Duplicate struct member {name} previous at {at}'.format( l=loc(decl), name=name(decl), at=loc(members[name(decl)]) )) members[name(decl)] = decl _ = peek_or_terminal(tokens) != TOKENS.SEMICOLON and error_if_not_value(tokens, TOKENS.COMMA) _ = error_if_not_value(tokens, TOKENS.SEMICOLON) _ = error_if_not_value(tokens, TOKENS.RIGHT_BRACE) return members
def char_literal(char_stream, location): char = consume(char_stream) and consume( char_stream) # consume initial single quote, consume char if char == TOKENS.SINGLE_QUOTE: # empty char ... return CHAR('', location) if char == '\\': # if char is being escaped char = escape_characters.get(peek(char_stream), consume(char_stream)) return error_if_not_value(char_stream, TOKENS.SINGLE_QUOTE) and CHAR( char, location)
def pointer(tokens, symbol_table): # parse a list of **1** or or more pointers location = loc(error_if_not_value(tokens, TOKENS.STAR)) const, volatile = symbol_table['__ type_qualifiers __'](tokens, symbol_table, (False, False)) pointer_type = PointerType(pointer_or_ctype(tokens, symbol_table), location=location) pointer_type.const, pointer_type.volatile = const, volatile return pointer_type
def subscript_oper(tokens, symbol_table, primary_exp): location = error_if_not_type(c_type(primary_exp), PointerType) and loc( consume(tokens)) expr = symbol_table['__ expression __']( tokens, symbol_table) # subscript must be of Integral Type. _ = error_if_not_value(tokens, TOKENS.RIGHT_BRACKET) and error_if_not_type( c_type(expr), IntegralType) return ArraySubscriptingExpression(primary_exp, expr, c_type(c_type(primary_exp))(location), location)
def function_call(tokens, symbol_table, primary_exp): l = loc(consume(tokens)) func_type = error_if_not_type(c_type(c_type(primary_exp)), FunctionType) # get expression arguments. expression_argument_list = ArgumentExpressionList( tuple(get_args(tokens, symbol_table, func_type)), l) return error_if_not_value( tokens, TOKENS.RIGHT_PARENTHESIS) and FunctionCallExpression( primary_exp, expression_argument_list, c_type(func_type)(l), l)
def statement(tokens, symbol_table): """ : declaration | labeled_statement | compound_statement | selection_statement | iteration_statement | jump_statement | expression_statement | expression ';' | ; """ if peek_or_terminal(tokens) in rules( statement): # if current token has a rule use that one first return rules(statement)[peek(tokens)](tokens, symbol_table) if is_declaration( tokens, symbol_table): # checking for type_name is a bit expensive ... return declaration(tokens, symbol_table) # both expressions and labels may start with an identifier if isinstance(peek_or_terminal(tokens), IDENTIFIER): label_name = consume(tokens) if peek_or_terminal(tokens) == TOKENS.COLON: return symbol_table['__ labeled_statement __'](chain( (label_name, ), consume_all(tokens)), symbol_table) # return label_stmnt(label_name, statement(tokens, symbol_table)) # it must be an expression, TODO: figure out a way without using dangerous chain! # tokens = chain((label_name, consume(tokens)), tokens) tokens = chain((label_name, ), consume_all(tokens)) expr, _ = symbol_table['__ expression __']( tokens, symbol_table), error_if_not_value(tokens, TOKENS.SEMICOLON) return repeat(expr, 1) if peek_or_terminal(tokens) is not terminal: expr, _ = symbol_table['__ expression __']( tokens, symbol_table), error_if_not_value(tokens, TOKENS.SEMICOLON) return repeat(expr, 1) raise ValueError( '{l} No rule could be found to create statement, got {got}'.format( l=loc(peek(tokens, EOFLocation)), got=peek(tokens, '')))
def for_stmnt(tokens, symbol_table): location, _ = loc(error_if_not_value(tokens, TOKENS.FOR)), error_if_not_value(tokens, TOKENS.LEFT_PARENTHESIS) statement, expression = symbol_table['__ statement __'], symbol_table['__ expression __'] init_exp = EmptyExpression(VoidType(location), location) if peek_or_terminal(tokens) != TOKENS.SEMICOLON: init_exp = expression(tokens, symbol_table) _ = error_if_not_value(tokens, TOKENS.SEMICOLON) conditional_exp = TrueExpression(location) if peek_or_terminal(tokens) != TOKENS.SEMICOLON: conditional_exp = expression(tokens, symbol_table) _ = error_if_not_value(tokens, TOKENS.SEMICOLON) update_exp = EmptyExpression(VoidType(location), location) if peek_or_terminal(tokens) != TOKENS.RIGHT_PARENTHESIS: update_exp = expression(tokens, symbol_table) _ = error_if_not_value(tokens, TOKENS.RIGHT_PARENTHESIS) yield ForStatement(init_exp, conditional_exp, update_exp, statement(tokens, symbol_table), location)
def jump_statement(tokens, symbol_table): """ : 'goto' IDENTIFIER ';' | 'continue' ';' | 'break' ';' | 'return' ';' | 'return' expression ';' """ stmnt = rules(jump_statement)[peek(tokens)](tokens, symbol_table) _ = error_if_not_value(tokens, TOKENS.SEMICOLON) yield stmnt
def do_while_stmnt(tokens, symbol_table): location = loc(error_if_not_value(tokens, TOKENS.DO)) def exp(tokens, symbol_table): expression = symbol_table['__ expression __'] _, _ = error_if_not_value(tokens, TOKENS.WHILE), error_if_not_value(tokens, TOKENS.LEFT_PARENTHESIS) expr = expression(tokens, symbol_table) _, _ = error_if_not_value(tokens, TOKENS.RIGHT_PARENTHESIS), error_if_not_value(tokens, TOKENS.SEMICOLON) yield expr yield DoWhileStatement(exp(tokens, symbol_table), symbol_table['__ statement __'](tokens, symbol_table), location)
def parse_struct_members(tokens, symbol_table): declarator = symbol_table['__ declarator __'] location, members = loc(consume(tokens)), OrderedDict() while peek(tokens, TOKENS.RIGHT_BRACE) != TOKENS.RIGHT_BRACE: type_spec = specifier_qualifier_list(tokens, symbol_table) while peek(tokens, TOKENS.SEMICOLON) != TOKENS.SEMICOLON: decl = declarator(tokens, symbol_table) set_core_type(decl, type_spec) if name(decl) in members: raise ValueError( '{l} Duplicate struct member {name} previous at {at}'. format(l=loc(decl), name=name(decl), at=loc(members[name(decl)]))) members[name(decl)] = decl _ = peek_or_terminal( tokens) != TOKENS.SEMICOLON and error_if_not_value( tokens, TOKENS.COMMA) _ = error_if_not_value(tokens, TOKENS.SEMICOLON) _ = error_if_not_value(tokens, TOKENS.RIGHT_BRACE) return members
def conditional_expression(tokens, symbol_table): # logical_or_expression ('?' expression ':' conditional_expression)? exp = logical_or_expression(tokens, symbol_table) if peek(tokens, '') in rules(conditional_expression): location = loc(error_if_not_value(tokens, TOKENS.QUESTION)) _ = error_if_not_type(c_type(exp), NumericType) if_exp_is_true = assignment_expression(tokens, symbol_table) _ = error_if_not_value(tokens, TOKENS.COLON) if_exp_is_false = conditional_expression(tokens, symbol_table) ctype_1, ctype_2 = imap(c_type, (if_exp_is_true, if_exp_is_false)) if safe_type_coercion(ctype_1, ctype_2): ctype = ctype_1(location) elif safe_type_coercion(ctype_2, ctype_1): ctype = ctype_2(location) else: raise ValueError('{l} Could not determine type for ternary-expr, giving the types {t1} and {t2}'.format( t1=ctype_1, t2=ctype_2 )) return TernaryExpression(exp, if_exp_is_true, if_exp_is_false, ctype, location) return exp
def body(self, location, arguments=(), macros=()): if peek_or_terminal(arguments) == TOKENS.LEFT_PARENTHESIS and consume( arguments): name = error_if_not_type(consume(arguments, EOFLocation), (IDENTIFIER, KEYWORD)) _ = error_if_not_value(arguments, TOKENS.RIGHT_PARENTHESIS) elif isinstance(peek_or_terminal(arguments), (IDENTIFIER, KEYWORD)): name = consume(arguments) else: raise ValueError( '{l} Expected either LEFT_PARENTHESIS or IDENTIFIER for function macro defined got {g}' .format(l=location or EOLLocation, g=peek(arguments, ''))) yield INTEGER(str(int(name in macros)), loc(name))
def conditional_expression(tokens, symbol_table): # logical_or_expression ('?' expression ':' conditional_expression)? exp = logical_or_expression(tokens, symbol_table) if peek(tokens, '') in rules(conditional_expression): location = loc(error_if_not_value(tokens, TOKENS.QUESTION)) _ = error_if_not_type(c_type(exp), NumericType) if_exp_is_true = assignment_expression(tokens, symbol_table) _ = error_if_not_value(tokens, TOKENS.COLON) if_exp_is_false = conditional_expression(tokens, symbol_table) ctype_1, ctype_2 = imap(c_type, (if_exp_is_true, if_exp_is_false)) if safe_type_coercion(ctype_1, ctype_2): ctype = ctype_1(location) elif safe_type_coercion(ctype_2, ctype_1): ctype = ctype_2(location) else: raise ValueError( '{l} Could not determine type for ternary-expr, giving the types {t1} and {t2}' .format(t1=ctype_1, t2=ctype_2)) return TernaryExpression(exp, if_exp_is_true, if_exp_is_false, ctype, location) return exp
def dimensions(tokens): while peek(tokens) == TOKENS.LEFT_BRACKET: location = loc(consume(tokens)) if peek(tokens) == TOKENS.RIGHT_BRACKET: size = None else: const_exp = constant_expression(tokens, symbol_table) _ = error_if_not_type(c_type(const_exp), IntegralType) if exp(const_exp) < 0: raise ValueError('{l} array size is negative'.format(l=loc(const_exp))) size = exp(const_exp) _ = error_if_not_value(tokens, TOKENS.RIGHT_BRACKET) yield size, location
def dimensions(tokens): while peek(tokens) == TOKENS.LEFT_BRACKET: location = loc(consume(tokens)) if peek(tokens) == TOKENS.RIGHT_BRACKET: size = None else: const_exp = constant_expression(tokens, symbol_table) _ = error_if_not_type(c_type(const_exp), IntegralType) if exp(const_exp) < 0: raise ValueError( '{l} array size is negative'.format(l=loc(const_exp))) size = exp(const_exp) _ = error_if_not_value(tokens, TOKENS.RIGHT_BRACKET) yield size, location
def statement(tokens, symbol_table): """ : declaration | labeled_statement | compound_statement | selection_statement | iteration_statement | jump_statement | expression_statement | expression ';' | ; """ if peek_or_terminal(tokens) in rules(statement): # if current token has a rule use that one first return rules(statement)[peek(tokens)](tokens, symbol_table) if is_declaration(tokens, symbol_table): # checking for type_name is a bit expensive ... return declaration(tokens, symbol_table) # both expressions and labels may start with an identifier if isinstance(peek_or_terminal(tokens), IDENTIFIER): label_name = consume(tokens) if peek_or_terminal(tokens) == TOKENS.COLON: return symbol_table['__ labeled_statement __'](chain((label_name,), consume_all(tokens)), symbol_table) # return label_stmnt(label_name, statement(tokens, symbol_table)) # it must be an expression, TODO: figure out a way without using dangerous chain! # tokens = chain((label_name, consume(tokens)), tokens) tokens = chain((label_name,), consume_all(tokens)) expr, _ = symbol_table['__ expression __'](tokens, symbol_table), error_if_not_value(tokens, TOKENS.SEMICOLON) return repeat(expr, 1) if peek_or_terminal(tokens) is not terminal: expr, _ = symbol_table['__ expression __'](tokens, symbol_table), error_if_not_value(tokens, TOKENS.SEMICOLON) return repeat(expr, 1) raise ValueError('{l} No rule could be found to create statement, got {got}'.format( l=loc(peek(tokens, EOFLocation)), got=peek(tokens, '') ))
def declarations(tokens, symbol_table): # storage_class_specifier? type_name? init_declarator_list (';' or compound_statement) # declaration storage_class_specifier, specifier_qualifier_list, statement = imap( symbol_table.__getitem__, ('__ storage_class_specifier __', '__ specifier_qualifier_list __', '__ statement __')) storage_class = storage_class_specifier(tokens, symbol_table) base_type = specifier_qualifier_list(tokens, symbol_table) expecting_token = TOKENS.SEMICOLON if peek_or_terminal(tokens) == TOKENS.SEMICOLON: yield EmptyDeclaration(loc(consume(tokens)), storage_class) elif peek_or_terminal(tokens) is terminal: raise_error( '{l} Expected TOKENS.COMMA TOKENS.EQUAL TOKENS.SEMICOLON TOKENS.LEFT_BRACE got `{got}`' .format(l=loc(peek(tokens, EOFLocation)), got=peek(tokens, ''))) else: for dec in init_declarator_list(tokens, symbol_table, base_type=base_type, storage_class=storage_class): dec.storage_class = storage_class if isinstance( storage_class, TypeDef ): # init_declarator_list adds the symbol as a decl to symbol_table symbol_table[name(dec)] = (symbol_table.pop( name(dec)) or 1) and c_type(dec) # replace dec by ctype elif peek_or_terminal( tokens) == TOKENS.LEFT_BRACE and not error_if_not_type( c_type(dec), FunctionType): symbol_table = push(symbol_table) symbol_table.update( chain( imap( lambda a: ( name(a), a ), # add non variable list parameters to the symbol table ... ifilterfalse( lambda c: isinstance(c_type(c), VAListType), c_type(dec))), (('__ RETURN_TYPE __', c_type(c_type(dec))), ('__ LABELS __', SymbolTable())))) yield FunctionDefinition(dec, next(statement(tokens, symbol_table))) expecting_token = (pop(symbol_table) or 1) and '' else: yield dec expecting_token = TOKENS.SEMICOLON _ = expecting_token and error_if_not_value(tokens, expecting_token)
def arguments(token_seq, parameters, l=LocationNotSet): parameters = iter(parameters) # empty (no) arguments ... but expects at least one parameter ... so use empty string ... if peek(token_seq, TOKENS.RIGHT_PARENTHESIS) == TOKENS.RIGHT_PARENTHESIS \ and consume(parameters, terminal) is not terminal: yield IGNORE(location=(loc(peek(token_seq)) or l)), while peek(token_seq, TOKENS.RIGHT_PARENTHESIS) != TOKENS.RIGHT_PARENTHESIS: if isinstance(peek_or_terminal(parameters), FunctionMacroVariadicArgument): tokens = (IGNORE(location=loc(peek(token_seq))),) \ if peek(token_seq) == TOKENS.RIGHT_PARENTHESIS else argument( token_seq, # if the current parameter is variadic argument get everything including commas ... takewhile=lambda token_seq: peek(token_seq, TOKENS.RIGHT_PARENTHESIS) != TOKENS.RIGHT_PARENTHESIS ) # if at the end of arguments emit emtpy string ... elif peek_or_terminal( token_seq ) == TOKENS.COMMA: # if comma than argument is just an empty string ... tokens = IGNORE(location=loc(peek(token_seq))), else: tokens = argument(token_seq) _ = consume(parameters, None) yield tokens if peek_or_terminal( token_seq ) != TOKENS.RIGHT_PARENTHESIS: # if not at end we are expecting a comma ... location = loc(error_if_not_value(token_seq, TOKENS.COMMA, l)) if peek_or_terminal(token_seq) == TOKENS.RIGHT_PARENTHESIS \ and isinstance(peek_or_terminal(parameters), FunctionMacroArgument): _ = consume( parameters ) # if we read a comma and we are at end still expect at least one more parameter yield IGNORE(location=location), # yield empty string ... if isinstance(consume(parameters, None), FunctionMacroVariadicArgument): yield IGNORE(location=(loc(peek(token_seq)) or l)), _ = error_if_not_value(token_seq, TOKENS.RIGHT_PARENTHESIS, location=l)
def for_stmnt(tokens, symbol_table): location, _ = loc(error_if_not_value(tokens, TOKENS.FOR)), error_if_not_value( tokens, TOKENS.LEFT_PARENTHESIS) statement, expression = symbol_table['__ statement __'], symbol_table[ '__ expression __'] init_exp = EmptyExpression(VoidType(location), location) if peek_or_terminal(tokens) != TOKENS.SEMICOLON: init_exp = expression(tokens, symbol_table) _ = error_if_not_value(tokens, TOKENS.SEMICOLON) conditional_exp = TrueExpression(location) if peek_or_terminal(tokens) != TOKENS.SEMICOLON: conditional_exp = expression(tokens, symbol_table) _ = error_if_not_value(tokens, TOKENS.SEMICOLON) update_exp = EmptyExpression(VoidType(location), location) if peek_or_terminal(tokens) != TOKENS.RIGHT_PARENTHESIS: update_exp = expression(tokens, symbol_table) _ = error_if_not_value(tokens, TOKENS.RIGHT_PARENTHESIS) yield ForStatement(init_exp, conditional_exp, update_exp, statement(tokens, symbol_table), location)
def _func_macro_arguments(line): symbol_table = SymbolTable() while peek(line, TOKENS.RIGHT_PARENTHESIS) != TOKENS.RIGHT_PARENTHESIS: if peek(line) == TOKENS.ELLIPSIS: arg = FunctionMacroVariadicArgument(IDENTIFIER('__VA_ARGS__', loc(consume(line)))) else: arg = FunctionMacroArgument(error_if_not_type(consume(line, EOFLocation), (IDENTIFIER, KEYWORD))) if peek_or_terminal(line) == TOKENS.ELLIPSIS: arg = FunctionMacroVariadicArgument(IDENTIFIER(arg, loc(consume(line)))) symbol_table[arg] = arg # check for duplicate argument name yield arg # if ok add to the rest ... if isinstance(arg, FunctionMacroVariadicArgument): # if variadic argument break ... break # consume expected comma if we don't see a right parenthesis ... _ = peek(line, TOKENS.RIGHT_PARENTHESIS) != TOKENS.RIGHT_PARENTHESIS \ and error_if_not_value(line, TOKENS.COMMA, loc(arg))
def do_while_stmnt(tokens, symbol_table): location = loc(error_if_not_value(tokens, TOKENS.DO)) def exp(tokens, symbol_table): expression = symbol_table['__ expression __'] _, _ = error_if_not_value(tokens, TOKENS.WHILE), error_if_not_value( tokens, TOKENS.LEFT_PARENTHESIS) expr = expression(tokens, symbol_table) _, _ = error_if_not_value( tokens, TOKENS.RIGHT_PARENTHESIS), error_if_not_value( tokens, TOKENS.SEMICOLON) yield expr yield DoWhileStatement( exp(tokens, symbol_table), symbol_table['__ statement __'](tokens, symbol_table), location)
def argument( token_seq, # a non empty argument terminates with either a comma or right parenthesis ... takewhile=lambda token_seq: peek(token_seq, TOKENS.COMMA) not in {TOKENS.COMMA, TOKENS.RIGHT_PARENTHESIS}): while takewhile(token_seq): if peek_or_terminal( token_seq) == TOKENS.LEFT_PARENTHESIS: # nested parenthesis yield consume(token_seq) for token in argument( token_seq, # recursively call argument chaining all the nested parenthesis, until last right is hit takewhile=lambda token_seq: peek( token_seq, TOKENS.RIGHT_PARENTHESIS) != TOKENS. RIGHT_PARENTHESIS): yield token yield error_if_not_value(token_seq, TOKENS.RIGHT_PARENTHESIS) else: yield consume(token_seq)
def case(tokens, symbol_table): location = loc(loc(consume(tokens))) constant_expression, statement = imap( symbol_table.__getitem__, ('__ constant_expression __', '__ statement __')) expr = constant_expression(tokens, symbol_table) _, _ = error_if_not_value(tokens, TOKENS.COLON), error_if_not_type( c_type(expr), IntegralType) switch_cases = symbol_table['__ SWITCH STATEMENT __'] switch_exp = symbol_table['__ SWITCH EXPRESSION __'] if c_type(expr) != c_type(switch_exp): raise ValueError( '{l} case exp type {g} differs from switch exp type {e}'.format( l=location, g=c_type(expr), e=c_type(switch_exp))) switch_cases[exp(expr)] = CaseStatement(expr, statement(tokens, symbol_table), location) yield switch_cases[exp(expr)]
def parse_enum_members(tokens, symbol_table): constant_expression = symbol_table['__ constant_expression __'] location, members, current_value = loc(consume(tokens)), OrderedDict(), 0 while peek(tokens, TOKENS.RIGHT_BRACE) != TOKENS.RIGHT_BRACE: ident = error_if_not_type(consume(tokens, ''), IDENTIFIER) value = ConstantExpression(current_value, IntegerType(location), location) if peek_or_terminal(tokens) == TOKENS.EQUAL and consume(tokens): value = constant_expression(tokens, symbol_table) _ = error_if_not_type(c_type(value), IntegerType) current_value = error_if_not_type(exp(value), (int, long)) symbol_table[ident] = value # Add value to symbol_table members[ident] = Definition(ident, c_type(value), value, location) _ = peek_or_terminal(tokens) == TOKENS.COMMA and consume(tokens) _ = error_if_not_value(tokens, TOKENS.RIGHT_BRACE) return members
def _func_macro_arguments(line): symbol_table = SymbolTable() while peek(line, TOKENS.RIGHT_PARENTHESIS) != TOKENS.RIGHT_PARENTHESIS: if peek(line) == TOKENS.ELLIPSIS: arg = FunctionMacroVariadicArgument( IDENTIFIER('__VA_ARGS__', loc(consume(line)))) else: arg = FunctionMacroArgument( error_if_not_type(consume(line, EOFLocation), (IDENTIFIER, KEYWORD))) if peek_or_terminal(line) == TOKENS.ELLIPSIS: arg = FunctionMacroVariadicArgument( IDENTIFIER(arg, loc(consume(line)))) symbol_table[arg] = arg # check for duplicate argument name yield arg # if ok add to the rest ... if isinstance(arg, FunctionMacroVariadicArgument ): # if variadic argument break ... break # consume expected comma if we don't see a right parenthesis ... _ = peek(line, TOKENS.RIGHT_PARENTHESIS) != TOKENS.RIGHT_PARENTHESIS \ and error_if_not_value(line, TOKENS.COMMA, loc(arg))
def declarations(tokens, symbol_table): # storage_class_specifier? type_name? init_declarator_list (';' or compound_statement) # declaration storage_class_specifier, specifier_qualifier_list, statement = imap( symbol_table.__getitem__, ('__ storage_class_specifier __', '__ specifier_qualifier_list __', '__ statement __') ) storage_class = storage_class_specifier(tokens, symbol_table) base_type = specifier_qualifier_list(tokens, symbol_table) expecting_token = TOKENS.SEMICOLON if peek_or_terminal(tokens) == TOKENS.SEMICOLON: yield EmptyDeclaration(loc(consume(tokens)), storage_class) elif peek_or_terminal(tokens) is terminal: raise_error('{l} Expected TOKENS.COMMA TOKENS.EQUAL TOKENS.SEMICOLON TOKENS.LEFT_BRACE got `{got}`'.format( l=loc(peek(tokens, EOFLocation)), got=peek(tokens, '') )) else: for dec in init_declarator_list(tokens, symbol_table, base_type=base_type, storage_class=storage_class): dec.storage_class = storage_class if isinstance(storage_class, TypeDef): # init_declarator_list adds the symbol as a decl to symbol_table symbol_table[name(dec)] = (symbol_table.pop(name(dec)) or 1) and c_type(dec) # replace dec by ctype elif peek_or_terminal(tokens) == TOKENS.LEFT_BRACE and not error_if_not_type(c_type(dec), FunctionType): symbol_table = push(symbol_table) symbol_table.update(chain( imap( lambda a: (name(a), a), # add non variable list parameters to the symbol table ... ifilterfalse(lambda c: isinstance(c_type(c), VAListType), c_type(dec)) ), (('__ RETURN_TYPE __', c_type(c_type(dec))), ('__ LABELS __', SymbolTable())) )) yield FunctionDefinition(dec, next(statement(tokens, symbol_table))) expecting_token = (pop(symbol_table) or 1) and '' else: yield dec expecting_token = TOKENS.SEMICOLON _ = expecting_token and error_if_not_value(tokens, expecting_token)
def main(): cli = argparse.ArgumentParser(description='C Compiler ...') cli.add_argument('files', nargs='+') cli.add_argument('-O', '--optimize', default=0, nargs=1, help='Optimization Level') cli.add_argument('-E', '--preprocess', action='store_true', default=False, help='Output preprocessor and stop.') cli.add_argument('-S', '--assembly', action='store_true', default=False, help='Output instructions readable text.') cli.add_argument('-c', '--compile', action='store_true', default=False, help='Compile, but not link.') cli.add_argument('-static', '--static', action='store_true', default=True, help='Static Linking (default).') cli.add_argument('-shared', '--shared', action='store_true', default=False, help='Shared Linking.') cli.add_argument('--vm', action='store_true', default=False, help='Execute code on Virtual Machine.') cli.add_argument('-a', '--archive', action='store_true', default=False, help='Archive files into a single output') cli.add_argument('-o', '--output', default=[], nargs='?', action='append', help='Name of output, file(s) default is the original') cli.add_argument( '-I', '--Include', default=[], nargs='?', action='append', help= 'Directories to be used by the preprocessor when searching for files.') cli.add_argument( '-L', '--Libraries', default=[], nargs='?', action='append', help='Directories to be used by the linker when searching for libraries' ) cli.add_argument( '-l', '--libraries', default=[], nargs='?', action='append', help='Name of libraries to be used when searching for symbols.') args = cli.parse_args() args.Include += std_include_dirs + list( set(imap(os.path.dirname, args.files))) args.Libraries += std_libraries_dirs args.libraries += std_libraries libraries = ifilter( os.path.isfile, starmap(os.path.join, product(args.Libraries, args.libraries))) optimizer = lambda instrs: optimize(instrs, zero_level_optimization) if args.optimize and args.optimize[0] == '1': optimizer = lambda instrs: optimize(instrs, first_level_optimization) if args.preprocess: exhaust(imap(sys.stdout.write, preprocess(args.files, args.Include))) elif args.assembly: exhaust( imap(sys.stdout.write, assembly(args.files, args.Include, libraries, optimizer))) elif args.compile: if args.output: # if output(s) giving then check it matches the number of inputs ... output_files = error_if_not_value(repeat(len(args.output), 1), len(args.files)) and args.output else: output_files = imap( '{0}.o.p'.format, imap(lambda f: os.path.splitext(f)[0], args.files)) for input_file, output_file in izip(args.files, output_files): symbol_table = linker.library( symbols(input_file, args.Include, optimizer)) with open(output_file, 'wb') as file_obj: pickle.dump(symbol_table, file_obj) elif args.archive: symbol_table = SymbolTable() error_if_not_value( repeat(len(args.output), 1), 1) # archives require a single output which has no default ... for input_file in args.files: # compile all files into a single symbol_table ... symbol_table = linker.library( symbols(input_file, args.Include, optimizer), symbol_table) with open(args.output[0], 'wb') as file_obj: # dump symbol_table ... pickle.dump(symbol_table, file_obj) elif args.shared: raise NotImplementedError else: # default compile, and and statically link ... instructions = instrs(args.files, args.Include, libraries, optimizer) if args.vm: # if we requested a vm then execute instructions ... vm.start(instructions) else: # other wise emit single executable file ... _ = args.output and error_if_not_value(repeat(len( args.output), 1), 1, Location('cc.py', '', '')) file_output = args.output and args.output[ 0] or 'a.out.p' # if not giving an output use default a.out.p with open(file_output, 'wb') as file_obj: pickle.dump(tuple(instructions), file_obj)
def nested_declarator(tokens, symbol_table): dec = error_if_not_value( tokens, TOKENS.LEFT_PARENTHESIS) and symbol_table['__ declarator __']( tokens, symbol_table) return error_if_not_value(tokens, TOKENS.RIGHT_PARENTHESIS) and dec
def _empty_statement(tokens, *_): yield EmptyStatement(loc(error_if_not_value(tokens, TOKENS.SEMICOLON)))
def nested_abstract_declarator(tokens, symbol_table): _ = error_if_not_value(tokens, TOKENS.LEFT_PARENTHESIS) dec = abstract_declarator(tokens, symbol_table) return error_if_not_value(tokens, TOKENS.RIGHT_PARENTHESIS) and dec
def subscript_oper(tokens, symbol_table, primary_exp): location = error_if_not_type(c_type(primary_exp), PointerType) and loc(consume(tokens)) expr = symbol_table['__ expression __'](tokens, symbol_table) # subscript must be of Integral Type. _ = error_if_not_value(tokens, TOKENS.RIGHT_BRACKET) and error_if_not_type(c_type(expr), IntegralType) return ArraySubscriptingExpression(primary_exp, expr, c_type(c_type(primary_exp))(location), location)
def nested_block(token_seq): for token in chain(get_line(token_seq), get_block(token_seq)): # get the entire block ... yield token yield error_if_not_value(token_seq, TOKENS.PENDIF)
def _func_macro_definition(name, line): arguments = tuple(_func_macro_arguments(line)) # defining function macro _ = error_if_not_value(line, TOKENS.RIGHT_PARENTHESIS, loc(name)) return FunctionMacro(name, arguments, tuple(filter_out_empty_tokens(line)))
def exp(tokens, symbol_table): expression = symbol_table['__ expression __'] _, _ = error_if_not_value(tokens, TOKENS.WHILE), error_if_not_value(tokens, TOKENS.LEFT_PARENTHESIS) expr = expression(tokens, symbol_table) _, _ = error_if_not_value(tokens, TOKENS.RIGHT_PARENTHESIS), error_if_not_value(tokens, TOKENS.SEMICOLON) yield expr
def expression_or_compound_literal(tokens, symbol_table): if error_if_not_value(tokens, TOKENS.LEFT_PARENTHESIS) and is_type_name(peek_or_terminal(tokens), symbol_table): return symbol_table['__ compound_literal __'](tokens, symbol_table) _exp = symbol_table['__ expression __'](tokens, symbol_table) return error_if_not_value(tokens, TOKENS.RIGHT_PARENTHESIS) and _exp
def main(): cli = argparse.ArgumentParser(description='C Compiler ...') cli.add_argument('files', nargs='+') cli.add_argument('-O', '--optimize', default=0, nargs=1, help='Optimization Level') cli.add_argument('-E', '--preprocess', action='store_true', default=False, help='Output preprocessor and stop.') cli.add_argument('-S', '--assembly', action='store_true', default=False, help='Output instructions readable text.') cli.add_argument('-c', '--compile', action='store_true', default=False, help='Compile, but not link.') cli.add_argument('-static', '--static', action='store_true', default=True, help='Static Linking (default).') cli.add_argument('-shared', '--shared', action='store_true', default=False, help='Shared Linking.') cli.add_argument('--vm', action='store_true', default=False, help='Execute code on Virtual Machine.') cli.add_argument('-a', '--archive', action='store_true', default=False, help='Archive files into a single output') cli.add_argument('-o', '--output', default=[], nargs='?', action='append', help='Name of output, file(s) default is the original') cli.add_argument('-I', '--Include', default=[], nargs='?', action='append', help='Directories to be used by the preprocessor when searching for files.') cli.add_argument('-L', '--Libraries', default=[], nargs='?', action='append', help='Directories to be used by the linker when searching for libraries') cli.add_argument('-l', '--libraries', default=[], nargs='?', action='append', help='Name of libraries to be used when searching for symbols.') args = cli.parse_args() args.Include += std_include_dirs + list(set(imap(os.path.dirname, args.files))) args.Libraries += std_libraries_dirs args.libraries += std_libraries libraries = ifilter(os.path.isfile, starmap(os.path.join, product(args.Libraries, args.libraries))) optimizer = lambda instrs: optimize(instrs, zero_level_optimization) if args.optimize and args.optimize[0] == '1': optimizer = lambda instrs: optimize(instrs, first_level_optimization) if args.preprocess: exhaust(imap(sys.stdout.write, preprocess(args.files, args.Include))) elif args.assembly: exhaust(imap(sys.stdout.write, assembly(args.files, args.Include, libraries, optimizer))) elif args.compile: if args.output: # if output(s) giving then check it matches the number of inputs ... output_files = error_if_not_value(repeat(len(args.output), 1), len(args.files)) and args.output else: output_files = imap('{0}.o.p'.format, imap(lambda f: os.path.splitext(f)[0], args.files)) for input_file, output_file in izip(args.files, output_files): symbol_table = linker.library(symbols(input_file, args.Include, optimizer)) with open(output_file, 'wb') as file_obj: pickle.dump(symbol_table, file_obj) elif args.archive: symbol_table = SymbolTable() error_if_not_value(repeat(len(args.output), 1), 1) # archives require a single output which has no default ... for input_file in args.files: # compile all files into a single symbol_table ... symbol_table = linker.library(symbols(input_file, args.Include, optimizer), symbol_table) with open(args.output[0], 'wb') as file_obj: # dump symbol_table ... pickle.dump(symbol_table, file_obj) elif args.shared: raise NotImplementedError else: # default compile, and and statically link ... instructions = instrs(args.files, args.Include, libraries, optimizer) if args.vm: # if we requested a vm then execute instructions ... vm.start(instructions) else: # other wise emit single executable file ... _ = args.output and error_if_not_value(repeat(len(args.output), 1), 1, Location('cc.py', '', '')) file_output = args.output and args.output[0] or 'a.out.p' # if not giving an output use default a.out.p with open(file_output, 'wb') as file_obj: pickle.dump(tuple(instructions), file_obj)
def if_block(token_seq, macros): for token in macros['__ preprocess __'](rules(if_block)[peek(token_seq)](token_seq, macros), macros): yield token _ = error_if_not_value(token_seq, TOKENS.PENDIF)
def _values(char_stream): while peek(char_stream, TOKENS.DOUBLE_QUOTE) != TOKENS.DOUBLE_QUOTE: value = consume(char_stream) value = escape_characters.get(peek(char_stream), consume(char_stream)) if value == '\\' else value yield value _ = error_if_not_value(char_stream, TOKENS.DOUBLE_QUOTE)
def type_name_or_compound_literal(tokens, symbol_table): v, _ = symbol_table['__ type_name __'](tokens, symbol_table), error_if_not_value(tokens, TOKENS.RIGHT_PARENTHESIS) if peek_or_terminal(tokens) == TOKENS.LEFT_BRACE: v = CompoundLiteral(symbol_table['__ initializer __'](tokens, symbol_table), v, loc(v)) return v
def _if(tokens, symbol_table): location, _ = loc(consume(tokens)), error_if_not_value(tokens, TOKENS.LEFT_PARENTHESIS) expression, statement = imap(symbol_table.__getitem__, ('__ expression __', '__ statement __')) expr, _ = expression(tokens, symbol_table), error_if_not_value(tokens, TOKENS.RIGHT_PARENTHESIS) yield IfStatement(expr, statement(tokens, symbol_table), else_statement(tokens, symbol_table), location)
def standard_lib_file_path(token_seq, _): file_path = consume(token_seq) and ''.join(takewhile(TOKENS.GREATER_THAN.__ne__, token_seq)) _ = error_if_not_value(token_seq, TOKENS.GREATER_THAN) return file_path