def init_declarator(tokens, symbol_table, base_type=CType(''), storage_class=None): # : declarator ('=' assignment_expression or initializer)? decl = set_core_type( symbol_table['__ declarator __'](tokens, symbol_table), base_type) if peek_or_terminal(tokens) == TOKENS.EQUAL and consume(tokens): decl = Definition(name(decl), c_type(decl), EmptyExpression(c_type(decl)), loc(decl), storage_class) symbol_table[name( decl )] = decl # we have to add it to the symbol table for things like `int a = a;` expr = initializer_or_assignment_expression(tokens, symbol_table) # if declaration is an array type and the expression is of string_type then convert to initializer for parsing if isinstance(c_type(decl), ArrayType) and isinstance( c_type(expr), StringType): expr = Initializer( enumerate(exp(expr)), ArrayType(c_type(c_type(expr)), len(c_type(expr)), loc(expr)), loc(expr)) decl.initialization = parse_initializer(expr, decl) if isinstance( expr, Initializer) else expr else: symbol_table[name(decl)] = decl = Declaration(name(decl), c_type(decl), loc(decl)) return decl
def definition(dec, symbol_table): # Global definition. assert not isinstance(c_type(dec), FunctionType) symbol_table[name(dec)] = bind_load_instructions(dec) symbol_table[name(dec)].symbol = Data( # Add reference of symbol to definition to keep track of references # static binaries, (packed binaries since machine may require alignment ...) name(dec), static_def_binaries(dec), size(c_type(dec)), dec.storage_class, loc(dec), ) return symbol_table[name(dec)].symbol
def declaration(stmnt, symbol_table): # This are non-global declarations they don't require any space # but they could be referenced (extern, or function type) symbol_type = Code if isinstance(c_type(stmnt), FunctionType) else Data stmnt.symbol = symbol_type(declarations.name(stmnt), (), None, stmnt.storage_class, loc(stmnt)) stmnt.symbol.size = size(c_type(stmnt), overrides={FunctionType: None}) symbol_table[declarations.name(stmnt)] = stmnt yield Pass(loc(stmnt))
def get_declaration_or_definition(decl, storage_class): _ = initialization(decl) and isinstance(storage_class, Extern) and raise_error( '{l} {ident} has both initialization expr and extern storage class'.format(l=loc(decl), ident=name(decl))) if isinstance(c_type(decl), (FunctionType, StructType)) and not name(decl) or isinstance(storage_class, Extern): return Declaration(name(decl), c_type(decl), loc(decl), storage_class) return Definition(name(decl), c_type(decl), initialization(decl), loc(decl), storage_class or Auto(loc(decl)))
def function_definition(dec, symbol_table): """ Function Call Convention: Allocate enough space on the stack for the return type. Push a new Frame (saves (base & stack ptr)) Push all parameters on the stack from right to left. (The values won't be pop but referenced on stack (+) ...) Calculate & Push pointer where to return value. Push pointer where to store return value. Push the return Address so the callee knows where to return to. (Reset Base pointer) creating a new Frame. Jump to callee code segment callee references values passed on the stack by pushing the base_stack_pointer, (+offsets) for previous frame and (-offset) for current frame ... Callee will place the return value in the specified pointer. Caller Pops frame, and uses the set (returned) value. """ symbol = Code(name(dec), None, None, dec.storage_class, loc(dec)) symbol_table[name(dec)] = bind_load_instructions(dec) # bind load/reference instructions, add to symbol table. symbol_table[name(dec)].symbol = symbol def binaries(body, symbol_table): symbol_table = push(symbol_table) symbol_table['__ stack __'] = Stack() # Each function call has its own Frame which is nothing more than a stack # Skip return address ... offset = size_arrays_as_pointers(void_pointer_type) + ( # if function has zero return size then the return pointer will be omitted ... size_arrays_as_pointers(void_pointer_type) * bool(size_arrays_as_pointers(c_type(c_type(dec)), overrides={VoidType: 0})) ) for parameter in c_type(dec): # monkey patch declarator objects add Load commands according to stack state; add to symbol table. symbol_table[name(parameter)] = bind_instructions(parameter, offset) assert not type(parameter) is ArrayType # TODO: fix this. offset += size_arrays_as_pointers(c_type(parameter)) symbol_table.update( izip(('__ CURRENT FUNCTION __', '__ LABELS __', '__ GOTOS __'), (dec, SymbolTable(), defaultdict(list))) ) def pop_symbol_table(symbol_table, location=loc(dec)): # pop symbol table once all binaries have being emitted yield (pop(symbol_table) or 1) and Pass(location) return chain( # body of function ... chain.from_iterable(imap(symbol_table['__ statement __'], chain.from_iterable(body), repeat(symbol_table))), return_instrs(loc(dec)), # default return instructions, in case one was not giving ... pop_symbol_table(symbol_table) # pop symbol_table once complete ... ) symbol.binaries = binaries(initialization(dec), symbol_table) return symbol
def get_declaration_or_definition(decl, storage_class): _ = initialization(decl) and isinstance( storage_class, Extern) and raise_error( '{l} {ident} has both initialization expr and extern storage class' .format(l=loc(decl), ident=name(decl))) if isinstance(c_type(decl), (FunctionType, StructType)) and not name(decl) or isinstance( storage_class, Extern): return Declaration(name(decl), c_type(decl), loc(decl), storage_class) return Definition(name(decl), c_type(decl), initialization(decl), loc(decl), storage_class or Auto(loc(decl)))
def definition(dec, symbol_table): # Global definition. assert not isinstance(c_type(dec), FunctionType) symbol_table[name(dec)] = bind_load_instructions(dec) symbol_table[name( dec )].symbol = Data( # Add reference of symbol to definition to keep track of references # static binaries, (packed binaries since machine may require alignment ...) name(dec), static_def_binaries(dec), size(c_type(dec)), dec.storage_class, loc(dec), ) return symbol_table[name(dec)].symbol
def declarations(tokens, symbol_table): # storage_class_specifier? type_name? init_declarator_list (';' or compound_statement) # declaration storage_class_specifier, specifier_qualifier_list, statement = imap( symbol_table.__getitem__, ('__ storage_class_specifier __', '__ specifier_qualifier_list __', '__ statement __')) storage_class = storage_class_specifier(tokens, symbol_table) base_type = specifier_qualifier_list(tokens, symbol_table) expecting_token = TOKENS.SEMICOLON if peek_or_terminal(tokens) == TOKENS.SEMICOLON: yield EmptyDeclaration(loc(consume(tokens)), storage_class) elif peek_or_terminal(tokens) is terminal: raise_error( '{l} Expected TOKENS.COMMA TOKENS.EQUAL TOKENS.SEMICOLON TOKENS.LEFT_BRACE got `{got}`' .format(l=loc(peek(tokens, EOFLocation)), got=peek(tokens, ''))) else: for dec in init_declarator_list(tokens, symbol_table, base_type=base_type, storage_class=storage_class): dec.storage_class = storage_class if isinstance( storage_class, TypeDef ): # init_declarator_list adds the symbol as a decl to symbol_table symbol_table[name(dec)] = (symbol_table.pop( name(dec)) or 1) and c_type(dec) # replace dec by ctype elif peek_or_terminal( tokens) == TOKENS.LEFT_BRACE and not error_if_not_type( c_type(dec), FunctionType): symbol_table = push(symbol_table) symbol_table.update( chain( imap( lambda a: ( name(a), a ), # add non variable list parameters to the symbol table ... ifilterfalse( lambda c: isinstance(c_type(c), VAListType), c_type(dec))), (('__ RETURN_TYPE __', c_type(c_type(dec))), ('__ LABELS __', SymbolTable())))) yield FunctionDefinition(dec, next(statement(tokens, symbol_table))) expecting_token = (pop(symbol_table) or 1) and '' else: yield dec expecting_token = TOKENS.SEMICOLON _ = expecting_token and error_if_not_value(tokens, expecting_token)
def label_statement(stmnt, symbol_table): instr = Pass(loc(stmnt)) labels, gotos, stack, statement = imap( symbol_table.__getitem__, ('__ LABELS __', '__ GOTOS __', '__ stack __', '__ statement __') ) labels[name(stmnt)] = (instr, symbol_table['__ stack __'].stack_pointer) # update all previous gotos referring to this lbl for alloc_instr, rel_jump_instr, goto_stack_pointer in gotos[name(stmnt)]: # TODO: bug! set_address uses obj.address. alloc_instr[0].obj.address = alloc_instr.address + (stack.stack_pointer - goto_stack_pointer) rel_jump_instr[0].obj = instr del gotos[name(stmnt)][:] return chain((instr,), statement(stmnt.statement, symbol_table))
def binaries(body, symbol_table): symbol_table = push(symbol_table) symbol_table['__ stack __'] = Stack() # Each function call has its own Frame which is nothing more than a stack # Skip return address ... offset = size_arrays_as_pointers(void_pointer_type) + ( # if function has zero return size then the return pointer will be omitted ... size_arrays_as_pointers(void_pointer_type) * bool(size_arrays_as_pointers(c_type(c_type(dec)), overrides={VoidType: 0})) ) for parameter in c_type(dec): # monkey patch declarator objects add Load commands according to stack state; add to symbol table. symbol_table[name(parameter)] = bind_instructions(parameter, offset) assert not type(parameter) is ArrayType # TODO: fix this. offset += size_arrays_as_pointers(c_type(parameter)) symbol_table.update( izip(('__ CURRENT FUNCTION __', '__ LABELS __', '__ GOTOS __'), (dec, SymbolTable(), defaultdict(list))) ) def pop_symbol_table(symbol_table, location=loc(dec)): # pop symbol table once all binaries have being emitted yield (pop(symbol_table) or 1) and Pass(location) return chain( # body of function ... chain.from_iterable(imap(symbol_table['__ statement __'], chain.from_iterable(body), repeat(symbol_table))), return_instrs(loc(dec)), # default return instructions, in case one was not giving ... pop_symbol_table(symbol_table) # pop symbol_table once complete ... )
def composite_specifier( tokens, symbol_table, obj_type=StructType, member_parse_func=parse_struct_members, terminal=object() ): """ : 'composite type' IDENTIFIER | 'composite type' IDENTIFIER '{' members '}' | 'composite type' '{' members '}' """ location = loc(consume(tokens)) if peek_or_terminal(tokens) == TOKENS.LEFT_BRACE: # anonymous composite ... return obj_type(None, member_parse_func(tokens, symbol_table), location) if isinstance(peek_or_terminal(tokens), IDENTIFIER): obj = symbol_table.get(obj_type.get_name(peek(tokens)), obj_type(consume(tokens), None, location)) # some composites are bit tricky such as Struct/Union ... # since any of its members may contain itself as a reference, so we'll add the type to # the symbol table before adding the members ... # TODO: make types immutable, right now they are being shared. if symbol_table.get(obj.name, terminal) is terminal: symbol_table[name(obj)] = obj if peek_or_terminal(tokens) == TOKENS.LEFT_BRACE: obj.members = member_parse_func(tokens, symbol_table) return obj raise ValueError('{l} Expected IDENTIFIER or LEFT_BRACE got {got}'.format( l=loc(peek(tokens, EOFLocation)), got=peek(tokens, '') ))
def init_declarator(tokens, symbol_table, base_type=CType(''), storage_class=None): # : declarator ('=' assignment_expression or initializer)? decl = set_core_type(symbol_table['__ declarator __'](tokens, symbol_table), base_type) if peek_or_terminal(tokens) == TOKENS.EQUAL and consume(tokens): decl = Definition(name(decl), c_type(decl), EmptyExpression(c_type(decl)), loc(decl), storage_class) symbol_table[name(decl)] = decl # we have to add it to the symbol table for things like `int a = a;` expr = initializer_or_assignment_expression(tokens, symbol_table) # if declaration is an array type and the expression is of string_type then convert to initializer for parsing if isinstance(c_type(decl), ArrayType) and isinstance(c_type(expr), StringType): expr = Initializer( enumerate(exp(expr)), ArrayType(c_type(c_type(expr)), len(c_type(expr)), loc(expr)), loc(expr) ) decl.initialization = parse_initializer(expr, decl) if isinstance(expr, Initializer) else expr else: symbol_table[name(decl)] = decl = Declaration(name(decl), c_type(decl), loc(decl)) return decl
def composite_specifier(tokens, symbol_table, obj_type=StructType, member_parse_func=parse_struct_members, terminal=object()): """ : 'composite type' IDENTIFIER | 'composite type' IDENTIFIER '{' members '}' | 'composite type' '{' members '}' """ location = loc(consume(tokens)) if peek_or_terminal( tokens) == TOKENS.LEFT_BRACE: # anonymous composite ... return obj_type(None, member_parse_func(tokens, symbol_table), location) if isinstance(peek_or_terminal(tokens), IDENTIFIER): obj = symbol_table.get(obj_type.get_name(peek(tokens)), obj_type(consume(tokens), None, location)) # some composites are bit tricky such as Struct/Union ... # since any of its members may contain itself as a reference, so we'll add the type to # the symbol table before adding the members ... # TODO: make types immutable, right now they are being shared. if symbol_table.get(obj.name, terminal) is terminal: symbol_table[name(obj)] = obj if peek_or_terminal(tokens) == TOKENS.LEFT_BRACE: obj.members = member_parse_func(tokens, symbol_table) return obj raise ValueError('{l} Expected IDENTIFIER or LEFT_BRACE got {got}'.format( l=loc(peek(tokens, EOFLocation)), got=peek(tokens, '')))
def __init__(self, c_decl, body): _ = error_if_not_type(c_type(c_decl), FunctionType) if not all(isinstance(arg, Declarator) for arg in c_type(c_decl)): raise ValueError('{l} FunctionDef must have concrete declarators as params'.format(l=loc(c_type(c_decl)))) if not isinstance(body, CompoundStatement): raise ValueError('{l} FunctionDef body is not a compound statement, got {g}'.format(l=loc(c_decl), g=body)) super(FunctionDefinition, self).__init__(name(c_decl), c_type(c_decl), body, loc(c_decl), c_decl.storage_class)
def parse_struct_members(tokens, symbol_table): declarator = symbol_table['__ declarator __'] location, members = loc(consume(tokens)), OrderedDict() while peek(tokens, TOKENS.RIGHT_BRACE) != TOKENS.RIGHT_BRACE: type_spec = specifier_qualifier_list(tokens, symbol_table) while peek(tokens, TOKENS.SEMICOLON) != TOKENS.SEMICOLON: decl = declarator(tokens, symbol_table) set_core_type(decl, type_spec) if name(decl) in members: raise ValueError('{l} Duplicate struct member {name} previous at {at}'.format( l=loc(decl), name=name(decl), at=loc(members[name(decl)]) )) members[name(decl)] = decl _ = peek_or_terminal(tokens) != TOKENS.SEMICOLON and error_if_not_value(tokens, TOKENS.COMMA) _ = error_if_not_value(tokens, TOKENS.SEMICOLON) _ = error_if_not_value(tokens, TOKENS.RIGHT_BRACE) return members
def identifier_expression(expr, symbol_table): # Defaults to Load, assignment expression will update it to set. dec = symbol_table[name(expr)] if isinstance( c_type(dec), (FunctionType, ArrayType)): # Function/Array Types are nothing more than addresses. return dec.load_address(loc(expr)) return load(dec.load_address(loc(expr)), size_arrays_as_pointers(c_type(expr)), loc(expr))
def convert_declaration_to_definition(decl): _ = isinstance(decl, FunctionDefinition) and raise_error( '{l} Nested function definitions are not allowed.'.format(l=loc(decl))) # Non Function declaration without storage class is set to auto if type(decl) is Declaration and not isinstance( c_type(decl), FunctionType) and decl.storage_class is not Extern: decl = Definition( # all non-function-declarations within compound statements are definitions ... name(decl), c_type(decl), EmptyExpression(c_type(decl), loc(decl)), loc(decl), decl.storage_class or Auto(loc(decl))) return decl
def static_definition(stmnt, symbol_table): def load_address(self, location): return push(Address(self._initial_data, location), location) data = static_def_binaries(stmnt, (Pass(loc(stmnt)),)) stmnt._initial_data = peek(data) stmnt.end_of_data = Pass(loc(stmnt)) stmnt.load_address = bind_load_address_func(load_address, stmnt) symbol_table[declarations.name(stmnt)] = stmnt return chain( # jump over embedded data ... relative_jump(Offset(stmnt.end_of_data, loc(stmnt)), loc(stmnt)), consume_all(data), (stmnt.end_of_data,) )
def parse_struct_members(tokens, symbol_table): declarator = symbol_table['__ declarator __'] location, members = loc(consume(tokens)), OrderedDict() while peek(tokens, TOKENS.RIGHT_BRACE) != TOKENS.RIGHT_BRACE: type_spec = specifier_qualifier_list(tokens, symbol_table) while peek(tokens, TOKENS.SEMICOLON) != TOKENS.SEMICOLON: decl = declarator(tokens, symbol_table) set_core_type(decl, type_spec) if name(decl) in members: raise ValueError( '{l} Duplicate struct member {name} previous at {at}'. format(l=loc(decl), name=name(decl), at=loc(members[name(decl)]))) members[name(decl)] = decl _ = peek_or_terminal( tokens) != TOKENS.SEMICOLON and error_if_not_value( tokens, TOKENS.COMMA) _ = error_if_not_value(tokens, TOKENS.SEMICOLON) _ = error_if_not_value(tokens, TOKENS.RIGHT_BRACE) return members
def __init__(self, c_decl, body): _ = error_if_not_type(c_type(c_decl), FunctionType) if not all(isinstance(arg, Declarator) for arg in c_type(c_decl)): raise ValueError( '{l} FunctionDef must have concrete declarators as params'. format(l=loc(c_type(c_decl)))) if not isinstance(body, CompoundStatement): raise ValueError( '{l} FunctionDef body is not a compound statement, got {g}'. format(l=loc(c_decl), g=body)) super(FunctionDefinition, self).__init__(name(c_decl), c_type(c_decl), body, loc(c_decl), c_decl.storage_class)
def convert_declaration_to_definition(decl): _ = isinstance(decl, FunctionDefinition) and raise_error( '{l} Nested function definitions are not allowed.'.format(l=loc(decl))) # Non Function declaration without storage class is set to auto if type(decl) is Declaration and not isinstance(c_type(decl), FunctionType) and decl.storage_class is not Extern: decl = Definition( # all non-function-declarations within compound statements are definitions ... name(decl), c_type(decl), EmptyExpression(c_type(decl), loc(decl)), loc(decl), decl.storage_class or Auto(loc(decl)) ) return decl
def declarations(tokens, symbol_table): # storage_class_specifier? type_name? init_declarator_list (';' or compound_statement) # declaration storage_class_specifier, specifier_qualifier_list, statement = imap( symbol_table.__getitem__, ('__ storage_class_specifier __', '__ specifier_qualifier_list __', '__ statement __') ) storage_class = storage_class_specifier(tokens, symbol_table) base_type = specifier_qualifier_list(tokens, symbol_table) expecting_token = TOKENS.SEMICOLON if peek_or_terminal(tokens) == TOKENS.SEMICOLON: yield EmptyDeclaration(loc(consume(tokens)), storage_class) elif peek_or_terminal(tokens) is terminal: raise_error('{l} Expected TOKENS.COMMA TOKENS.EQUAL TOKENS.SEMICOLON TOKENS.LEFT_BRACE got `{got}`'.format( l=loc(peek(tokens, EOFLocation)), got=peek(tokens, '') )) else: for dec in init_declarator_list(tokens, symbol_table, base_type=base_type, storage_class=storage_class): dec.storage_class = storage_class if isinstance(storage_class, TypeDef): # init_declarator_list adds the symbol as a decl to symbol_table symbol_table[name(dec)] = (symbol_table.pop(name(dec)) or 1) and c_type(dec) # replace dec by ctype elif peek_or_terminal(tokens) == TOKENS.LEFT_BRACE and not error_if_not_type(c_type(dec), FunctionType): symbol_table = push(symbol_table) symbol_table.update(chain( imap( lambda a: (name(a), a), # add non variable list parameters to the symbol table ... ifilterfalse(lambda c: isinstance(c_type(c), VAListType), c_type(dec)) ), (('__ RETURN_TYPE __', c_type(c_type(dec))), ('__ LABELS __', SymbolTable())) )) yield FunctionDefinition(dec, next(statement(tokens, symbol_table))) expecting_token = (pop(symbol_table) or 1) and '' else: yield dec expecting_token = TOKENS.SEMICOLON _ = expecting_token and error_if_not_value(tokens, expecting_token)
def call_function(function_call_expr, symbol_table): l, expr = loc(function_call_expr), left_exp(function_call_expr) return chain( # if expression is a simple identifier of function type, no need for AbsoluteJump, use RelativeJump set_base_stack_pointer(load_stack_pointer(l), l), relative_jump(Offset(symbol_table[name(expr)].get_address_obj(l).obj, l), l), ) if isinstance(expr, IdentifierExpression) and isinstance(c_type(expr), FunctionType) else absolute_jump( chain( symbol_table['__ expression __'](expr, symbol_table), # load callee address # calculate new base stack pointer excluding the callees address ... # give the callee a new frame... if we where to reset the base stack ptr before evaluating the left_expr # we run the risk of failing to properly load function address if it was store as a local function pointer set_base_stack_pointer(add(load_stack_pointer(l), push(size(void_pointer_type), l), l), l) ), l )
def executable(symbols, symbol_table=None, entry_point=default_entry_point, libraries=(), linker=static): location = '__SOP__' # Start of Program symbol_table = SymbolTable() if symbol_table is None else symbol_table __end__ = Word(0, location) libs = tuple(imap(Library, libraries)) symbols = chain( symbols, # add heap pointer(s) ... ( object_file.Data('__base_heap_ptr__', (Address(__end__, location), ), size(void_pointer_type), None, location), object_file.Data('__heap_ptr__', (Address(__end__, location), ), size(void_pointer_type), None, location), )) def declarations(symbol_table): # iterate over all symbols withing symbol_table that do not have binaries (they should be declarations) for v in chain.from_iterable( imap( set_binaries, ifilterfalse(lambda s: s.binaries, symbol_table.itervalues()))): yield v # emit default binaries for declarations ... # inject declaration into temp symbol_table to generate entry point function call instructions ... st = {'__ expression __': expression} _ = declaration(entry_point, st) instr_seq = chain( statement( # call entry point ... FunctionCallExpression( IdentifierExpression(name(entry_point), c_type(entry_point), location), (), c_type(c_type(entry_point)), location), st), halt(location), # Halt machine on return ... chain.from_iterable( starmap(binaries, izip(symbols, repeat(symbol_table)))), ) # link all foreign symbols and emit binaries for declarations ... return chain(linker(instr_seq, symbol_table, libs), declarations(symbol_table), (__end__, ))
def call_function(function_call_expr, symbol_table): l, expr = loc(function_call_expr), left_exp(function_call_expr) return chain( # if expression is a simple identifier of function type, no need for AbsoluteJump, use RelativeJump set_base_stack_pointer(load_stack_pointer(l), l), relative_jump( Offset(symbol_table[name(expr)].get_address_obj(l).obj, l), l), ) if isinstance(expr, IdentifierExpression) and isinstance( c_type(expr), FunctionType ) else absolute_jump( chain( symbol_table['__ expression __']( expr, symbol_table), # load callee address # calculate new base stack pointer excluding the callees address ... # give the callee a new frame... if we where to reset the base stack ptr before evaluating the left_expr # we run the risk of failing to properly load function address if it was store as a local function pointer set_base_stack_pointer( add(load_stack_pointer(l), push(size(void_pointer_type), l), l), l)), l)
def __setitem__(self, key, value): # C allows multiple declarations, so long as long they are all consistent, with previous declarations # AND a single definition. # possible scenarios # 1) Giving a declaration, check its consistent with previous declaration or definition if any. # 2) Giving a definition, check its consistent with previous declaration and its consistent with previous # declaration if any. if isinstance(value, Declaration) and key in self: # check declarations/definitions ... # either function definition, definition or declaration or constant_expression(for enums) ... # check for consistency. prev = self[key] if isinstance(prev, Declaration) and c_type(value) == c_type(prev) and name(value) == name(prev): # TODO: check storage class, extern vs static declarations/definitions ... # if previous is declaration pop it and insert new either def or dec _ = type(prev) is Declaration and self.pop(key) # pop previous declaration otherwise do nothing ... else: raise ValueError('{l} inconsistent def/dec with previous at {a}'.format(l=loc(value), a=loc(self[key]))) super(SymbolTable, self).__setitem__(key, value)
def executable(symbols, symbol_table=None, entry_point=default_entry_point, libraries=(), linker=static): location = '__SOP__' # Start of Program symbol_table = SymbolTable() if symbol_table is None else symbol_table __end__ = Word(0, location) libs = tuple(imap(Library, libraries)) symbols = chain( symbols, # add heap pointer(s) ... ( object_file.Data( '__base_heap_ptr__', (Address(__end__, location),), size(void_pointer_type), None, location ), object_file.Data( '__heap_ptr__', (Address(__end__, location),), size(void_pointer_type), None, location ), ) ) def declarations(symbol_table): # iterate over all symbols withing symbol_table that do not have binaries (they should be declarations) for v in chain.from_iterable(imap(set_binaries, ifilterfalse(lambda s: s.binaries, symbol_table.itervalues()))): yield v # emit default binaries for declarations ... # inject declaration into temp symbol_table to generate entry point function call instructions ... st = {'__ expression __': expression} _ = declaration(entry_point, st) instr_seq = chain( statement( # call entry point ... FunctionCallExpression( IdentifierExpression(name(entry_point), c_type(entry_point), location), (), c_type(c_type(entry_point)), location ), st ), halt(location), # Halt machine on return ... chain.from_iterable(starmap(binaries, izip(symbols, repeat(symbol_table)))), ) # link all foreign symbols and emit binaries for declarations ... return chain(linker(instr_seq, symbol_table, libs), declarations(symbol_table), (__end__,))
def binaries(body, symbol_table): symbol_table = push(symbol_table) symbol_table['__ stack __'] = Stack( ) # Each function call has its own Frame which is nothing more than a stack # Skip return address ... offset = size_arrays_as_pointers(void_pointer_type) + ( # if function has zero return size then the return pointer will be omitted ... size_arrays_as_pointers(void_pointer_type) * bool( size_arrays_as_pointers(c_type(c_type(dec)), overrides={VoidType: 0}))) for parameter in c_type(dec): # monkey patch declarator objects add Load commands according to stack state; add to symbol table. symbol_table[name(parameter)] = bind_instructions( parameter, offset) assert not type(parameter) is ArrayType # TODO: fix this. offset += size_arrays_as_pointers(c_type(parameter)) symbol_table.update( izip(('__ CURRENT FUNCTION __', '__ LABELS __', '__ GOTOS __'), (dec, SymbolTable(), defaultdict(list)))) def pop_symbol_table(symbol_table, location=loc( dec)): # pop symbol table once all binaries have being emitted yield (pop(symbol_table) or 1) and Pass(location) return chain( # body of function ... chain.from_iterable( imap(symbol_table['__ statement __'], chain.from_iterable(body), repeat(symbol_table))), return_instrs( loc(dec) ), # default return instructions, in case one was not giving ... pop_symbol_table( symbol_table) # pop symbol_table once complete ... )
def identifier_expression(expr, symbol_table): # Defaults to Load, assignment expression will update it to set. dec = symbol_table[name(expr)] if isinstance(c_type(dec), (FunctionType, ArrayType)): # Function/Array Types are nothing more than addresses. return dec.load_address(loc(expr)) return load(dec.load_address(loc(expr)), size_arrays_as_pointers(c_type(expr)), loc(expr))
def function_definition(dec, symbol_table): """ Function Call Convention: Allocate enough space on the stack for the return type. Push a new Frame (saves (base & stack ptr)) Push all parameters on the stack from right to left. (The values won't be pop but referenced on stack (+) ...) Calculate & Push pointer where to return value. Push pointer where to store return value. Push the return Address so the callee knows where to return to. (Reset Base pointer) creating a new Frame. Jump to callee code segment callee references values passed on the stack by pushing the base_stack_pointer, (+offsets) for previous frame and (-offset) for current frame ... Callee will place the return value in the specified pointer. Caller Pops frame, and uses the set (returned) value. """ symbol = Code(name(dec), None, None, dec.storage_class, loc(dec)) symbol_table[name(dec)] = bind_load_instructions( dec) # bind load/reference instructions, add to symbol table. symbol_table[name(dec)].symbol = symbol def binaries(body, symbol_table): symbol_table = push(symbol_table) symbol_table['__ stack __'] = Stack( ) # Each function call has its own Frame which is nothing more than a stack # Skip return address ... offset = size_arrays_as_pointers(void_pointer_type) + ( # if function has zero return size then the return pointer will be omitted ... size_arrays_as_pointers(void_pointer_type) * bool( size_arrays_as_pointers(c_type(c_type(dec)), overrides={VoidType: 0}))) for parameter in c_type(dec): # monkey patch declarator objects add Load commands according to stack state; add to symbol table. symbol_table[name(parameter)] = bind_instructions( parameter, offset) assert not type(parameter) is ArrayType # TODO: fix this. offset += size_arrays_as_pointers(c_type(parameter)) symbol_table.update( izip(('__ CURRENT FUNCTION __', '__ LABELS __', '__ GOTOS __'), (dec, SymbolTable(), defaultdict(list)))) def pop_symbol_table(symbol_table, location=loc( dec)): # pop symbol table once all binaries have being emitted yield (pop(symbol_table) or 1) and Pass(location) return chain( # body of function ... chain.from_iterable( imap(symbol_table['__ statement __'], chain.from_iterable(body), repeat(symbol_table))), return_instrs( loc(dec) ), # default return instructions, in case one was not giving ... pop_symbol_table( symbol_table) # pop symbol_table once complete ... ) symbol.binaries = binaries(initialization(dec), symbol_table) return symbol
def declaration(dec, symbol_table): symbol_table[name(dec)] = bind_load_instructions(dec) symbol_table[name(dec)].symbol = Code(name(dec), (), None, dec.storage_class, loc(dec)) \ if isinstance(c_type(dec), FunctionType) \ else Data(name(dec), (), size(c_type(dec)), dec.storage_class, loc(dec)) return symbol_table[name(dec)].symbol
def non_static_definition(stmnt, symbol_table): stmnt = stack_allocation(symbol_table['__ stack __'], stmnt) symbol_table[declarations.name(stmnt)] = stmnt return rules(non_static_definition)[type(c_type(stmnt))](stmnt, symbol_table)