def build_obj_file_symbols(_symbol_table=None, _symbols=None): _symbol_table = symbol_table.get_symbol_table(_symbol_table) buffer = bytearray() for symbol_name in _symbol_table[1:]: # skip index 0 (global scope) if symbols.symbol_exists(symbol_name, _symbols): symbol = symbols.get_symbol(symbol_name, _symbols) machine_code_size = len(symbol['machine_code']) buffer.extend(binutils.word_to_le(machine_code_size)) buffer.extend(binutils.word_to_le(symbol['proc_index'])) buffer.extend(symbol['machine_code']) relocation_table_size = len(symbol['relocation_table']) buffer.extend(binutils.word_to_le(relocation_table_size)) for relocation in symbol['relocation_table']: buffer.extend( binutils.word_to_le(relocation['machine_code_offset'])) buffer.extend(binutils.word_to_le(relocation['symbol_index'])) else: # external symbol buffer.extend([0xff, 0xff]) return buffer
def build_cpu_symbols(errors=None, _symbol_table=None, _symbols=None, link_base=None): _symbols = symbols.get_symbols(_symbols) if link_base is None: link_base = default_link_base buffer = bytearray() for symbol_name in _symbols.keys(): symbol = symbols.get_symbol(symbol_name, _symbols) machine_code = symbol['machine_code'] # do the relocation... for relocation in symbol['relocation_table']: relocation_symbol_name = symbol_table.get_symbol_name( relocation['symbol_index'], _symbol_table) if symbols.symbol_exists(relocation_symbol_name, _symbols): # determine the absolute memory address of the relocated symbol by adding its machine code base to the # link base (the machine code base was set when linking the relocated symbol) # the link base is the memory address to which a program is loaded before it is executed by the cpu relocation_symbol = symbols.get_symbol(relocation_symbol_name, _symbols) relocation_symbol_addr = link_base + relocation_symbol[ 'machine_code_base'] # insert the absolute memory address of the relocated symbol into the machine code of the current symbol # at the correct offset machine_code[ relocation['machine_code_offset']] = binutils.word_to_le( relocation_symbol_addr)[0] machine_code[relocation['machine_code_offset'] + 1] = binutils.word_to_le( relocation_symbol_addr)[1] else: if errors is not None: errors.append({ 'name': 'UNKNOWN_SYMBOL', 'info': [relocation_symbol_name] }) return None buffer.extend(machine_code) return buffer
def build_obj_file_header(link_base=None): if link_base is None: link_base = 0xffff # 0xffff = use default buffer = bytearray() buffer.extend(map(ord, obj_file_signature)) buffer.append(max_obj_file_version) buffer.extend(binutils.word_to_le(link_base)) return buffer
def build_obj_file_symbol_table(_symbol_table=None): _symbol_table = symbol_table.get_symbol_table(_symbol_table)[ 1:] # remove index 0 (global scope) buffer = bytearray() symbol_table_size = len(_symbol_table) buffer.extend(binutils.word_to_le(symbol_table_size)) for symbol_name in _symbol_table: buffer.append(len(symbol_name)) buffer.extend(map(ord, symbol_name)) return buffer
def mnemonic_stoa(operands, errors=None): opcode = None opcode_operands = bytearray() _relocation_table = [] if validate_operands_count(operands, 2, errors): operand1 = operands[0].lower() operand2 = operands[1].lower() if validate_operand_addr_size(operand1, 16, errors): # store to address is supported to an address or a symbol name (using relocation) but only from an 8-bit # register addr_value = get_addr_value(operand1) if addr_value is None: operand1 = expand_local_symbol_name(operand1) opcode_operands.extend([0, 0]) _relocation_table.append({ 'machine_code_offset': 1, 'symbol_index': symbol_table.get_index(operand1) }) else: opcode_operands.extend(binutils.word_to_le(addr_value)) if validate_operand_register_size(operand2, 8, errors): register_opcode = get_register_opcode(operand2) opcode = 0b11100001 | (register_opcode << 1) else: opcode_operands.clear() if errors: return None else: machine_code = bytearray() machine_code.append(opcode) machine_code.extend(opcode_operands) return { 'machine_code': machine_code, 'relocation_table': _relocation_table }
def mnemonics_db_dw(mnemonic, operands, errors=None): opcode_operands = bytearray() _relocation_table = [] if operands: for operand in operands: operand_splits = operand.rsplit(None, 3) if len(operand_splits) == 4 and '(' == operand_splits[ -3] and ')' == operand_splits[-1]: operand = operand_splits[0] multiplier = operand_splits[-2] if data.is_valid_str(multiplier) or data.is_valid_chr( multiplier): if errors is not None: errors.append({ 'name': 'UNSUPPORTED_MULTIPLIER', 'info': [multiplier] }) opcode_operands.clear() break elif data.get_size(multiplier) is None or data.get_value( multiplier) < 1: if errors is not None: errors.append({ 'name': 'INVALID_MULTIPLIER', 'info': [multiplier] }) opcode_operands.clear() break elif data.get_size(multiplier) > 16: if errors is not None: errors.append({ 'name': 'UNSUPPORTED_MULTIPLIER_SIZE', 'info': [data.get_size(multiplier), 16] }) opcode_operands.clear() break multiplier_value = data.get_value(multiplier) else: multiplier_value = 1 if 'db' == mnemonic: # bytes support max. 8-bit data, a single character or a string if validate_operand_data_size(operand, 8, errors): if data.is_valid_str(operand): data_values = data.get_value( operand) * multiplier_value opcode_operands.extend(data_values) else: data_value = data.get_value(operand) opcode_operands.extend([data_value] * multiplier_value) else: opcode_operands.clear() break elif 'dw' == mnemonic: # words support a symbol name (using relocation), max. 16-bit data, a single character or a string both # including unicode if is_valid_name(operand): operand = expand_local_symbol_name(operand) opcode_operands.extend([0, 0]) _relocation_table.append({ 'machine_code_offset': len(opcode_operands) - 2, 'symbol_index': symbol_table.get_index(operand) }) elif validate_operand_data_size(operand, 16, errors): if data.is_valid_str(operand): data_values = data.get_value( operand) * multiplier_value for data_value in data_values: opcode_operands.extend( binutils.word_to_le(data_value)) else: data_value = data.get_value(operand) opcode_operands.extend( binutils.word_to_le(data_value) * multiplier_value) else: opcode_operands.clear() break else: if errors is not None: errors.append({'name': 'NO_DATA', 'info': []}) if errors: return None else: machine_code = bytearray() machine_code.extend(opcode_operands) return { 'machine_code': machine_code, 'relocation_table': _relocation_table }
def mnemonics_jmps_calls(mnemonic, operands, errors=None): opcode = None opcode_operands = bytearray() _relocation_table = [] if validate_operands_count(operands, 1, errors): # jumps and calls are supported to M, an address or a symbol name (using # relocation); note: M vs. address/symbol name is distinguished using a # flip-bit in the opcode operand = operands[0].lower() if 'm' == operand: opcode = 0b0 elif validate_operand_addr_size(operand, 16, errors): opcode = 0b1 addr_value = get_addr_value(operand) if addr_value is None: operand = expand_local_symbol_name(operand) opcode_operands.extend([0, 0]) _relocation_table.append({ 'machine_code_offset': 1, 'symbol_index': symbol_table.get_index(operand) }) else: opcode_operands.extend(binutils.word_to_le(addr_value)) # optimized usage of opcodes ...and flip the bit for M vs. address/symbol name if opcode is not None: if 'jmp' == mnemonic: opcode = 0b01110101 | (opcode << 1) elif mnemonic in ['jc', 'jb', 'jnae']: opcode = 0b01111001 | (opcode << 1) elif mnemonic in ['jnc', 'jnb', 'jae']: opcode = 0b01111101 | (opcode << 1) elif mnemonic in ['jz', 'je']: opcode = 0b10001111 | (opcode << 4) elif mnemonic in ['jnz', 'jne']: opcode = 0b10101111 | (opcode << 4) elif mnemonic in ['ja', 'jnbe']: opcode = 0b00000001 | (opcode << 1) elif mnemonic in ['jna', 'jbe']: opcode = 0b00000110 | opcode elif 'call' == mnemonic: opcode = 0b11000001 | (opcode << 1) elif mnemonic in ['cc', 'cb', 'cnae']: opcode = 0b11000101 | (opcode << 1) elif mnemonic in ['cnc', 'cnb', 'cae']: opcode = 0b11001011 | (opcode << 2) elif mnemonic in ['cz', 'ce']: opcode = 0b11010001 | (opcode << 1) elif mnemonic in ['cnz', 'cne']: opcode = 0b11010101 | (opcode << 1) elif mnemonic in ['ca', 'cnbe']: opcode = 0b00010010 | opcode elif mnemonic in ['cna', 'cbe']: opcode = 0b00010110 | opcode if errors: return None else: machine_code = bytearray() machine_code.append(opcode) machine_code.extend(opcode_operands) return { 'machine_code': machine_code, 'relocation_table': _relocation_table }
def mnemonic_mov(operands, errors=None): opcode = None opcode_operands = bytearray() _relocation_table = [] if validate_operands_count(operands, 2, errors): operand1 = operands[0].lower() operand2 = operands[1].lower() if 'm' == operand1: # move into M is only supported from an 8-bit register register1_opcode = 0b110 if validate_operand_register_size(operand2, 8, errors): register2_opcode = get_register_opcode(operand2) opcode = 0b10000000 | (register1_opcode << 4) | ( register2_opcode << 1) elif validate_operand_register(operand1, errors): register1_size = get_register_size(operand1) register1_opcode = get_register_opcode(operand1) if 8 == register1_size: # move into an 8-bit register is supported from M, another 8-bit register or using max. 8-bit data or a # single character but no string register2_opcode = None if 'm' == operand2: register2_opcode = 0b110 elif is_valid_register(operand2): if validate_operand_register_size(operand2, register1_size, errors): register2_opcode = get_register_opcode(operand2) elif data.is_valid_str(operand2): if errors is not None: errors.append({ 'name': 'INCOMPATIBLE_DATA_TYPE', 'info': [] }) elif validate_operand_data_size(operand2, register1_size, errors): register2_opcode = 0b111 data_value = data.get_value(operand2) opcode_operands.append(data_value) if register2_opcode is not None: opcode = 0b10000000 | (register1_opcode << 4) | ( register2_opcode << 1) elif 16 == register1_size: # move into a 16-bit register is supported from a symbol name (using relocation), another 16-bit # register or using max. 16-bit data or a single character including unicode but no string register2_opcode = None if is_valid_name(operand2): operand2 = expand_local_symbol_name(operand2) register2_opcode = 0b111 opcode_operands.extend([0, 0]) _relocation_table.append({ 'machine_code_offset': 1, 'symbol_index': symbol_table.get_index(operand2) }) elif is_valid_register(operand2): if validate_operand_register_size(operand2, register1_size, errors): register2_opcode = get_register_opcode(operand2) elif data.is_valid_str(operand2): errors.append({ 'name': 'INCOMPATIBLE_DATA_TYPE', 'info': [] }) elif validate_operand_data_size(operand2, register1_size, errors): register2_opcode = 0b111 data_value = data.get_value(operand2) opcode_operands.extend(binutils.word_to_le(data_value)) if register2_opcode is not None: opcode = (register1_opcode << 4) | (register2_opcode << 1) if errors: return None else: machine_code = bytearray() machine_code.append(opcode) machine_code.extend(opcode_operands) return { 'machine_code': machine_code, 'relocation_table': _relocation_table }