def rebuild(relocation_table, old_symbol_table, new_symbol_table=None): new_symbol_table = symbol_table.get_symbol_table(new_symbol_table) for relocation in relocation_table: symbol_name = symbol_table.get_symbol_name(relocation['symbol_index'], old_symbol_table) relocation['symbol_index'] = symbol_table.get_index( symbol_name, new_symbol_table)
def add_symbol(symbol_name, proc_name=None, symbols=None, _symbol_table=None): symbols = get_symbols(symbols) if not symbol_exists(symbol_name, symbols): symbols[symbol_name] = { 'proc_index': symbol_table.get_index(proc_name, _symbol_table), 'machine_code': bytearray(), 'relocation_table': [], } return get_symbol(symbol_name, symbols)
def mnemonic_stoa(operands, errors=None): opcode = None opcode_operands = bytearray() _relocation_table = [] if validate_operands_count(operands, 2, errors): operand1 = operands[0].lower() operand2 = operands[1].lower() if validate_operand_addr_size(operand1, 16, errors): # store to address is supported to an address or a symbol name (using relocation) but only from an 8-bit # register addr_value = get_addr_value(operand1) if addr_value is None: operand1 = expand_local_symbol_name(operand1) opcode_operands.extend([0, 0]) _relocation_table.append({ 'machine_code_offset': 1, 'symbol_index': symbol_table.get_index(operand1) }) else: opcode_operands.extend(binutils.word_to_le(addr_value)) if validate_operand_register_size(operand2, 8, errors): register_opcode = get_register_opcode(operand2) opcode = 0b11100001 | (register_opcode << 1) else: opcode_operands.clear() if errors: return None else: machine_code = bytearray() machine_code.append(opcode) machine_code.extend(opcode_operands) return { 'machine_code': machine_code, 'relocation_table': _relocation_table }
def mnemonics_db_dw(mnemonic, operands, errors=None): opcode_operands = bytearray() _relocation_table = [] if operands: for operand in operands: operand_splits = operand.rsplit(None, 3) if len(operand_splits) == 4 and '(' == operand_splits[ -3] and ')' == operand_splits[-1]: operand = operand_splits[0] multiplier = operand_splits[-2] if data.is_valid_str(multiplier) or data.is_valid_chr( multiplier): if errors is not None: errors.append({ 'name': 'UNSUPPORTED_MULTIPLIER', 'info': [multiplier] }) opcode_operands.clear() break elif data.get_size(multiplier) is None or data.get_value( multiplier) < 1: if errors is not None: errors.append({ 'name': 'INVALID_MULTIPLIER', 'info': [multiplier] }) opcode_operands.clear() break elif data.get_size(multiplier) > 16: if errors is not None: errors.append({ 'name': 'UNSUPPORTED_MULTIPLIER_SIZE', 'info': [data.get_size(multiplier), 16] }) opcode_operands.clear() break multiplier_value = data.get_value(multiplier) else: multiplier_value = 1 if 'db' == mnemonic: # bytes support max. 8-bit data, a single character or a string if validate_operand_data_size(operand, 8, errors): if data.is_valid_str(operand): data_values = data.get_value( operand) * multiplier_value opcode_operands.extend(data_values) else: data_value = data.get_value(operand) opcode_operands.extend([data_value] * multiplier_value) else: opcode_operands.clear() break elif 'dw' == mnemonic: # words support a symbol name (using relocation), max. 16-bit data, a single character or a string both # including unicode if is_valid_name(operand): operand = expand_local_symbol_name(operand) opcode_operands.extend([0, 0]) _relocation_table.append({ 'machine_code_offset': len(opcode_operands) - 2, 'symbol_index': symbol_table.get_index(operand) }) elif validate_operand_data_size(operand, 16, errors): if data.is_valid_str(operand): data_values = data.get_value( operand) * multiplier_value for data_value in data_values: opcode_operands.extend( binutils.word_to_le(data_value)) else: data_value = data.get_value(operand) opcode_operands.extend( binutils.word_to_le(data_value) * multiplier_value) else: opcode_operands.clear() break else: if errors is not None: errors.append({'name': 'NO_DATA', 'info': []}) if errors: return None else: machine_code = bytearray() machine_code.extend(opcode_operands) return { 'machine_code': machine_code, 'relocation_table': _relocation_table }
def mnemonics_jmps_calls(mnemonic, operands, errors=None): opcode = None opcode_operands = bytearray() _relocation_table = [] if validate_operands_count(operands, 1, errors): # jumps and calls are supported to M, an address or a symbol name (using # relocation); note: M vs. address/symbol name is distinguished using a # flip-bit in the opcode operand = operands[0].lower() if 'm' == operand: opcode = 0b0 elif validate_operand_addr_size(operand, 16, errors): opcode = 0b1 addr_value = get_addr_value(operand) if addr_value is None: operand = expand_local_symbol_name(operand) opcode_operands.extend([0, 0]) _relocation_table.append({ 'machine_code_offset': 1, 'symbol_index': symbol_table.get_index(operand) }) else: opcode_operands.extend(binutils.word_to_le(addr_value)) # optimized usage of opcodes ...and flip the bit for M vs. address/symbol name if opcode is not None: if 'jmp' == mnemonic: opcode = 0b01110101 | (opcode << 1) elif mnemonic in ['jc', 'jb', 'jnae']: opcode = 0b01111001 | (opcode << 1) elif mnemonic in ['jnc', 'jnb', 'jae']: opcode = 0b01111101 | (opcode << 1) elif mnemonic in ['jz', 'je']: opcode = 0b10001111 | (opcode << 4) elif mnemonic in ['jnz', 'jne']: opcode = 0b10101111 | (opcode << 4) elif mnemonic in ['ja', 'jnbe']: opcode = 0b00000001 | (opcode << 1) elif mnemonic in ['jna', 'jbe']: opcode = 0b00000110 | opcode elif 'call' == mnemonic: opcode = 0b11000001 | (opcode << 1) elif mnemonic in ['cc', 'cb', 'cnae']: opcode = 0b11000101 | (opcode << 1) elif mnemonic in ['cnc', 'cnb', 'cae']: opcode = 0b11001011 | (opcode << 2) elif mnemonic in ['cz', 'ce']: opcode = 0b11010001 | (opcode << 1) elif mnemonic in ['cnz', 'cne']: opcode = 0b11010101 | (opcode << 1) elif mnemonic in ['ca', 'cnbe']: opcode = 0b00010010 | opcode elif mnemonic in ['cna', 'cbe']: opcode = 0b00010110 | opcode if errors: return None else: machine_code = bytearray() machine_code.append(opcode) machine_code.extend(opcode_operands) return { 'machine_code': machine_code, 'relocation_table': _relocation_table }
def mnemonic_mov(operands, errors=None): opcode = None opcode_operands = bytearray() _relocation_table = [] if validate_operands_count(operands, 2, errors): operand1 = operands[0].lower() operand2 = operands[1].lower() if 'm' == operand1: # move into M is only supported from an 8-bit register register1_opcode = 0b110 if validate_operand_register_size(operand2, 8, errors): register2_opcode = get_register_opcode(operand2) opcode = 0b10000000 | (register1_opcode << 4) | ( register2_opcode << 1) elif validate_operand_register(operand1, errors): register1_size = get_register_size(operand1) register1_opcode = get_register_opcode(operand1) if 8 == register1_size: # move into an 8-bit register is supported from M, another 8-bit register or using max. 8-bit data or a # single character but no string register2_opcode = None if 'm' == operand2: register2_opcode = 0b110 elif is_valid_register(operand2): if validate_operand_register_size(operand2, register1_size, errors): register2_opcode = get_register_opcode(operand2) elif data.is_valid_str(operand2): if errors is not None: errors.append({ 'name': 'INCOMPATIBLE_DATA_TYPE', 'info': [] }) elif validate_operand_data_size(operand2, register1_size, errors): register2_opcode = 0b111 data_value = data.get_value(operand2) opcode_operands.append(data_value) if register2_opcode is not None: opcode = 0b10000000 | (register1_opcode << 4) | ( register2_opcode << 1) elif 16 == register1_size: # move into a 16-bit register is supported from a symbol name (using relocation), another 16-bit # register or using max. 16-bit data or a single character including unicode but no string register2_opcode = None if is_valid_name(operand2): operand2 = expand_local_symbol_name(operand2) register2_opcode = 0b111 opcode_operands.extend([0, 0]) _relocation_table.append({ 'machine_code_offset': 1, 'symbol_index': symbol_table.get_index(operand2) }) elif is_valid_register(operand2): if validate_operand_register_size(operand2, register1_size, errors): register2_opcode = get_register_opcode(operand2) elif data.is_valid_str(operand2): errors.append({ 'name': 'INCOMPATIBLE_DATA_TYPE', 'info': [] }) elif validate_operand_data_size(operand2, register1_size, errors): register2_opcode = 0b111 data_value = data.get_value(operand2) opcode_operands.extend(binutils.word_to_le(data_value)) if register2_opcode is not None: opcode = (register1_opcode << 4) | (register2_opcode << 1) if errors: return None else: machine_code = bytearray() machine_code.append(opcode) machine_code.extend(opcode_operands) return { 'machine_code': machine_code, 'relocation_table': _relocation_table }
def assemble_asm_file(file_name): global current_file_name, current_line_num, current_line_str, current_symbol_name, current_proc_name if os.path.isfile(file_name): with open(file_name, 'r') as asm: current_file_name = file_name line_num = 0 current_line_num = line_num for line_str in asm.readlines(): current_line_str = line_str line_num += 1 current_line_num = line_num errors = [] line = parse_asm_line_str(current_line_str, errors) if not errors and line['directive']: directive = line['directive'] directive_lower = directive.lower() if not is_valid_directive(directive_lower): show_error({ 'name': 'INVALID_DIRECTIVE', 'info': [directive] }) return elif 'base' == directive_lower: directive_base(line['operands'], errors) elif 'proc' == directive_lower: if current_proc_name is None: current_proc_name = directive_proc( line['operands'], errors) if not errors: line['symbol_name'] = current_proc_name else: show_error({'name': 'UNEXPECTED_PROC', 'info': []}) return elif 'endproc' == directive_lower: if current_proc_name is not None: current_proc_name = None current_symbol_name = None else: show_error({ 'name': 'UNEXPECTED_ENDPROC', 'info': [] }) return elif 'end' == directive_lower: # the .end directive simply exists the line-by-line loop (skipping the rest of the file) break if not errors and line['symbol_name']: symbol_name = line['symbol_name'] if not is_valid_name(symbol_name): show_error( { 'name': 'INVALID_SYMBOL_NAME', 'info': [symbol_name] }, '') return symbol_name = expand_local_symbol_name(symbol_name) if symbols.symbol_exists(symbol_name): show_error( { 'name': 'DUPLICATE_SYMBOL', 'info': [symbol_name] }, '') return else: current_symbol_name = symbol_name if symbol_table.symbol_exists(current_symbol_name): # if the current symbol was already used as an operand (hence it already exists in the # symbol table, but with a lower index), move it to the end of the symbol table to keep # the symbols in the order of their definition old_symbol_table = symbol_table.get_symbol_table( ).copy() symbol_table.remove_symbol(current_symbol_name) symbol_table.add_symbol(current_symbol_name) # rebuild all symbols to use the new symbol indexes _symbols = symbols.get_symbols() for symbol in _symbols.values(): # procedure proc_name = symbol_table.get_symbol_name( symbol['proc_index'], old_symbol_table) symbol['proc_index'] = symbol_table.get_index( proc_name) # relocation table relocation_table.rebuild( symbol['relocation_table'], old_symbol_table) else: symbol_table.add_symbol(current_symbol_name) symbols.add_symbol(current_symbol_name, current_proc_name) if not errors and line['mnemonic']: if not current_symbol_name: show_error({ 'name': 'INSTRUCTION_WITHOUT_SYMBOL', 'info': [] }) return else: assembly = assemble_asm_line(line, errors) if not errors: # dump_assembly(assembly) symbol = symbols.get_symbol(current_symbol_name) for relocation in assembly['relocation_table']: # adjust the machine code offset to be relative to the current symbol relocation['machine_code_offset'] += len( symbol['machine_code']) symbol['relocation_table'].extend( assembly['relocation_table']) symbol['machine_code'].extend( assembly['machine_code']) # end of line if errors: show_error(errors[0]) break # end of file else: show_error({'name': 'FILE_NOT_FOUND', 'info': [file_name]})