Пример #1
0
def rebuild(relocation_table, old_symbol_table, new_symbol_table=None):
    new_symbol_table = symbol_table.get_symbol_table(new_symbol_table)

    for relocation in relocation_table:
        symbol_name = symbol_table.get_symbol_name(relocation['symbol_index'],
                                                   old_symbol_table)
        relocation['symbol_index'] = symbol_table.get_index(
            symbol_name, new_symbol_table)
Пример #2
0
def add_symbol(symbol_name, proc_name=None, symbols=None, _symbol_table=None):
    symbols = get_symbols(symbols)

    if not symbol_exists(symbol_name, symbols):
        symbols[symbol_name] = {
            'proc_index': symbol_table.get_index(proc_name, _symbol_table),
            'machine_code': bytearray(),
            'relocation_table': [],
        }

    return get_symbol(symbol_name, symbols)
Пример #3
0
def mnemonic_stoa(operands, errors=None):
    opcode = None
    opcode_operands = bytearray()
    _relocation_table = []

    if validate_operands_count(operands, 2, errors):
        operand1 = operands[0].lower()
        operand2 = operands[1].lower()
        if validate_operand_addr_size(operand1, 16, errors):
            # store to address is supported to an address or a symbol name (using relocation) but only from an 8-bit
            # register
            addr_value = get_addr_value(operand1)
            if addr_value is None:
                operand1 = expand_local_symbol_name(operand1)
                opcode_operands.extend([0, 0])
                _relocation_table.append({
                    'machine_code_offset':
                    1,
                    'symbol_index':
                    symbol_table.get_index(operand1)
                })
            else:
                opcode_operands.extend(binutils.word_to_le(addr_value))

            if validate_operand_register_size(operand2, 8, errors):
                register_opcode = get_register_opcode(operand2)
                opcode = 0b11100001 | (register_opcode << 1)
            else:
                opcode_operands.clear()

    if errors:
        return None
    else:
        machine_code = bytearray()
        machine_code.append(opcode)
        machine_code.extend(opcode_operands)
        return {
            'machine_code': machine_code,
            'relocation_table': _relocation_table
        }
Пример #4
0
def mnemonics_db_dw(mnemonic, operands, errors=None):
    opcode_operands = bytearray()
    _relocation_table = []

    if operands:
        for operand in operands:
            operand_splits = operand.rsplit(None, 3)
            if len(operand_splits) == 4 and '(' == operand_splits[
                    -3] and ')' == operand_splits[-1]:
                operand = operand_splits[0]
                multiplier = operand_splits[-2]
                if data.is_valid_str(multiplier) or data.is_valid_chr(
                        multiplier):
                    if errors is not None:
                        errors.append({
                            'name': 'UNSUPPORTED_MULTIPLIER',
                            'info': [multiplier]
                        })
                    opcode_operands.clear()
                    break
                elif data.get_size(multiplier) is None or data.get_value(
                        multiplier) < 1:
                    if errors is not None:
                        errors.append({
                            'name': 'INVALID_MULTIPLIER',
                            'info': [multiplier]
                        })
                    opcode_operands.clear()
                    break
                elif data.get_size(multiplier) > 16:
                    if errors is not None:
                        errors.append({
                            'name': 'UNSUPPORTED_MULTIPLIER_SIZE',
                            'info': [data.get_size(multiplier), 16]
                        })
                    opcode_operands.clear()
                    break
                multiplier_value = data.get_value(multiplier)
            else:
                multiplier_value = 1

            if 'db' == mnemonic:
                # bytes support max. 8-bit data, a single character or a string
                if validate_operand_data_size(operand, 8, errors):
                    if data.is_valid_str(operand):
                        data_values = data.get_value(
                            operand) * multiplier_value
                        opcode_operands.extend(data_values)
                    else:
                        data_value = data.get_value(operand)
                        opcode_operands.extend([data_value] * multiplier_value)
                else:
                    opcode_operands.clear()
                    break
            elif 'dw' == mnemonic:
                # words support a symbol name (using relocation), max. 16-bit data, a single character or a string both
                # including unicode
                if is_valid_name(operand):
                    operand = expand_local_symbol_name(operand)
                    opcode_operands.extend([0, 0])
                    _relocation_table.append({
                        'machine_code_offset':
                        len(opcode_operands) - 2,
                        'symbol_index':
                        symbol_table.get_index(operand)
                    })
                elif validate_operand_data_size(operand, 16, errors):
                    if data.is_valid_str(operand):
                        data_values = data.get_value(
                            operand) * multiplier_value
                        for data_value in data_values:
                            opcode_operands.extend(
                                binutils.word_to_le(data_value))
                    else:
                        data_value = data.get_value(operand)
                        opcode_operands.extend(
                            binutils.word_to_le(data_value) * multiplier_value)
                else:
                    opcode_operands.clear()
                    break
    else:
        if errors is not None:
            errors.append({'name': 'NO_DATA', 'info': []})

    if errors:
        return None
    else:
        machine_code = bytearray()
        machine_code.extend(opcode_operands)
        return {
            'machine_code': machine_code,
            'relocation_table': _relocation_table
        }
Пример #5
0
def mnemonics_jmps_calls(mnemonic, operands, errors=None):
    opcode = None
    opcode_operands = bytearray()
    _relocation_table = []

    if validate_operands_count(operands, 1, errors):
        # jumps and calls are supported to M, an address or a symbol name (using
        # relocation); note: M vs. address/symbol name is distinguished using a
        # flip-bit in the opcode
        operand = operands[0].lower()
        if 'm' == operand:
            opcode = 0b0
        elif validate_operand_addr_size(operand, 16, errors):
            opcode = 0b1
            addr_value = get_addr_value(operand)
            if addr_value is None:
                operand = expand_local_symbol_name(operand)
                opcode_operands.extend([0, 0])
                _relocation_table.append({
                    'machine_code_offset':
                    1,
                    'symbol_index':
                    symbol_table.get_index(operand)
                })
            else:
                opcode_operands.extend(binutils.word_to_le(addr_value))

        # optimized usage of opcodes ...and flip the bit for M vs. address/symbol name
        if opcode is not None:
            if 'jmp' == mnemonic:
                opcode = 0b01110101 | (opcode << 1)
            elif mnemonic in ['jc', 'jb', 'jnae']:
                opcode = 0b01111001 | (opcode << 1)
            elif mnemonic in ['jnc', 'jnb', 'jae']:
                opcode = 0b01111101 | (opcode << 1)
            elif mnemonic in ['jz', 'je']:
                opcode = 0b10001111 | (opcode << 4)
            elif mnemonic in ['jnz', 'jne']:
                opcode = 0b10101111 | (opcode << 4)
            elif mnemonic in ['ja', 'jnbe']:
                opcode = 0b00000001 | (opcode << 1)
            elif mnemonic in ['jna', 'jbe']:
                opcode = 0b00000110 | opcode

            elif 'call' == mnemonic:
                opcode = 0b11000001 | (opcode << 1)
            elif mnemonic in ['cc', 'cb', 'cnae']:
                opcode = 0b11000101 | (opcode << 1)
            elif mnemonic in ['cnc', 'cnb', 'cae']:
                opcode = 0b11001011 | (opcode << 2)
            elif mnemonic in ['cz', 'ce']:
                opcode = 0b11010001 | (opcode << 1)
            elif mnemonic in ['cnz', 'cne']:
                opcode = 0b11010101 | (opcode << 1)
            elif mnemonic in ['ca', 'cnbe']:
                opcode = 0b00010010 | opcode
            elif mnemonic in ['cna', 'cbe']:
                opcode = 0b00010110 | opcode

    if errors:
        return None
    else:
        machine_code = bytearray()
        machine_code.append(opcode)
        machine_code.extend(opcode_operands)
        return {
            'machine_code': machine_code,
            'relocation_table': _relocation_table
        }
Пример #6
0
def mnemonic_mov(operands, errors=None):
    opcode = None
    opcode_operands = bytearray()
    _relocation_table = []

    if validate_operands_count(operands, 2, errors):
        operand1 = operands[0].lower()
        operand2 = operands[1].lower()
        if 'm' == operand1:
            # move into M is only supported from an 8-bit register
            register1_opcode = 0b110
            if validate_operand_register_size(operand2, 8, errors):
                register2_opcode = get_register_opcode(operand2)
                opcode = 0b10000000 | (register1_opcode << 4) | (
                    register2_opcode << 1)
        elif validate_operand_register(operand1, errors):
            register1_size = get_register_size(operand1)
            register1_opcode = get_register_opcode(operand1)
            if 8 == register1_size:
                # move into an 8-bit register is supported from M, another 8-bit register or using max. 8-bit data or a
                # single character but no string
                register2_opcode = None
                if 'm' == operand2:
                    register2_opcode = 0b110
                elif is_valid_register(operand2):
                    if validate_operand_register_size(operand2, register1_size,
                                                      errors):
                        register2_opcode = get_register_opcode(operand2)
                elif data.is_valid_str(operand2):
                    if errors is not None:
                        errors.append({
                            'name': 'INCOMPATIBLE_DATA_TYPE',
                            'info': []
                        })
                elif validate_operand_data_size(operand2, register1_size,
                                                errors):
                    register2_opcode = 0b111
                    data_value = data.get_value(operand2)
                    opcode_operands.append(data_value)

                if register2_opcode is not None:
                    opcode = 0b10000000 | (register1_opcode << 4) | (
                        register2_opcode << 1)
            elif 16 == register1_size:
                # move into a 16-bit register is supported from a symbol name (using relocation), another 16-bit
                # register or using max. 16-bit data or a single character including unicode but no string
                register2_opcode = None
                if is_valid_name(operand2):
                    operand2 = expand_local_symbol_name(operand2)
                    register2_opcode = 0b111
                    opcode_operands.extend([0, 0])
                    _relocation_table.append({
                        'machine_code_offset':
                        1,
                        'symbol_index':
                        symbol_table.get_index(operand2)
                    })
                elif is_valid_register(operand2):
                    if validate_operand_register_size(operand2, register1_size,
                                                      errors):
                        register2_opcode = get_register_opcode(operand2)
                elif data.is_valid_str(operand2):
                    errors.append({
                        'name': 'INCOMPATIBLE_DATA_TYPE',
                        'info': []
                    })
                elif validate_operand_data_size(operand2, register1_size,
                                                errors):
                    register2_opcode = 0b111
                    data_value = data.get_value(operand2)
                    opcode_operands.extend(binutils.word_to_le(data_value))

                if register2_opcode is not None:
                    opcode = (register1_opcode << 4) | (register2_opcode << 1)

    if errors:
        return None
    else:
        machine_code = bytearray()
        machine_code.append(opcode)
        machine_code.extend(opcode_operands)
        return {
            'machine_code': machine_code,
            'relocation_table': _relocation_table
        }
Пример #7
0
def assemble_asm_file(file_name):
    global current_file_name, current_line_num, current_line_str, current_symbol_name, current_proc_name

    if os.path.isfile(file_name):
        with open(file_name, 'r') as asm:
            current_file_name = file_name
            line_num = 0
            current_line_num = line_num

            for line_str in asm.readlines():
                current_line_str = line_str
                line_num += 1
                current_line_num = line_num

                errors = []

                line = parse_asm_line_str(current_line_str, errors)

                if not errors and line['directive']:
                    directive = line['directive']
                    directive_lower = directive.lower()

                    if not is_valid_directive(directive_lower):
                        show_error({
                            'name': 'INVALID_DIRECTIVE',
                            'info': [directive]
                        })
                        return
                    elif 'base' == directive_lower:
                        directive_base(line['operands'], errors)
                    elif 'proc' == directive_lower:
                        if current_proc_name is None:
                            current_proc_name = directive_proc(
                                line['operands'], errors)
                            if not errors:
                                line['symbol_name'] = current_proc_name
                        else:
                            show_error({'name': 'UNEXPECTED_PROC', 'info': []})
                            return
                    elif 'endproc' == directive_lower:
                        if current_proc_name is not None:
                            current_proc_name = None
                            current_symbol_name = None
                        else:
                            show_error({
                                'name': 'UNEXPECTED_ENDPROC',
                                'info': []
                            })
                            return
                    elif 'end' == directive_lower:
                        # the .end directive simply exists the line-by-line loop (skipping the rest of the file)
                        break

                if not errors and line['symbol_name']:
                    symbol_name = line['symbol_name']

                    if not is_valid_name(symbol_name):
                        show_error(
                            {
                                'name': 'INVALID_SYMBOL_NAME',
                                'info': [symbol_name]
                            }, '')
                        return

                    symbol_name = expand_local_symbol_name(symbol_name)

                    if symbols.symbol_exists(symbol_name):
                        show_error(
                            {
                                'name': 'DUPLICATE_SYMBOL',
                                'info': [symbol_name]
                            }, '')
                        return
                    else:
                        current_symbol_name = symbol_name

                        if symbol_table.symbol_exists(current_symbol_name):
                            # if the current symbol was already used as an operand (hence it already exists in the
                            # symbol table, but with a lower index), move it to the end of the symbol table to keep
                            # the symbols in the order of their definition
                            old_symbol_table = symbol_table.get_symbol_table(
                            ).copy()
                            symbol_table.remove_symbol(current_symbol_name)
                            symbol_table.add_symbol(current_symbol_name)

                            # rebuild all symbols to use the new symbol indexes
                            _symbols = symbols.get_symbols()
                            for symbol in _symbols.values():
                                # procedure
                                proc_name = symbol_table.get_symbol_name(
                                    symbol['proc_index'], old_symbol_table)
                                symbol['proc_index'] = symbol_table.get_index(
                                    proc_name)

                                # relocation table
                                relocation_table.rebuild(
                                    symbol['relocation_table'],
                                    old_symbol_table)
                        else:
                            symbol_table.add_symbol(current_symbol_name)

                        symbols.add_symbol(current_symbol_name,
                                           current_proc_name)

                if not errors and line['mnemonic']:
                    if not current_symbol_name:
                        show_error({
                            'name': 'INSTRUCTION_WITHOUT_SYMBOL',
                            'info': []
                        })
                        return
                    else:
                        assembly = assemble_asm_line(line, errors)

                        if not errors:
                            # dump_assembly(assembly)

                            symbol = symbols.get_symbol(current_symbol_name)

                            for relocation in assembly['relocation_table']:
                                # adjust the machine code offset to be relative to the current symbol
                                relocation['machine_code_offset'] += len(
                                    symbol['machine_code'])
                            symbol['relocation_table'].extend(
                                assembly['relocation_table'])
                            symbol['machine_code'].extend(
                                assembly['machine_code'])

                # end of line

                if errors:
                    show_error(errors[0])
                    break

            # end of file
    else:
        show_error({'name': 'FILE_NOT_FOUND', 'info': [file_name]})