class InstructionTranslator(object): def __init__(self): # An instance of a *VariableNamer*. This is used so all the # temporary REIL registers are unique. self._ir_name_generator = VariableNamer("t", separator="") def translate(self, instruction): """Return REIL representation of an instruction. """ try: trans_instrs = self._translate(instruction) except Exception: self._log_translation_exception(instruction) raise TranslationError("Unknown error") return trans_instrs def reset(self): """Restart REIL register name generator. """ self._ir_name_generator.reset() def _translate(self, instruction): raise NotImplementedError() # Auxiliary functions # ======================================================================== # def _log_not_supported_instruction(self, instruction): logger.info("Instruction not supported: %s (%s [%s])", instruction.mnemonic, instruction, instruction.bytes.encode('hex'), exc_info=True) def _log_translation_exception(self, instruction): logger.error("Error translating instruction: %s (%s [%s])", instruction.mnemonic, instruction, instruction.bytes.encode('hex'), exc_info=True)
class InstructionTranslator(object): def __init__(self): # An instance of a *VariableNamer*. This is used so all the # temporary REIL registers are unique. self._ir_name_generator = VariableNamer("t", separator="") def translate(self, instruction): """Return REIL representation of an instruction. """ try: trans_instrs = self._translate(instruction) except Exception: self._log_translation_exception(instruction) raise TranslationError("Unknown error") return trans_instrs def reset(self): """Restart REIL register name generator. """ self._ir_name_generator.reset() def _translate(self, instruction): raise NotImplementedError() # Auxiliary functions # ======================================================================== # def _log_not_supported_instruction(self, instruction): logger.warning("Instruction not supported: %s (%s [%s])", instruction.mnemonic, instruction, codecs.encode(instruction.bytes, 'hex'), exc_info=True) def _log_translation_exception(self, instruction): logger.error("Error translating instruction: %s (%s [%s])", instruction.mnemonic, instruction, codecs.encode(instruction.bytes, 'hex'), exc_info=True)
class X86Translator(object): """x86 to IR Translator.""" def __init__(self, architecture_mode=ARCH_X86_MODE_32, translation_mode=FULL_TRANSLATION): # Set *Architecture Mode*. The translation of each instruction # into the REIL language is based on this. self.arch_mode = architecture_mode # Set *Translation Mode*. self._translation_mode = translation_mode # An instance of a ReilInstructionBuilder. self.ir_builder = ReilInstructionBuilder() # An instance of *ArchitectureInformation*. self.arch_info = X86ArchitectureInformation(self.arch_mode) # An instance of a *VariableNamer*. This is used so all the # temporary REIL registers are unique. self.ir_reg_name_generator = VariableNamer("t", separator="") # An instance of a X86InstructionTranslator self.instr_translator = X86InstructionTranslator(self.ir_reg_name_generator, self.arch_mode, self._translation_mode) @add_register_size def translate(self, instruction): """Return IR representation of an instruction. """ trans_instrs = [] try: src_read_instrs = [] dst_write_instrs = [] src_regs, src_read_instrs = self._translate_src_oprnds(instruction) dst_regs, dst_write_instrs = self._translate_dst_oprnds(instruction) trans_instrs = self.instr_translator.translate(instruction, src_regs, dst_regs) except NotImplementedError as err: src_read_instrs = [] dst_write_instrs = [] trans_instrs = [self.ir_builder.gen_unkn()] logger.debug("[E] x86 Translator :: Instruction not supported : '%s' (%s)" % (instruction, instruction.mnemonic)) except Exception as err: print_translation_exception(instruction, err) translation = src_read_instrs + trans_instrs + dst_write_instrs self._translate_instr_addresses(instruction.address, translation) return translation def reset(self): """Restart IR register name generator. """ self.ir_reg_name_generator.reset() @property def translation_mode(self): """Get translation mode. """ return self._translation_mode @translation_mode.setter def translation_mode(self, value): """Set translation mode. """ self._translation_mode = value self.instr_translator._translation_mode = value # Auxiliary functions # ======================================================================== # def _translate_instr_addresses(self, base_address, translation): if base_address: for index, instr in enumerate(translation): instr.address = (base_address << 8) | (index & 0xff) def _translate_src_oprnds(self, instruction): """Return instruction sources access translation. """ src_regs = [] src_read_instrs = [] for src, acc_mem in instruction.source_operands: if isinstance(src, barf.arch.x86.x86base.X86ImmediateOperand): read_src_reg = ReilImmediateOperand(src.immediate, src.size) elif isinstance(src, barf.arch.x86.x86base.X86RegisterOperand): read_src_reg = ReilRegisterOperand(src.name, src.size) elif isinstance(src, barf.arch.x86.x86base.X86MemoryOperand): read_src_reg = ReilRegisterOperand(self.ir_reg_name_generator.get_next(), src.size) src_read_instrs += self._generate_read_instr(src, read_src_reg, acc_mem) else: raise Exception() src_regs += [read_src_reg] return src_regs, src_read_instrs def _translate_dst_oprnds(self, instruction): """Return instruction destination access translation. """ dst_regs = [] dst_write_instrs = [] src_regs = filter(lambda r : isinstance(r, barf.arch.x86.x86base.X86RegisterOperand), map(lambda t : t[0], instruction.source_operands)) for dst in instruction.destination_operands: # print type(dst) if isinstance(dst, barf.arch.x86.x86base.X86RegisterOperand): if dst.name in [src.name for src in src_regs]: write_dst_reg = ReilRegisterOperand(self.ir_reg_name_generator.get_next(), dst.size) dst_reg = ReilRegisterOperand(dst.name, dst.size) dst_write_instrs += [self.ir_builder.gen_str(write_dst_reg, dst_reg)] else: write_dst_reg = ReilRegisterOperand(dst.name, dst.size) elif isinstance(dst, barf.arch.x86.x86base.X86MemoryOperand): write_dst_reg = ReilRegisterOperand(self.ir_reg_name_generator.get_next(), dst.size) dst_write_instrs += self._generate_write_instrs(dst, write_dst_reg) else: raise Exception() dst_regs += [write_dst_reg] return dst_regs, dst_write_instrs def _generate_read_instr(self, operand, dst_reg, access_memory): """Return operand read memory access translation. """ if access_memory: addr_reg, instrs = self._compute_memory_address(operand, None) instrs += [self.ir_builder.gen_ldm(addr_reg, dst_reg)] else: addr_reg, instrs = self._compute_memory_address(operand, dst_reg) if len(instrs) == 0: instrs += [self.ir_builder.gen_str(addr_reg, dst_reg)] return instrs def _generate_write_instrs(self, operand, dst_reg): """Return operand write memory access translation. """ addr_reg, instrs = self._compute_memory_address(operand, None) return instrs + [self.ir_builder.gen_stm(dst_reg, addr_reg)] def _compute_memory_address(self, mem_operand, dst_reg): """Return operand memory access translation. """ # reil code generation: # add base, disp, t0 # mul index, scale, t1 # add t1, t0, t2 size = self.arch_info.architecture_size regs, instrs = self._unpack_memory_operand(mem_operand) addr_reg = dst_reg if dst_reg else None if len(regs) == 3: temp_reg = ReilRegisterOperand(self.ir_reg_name_generator.get_next(), size) if not dst_reg: addr_reg = ReilRegisterOperand(self.ir_reg_name_generator.get_next(), size) instrs += [self.ir_builder.gen_add(regs[0], regs[1], temp_reg)] instrs += [self.ir_builder.gen_add(temp_reg, regs[2], addr_reg)] elif len(regs) == 2: if not dst_reg: addr_reg = ReilRegisterOperand(self.ir_reg_name_generator.get_next(), size) instrs += [self.ir_builder.gen_add(regs[0], regs[1], addr_reg)] elif len(regs) == 1: addr_reg = regs[0] return addr_reg, instrs def _unpack_memory_operand(self, operand): """Return memory operand components. """ # [base + scale * index + disp] -> # [base, index * scale, disp], [index * scale instr] size = self.arch_info.architecture_size instrs = [] base_reg, index_reg, disp_reg = None, None, None if operand.base: base_reg = ReilRegisterOperand(operand.base, size) if operand.index and operand.scale != 0x0: index_temp_reg = ReilRegisterOperand(operand.index, size) scale_temp_reg = ReilImmediateOperand(operand.scale, size) index_reg = ReilRegisterOperand(self.ir_reg_name_generator.get_next(), size) mul_instr = self.ir_builder.gen_mul(index_temp_reg, scale_temp_reg, index_reg) instrs += [mul_instr] if operand.displacement and operand.displacement != 0x0: disp_reg = ReilImmediateOperand(operand.displacement, size) regs = filter(lambda x : x, [base_reg, index_reg, disp_reg]) return regs, instrs
class ArmTranslator(object): """ARM to IR Translator.""" def __init__(self, architecture_mode=ARCH_ARM_MODE_32, translation_mode=FULL_TRANSLATION): # Set *Architecture Mode*. The translation of each instruction # into the REIL language is based on this. self._arch_mode = architecture_mode # An instance of *ArchitectureInformation*. self._arch_info = ArmArchitectureInformation(architecture_mode) # Set *Translation Mode*. self._translation_mode = translation_mode # An instance of a *VariableNamer*. This is used so all the # temporary REIL registers are unique. self._ir_name_generator = VariableNamer("t", separator="") self._builder = ReilInstructionBuilder() self._flags = { "nf" : ReilRegisterOperand("nf", 1), "zf" : ReilRegisterOperand("zf", 1), "cf" : ReilRegisterOperand("cf", 1), "vf" : ReilRegisterOperand("vf", 1), } if self._arch_mode == ARCH_ARM_MODE_32: self._sp = ReilRegisterOperand("r13", 32) # TODO: Implement alias self._pc = ReilRegisterOperand("r15", 32) self._lr = ReilRegisterOperand("r14", 32) self._ws = ReilImmediateOperand(4, 32) # word size elif self._arch_mode == ARCH_ARM_MODE_64: self._sp = ReilRegisterOperand("r13", 64) self._pc = ReilRegisterOperand("r15", 64) self._lr = ReilRegisterOperand("r14", 64) self._ws = ReilImmediateOperand(8, 64) # word size def translate(self, instruction): """Return IR representation of an instruction. """ try: trans_instrs = self._translate(instruction) except NotImplementedError as e: unkn_instr = self._builder.gen_unkn() unkn_instr.address = instruction.address << 8 | (0x0 & 0xff) trans_instrs = [unkn_instr] self._log_not_supported_instruction(instruction) except Exception as e: self._log_translation_exception(instruction) raise # Some sanity check.... for instr in trans_instrs: try: check_operands_size(instr, self._arch_info.architecture_size) except: logger.error( "Invalid operand size: %s (%s)", instr, instruction ) raise return trans_instrs def _translate(self, instruction): """Translate a arm instruction into REIL language. :param instruction: a arm instruction :type instruction: ArmInstruction """ # Retrieve translation function. translator_name = "_translate_" + instruction.mnemonic translator_fn = getattr(self, translator_name, self._not_implemented) # Translate instruction. tb = ArmTranslationBuilder(self._ir_name_generator, self._arch_mode) # Pre-processing: evaluate flags nop_cc_lbl = tb.label('condition_code_not_met') if (instruction.condition_code is not None): self._evaluate_condition_code(tb, instruction, nop_cc_lbl) translator_fn(tb, instruction) tb.add(nop_cc_lbl) tb.add(self._builder.gen_nop()) # Added NOP so there is a REIL instruction to jump to return tb.instanciate(instruction.address) def reset(self): """Restart IR register name generator. """ self._ir_name_generator.reset() @property def translation_mode(self): """Get translation mode. """ return self._translation_mode @translation_mode.setter def translation_mode(self, value): """Set translation mode. """ self._translation_mode = value def _log_not_supported_instruction(self, instruction): bytes_str = " ".join("%02x" % ord(b) for b in instruction.bytes) logger.info( "Instruction not supported: %s (%s [%s])", instruction.mnemonic, instruction, bytes_str ) def _log_translation_exception(self, instruction): bytes_str = " ".join("%02x" % ord(b) for b in instruction.bytes) logger.error( "Failed to translate arm to REIL: %s (%s)", instruction, bytes_str, exc_info=True ) # ============================================================================ # def _not_implemented(self, tb, instruction): raise NotImplementedError("Instruction Not Implemented") # Translators # ============================================================================ # # ============================================================================ # # "Flags" # ============================================================================ # def _update_nf(self, tb, oprnd0, oprnd1, result): sign = tb._extract_bit(result, oprnd0.size - 1) tb.add(self._builder.gen_str(sign, self._flags["nf"])) def _carry_from_uf(self, tb, oprnd0, oprnd1, result): assert (result.size == oprnd0.size * 2) carry = tb._extract_bit(result, oprnd0.size) tb.add(self._builder.gen_str(carry, self._flags["cf"])) def _borrow_from_uf(self, tb, oprnd0, oprnd1, result): # BorrowFrom as defined in the ARM Reference Manual has the same implementation as CarryFrom self._carry_from_uf(tb, oprnd0, oprnd1, result) def _overflow_from_add_uf(self, tb, oprnd0, oprnd1, result): op1_sign = tb._extract_bit(oprnd0, oprnd0.size - 1) op2_sign = tb._extract_bit(oprnd1, oprnd0.size - 1) res_sign = tb._extract_bit(result, oprnd0.size - 1) overflow = tb._and_regs(tb._equal_regs(op1_sign, op2_sign), tb._unequal_regs(op1_sign, res_sign)) tb.add(self._builder.gen_str(overflow, self._flags["vf"])) # Evaluate overflow and update the flag def _overflow_from_sub_uf(self, tb, oprnd0, oprnd1, result): tb.add(self._builder.gen_str(tb._overflow_from_sub(oprnd0, oprnd1, result), self._flags["vf"])) def _update_zf(self, tb, oprnd0, oprnd1, result): zf = self._flags["zf"] imm0 = tb.immediate((2**oprnd0.size)-1, result.size) tmp0 = tb.temporal(oprnd0.size) tb.add(self._builder.gen_and(result, imm0, tmp0)) # filter low part of result tb.add(self._builder.gen_bisz(tmp0, zf)) def _carry_out(self, tb, carry_operand, oprnd0, oprnd1, result): if isinstance(carry_operand, ArmImmediateOperand): return elif isinstance(carry_operand, ArmRegisterOperand): return elif isinstance(carry_operand, ArmShiftedRegisterOperand): base = ReilRegisterOperand(carry_operand.base_reg.name, carry_operand.size) shift_type = carry_operand.shift_type shift_amount = carry_operand.shift_amount if (shift_type == 'lsl'): if isinstance(shift_amount, ArmImmediateOperand): if shift_amount.immediate == 0: return else: # carry_out = Rm[32 - shift_imm] shift_carry_out = tb._extract_bit(base, 32 - shift_amount.immediate) elif isinstance(shift_amount, ArmRegisterOperand): # Rs: register with shift amount # if Rs[7:0] == 0 then # carry_out = C Flag # else if Rs[7:0] <= 32 then # carry_out = Rm[32 - Rs[7:0]] # else /* Rs[7:0] > 32 */ # carry_out = 0 shift_carry_out = tb.temporal(1) tb.add(self._builder.gen_str(self._flags["cf"], shift_carry_out)) rs = ReilRegisterOperand(shift_amount.name, shift_amount.size) rs_7_0 = tb._and_regs(rs, tb.immediate(0xFF, rs.size)) end_label = tb.label('end_label') rs_greater_32_label = tb.label('rs_greater_32_label') # if Rs[7:0] == 0 then # carry_out = C Flag tb._jump_if_zero(rs_7_0, end_label) # shift_carry_out already has the C flag set, so do nothing tb.add(self._builder.gen_jcc(tb._greater_than_or_equal(rs_7_0, tb.immediate(33, rs_7_0.size)), rs_greater_32_label)) # Rs > 0 and Rs <= 32 # carry_out = Rm[32 - Rs[7:0]] extract_bit_number = tb.temporal(rs_7_0.size) tb.add(self._builder.gen_sub(tb.immediate(32, rs_7_0.size), rs_7_0, extract_bit_number)) tb.add(self._builder.gen_str(tb._extract_bit_with_register(base, extract_bit_number), shift_carry_out)) tb._jump_to(end_label) # else /* Rs[7:0] > 32 */ # carry_out = 0 tb.add(rs_greater_32_label) tb.add(self._builder.gen_str(tb.immediate(0, 1), shift_carry_out)) # tb._jump_to(end_label) tb.add(end_label) else: raise Exception("carry_out: Unknown shift amount type.") else: # TODO: Implement other shift types raise NotImplementedError("Instruction Not Implemented: carry_out: shift type " + carry_operand.shift_type) else: raise Exception("carry_out: Unknown operand type.") tb.add(self._builder.gen_str(shift_carry_out, self._flags["cf"])) def _update_flags_data_proc_add(self, tb, oprnd0, oprnd1, result): self._update_zf(tb, oprnd0, oprnd1, result) self._update_nf(tb, oprnd0, oprnd1, result) self._carry_from_uf(tb, oprnd0, oprnd1, result) self._overflow_from_add_uf(tb, oprnd0, oprnd1, result) def _update_flags_data_proc_sub(self, tb, oprnd0, oprnd1, result): self._update_zf(tb, oprnd0, oprnd1, result) self._update_nf(tb, oprnd0, oprnd1, result) self._borrow_from_uf(tb, oprnd0, oprnd1, result) # C Flag = NOT BorrowFrom (to be used by subsequent instructions like SBC and RSC) tb.add(self._builder.gen_str(tb._negate_reg(self._flags["cf"]), self._flags["cf"])) self._overflow_from_sub_uf(tb, oprnd0, oprnd1, result) def _update_flags_data_proc_other(self, tb, second_operand, oprnd0, oprnd1, result): self._update_zf(tb, oprnd0, oprnd1, result) self._update_nf(tb, oprnd0, oprnd1, result) self._carry_out(tb, second_operand, oprnd0, oprnd1, result) # Overflow Flag (V) unaffected def _update_flags_other(self, tb, oprnd0, oprnd1, result): self._update_zf(tb, oprnd0, oprnd1, result) self._update_nf(tb, oprnd0, oprnd1, result) # Carry Flag (C) unaffected # Overflow Flag (V) unaffected def _undefine_flag(self, tb, flag): # NOTE: In every test I've made, each time a flag is leave # undefined it is always set to 0. imm = tb.immediate(0, flag.size) tb.add(self._builder.gen_str(imm, flag)) def _clear_flag(self, tb, flag): imm = tb.immediate(0, flag.size) tb.add(self._builder.gen_str(imm, flag)) def _set_flag(self, tb, flag): imm = tb.immediate(1, flag.size) tb.add(self._builder.gen_str(imm, flag)) # EQ: Z set def _evaluate_eq(self, tb): return self._flags["zf"] # NE: Z clear def _evaluate_ne(self, tb): return tb._negate_reg(self._flags["zf"]) # CS: C set def _evaluate_cs(self, tb): return self._flags["cf"] # CC: C clear def _evaluate_cc(self, tb): return tb._negate_reg(self._flags["cf"]) # MI: N set def _evaluate_mi(self, tb): return self._flags["nf"] # PL: N clear def _evaluate_pl(self, tb): return tb._negate_reg(self._flags["nf"]) # VS: V set def _evaluate_vs(self, tb): return self._flags["vf"] # VC: V clear def _evaluate_vc(self, tb): return tb._negate_reg(self._flags["vf"]) # HI: C set and Z clear def _evaluate_hi(self, tb): return tb._and_regs(self._flags["cf"], tb._negate_reg(self._flags["zf"])) # LS: C clear or Z set def _evaluate_ls(self, tb): return tb._or_regs(tb._negate_reg(self._flags["cf"]), self._flags["zf"]) # GE: N == V def _evaluate_ge(self, tb): return tb._equal_regs(self._flags["nf"], self._flags["vf"]) # LT: N != V def _evaluate_lt(self, tb): return tb._negate_reg(self._evaluate_ge(tb)) # GT: (Z == 0) and (N == V) def _evaluate_gt(self, tb): return tb._and_regs(tb._negate_reg(self._flags["zf"]), self._evaluate_ge(tb)) # LE: (Z == 1) or (N != V) def _evaluate_le(self, tb): return tb._or_regs(self._flags["zf"], self._evaluate_lt(tb)) def _evaluate_condition_code(self, tb, instruction, nop_label): if (instruction.condition_code == ARM_COND_CODE_AL): return eval_cc_fn = { ARM_COND_CODE_EQ : self._evaluate_eq, ARM_COND_CODE_NE : self._evaluate_ne, ARM_COND_CODE_CS : self._evaluate_cs, ARM_COND_CODE_HS : self._evaluate_cs, ARM_COND_CODE_CC : self._evaluate_cc, ARM_COND_CODE_LO : self._evaluate_cc, ARM_COND_CODE_MI : self._evaluate_mi, ARM_COND_CODE_PL : self._evaluate_pl, ARM_COND_CODE_VS : self._evaluate_vs, ARM_COND_CODE_VC : self._evaluate_vc, ARM_COND_CODE_HI : self._evaluate_hi, ARM_COND_CODE_LS : self._evaluate_ls, ARM_COND_CODE_GE : self._evaluate_ge, ARM_COND_CODE_LT : self._evaluate_lt, ARM_COND_CODE_GT : self._evaluate_gt, ARM_COND_CODE_LE : self._evaluate_le, } neg_cond = tb._negate_reg(eval_cc_fn[instruction.condition_code](tb)) tb.add(self._builder.gen_jcc(neg_cond, nop_label)) return # "Data-processing Instructions" # ============================================================================ # def _translate_mov(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) tb.write(instruction.operands[0], oprnd1) if instruction.update_flags: self._update_flags_data_proc_other(tb, instruction.operands[1], oprnd1, None, oprnd1) def _translate_and(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) oprnd2 = tb.read(instruction.operands[2]) result = tb.temporal(oprnd1.size) tb.add(self._builder.gen_and(oprnd1, oprnd2, result)) tb.write(instruction.operands[0], result) if instruction.update_flags: self._update_flags_data_proc_other(tb, instruction.operands[2], oprnd1, oprnd2, result) def _translate_orr(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) oprnd2 = tb.read(instruction.operands[2]) result = tb.temporal(oprnd1.size) tb.add(self._builder.gen_or(oprnd1, oprnd2, result)) tb.write(instruction.operands[0], result) if instruction.update_flags: self._update_flags_data_proc_other(tb, instruction.operands[2], oprnd1, oprnd2, result) def _translate_eor(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) oprnd2 = tb.read(instruction.operands[2]) result = tb.temporal(oprnd1.size) tb.add(self._builder.gen_xor(oprnd1, oprnd2, result)) tb.write(instruction.operands[0], result) if instruction.update_flags: self._update_flags_data_proc_other(tb, instruction.operands[2], oprnd1, oprnd2, result) def _translate_add(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) oprnd2 = tb.read(instruction.operands[2]) result = tb.temporal(oprnd1.size * 2) tb.add(self._builder.gen_add(oprnd1, oprnd2, result)) tb.write(instruction.operands[0], result) if instruction.update_flags: self._update_flags_data_proc_add(tb, oprnd1, oprnd2, result) def _translate_sub(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) oprnd2 = tb.read(instruction.operands[2]) result = tb.temporal(oprnd1.size * 2) tb.add(self._builder.gen_sub(oprnd1, oprnd2, result)) tb.write(instruction.operands[0], result) if instruction.update_flags: self._update_flags_data_proc_sub(tb, oprnd1, oprnd2, result) def _translate_mul(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) oprnd2 = tb.read(instruction.operands[2]) result = tb.temporal(oprnd1.size * 2) tb.add(self._builder.gen_mul(oprnd1, oprnd2, result)) tb.write(instruction.operands[0], result) if instruction.update_flags: self._update_zf(tb, oprnd1, oprnd2, result) self._update_nf(tb, oprnd1, oprnd2, result) def _translate_cmn(self, tb, instruction): oprnd1 = tb.read(instruction.operands[0]) oprnd2 = tb.read(instruction.operands[1]) result = tb.temporal(oprnd1.size * 2) tb.add(self._builder.gen_add(oprnd1, oprnd2, result)) self._update_flags_data_proc_add(tb, oprnd1, oprnd2, result) # S = 1 (implied in the instruction) def _translate_cmp(self, tb, instruction): oprnd1 = tb.read(instruction.operands[0]) oprnd2 = tb.read(instruction.operands[1]) result = tb.temporal(oprnd1.size * 2) tb.add(self._builder.gen_sub(oprnd1, oprnd2, result)) self._update_flags_data_proc_sub(tb, oprnd1, oprnd2, result) # S = 1 (implied in the instruction) # "Load/store word and unsigned byte Instructions" # ============================================================================ # def _translate_ldr(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) tb.write(instruction.operands[0], oprnd1) def _translate_str(self, tb, instruction): oprnd0 = tb.read(instruction.operands[0]) tb.write(instruction.operands[1], oprnd0) # "Load/store multiple Instructions" # ============================================================================ # def _translate_ldm(self, tb, instruction): self._translate_ldm_stm(tb, instruction, True) def _translate_stm(self, tb, instruction): self._translate_ldm_stm(tb, instruction, False) # LDM and STM have exactly the same logic except one loads and the other stores # It is assumed that the disassembler (for example Capstone) writes the register list in increasing order def _translate_ldm_stm(self, tb, instruction, load = True): base = tb.read(instruction.operands[0]) reg_list = tb.read(instruction.operands[1]) if instruction.ldm_stm_addr_mode == None: instruction.ldm_stm_addr_mode = ARM_LDM_STM_IA # default mode for load and store if load: load_store_fn = self._load_value # Convert stack addressing modes to non-stack addressing modes if instruction.ldm_stm_addr_mode in ldm_stack_am_to_non_stack_am: instruction.ldm_stm_addr_mode = ldm_stack_am_to_non_stack_am[instruction.ldm_stm_addr_mode] else: # Store load_store_fn = self._store_value if instruction.ldm_stm_addr_mode in stm_stack_am_to_non_stack_am: instruction.ldm_stm_addr_mode = stm_stack_am_to_non_stack_am[instruction.ldm_stm_addr_mode] pointer = tb.temporal(base.size) tb.add(self._builder.gen_str(base, pointer)) reg_list_size_bytes = ReilImmediateOperand(self._ws.immediate * len(reg_list), base.size) if instruction.ldm_stm_addr_mode == ARM_LDM_STM_IA: for reg in reg_list: load_store_fn(tb, pointer, reg) pointer = tb._add_to_reg(pointer, self._ws) elif instruction.ldm_stm_addr_mode == ARM_LDM_STM_IB: for reg in reg_list: pointer = tb._add_to_reg(pointer, self._ws) load_store_fn(tb, pointer, reg) elif instruction.ldm_stm_addr_mode == ARM_LDM_STM_DA: reg_list.reverse() # Assuming the registry list was in increasing registry number for reg in reg_list: load_store_fn(tb, pointer, reg) pointer = tb._sub_to_reg(pointer, self._ws) elif instruction.ldm_stm_addr_mode == ARM_LDM_STM_DB: reg_list.reverse() for reg in reg_list: pointer = tb._sub_to_reg(pointer, self._ws) load_store_fn(tb, pointer, reg) else: raise Exception("Unknown addressing mode.") # Write-back if instruction.operands[0].wb: if instruction.ldm_stm_addr_mode == ARM_LDM_STM_IA or instruction.ldm_stm_addr_mode == ARM_LDM_STM_IB: tmp = tb._add_to_reg(base, reg_list_size_bytes) elif instruction.ldm_stm_addr_mode == ARM_LDM_STM_DA or instruction.ldm_stm_addr_mode == ARM_LDM_STM_DB: tmp = tb._sub_to_reg(base, reg_list_size_bytes) tb.add(self._builder.gen_str(tmp, base)) def _load_value(self, tb, mem_dir, value): tb.add(self._builder.gen_ldm(mem_dir, value)) def _store_value(self, tb, mem_dir, value): tb.add(self._builder.gen_stm(value, mem_dir)) # PUSH and POP are equivalent to STM and LDM in FD mode with the SP (and write-back) # Instructions are modified to adapt it to the LDM/STM interface def _translate_push_pop(self, tb, instruction, translate_fn): sp_name = "r13" # TODO: Use self._sp sp_size = instruction.operands[0].reg_list[0][0].size # Infer it from the registers list sp_reg = ArmRegisterOperand(sp_name, sp_size) sp_reg.wb = True instruction.operands = [sp_reg, instruction.operands[0]] instruction.ldm_stm_addr_mode = ARM_LDM_STM_FD translate_fn(tb, instruction) def _translate_push(self, tb, instruction): self._translate_push_pop(tb, instruction, self._translate_stm) def _translate_pop(self, tb, instruction): self._translate_push_pop(tb, instruction, self._translate_ldm) # "Branch Instructions" # ============================================================================ # def _translate_b(self, tb, instruction): self._translate_branch(tb, instruction, link = False) def _translate_bl(self, tb, instruction): self._translate_branch(tb, instruction, link = True) # TODO: Thumb def _translate_bx(self, tb, instruction): self._translate_branch(tb, instruction, link = False) def _translate_blx(self, tb, instruction): self._translate_branch(tb, instruction, link = True) def _translate_branch(self, tb, instruction, link): arm_operand = instruction.operands[0] if isinstance(arm_operand, ArmImmediateOperand): target = ReilImmediateOperand(arm_operand.immediate << 8, self._pc.size + 8) elif isinstance(arm_operand, ArmRegisterOperand): target = ReilRegisterOperand(arm_operand.name, arm_operand.size) target = tb._and_regs(target, ReilImmediateOperand(0xFFFFFFFE, target.size)) tmp0 = tb.temporal(target.size + 8) tmp1 = tb.temporal(target.size + 8) tb.add(self._builder.gen_str(target, tmp0)) tb.add(self._builder.gen_bsh(tmp0, ReilImmediateOperand(8, target.size + 8), tmp1)) target = tmp1 else: raise NotImplementedError("Instruction Not Implemented: Unknown operand for branch operation.") if (link): tb.add(self._builder.gen_str(ReilImmediateOperand(instruction.address + instruction.size, self._pc.size), self._lr)) tb._jump_to(target)
class ArmTranslator(Translator): """ARM to IR Translator.""" def __init__(self, architecture_mode=ARCH_ARM_MODE_THUMB, translation_mode=FULL_TRANSLATION): super(ArmTranslator, self).__init__() # Set *Architecture Mode*. The translation of each instruction # into the REIL language is based on this. self._arch_mode = architecture_mode # An instance of *ArchitectureInformation*. self._arch_info = ArmArchitectureInformation(architecture_mode) # Set *Translation Mode*. self._translation_mode = translation_mode # An instance of a *VariableNamer*. This is used so all the # temporary REIL registers are unique. self._ir_name_generator = VariableNamer("t", separator="") self._builder = ReilInstructionBuilder() self._flags = { "nf": ReilRegisterOperand("nf", 1), "zf": ReilRegisterOperand("zf", 1), "cf": ReilRegisterOperand("cf", 1), "vf": ReilRegisterOperand("vf", 1), } if self._arch_mode in [ARCH_ARM_MODE_ARM, ARCH_ARM_MODE_THUMB]: self._sp = ReilRegisterOperand("r13", 32) # TODO: Implement alias self._pc = ReilRegisterOperand("r15", 32) self._lr = ReilRegisterOperand("r14", 32) self._ws = ReilImmediateOperand(4, 32) # word size # TODO: Remove this code? # elif self._arch_mode == ARCH_ARM_MODE_64: # self._sp = ReilRegisterOperand("r13", 64) # self._pc = ReilRegisterOperand("r15", 64) # self._lr = ReilRegisterOperand("r14", 64) # self._ws = ReilImmediateOperand(8, 64) # word size def translate(self, instruction): """Return IR representation of an instruction. """ try: trans_instrs = self._translate(instruction) except NotImplementedError as e: unkn_instr = self._builder.gen_unkn() unkn_instr.address = instruction.address << 8 | (0x0 & 0xff) trans_instrs = [unkn_instr] self._log_not_supported_instruction(instruction, str(e)) except Exception: self._log_translation_exception(instruction) raise # Some sanity check.... for instr in trans_instrs: try: check_operands_size(instr, self._arch_info.architecture_size) except: logger.error("Invalid operand size: %s (%s)", instr, instruction) raise return trans_instrs def _translate(self, instruction): """Translate a arm instruction into REIL language. :param instruction: a arm instruction :type instruction: ArmInstruction """ # Retrieve translation function. translator_name = "_translate_" + instruction.mnemonic translator_fn = getattr(self, translator_name, self._not_implemented) # Translate instruction. tb = ArmTranslationBuilder(self._ir_name_generator, self._arch_mode) # TODO: Improve this. if instruction.mnemonic in [ "b", "bl", "bx", "blx", "bne", "beq", "bpl", "ble", "bcs", "bhs", "blt", "bge", "bhi", "blo", "bls" ]: if instruction.condition_code is None: instruction.condition_code = ARM_COND_CODE_AL # TODO: unify translations translator_fn(tb, instruction) else: # Pre-processing: evaluate flags if instruction.condition_code is not None: self._evaluate_condition_code(tb, instruction) translator_fn(tb, instruction) return tb.instanciate(instruction.address) def reset(self): """Restart IR register name generator. """ self._ir_name_generator.reset() @property def translation_mode(self): """Get translation mode. """ return self._translation_mode @translation_mode.setter def translation_mode(self, value): """Set translation mode. """ self._translation_mode = value def _log_not_supported_instruction(self, instruction, reason="unknown"): bytes_str = " ".join("%02x" % ord(b) for b in instruction.bytes) logger.info("Instruction not supported: %s (%s [%s]). Reason: %s", instruction.mnemonic, instruction, bytes_str, reason) def _log_translation_exception(self, instruction): bytes_str = " ".join("%02x" % ord(b) for b in instruction.bytes) logger.error("Failed to translate arm to REIL: %s (%s)", instruction, bytes_str, exc_info=True) def _not_implemented(self, tb, instruction): raise NotImplementedError("Instruction Not Implemented") # Translators # ============================================================================ # # ============================================================================ # # "Flags" # ============================================================================ # def _update_nf(self, tb, oprnd0, oprnd1, result): sign = tb._extract_bit(result, oprnd0.size - 1) tb.add(self._builder.gen_str(sign, self._flags["nf"])) def _carry_from_uf(self, tb, oprnd0, oprnd1, result): assert (result.size == oprnd0.size * 2) carry = tb._extract_bit(result, oprnd0.size) tb.add(self._builder.gen_str(carry, self._flags["cf"])) def _borrow_from_uf(self, tb, oprnd0, oprnd1, result): # BorrowFrom as defined in the ARM Reference Manual has the same implementation as CarryFrom self._carry_from_uf(tb, oprnd0, oprnd1, result) def _overflow_from_add_uf(self, tb, oprnd0, oprnd1, result): op1_sign = tb._extract_bit(oprnd0, oprnd0.size - 1) op2_sign = tb._extract_bit(oprnd1, oprnd0.size - 1) res_sign = tb._extract_bit(result, oprnd0.size - 1) overflow = tb._and_regs(tb._equal_regs(op1_sign, op2_sign), tb._unequal_regs(op1_sign, res_sign)) tb.add(self._builder.gen_str(overflow, self._flags["vf"])) # Evaluate overflow and update the flag def _overflow_from_sub_uf(self, tb, oprnd0, oprnd1, result): tb.add( self._builder.gen_str( tb._overflow_from_sub(oprnd0, oprnd1, result), self._flags["vf"])) def _update_zf(self, tb, oprnd0, oprnd1, result): zf = self._flags["zf"] imm0 = tb.immediate((2**oprnd0.size) - 1, result.size) tmp0 = tb.temporal(oprnd0.size) tb.add(self._builder.gen_and(result, imm0, tmp0)) # filter low part of result tb.add(self._builder.gen_bisz(tmp0, zf)) def _carry_out(self, tb, carry_operand, oprnd0, oprnd1, result): if isinstance(carry_operand, ArmImmediateOperand): return elif isinstance(carry_operand, ArmRegisterOperand): return elif isinstance(carry_operand, ArmShiftedRegisterOperand): base = ReilRegisterOperand(carry_operand.base_reg.name, carry_operand.size) shift_type = carry_operand.shift_type shift_amount = carry_operand.shift_amount if shift_type == 'lsl': if isinstance(shift_amount, ArmImmediateOperand): if shift_amount.immediate == 0: return else: # carry_out = Rm[32 - shift_imm] shift_carry_out = tb._extract_bit( base, 32 - shift_amount.immediate) elif isinstance(shift_amount, ArmRegisterOperand): # Rs: register with shift amount # if Rs[7:0] == 0 then # carry_out = C Flag # else if Rs[7:0] <= 32 then # carry_out = Rm[32 - Rs[7:0]] # else /* Rs[7:0] > 32 */ # carry_out = 0 shift_carry_out = tb.temporal(1) tb.add( self._builder.gen_str(self._flags["cf"], shift_carry_out)) rs = ReilRegisterOperand(shift_amount.name, shift_amount.size) rs_7_0 = tb._and_regs(rs, tb.immediate(0xFF, rs.size)) end_label = tb.label('end_label') rs_greater_32_label = tb.label('rs_greater_32_label') # if Rs[7:0] == 0 then # carry_out = C Flag tb._jump_if_zero( rs_7_0, end_label ) # shift_carry_out already has the C flag set, so do nothing tb.add( self._builder.gen_jcc( tb._greater_than_or_equal( rs_7_0, tb.immediate(33, rs_7_0.size)), rs_greater_32_label)) # Rs > 0 and Rs <= 32 # carry_out = Rm[32 - Rs[7:0]] extract_bit_number = tb.temporal(rs_7_0.size) tb.add( self._builder.gen_sub(tb.immediate(32, rs_7_0.size), rs_7_0, extract_bit_number)) tb.add( self._builder.gen_str( tb._extract_bit_with_register( base, extract_bit_number), shift_carry_out)) tb._jump_to(end_label) # else /* Rs[7:0] > 32 */ # carry_out = 0 tb.add(rs_greater_32_label) tb.add( self._builder.gen_str(tb.immediate(0, 1), shift_carry_out)) # tb._jump_to(end_label) tb.add(end_label) else: raise Exception("carry_out: Unknown shift amount type.") else: # TODO: Implement other shift types raise NotImplementedError( "Instruction Not Implemented: carry_out: shift type " + carry_operand.shift_type) else: raise Exception("carry_out: Unknown operand type.") tb.add(self._builder.gen_str(shift_carry_out, self._flags["cf"])) def _update_flags_data_proc_add(self, tb, oprnd0, oprnd1, result): self._update_zf(tb, oprnd0, oprnd1, result) self._update_nf(tb, oprnd0, oprnd1, result) self._carry_from_uf(tb, oprnd0, oprnd1, result) self._overflow_from_add_uf(tb, oprnd0, oprnd1, result) def _update_flags_data_proc_sub(self, tb, oprnd0, oprnd1, result): self._update_zf(tb, oprnd0, oprnd1, result) self._update_nf(tb, oprnd0, oprnd1, result) self._borrow_from_uf(tb, oprnd0, oprnd1, result) # C Flag = NOT BorrowFrom (to be used by subsequent instructions like SBC and RSC) tb.add( self._builder.gen_str(tb._negate_reg(self._flags["cf"]), self._flags["cf"])) self._overflow_from_sub_uf(tb, oprnd0, oprnd1, result) def _update_flags_data_proc_other(self, tb, second_operand, oprnd0, oprnd1, result): self._update_zf(tb, oprnd0, oprnd1, result) self._update_nf(tb, oprnd0, oprnd1, result) self._carry_out(tb, second_operand, oprnd0, oprnd1, result) # Overflow Flag (V) unaffected def _update_flags_other(self, tb, oprnd0, oprnd1, result): self._update_zf(tb, oprnd0, oprnd1, result) self._update_nf(tb, oprnd0, oprnd1, result) # Carry Flag (C) unaffected # Overflow Flag (V) unaffected def _undefine_flag(self, tb, flag): # NOTE: In every test I've made, each time a flag is leave # undefined it is always set to 0. imm = tb.immediate(0, flag.size) tb.add(self._builder.gen_str(imm, flag)) def _clear_flag(self, tb, flag): imm = tb.immediate(0, flag.size) tb.add(self._builder.gen_str(imm, flag)) def _set_flag(self, tb, flag): imm = tb.immediate(1, flag.size) tb.add(self._builder.gen_str(imm, flag)) def _evaluate_eq(self, tb): # EQ: Z set return self._flags["zf"] def _evaluate_ne(self, tb): # NE: Z clear return tb._negate_reg(self._flags["zf"]) def _evaluate_cs(self, tb): # CS: C set return self._flags["cf"] def _evaluate_cc(self, tb): # CC: C clear return tb._negate_reg(self._flags["cf"]) def _evaluate_mi(self, tb): # MI: N set return self._flags["nf"] def _evaluate_pl(self, tb): # PL: N clear return tb._negate_reg(self._flags["nf"]) def _evaluate_vs(self, tb): # VS: V set return self._flags["vf"] def _evaluate_vc(self, tb): # VC: V clear return tb._negate_reg(self._flags["vf"]) def _evaluate_hi(self, tb): # HI: C set and Z clear return tb._and_regs(self._flags["cf"], tb._negate_reg(self._flags["zf"])) def _evaluate_ls(self, tb): # LS: C clear or Z set return tb._or_regs(tb._negate_reg(self._flags["cf"]), self._flags["zf"]) def _evaluate_ge(self, tb): # GE: N == V return tb._equal_regs(self._flags["nf"], self._flags["vf"]) def _evaluate_lt(self, tb): # LT: N != V return tb._negate_reg(self._evaluate_ge(tb)) def _evaluate_gt(self, tb): # GT: (Z == 0) and (N == V) return tb._and_regs(tb._negate_reg(self._flags["zf"]), self._evaluate_ge(tb)) def _evaluate_le(self, tb): # LE: (Z == 1) or (N != V) return tb._or_regs(self._flags["zf"], self._evaluate_lt(tb)) def _evaluate_condition_code(self, tb, instruction): if instruction.condition_code == ARM_COND_CODE_AL: return eval_cc_fn = { ARM_COND_CODE_EQ: self._evaluate_eq, ARM_COND_CODE_NE: self._evaluate_ne, ARM_COND_CODE_CS: self._evaluate_cs, ARM_COND_CODE_HS: self._evaluate_cs, ARM_COND_CODE_CC: self._evaluate_cc, ARM_COND_CODE_LO: self._evaluate_cc, ARM_COND_CODE_MI: self._evaluate_mi, ARM_COND_CODE_PL: self._evaluate_pl, ARM_COND_CODE_VS: self._evaluate_vs, ARM_COND_CODE_VC: self._evaluate_vc, ARM_COND_CODE_HI: self._evaluate_hi, ARM_COND_CODE_LS: self._evaluate_ls, ARM_COND_CODE_GE: self._evaluate_ge, ARM_COND_CODE_LT: self._evaluate_lt, ARM_COND_CODE_GT: self._evaluate_gt, ARM_COND_CODE_LE: self._evaluate_le, } neg_cond = tb._negate_reg(eval_cc_fn[instruction.condition_code](tb)) end_addr = ReilImmediateOperand( (instruction.address + instruction.size) << 8, self._arch_info.address_size + 8) tb.add(self._builder.gen_jcc(neg_cond, end_addr)) return # "Data-processing Instructions" # ============================================================================ # def _translate_mov(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) tb.write(instruction.operands[0], oprnd1) if instruction.update_flags: self._update_flags_data_proc_other(tb, instruction.operands[1], oprnd1, None, oprnd1) def _translate_mvn(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) tb.write(instruction.operands[0], tb._negate_reg(oprnd1)) if instruction.update_flags: self._update_flags_data_proc_other(tb, instruction.operands[1], oprnd1, None, tb._negate_reg(oprnd1)) def _translate_movw(self, tb, instruction): reil_operand = ReilRegisterOperand(instruction.operands[0].name, instruction.operands[0].size) word_mask = ReilImmediateOperand(0x0000FFFF, reil_operand.size) and_temp = tb.temporal(reil_operand.size) oprnd1 = tb.read(instruction.operands[1]) tb.write(instruction.operands[0], oprnd1) tb.add(self._builder.gen_and( reil_operand, word_mask, and_temp)) # filter bits [7:0] part of result tb.add(self._builder.gen_str(and_temp, reil_operand)) # It doesn't update flags def _translate_and(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) oprnd2 = tb.read(instruction.operands[2]) result = tb.temporal(oprnd1.size) tb.add(self._builder.gen_and(oprnd1, oprnd2, result)) tb.write(instruction.operands[0], result) if instruction.update_flags: self._update_flags_data_proc_other(tb, instruction.operands[2], oprnd1, oprnd2, result) def _translate_orr(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) oprnd2 = tb.read(instruction.operands[2]) result = tb.temporal(oprnd1.size) tb.add(self._builder.gen_or(oprnd1, oprnd2, result)) tb.write(instruction.operands[0], result) if instruction.update_flags: self._update_flags_data_proc_other(tb, instruction.operands[2], oprnd1, oprnd2, result) def _translate_eor(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) oprnd2 = tb.read(instruction.operands[2]) result = tb.temporal(oprnd1.size) tb.add(self._builder.gen_xor(oprnd1, oprnd2, result)) tb.write(instruction.operands[0], result) if instruction.update_flags: self._update_flags_data_proc_other(tb, instruction.operands[2], oprnd1, oprnd2, result) def _translate_add(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) oprnd2 = tb.read(instruction.operands[2]) result = tb.temporal(oprnd1.size * 2) tb.add(self._builder.gen_add(oprnd1, oprnd2, result)) tb.write(instruction.operands[0], result) if instruction.update_flags: self._update_flags_data_proc_add(tb, oprnd1, oprnd2, result) def _translate_sub(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) oprnd2 = tb.read(instruction.operands[2]) result = tb.temporal(oprnd1.size * 2) tb.add(self._builder.gen_sub(oprnd1, oprnd2, result)) tb.write(instruction.operands[0], result) if instruction.update_flags: self._update_flags_data_proc_sub(tb, oprnd1, oprnd2, result) def _translate_rsb(self, tb, instruction): instruction.operands[1], instruction.operands[ 2] = instruction.operands[2], instruction.operands[1] self._translate_sub(tb, instruction) def _translate_mul(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) oprnd2 = tb.read(instruction.operands[2]) result = tb.temporal(oprnd1.size * 2) tb.add(self._builder.gen_mul(oprnd1, oprnd2, result)) tb.write(instruction.operands[0], result) if instruction.update_flags: self._update_zf(tb, oprnd1, oprnd2, result) self._update_nf(tb, oprnd1, oprnd2, result) def _translate_cmn(self, tb, instruction): oprnd1 = tb.read(instruction.operands[0]) oprnd2 = tb.read(instruction.operands[1]) result = tb.temporal(oprnd1.size * 2) tb.add(self._builder.gen_add(oprnd1, oprnd2, result)) self._update_flags_data_proc_add( tb, oprnd1, oprnd2, result) # S = 1 (implied in the instruction) def _translate_cmp(self, tb, instruction): oprnd1 = tb.read(instruction.operands[0]) oprnd2 = tb.read(instruction.operands[1]) result = tb.temporal(oprnd1.size * 2) tb.add(self._builder.gen_sub(oprnd1, oprnd2, result)) self._update_flags_data_proc_sub( tb, oprnd1, oprnd2, result) # S = 1 (implied in the instruction) def _translate_cbz(self, tb, instruction): oprnd1 = tb.read(instruction.operands[0]) arm_operand = instruction.operands[1] if isinstance(arm_operand, ArmImmediateOperand): target = ReilImmediateOperand(arm_operand.immediate << 8, self._pc.size + 8) elif isinstance(arm_operand, ArmRegisterOperand): target = ReilRegisterOperand(arm_operand.name, arm_operand.size) target = tb._and_regs( target, ReilImmediateOperand(0xFFFFFFFE, target.size)) tmp0 = tb.temporal(target.size + 8) tmp1 = tb.temporal(target.size + 8) tb.add(self._builder.gen_str(target, tmp0)) tb.add( self._builder.gen_bsh(tmp0, ReilImmediateOperand(8, target.size + 8), tmp1)) target = tmp1 else: raise Exception() tb._jump_if_zero(oprnd1, target) def _translate_cbnz(self, tb, instruction): oprnd0 = tb.read(instruction.operands[0]) arm_operand = instruction.operands[1] if isinstance(arm_operand, ArmImmediateOperand): target = ReilImmediateOperand(arm_operand.immediate << 8, self._pc.size + 8) elif isinstance(arm_operand, ArmRegisterOperand): target = ReilRegisterOperand(arm_operand.name, arm_operand.size) target = tb._and_regs( target, ReilImmediateOperand(0xFFFFFFFE, target.size)) tmp0 = tb.temporal(target.size + 8) tmp1 = tb.temporal(target.size + 8) tb.add(self._builder.gen_str(target, tmp0)) tb.add( self._builder.gen_bsh(tmp0, ReilImmediateOperand(8, target.size + 8), tmp1)) target = tmp1 else: raise Exception() neg_oprnd = tb._negate_reg(oprnd0) tb._jump_if_zero(neg_oprnd, target) def _translate_lsl(self, tb, instruction): # LSL (register) if len(instruction.operands) == 3 and isinstance( instruction.operands[1], ArmRegisterOperand): sh_op = ArmShiftedRegisterOperand(instruction.operands[1], "lsl", instruction.operands[2], instruction.operands[1].size) disp = tb._compute_shifted_register(sh_op) tb.write(instruction.operands[0], disp) return if len(instruction.operands) == 2 and isinstance( instruction.operands[1], ArmShiftedRegisterOperand): # Capstone is incorrectly packing <Rm>, #<imm5> into a shifted register, unpack it instruction.operands.append(instruction.operands[1]._shift_amount) instruction.operands[1] = instruction.operands[1]._base_reg oprnd1 = tb.read(instruction.operands[1]) oprnd2 = tb.read(instruction.operands[2]) result = tb.temporal(oprnd1.size) tb.add(self._builder.gen_bsh(oprnd1, oprnd2, result)) tb.write(instruction.operands[0], result) if instruction.update_flags: self._update_zf(tb, oprnd1, oprnd2, result) self._update_nf(tb, oprnd1, oprnd2, result) # TODO: Encapsulate this new kind of flag update (different from the data proc instructions like add, and, orr) if oprnd2.immediate == 0: return else: # carry_out = Rm[32 - shift_imm] shift_carry_out = tb._extract_bit(oprnd1, 32 - oprnd2.immediate) tb.add( self._builder.gen_str(shift_carry_out, self._flags["cf"])) # "Load/store word and unsigned byte Instructions" # ============================================================================ # def _translate_ldr(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) tb.write(instruction.operands[0], oprnd1) def _translate_str(self, tb, instruction): oprnd0 = tb.read(instruction.operands[0]) tb.write(instruction.operands[1], oprnd0) # TODO: Check if the byte suffix ('b') should be coded as extra information # and removed from the mnemonic (handling all ldr/str translations in only # two functions). def _translate_ldrb(self, tb, instruction): op0_reil = ReilRegisterOperand(instruction.operands[0].name, instruction.operands[0].size) addr_reg = tb._compute_memory_address(instruction.operands[1]) byte_reg = tb.temporal(8) tb.add(tb._builder.gen_ldm(addr_reg, byte_reg)) tb.add(self._builder.gen_str(byte_reg, op0_reil)) def _translate_strb(self, tb, instruction): reil_operand = ReilRegisterOperand(instruction.operands[0].name, instruction.operands[0].size) byte_reg = tb.temporal(8) tb.add(self._builder.gen_str( reil_operand, byte_reg)) # filter bits [7:0] part of result addr = tb._compute_memory_address(instruction.operands[1]) tb.add(self._builder.gen_stm(byte_reg, addr)) # TODO: Generalize LDR to handle byte and half word in a single function def _translate_ldrh(self, tb, instruction): op0_reil = ReilRegisterOperand(instruction.operands[0].name, instruction.operands[0].size) addr_reg = tb._compute_memory_address(instruction.operands[1]) byte_reg = tb.temporal(16) tb.add(tb._builder.gen_ldm(addr_reg, byte_reg)) tb.add(self._builder.gen_str(byte_reg, op0_reil)) def _translate_strh(self, tb, instruction): reil_operand = ReilRegisterOperand(instruction.operands[0].name, instruction.operands[0].size) half_word_reg = tb.temporal(16) tb.add(self._builder.gen_str( reil_operand, half_word_reg)) # filter bits [15:0] part of result addr = tb._compute_memory_address(instruction.operands[1]) tb.add(self._builder.gen_stm(half_word_reg, addr)) def _translate_ldrd(self, tb, instruction): if len(instruction.operands ) > 2: # Rd2 has been specified (UAL syntax) addr_reg = tb._compute_memory_address(instruction.operands[2]) else: addr_reg = tb._compute_memory_address(instruction.operands[1]) reil_operand = ReilRegisterOperand(instruction.operands[0].name, instruction.operands[0].size) tb.add(tb._builder.gen_ldm(addr_reg, reil_operand)) addr_reg = tb._add_to_reg(addr_reg, ReilImmediateOperand(4, reil_operand.size)) if len(instruction.operands ) > 2: # Rd2 has been specified (UAL syntax) reil_operand = ReilRegisterOperand(instruction.operands[1].name, instruction.operands[0].size) else: # TODO: Assuming the register is written in its number format # (no alias like lr or pc). reil_operand = ReilRegisterOperand( 'r' + str(int(reil_operand.name[1:]) + 1), reil_operand.size) tb.add(tb._builder.gen_ldm(addr_reg, reil_operand)) def _translate_strd(self, tb, instruction): if len(instruction.operands ) > 2: # Rd2 has been specified (UAL syntax) addr_reg = tb._compute_memory_address(instruction.operands[2]) else: addr_reg = tb._compute_memory_address(instruction.operands[1]) reil_operand = ReilRegisterOperand(instruction.operands[0].name, instruction.operands[0].size) tb.add(tb._builder.gen_stm(reil_operand, addr_reg)) addr_reg = tb._add_to_reg(addr_reg, ReilImmediateOperand(4, reil_operand.size)) if len(instruction.operands ) > 2: # Rd2 has been specified (UAL syntax) reil_operand = ReilRegisterOperand(instruction.operands[1].name, instruction.operands[0].size) else: # TODO: Assuming the register is written in its number format # (no alias like lr or pc). reil_operand = ReilRegisterOperand( 'r' + str(int(reil_operand.name[1:]) + 1), reil_operand.size) tb.add(tb._builder.gen_stm(reil_operand, addr_reg)) # "Load/store multiple Instructions" # ============================================================================ # def _translate_ldm(self, tb, instruction): self._translate_ldm_stm(tb, instruction, True) def _translate_stm(self, tb, instruction): self._translate_ldm_stm(tb, instruction, False) def _translate_ldm_stm(self, tb, instruction, load=True): # LDM and STM have exactly the same logic except one loads and the # other stores It is assumed that the disassembler (for example # Capstone) writes the register list in increasing order base = tb.read(instruction.operands[0]) reg_list = tb.read(instruction.operands[1]) if instruction.ldm_stm_addr_mode is None: instruction.ldm_stm_addr_mode = ARM_LDM_STM_IA # default mode for load and store if load: load_store_fn = self._load_value # Convert stack addressing modes to non-stack addressing modes if instruction.ldm_stm_addr_mode in ldm_stack_am_to_non_stack_am: instruction.ldm_stm_addr_mode = ldm_stack_am_to_non_stack_am[ instruction.ldm_stm_addr_mode] else: # Store load_store_fn = self._store_value if instruction.ldm_stm_addr_mode in stm_stack_am_to_non_stack_am: instruction.ldm_stm_addr_mode = stm_stack_am_to_non_stack_am[ instruction.ldm_stm_addr_mode] pointer = tb.temporal(base.size) tb.add(self._builder.gen_str(base, pointer)) reg_list_size_bytes = ReilImmediateOperand( self._ws.immediate * len(reg_list), base.size) if instruction.ldm_stm_addr_mode == ARM_LDM_STM_IA: for reg in reg_list: load_store_fn(tb, pointer, reg) pointer = tb._add_to_reg(pointer, self._ws) elif instruction.ldm_stm_addr_mode == ARM_LDM_STM_IB: for reg in reg_list: pointer = tb._add_to_reg(pointer, self._ws) load_store_fn(tb, pointer, reg) elif instruction.ldm_stm_addr_mode == ARM_LDM_STM_DA: reg_list.reverse( ) # Assuming the registry list was in increasing registry number for reg in reg_list: load_store_fn(tb, pointer, reg) pointer = tb._sub_to_reg(pointer, self._ws) elif instruction.ldm_stm_addr_mode == ARM_LDM_STM_DB: reg_list.reverse() for reg in reg_list: pointer = tb._sub_to_reg(pointer, self._ws) load_store_fn(tb, pointer, reg) else: raise Exception("Unknown addressing mode.") # Write-back if instruction.operands[0].wb: if instruction.ldm_stm_addr_mode == ARM_LDM_STM_IA or instruction.ldm_stm_addr_mode == ARM_LDM_STM_IB: tmp = tb._add_to_reg(base, reg_list_size_bytes) elif instruction.ldm_stm_addr_mode == ARM_LDM_STM_DA or instruction.ldm_stm_addr_mode == ARM_LDM_STM_DB: tmp = tb._sub_to_reg(base, reg_list_size_bytes) tb.add(self._builder.gen_str(tmp, base)) def _load_value(self, tb, mem_dir, value): tb.add(self._builder.gen_ldm(mem_dir, value)) def _store_value(self, tb, mem_dir, value): tb.add(self._builder.gen_stm(value, mem_dir)) def _translate_push_pop(self, tb, instruction, translate_fn): # PUSH and POP are equivalent to STM and LDM in FD mode with the SP # (and write-back) Instructions are modified to adapt it to the # LDM/STM interface sp_name = "r13" # TODO: Use self._sp sp_size = instruction.operands[0].reg_list[0][ 0].size # Infer it from the registers list sp_reg = ArmRegisterOperand(sp_name, sp_size) sp_reg.wb = True instruction.operands = [sp_reg, instruction.operands[0]] instruction.ldm_stm_addr_mode = ARM_LDM_STM_FD translate_fn(tb, instruction) def _translate_push(self, tb, instruction): self._translate_push_pop(tb, instruction, self._translate_stm) def _translate_pop(self, tb, instruction): self._translate_push_pop(tb, instruction, self._translate_ldm) # "Branch Instructions" # ============================================================================ # def _translate_b(self, tb, instruction): self._translate_branch(tb, instruction, link=False) def _translate_bl(self, tb, instruction): self._translate_branch(tb, instruction, link=True) # TODO: Thumb def _translate_bx(self, tb, instruction): self._translate_branch(tb, instruction, link=False) def _translate_blx(self, tb, instruction): self._translate_branch(tb, instruction, link=True) def _translate_bne(self, tb, instruction): self._translate_branch(tb, instruction, link=False) def _translate_beq(self, tb, instruction): self._translate_branch(tb, instruction, link=False) def _translate_bpl(self, tb, instruction): self._translate_branch(tb, instruction, link=False) def _translate_ble(self, tb, instruction): self._translate_branch(tb, instruction, link=False) def _translate_bcs(self, tb, instruction): self._translate_branch(tb, instruction, link=False) def _translate_bhs(self, tb, instruction): self._translate_branch(tb, instruction, link=False) def _translate_blt(self, tb, instruction): self._translate_branch(tb, instruction, link=False) def _translate_bge(self, tb, instruction): self._translate_branch(tb, instruction, link=False) def _translate_bhi(self, tb, instruction): self._translate_branch(tb, instruction, link=False) def _translate_blo(self, tb, instruction): self._translate_branch(tb, instruction, link=False) def _translate_bls(self, tb, instruction): self._translate_branch(tb, instruction, link=False) def _translate_branch(self, tb, instruction, link): if instruction.condition_code == ARM_COND_CODE_AL: cond = tb.immediate(1, 1) else: eval_cc_fn = { ARM_COND_CODE_EQ: self._evaluate_eq, ARM_COND_CODE_NE: self._evaluate_ne, ARM_COND_CODE_CS: self._evaluate_cs, ARM_COND_CODE_HS: self._evaluate_cs, ARM_COND_CODE_CC: self._evaluate_cc, ARM_COND_CODE_LO: self._evaluate_cc, ARM_COND_CODE_MI: self._evaluate_mi, ARM_COND_CODE_PL: self._evaluate_pl, ARM_COND_CODE_VS: self._evaluate_vs, ARM_COND_CODE_VC: self._evaluate_vc, ARM_COND_CODE_HI: self._evaluate_hi, ARM_COND_CODE_LS: self._evaluate_ls, ARM_COND_CODE_GE: self._evaluate_ge, ARM_COND_CODE_LT: self._evaluate_lt, ARM_COND_CODE_GT: self._evaluate_gt, ARM_COND_CODE_LE: self._evaluate_le, } cond = eval_cc_fn[instruction.condition_code](tb) arm_operand = instruction.operands[0] if isinstance(arm_operand, ArmImmediateOperand): target = ReilImmediateOperand(arm_operand.immediate << 8, self._pc.size + 8) elif isinstance(arm_operand, ArmRegisterOperand): target = ReilRegisterOperand(arm_operand.name, arm_operand.size) target = tb._and_regs( target, ReilImmediateOperand(0xFFFFFFFE, target.size)) tmp0 = tb.temporal(target.size + 8) tmp1 = tb.temporal(target.size + 8) tb.add(self._builder.gen_str(target, tmp0)) tb.add( self._builder.gen_bsh(tmp0, ReilImmediateOperand(8, target.size + 8), tmp1)) target = tmp1 else: raise NotImplementedError( "Instruction Not Implemented: Unknown operand for branch operation." ) if link: tb.add( self._builder.gen_str( ReilImmediateOperand( instruction.address + instruction.size, self._pc.size), self._lr)) tb.add(self._builder.gen_jcc(cond, target)) return
class ArmTranslator(Translator): """ARM to IR Translator.""" def __init__(self, architecture_mode=ARCH_ARM_MODE_THUMB): super(ArmTranslator, self).__init__() # Set *Architecture Mode*. The translation of each instruction # into the REIL language is based on this. self._arch_mode = architecture_mode # An instance of *ArchitectureInformation*. self._arch_info = ArmArchitectureInformation(architecture_mode) # An instance of a *VariableNamer*. This is used so all the # temporary REIL registers are unique. self._ir_name_generator = VariableNamer("t", separator="") self._builder = ReilBuilder() self._flags = { "nf": ReilRegisterOperand("nf", 1), "zf": ReilRegisterOperand("zf", 1), "cf": ReilRegisterOperand("cf", 1), "vf": ReilRegisterOperand("vf", 1), } if self._arch_mode in [ARCH_ARM_MODE_ARM, ARCH_ARM_MODE_THUMB]: self._sp = ReilRegisterOperand("r13", 32) # TODO: Implement alias self._pc = ReilRegisterOperand("r15", 32) self._lr = ReilRegisterOperand("r14", 32) self._ws = ReilImmediateOperand(4, 32) # word size def translate(self, instruction): """Return IR representation of an instruction. """ try: trans_instrs = self.__translate(instruction) except NotImplementedError as e: unkn_instr = self._builder.gen_unkn() unkn_instr.address = instruction.address << 8 | (0x0 & 0xff) trans_instrs = [unkn_instr] self.__log_not_supported_instruction(instruction, str(e)) except Exception: self.__log_translation_exception(instruction) raise return trans_instrs def reset(self): """Restart IR register name generator. """ self._ir_name_generator.reset() def __translate(self, instruction): """Translate a arm instruction into REIL language. :param instruction: a arm instruction :type instruction: ArmInstruction """ # Retrieve translation function. mnemonic = instruction.mnemonic tb = ArmTranslationBuilder(self._ir_name_generator, self._arch_mode) # TODO: Improve this. if instruction.mnemonic in ["b", "bl", "bx", "blx", "bne", "beq", "bpl", "ble", "bcs", "bhs", "blt", "bge", "bhi", "blo", "bls"]: if instruction.condition_code is None: instruction.condition_code = ARM_COND_CODE_AL # TODO: unify translations else: # Pre-processing: evaluate flags if instruction.condition_code is not None: self._evaluate_condition_code(tb, instruction) # Translate instruction. if mnemonic in translators.dispatcher: translators.dispatcher[mnemonic](self, tb, instruction) else: raise NotImplementedError("Instruction Not Implemented") return tb.instanciate(instruction.address) def __log_not_supported_instruction(self, instruction, reason="unknown"): bytes_str = " ".join("%02x" % ord(b) for b in instruction.bytes) logger.info( "Instruction not supported: %s (%s [%s]). Reason: %s", instruction.mnemonic, instruction, bytes_str, reason ) def __log_translation_exception(self, instruction): bytes_str = " ".join("%02x" % ord(b) for b in instruction.bytes) logger.error( "Failed to translate arm to REIL: %s (%s)", instruction, bytes_str, exc_info=True ) # Flag translation. # ======================================================================== # def _update_nf(self, tb, oprnd0, oprnd1, result): sign = tb._extract_bit(result, oprnd0.size - 1) tb.add(self._builder.gen_str(sign, self._flags["nf"])) def _carry_from_uf(self, tb, oprnd0, oprnd1, result): assert (result.size == oprnd0.size * 2) carry = tb._extract_bit(result, oprnd0.size) tb.add(self._builder.gen_str(carry, self._flags["cf"])) def _borrow_from_uf(self, tb, oprnd0, oprnd1, result): # BorrowFrom as defined in the ARM Reference Manual has the same implementation as CarryFrom self._carry_from_uf(tb, oprnd0, oprnd1, result) def _overflow_from_add_uf(self, tb, oprnd0, oprnd1, result): op1_sign = tb._extract_bit(oprnd0, oprnd0.size - 1) op2_sign = tb._extract_bit(oprnd1, oprnd0.size - 1) res_sign = tb._extract_bit(result, oprnd0.size - 1) overflow = tb._and_regs(tb._equal_regs(op1_sign, op2_sign), tb._unequal_regs(op1_sign, res_sign)) tb.add(self._builder.gen_str(overflow, self._flags["vf"])) def _overflow_from_sub_uf(self, tb, oprnd0, oprnd1, result): # Evaluate overflow and update the flag tb.add(self._builder.gen_str(tb._overflow_from_sub(oprnd0, oprnd1, result), self._flags["vf"])) def _update_zf(self, tb, oprnd0, oprnd1, result): zf = self._flags["zf"] imm0 = tb.immediate((2**oprnd0.size)-1, result.size) tmp0 = tb.temporal(oprnd0.size) tb.add(self._builder.gen_and(result, imm0, tmp0)) # filter low part of result tb.add(self._builder.gen_bisz(tmp0, zf)) def _carry_out(self, tb, carry_operand, oprnd0, oprnd1, result): if isinstance(carry_operand, ArmImmediateOperand): return elif isinstance(carry_operand, ArmRegisterOperand): return elif isinstance(carry_operand, ArmShiftedRegisterOperand): base = ReilRegisterOperand(carry_operand.base_reg.name, carry_operand.size) shift_type = carry_operand.shift_type shift_amount = carry_operand.shift_amount if shift_type == 'lsl': if isinstance(shift_amount, ArmImmediateOperand): if shift_amount.immediate == 0: return else: # carry_out = Rm[32 - shift_imm] shift_carry_out = tb._extract_bit(base, 32 - shift_amount.immediate) elif isinstance(shift_amount, ArmRegisterOperand): # Rs: register with shift amount # if Rs[7:0] == 0 then # carry_out = C Flag # else if Rs[7:0] <= 32 then # carry_out = Rm[32 - Rs[7:0]] # else /* Rs[7:0] > 32 */ # carry_out = 0 shift_carry_out = tb.temporal(1) tb.add(self._builder.gen_str(self._flags["cf"], shift_carry_out)) rs = ReilRegisterOperand(shift_amount.name, shift_amount.size) rs_7_0 = tb._and_regs(rs, tb.immediate(0xFF, rs.size)) end_label = tb.label('end_label') rs_greater_32_label = tb.label('rs_greater_32_label') # if Rs[7:0] == 0 then # carry_out = C Flag tb._jump_if_zero(rs_7_0, end_label) # shift_carry_out already has the C flag set, so do nothing tb.add(self._builder.gen_jcc(tb._greater_than_or_equal(rs_7_0, tb.immediate(33, rs_7_0.size)), rs_greater_32_label)) # Rs > 0 and Rs <= 32 # carry_out = Rm[32 - Rs[7:0]] extract_bit_number = tb.temporal(rs_7_0.size) tb.add(self._builder.gen_sub(tb.immediate(32, rs_7_0.size), rs_7_0, extract_bit_number)) tb.add(self._builder.gen_str(tb._extract_bit_with_register(base, extract_bit_number), shift_carry_out)) tb._jump_to(end_label) # else /* Rs[7:0] > 32 */ # carry_out = 0 tb.add(rs_greater_32_label) tb.add(self._builder.gen_str(tb.immediate(0, 1), shift_carry_out)) # tb._jump_to(end_label) tb.add(end_label) else: raise Exception("carry_out: Unknown shift amount type.") else: # TODO: Implement other shift types raise NotImplementedError("Instruction Not Implemented: carry_out: shift type " + carry_operand.shift_type) else: raise Exception("carry_out: Unknown operand type.") tb.add(self._builder.gen_str(shift_carry_out, self._flags["cf"])) def _update_flags_data_proc_add(self, tb, oprnd0, oprnd1, result): self._update_zf(tb, oprnd0, oprnd1, result) self._update_nf(tb, oprnd0, oprnd1, result) self._carry_from_uf(tb, oprnd0, oprnd1, result) self._overflow_from_add_uf(tb, oprnd0, oprnd1, result) def _update_flags_data_proc_sub(self, tb, oprnd0, oprnd1, result): self._update_zf(tb, oprnd0, oprnd1, result) self._update_nf(tb, oprnd0, oprnd1, result) self._borrow_from_uf(tb, oprnd0, oprnd1, result) # C Flag = NOT BorrowFrom (to be used by subsequent instructions like SBC and RSC) tb.add(self._builder.gen_str(tb._negate_reg(self._flags["cf"]), self._flags["cf"])) self._overflow_from_sub_uf(tb, oprnd0, oprnd1, result) def _update_flags_data_proc_other(self, tb, second_operand, oprnd0, oprnd1, result): self._update_zf(tb, oprnd0, oprnd1, result) self._update_nf(tb, oprnd0, oprnd1, result) self._carry_out(tb, second_operand, oprnd0, oprnd1, result) # Overflow Flag (V) unaffected def _update_flags_other(self, tb, oprnd0, oprnd1, result): self._update_zf(tb, oprnd0, oprnd1, result) self._update_nf(tb, oprnd0, oprnd1, result) # Carry Flag (C) unaffected # Overflow Flag (V) unaffected def _undefine_flag(self, tb, flag): # NOTE: In every test I've made, each time a flag is leave # undefined it is always set to 0. imm = tb.immediate(0, flag.size) tb.add(self._builder.gen_str(imm, flag)) def _clear_flag(self, tb, flag): imm = tb.immediate(0, flag.size) tb.add(self._builder.gen_str(imm, flag)) def _set_flag(self, tb, flag): imm = tb.immediate(1, flag.size) tb.add(self._builder.gen_str(imm, flag)) # Helpers. # ======================================================================== # def _evaluate_eq(self, tb): # EQ: Z set return self._flags["zf"] def _evaluate_ne(self, tb): # NE: Z clear return tb._negate_reg(self._flags["zf"]) def _evaluate_cs(self, tb): # CS: C set return self._flags["cf"] def _evaluate_cc(self, tb): # CC: C clear return tb._negate_reg(self._flags["cf"]) def _evaluate_mi(self, tb): # MI: N set return self._flags["nf"] def _evaluate_pl(self, tb): # PL: N clear return tb._negate_reg(self._flags["nf"]) def _evaluate_vs(self, tb): # VS: V set return self._flags["vf"] def _evaluate_vc(self, tb): # VC: V clear return tb._negate_reg(self._flags["vf"]) def _evaluate_hi(self, tb): # HI: C set and Z clear return tb._and_regs(self._flags["cf"], tb._negate_reg(self._flags["zf"])) def _evaluate_ls(self, tb): # LS: C clear or Z set return tb._or_regs(tb._negate_reg(self._flags["cf"]), self._flags["zf"]) def _evaluate_ge(self, tb): # GE: N == V return tb._equal_regs(self._flags["nf"], self._flags["vf"]) def _evaluate_lt(self, tb): # LT: N != V return tb._negate_reg(self._evaluate_ge(tb)) def _evaluate_gt(self, tb): # GT: (Z == 0) and (N == V) return tb._and_regs(tb._negate_reg(self._flags["zf"]), self._evaluate_ge(tb)) def _evaluate_le(self, tb): # LE: (Z == 1) or (N != V) return tb._or_regs(self._flags["zf"], self._evaluate_lt(tb)) def _evaluate_condition_code(self, tb, instruction): if instruction.condition_code == ARM_COND_CODE_AL: return eval_cc_fn = { ARM_COND_CODE_EQ: self._evaluate_eq, ARM_COND_CODE_NE: self._evaluate_ne, ARM_COND_CODE_CS: self._evaluate_cs, ARM_COND_CODE_HS: self._evaluate_cs, ARM_COND_CODE_CC: self._evaluate_cc, ARM_COND_CODE_LO: self._evaluate_cc, ARM_COND_CODE_MI: self._evaluate_mi, ARM_COND_CODE_PL: self._evaluate_pl, ARM_COND_CODE_VS: self._evaluate_vs, ARM_COND_CODE_VC: self._evaluate_vc, ARM_COND_CODE_HI: self._evaluate_hi, ARM_COND_CODE_LS: self._evaluate_ls, ARM_COND_CODE_GE: self._evaluate_ge, ARM_COND_CODE_LT: self._evaluate_lt, ARM_COND_CODE_GT: self._evaluate_gt, ARM_COND_CODE_LE: self._evaluate_le, } neg_cond = tb._negate_reg(eval_cc_fn[instruction.condition_code](tb)) end_addr = ReilImmediateOperand((instruction.address + instruction.size) << 8, self._arch_info.address_size + 8) tb.add(self._builder.gen_jcc(neg_cond, end_addr)) return
class X86Translator(Translator): """x86 to IR Translator.""" def __init__(self, architecture_mode): super(X86Translator, self).__init__() # Set *Architecture Mode*. The translation of each instruction # into the REIL language is based on this. self._arch_mode = architecture_mode # An instance of *ArchitectureInformation*. self._arch_info = X86ArchitectureInformation(architecture_mode) # An instance of a *VariableNamer*. This is used so all the # temporary REIL registers are unique. self._ir_name_generator = VariableNamer("t", separator="") self._builder = ReilBuilder() self._flags = { "af": ReilRegisterOperand("af", 1), "cf": ReilRegisterOperand("cf", 1), "df": ReilRegisterOperand("df", 1), "of": ReilRegisterOperand("of", 1), "pf": ReilRegisterOperand("pf", 1), "sf": ReilRegisterOperand("sf", 1), "zf": ReilRegisterOperand("zf", 1), } if self._arch_mode == ARCH_X86_MODE_32: self._sp = ReilRegisterOperand("esp", 32) self._bp = ReilRegisterOperand("ebp", 32) self._ip = ReilRegisterOperand("eip", 32) self._ws = ReilImmediateOperand(4, 32) # word size elif self._arch_mode == ARCH_X86_MODE_64: self._sp = ReilRegisterOperand("rsp", 64) self._bp = ReilRegisterOperand("rbp", 64) self._ip = ReilRegisterOperand("rip", 64) self._ws = ReilImmediateOperand(8, 64) # word size def translate(self, instruction): """Return IR representation of an instruction. """ try: trans_instrs = self.__translate(instruction) except NotImplementedError: unkn_instr = self._builder.gen_unkn() unkn_instr.address = instruction.address << 8 | (0x0 & 0xff) trans_instrs = [unkn_instr] self.__log_not_supported_instruction(instruction) except: self.__log_translation_exception(instruction) raise return trans_instrs def reset(self): """Restart IR register name generator. """ self._ir_name_generator.reset() def __translate(self, instruction): """Translate a x86 instruction into REIL language. :param instruction: a x86 instruction :type instruction: X86Instruction """ # Retrieve translation function. mnemonic = instruction.mnemonic # Check whether it refers to the strings instruction or the sse instruction. if instruction.mnemonic in ["movsd"]: if instruction.bytes[0] not in ["\xa4", "\xa5"]: mnemonic += "_sse" # Translate instruction. if mnemonic in translators.dispatcher: tb = X86TranslationBuilder(self._ir_name_generator, self._arch_mode) translators.dispatcher[mnemonic](self, tb, instruction) else: raise NotImplementedError("Instruction Not Implemented") return tb.instanciate(instruction.address) def __log_not_supported_instruction(self, instruction): bytes_str = " ".join("%02x" % ord(b) for b in instruction.bytes) logger.info("Instruction not supported: %s (%s [%s])", instruction.mnemonic, instruction, bytes_str) def __log_translation_exception(self, instruction): bytes_str = " ".join("%02x" % ord(b) for b in instruction.bytes) logger.error("Failed to translate x86 to REIL: %s (%s)", instruction, bytes_str, exc_info=True) # Flag translation. # ======================================================================== # def _update_af(self, tb, oprnd0, oprnd1, result): assert oprnd0.size == oprnd1.size tmp0 = tb.temporal(8) tmp1 = tb.temporal(8) tmp2 = tb.temporal(8) tmp3 = tb.temporal(8) tmp4 = tb.temporal(8) tmp5 = tb.temporal(8) tmp6 = tb.temporal(8) imm4 = tb.immediate(4, 8) immn4 = tb.immediate(-4, 8) af = self._flags["af"] # Extract lower byte. tb.add(self._builder.gen_str(oprnd0, tmp0)) tb.add(self._builder.gen_str(oprnd1, tmp1)) # Zero-extend lower 4 bits. tb.add(self._builder.gen_bsh(tmp0, imm4, tmp2)) tb.add(self._builder.gen_bsh(tmp2, immn4, tmp4)) tb.add(self._builder.gen_bsh(tmp1, imm4, tmp3)) tb.add(self._builder.gen_bsh(tmp3, immn4, tmp5)) # Add up. tb.add(self._builder.gen_add(tmp4, tmp5, tmp6)) # Move bit 4 to AF flag. tb.add(self._builder.gen_bsh(tmp6, immn4, af)) def _update_af_sub(self, tb, oprnd0, oprnd1, result): assert oprnd0.size == oprnd1.size tmp0 = tb.temporal(8) tmp1 = tb.temporal(8) tmp2 = tb.temporal(8) tmp3 = tb.temporal(8) tmp4 = tb.temporal(8) tmp5 = tb.temporal(8) tmp6 = tb.temporal(8) imm4 = tb.immediate(4, 8) immn4 = tb.immediate(-4, 8) af = self._flags["af"] # Extract lower byte. tb.add(self._builder.gen_str(oprnd0, tmp0)) tb.add(self._builder.gen_str(oprnd1, tmp1)) # Zero-extend lower 4 bits. tb.add(self._builder.gen_bsh(tmp0, imm4, tmp2)) tb.add(self._builder.gen_bsh(tmp2, immn4, tmp4)) tb.add(self._builder.gen_bsh(tmp1, imm4, tmp3)) tb.add(self._builder.gen_bsh(tmp3, immn4, tmp5)) # Subtract tb.add(self._builder.gen_sub(tmp4, tmp5, tmp6)) # Move bit 4 to AF flag. tb.add(self._builder.gen_bsh(tmp6, immn4, af)) def _update_pf(self, tb, oprnd0, oprnd1, result): tmp0 = tb.temporal(result.size) tmp1 = tb.temporal(result.size) tmp2 = tb.temporal(result.size) tmp3 = tb.temporal(result.size) tmp4 = tb.temporal(result.size) tmp5 = tb.temporal(result.size) imm1 = tb.immediate(1, result.size) immn1 = tb.immediate(-1, result.size) immn2 = tb.immediate(-2, result.size) immn4 = tb.immediate(-4, result.size) pf = self._flags["pf"] # tmp1 = result ^ (result >> 4) tb.add(self._builder.gen_bsh(result, immn4, tmp0)) tb.add(self._builder.gen_xor(result, tmp0, tmp1)) # tmp3 = tmp1 ^ (tmp1 >> 2) tb.add(self._builder.gen_bsh(tmp1, immn2, tmp2)) tb.add(self._builder.gen_xor(tmp2, tmp1, tmp3)) # tmp5 = tmp3 ^ (tmp3 >> 1) tb.add(self._builder.gen_bsh(tmp3, immn1, tmp4)) tb.add(self._builder.gen_xor(tmp4, tmp3, tmp5)) # Invert and save result. tb.add(self._builder.gen_xor(tmp5, imm1, pf)) def _update_sf(self, tb, oprnd0, oprnd1, result): # Create temporal variables. tmp0 = tb.temporal(result.size) mask0 = tb.immediate(2**(oprnd0.size - 1), result.size) shift0 = tb.immediate(-(oprnd0.size - 1), result.size) sf = self._flags["sf"] tb.add(self._builder.gen_and(result, mask0, tmp0)) # filter sign bit tb.add(self._builder.gen_bsh(tmp0, shift0, sf)) # extract sign bit def _update_of(self, tb, oprnd0, oprnd1, result): assert oprnd0.size == oprnd1.size of = self._flags["of"] imm0 = tb.immediate(1, 1) tmp0 = tb.temporal(1) tmp1 = tb.temporal(1) tmp2 = tb.temporal(1) tmp3 = tb.temporal(1) # Extract sign bit. oprnd0_sign = self._extract_sign_bit(tb, oprnd0) oprnd1_sign = self._extract_sign_bit(tb, oprnd1) result_sign = self._extract_bit(tb, result, oprnd0.size - 1) # Compute OF. tb.add( self._builder.gen_xor(oprnd0_sign, oprnd1_sign, tmp0)) # (sign bit oprnd0 ^ sign bit oprnd1) tb.add(self._builder.gen_xor( tmp0, imm0, tmp1)) # (sign bit oprnd0 ^ sign bit oprnd1 ^ 1) tb.add( self._builder.gen_xor(oprnd0_sign, result_sign, tmp2)) # (sign bit oprnd0 ^ sign bit result) tb.add( self._builder.gen_and(tmp1, tmp2, tmp3) ) # (sign bit oprnd0 ^ sign bit oprnd1 ^ 1) & (sign bit oprnd0 ^ sign bit result) # Save result. tb.add(self._builder.gen_str(tmp3, of)) def _update_of_sub(self, tb, oprnd0, oprnd1, result): assert oprnd0.size == oprnd1.size of = self._flags["of"] imm0 = tb.immediate(1, 1) tmp0 = tb.temporal(1) tmp1 = tb.temporal(1) tmp2 = tb.temporal(1) tmp3 = tb.temporal(1) oprnd1_sign = tb.temporal(1) # Extract sign bit. oprnd0_sign = self._extract_sign_bit(tb, oprnd0) oprnd1_sign_tmp = self._extract_sign_bit(tb, oprnd1) result_sign = self._extract_bit(tb, result, oprnd0.size - 1) # Invert sign bit of oprnd2. tb.add(self._builder.gen_xor(oprnd1_sign_tmp, imm0, oprnd1_sign)) # Compute OF. tb.add( self._builder.gen_xor(oprnd0_sign, oprnd1_sign, tmp0)) # (sign bit oprnd0 ^ sign bit oprnd1) tb.add(self._builder.gen_xor( tmp0, imm0, tmp1)) # (sign bit oprnd0 ^ sign bit oprnd1 ^ 1) tb.add( self._builder.gen_xor(oprnd0_sign, result_sign, tmp2)) # (sign bit oprnd0 ^ sign bit result) tb.add( self._builder.gen_and(tmp1, tmp2, tmp3) ) # (sign bit oprnd0 ^ sign bit oprnd1 ^ 1) & (sign bit oprnd0 ^ sign bit result) # Save result. tb.add(self._builder.gen_str(tmp3, of)) def _update_cf(self, tb, oprnd0, oprnd1, result): cf = self._flags["cf"] imm0 = tb.immediate(2**oprnd0.size, result.size) imm1 = tb.immediate(-oprnd0.size, result.size) tmp0 = tb.temporal(result.size) tb.add(self._builder.gen_and(result, imm0, tmp0)) # filter carry bit tb.add(self._builder.gen_bsh(tmp0, imm1, cf)) def _update_zf(self, tb, oprnd0, oprnd1, result): zf = self._flags["zf"] imm0 = tb.immediate((2**oprnd0.size) - 1, result.size) tmp0 = tb.temporal(oprnd0.size) tb.add(self._builder.gen_and(result, imm0, tmp0)) # filter low part of result tb.add(self._builder.gen_bisz(tmp0, zf)) def _undefine_flag(self, tb, flag): # NOTE: In every test I've made, each time a flag is leave # undefined it is always set to 0. imm = tb.immediate(0, flag.size) tb.add(self._builder.gen_str(imm, flag)) def _clear_flag(self, tb, flag): imm = tb.immediate(0, flag.size) tb.add(self._builder.gen_str(imm, flag)) def _set_flag(self, tb, flag): imm = tb.immediate(1, flag.size) tb.add(self._builder.gen_str(imm, flag)) # Helpers. # ======================================================================== # def _evaluate_a(self, tb): # above (CF=0 and ZF=0). return tb._and_regs(tb._negate_reg(self._flags["cf"]), tb._negate_reg(self._flags["zf"])) def _evaluate_ae(self, tb): # above or equal (CF=0) return tb._negate_reg(self._flags["cf"]) def _evaluate_b(self, tb): # below (CF=1) return self._flags["cf"] def _evaluate_be(self, tb): # below or equal (CF=1 or ZF=1) return tb._or_regs(self._flags["cf"], self._flags["zf"]) def _evaluate_c(self, tb): # carry (CF=1) return self._flags["cf"] def _evaluate_e(self, tb): # equal (ZF=1) return self._flags["zf"] def _evaluate_g(self, tb): # greater (ZF=0 and SF=OF) return tb._and_regs( tb._negate_reg(self._flags["zf"]), tb._equal_regs(self._flags["sf"], self._flags["of"])) def _evaluate_ge(self, tb): # greater or equal (SF=OF) return tb._equal_regs(self._flags["sf"], self._flags["of"]) def _evaluate_l(self, tb): # less (SF != OF) return tb._unequal_regs(self._flags["sf"], self._flags["of"]) def _evaluate_le(self, tb): # less or equal (ZF=1 or SF != OF) return tb._or_regs( self._flags["zf"], tb._unequal_regs(self._flags["sf"], self._flags["of"])) def _evaluate_na(self, tb): # not above (CF=1 or ZF=1). return tb._or_regs(self._flags["cf"], self._flags["zf"]) def _evaluate_nae(self, tb): # not above or equal (CF=1) return self._flags["cf"] def _evaluate_nb(self, tb): # not below (CF=0) return tb._negate_reg(self._flags["cf"]) def _evaluate_nbe(self, tb): # not below or equal (CF=0 and ZF=0) return tb._and_regs(tb._negate_reg(self._flags["cf"]), tb._negate_reg(self._flags["zf"])) def _evaluate_nc(self, tb): # not carry (CF=0) return tb._negate_reg(self._flags["cf"]) def _evaluate_ne(self, tb): # not equal (ZF=0) return tb._negate_reg(self._flags["zf"]) def _evaluate_ng(self, tb): # not greater (ZF=1 or SF != OF) return tb._or_regs( self._flags["zf"], tb._unequal_regs(self._flags["sf"], self._flags["of"])) def _evaluate_nge(self, tb): # not greater or equal (SF != OF) return tb._unequal_regs(self._flags["sf"], self._flags["of"]) def _evaluate_nl(self, tb): # not less (SF=OF) return tb._equal_regs(self._flags["sf"], self._flags["of"]) def _evaluate_nle(self, tb): # not less or equal (ZF=0 and SF=OF) return tb._and_regs( tb._negate_reg(self._flags["zf"]), tb._equal_regs(self._flags["sf"], self._flags["of"])) def _evaluate_no(self, tb): # not overflow (OF=0) return tb._negate_reg(self._flags["of"]) def _evaluate_np(self, tb): # not parity (PF=0) return tb._negate_reg(self._flags["pf"]) def _evaluate_ns(self, tb): # not sign (SF=0) return tb._negate_reg(self._flags["sf"]) def _evaluate_nz(self, tb): # not zero (ZF=0) return tb._negate_reg(self._flags["zf"]) def _evaluate_o(self, tb): # overflow (OF=1) return self._flags["of"] def _evaluate_p(self, tb): # parity (PF=1) return self._flags["pf"] def _evaluate_pe(self, tb): # parity even (PF=1) return self._flags["pf"] def _evaluate_po(self, tb): # parity odd (PF=0) return tb._negate_reg(self._flags["pf"]) def _evaluate_s(self, tb): # sign (SF=1) return self._flags["sf"] def _evaluate_z(self, tb): # zero (ZF=1) return self._flags["zf"] # Helpers. # ======================================================================== # def _extract_bit(self, tb, reg, bit): assert (0 <= bit < reg.size) tmp = tb.temporal(reg.size) ret = tb.temporal(1) tb.add(self._builder.gen_bsh(reg, tb.immediate(-bit, reg.size), tmp)) # shift to LSB tb.add(self._builder.gen_and(tmp, tb.immediate(1, reg.size), ret)) # filter LSB return ret def _extract_msb(self, tb, reg): return self._extract_bit(tb, reg, reg.size - 1) def _extract_sign_bit(self, tb, reg): return self._extract_msb(tb, reg)
class ArmTranslator(object): """ARM to IR Translator.""" def __init__(self, architecture_mode=ARCH_ARM_MODE_32, translation_mode=FULL_TRANSLATION): # Set *Architecture Mode*. The translation of each instruction # into the REIL language is based on this. self._arch_mode = architecture_mode # An instance of *ArchitectureInformation*. self._arch_info = ArmArchitectureInformation(architecture_mode) # Set *Translation Mode*. self._translation_mode = translation_mode # An instance of a *VariableNamer*. This is used so all the # temporary REIL registers are unique. self._ir_name_generator = VariableNamer("t", separator="") self._builder = ReilInstructionBuilder() self._flags = { "nf": ReilRegisterOperand("nf", 1), "zf": ReilRegisterOperand("zf", 1), "cf": ReilRegisterOperand("cf", 1), "vf": ReilRegisterOperand("vf", 1), } if self._arch_mode == ARCH_ARM_MODE_32: self._sp = ReilRegisterOperand("r13", 32) # TODO: Implement alias self._pc = ReilRegisterOperand("r15", 32) self._lr = ReilRegisterOperand("r14", 32) self._ws = ReilImmediateOperand(4, 32) # word size elif self._arch_mode == ARCH_ARM_MODE_64: self._sp = ReilRegisterOperand("r13", 64) self._pc = ReilRegisterOperand("r15", 64) self._lr = ReilRegisterOperand("r14", 64) self._ws = ReilImmediateOperand(8, 64) # word size def translate(self, instruction): """Return IR representation of an instruction. """ try: trans_instrs = self._translate(instruction) except NotImplementedError as e: unkn_instr = self._builder.gen_unkn() unkn_instr.address = instruction.address << 8 | (0x0 & 0xff) trans_instrs = [unkn_instr] self._log_not_supported_instruction(instruction) except Exception as e: self._log_translation_exception(instruction) raise # Some sanity check.... for instr in trans_instrs: try: check_operands_size(instr, self._arch_info.architecture_size) except: logger.error("Invalid operand size: %s (%s)", instr, instruction) raise return trans_instrs def _translate(self, instruction): """Translate a arm instruction into REIL language. :param instruction: a arm instruction :type instruction: ArmInstruction """ # Retrieve translation function. translator_name = "_translate_" + instruction.mnemonic translator_fn = getattr(self, translator_name, self._not_implemented) # Translate instruction. tb = ArmTranslationBuilder(self._ir_name_generator, self._arch_mode) # Pre-processing: evaluate flags nop_cc_lbl = tb.label('condition_code_not_met') if (instruction.condition_code is not None): self._evaluate_condition_code(tb, instruction, nop_cc_lbl) translator_fn(tb, instruction) tb.add(nop_cc_lbl) tb.add(self._builder.gen_nop() ) # Added NOP so there is a REIL instruction to jump to return tb.instanciate(instruction.address) def reset(self): """Restart IR register name generator. """ self._ir_name_generator.reset() @property def translation_mode(self): """Get translation mode. """ return self._translation_mode @translation_mode.setter def translation_mode(self, value): """Set translation mode. """ self._translation_mode = value def _log_not_supported_instruction(self, instruction): bytes_str = " ".join("%02x" % ord(b) for b in instruction.bytes) logger.info("Instruction not supported: %s (%s [%s])", instruction.mnemonic, instruction, bytes_str) def _log_translation_exception(self, instruction): bytes_str = " ".join("%02x" % ord(b) for b in instruction.bytes) logger.error("Failed to translate arm to REIL: %s (%s)", instruction, bytes_str, exc_info=True) # ============================================================================ # def _not_implemented(self, tb, instruction): raise NotImplementedError("Instruction Not Implemented") # Translators # ============================================================================ # # ============================================================================ # # "Flags" # ============================================================================ # def _update_nf(self, tb, oprnd0, oprnd1, result): sign = tb._extract_bit(result, oprnd0.size - 1) tb.add(self._builder.gen_str(sign, self._flags["nf"])) def _carry_from_uf(self, tb, oprnd0, oprnd1, result): assert (result.size == oprnd0.size * 2) carry = tb._extract_bit(result, oprnd0.size) tb.add(self._builder.gen_str(carry, self._flags["cf"])) def _borrow_from_uf(self, tb, oprnd0, oprnd1, result): # BorrowFrom as defined in the ARM Reference Manual has the same implementation as CarryFrom self._carry_from_uf(tb, oprnd0, oprnd1, result) def _overflow_from_add_uf(self, tb, oprnd0, oprnd1, result): op1_sign = tb._extract_bit(oprnd0, oprnd0.size - 1) op2_sign = tb._extract_bit(oprnd1, oprnd0.size - 1) res_sign = tb._extract_bit(result, oprnd0.size - 1) overflow = tb._and_regs(tb._equal_regs(op1_sign, op2_sign), tb._unequal_regs(op1_sign, res_sign)) tb.add(self._builder.gen_str(overflow, self._flags["vf"])) # Evaluate overflow and update the flag def _overflow_from_sub_uf(self, tb, oprnd0, oprnd1, result): tb.add( self._builder.gen_str( tb._overflow_from_sub(oprnd0, oprnd1, result), self._flags["vf"])) def _update_zf(self, tb, oprnd0, oprnd1, result): zf = self._flags["zf"] imm0 = tb.immediate((2**oprnd0.size) - 1, result.size) tmp0 = tb.temporal(oprnd0.size) tb.add(self._builder.gen_and(result, imm0, tmp0)) # filter low part of result tb.add(self._builder.gen_bisz(tmp0, zf)) def _carry_out(self, tb, carry_operand, oprnd0, oprnd1, result): if isinstance(carry_operand, ArmImmediateOperand): return elif isinstance(carry_operand, ArmRegisterOperand): return elif isinstance(carry_operand, ArmShiftedRegisterOperand): base = ReilRegisterOperand(carry_operand.base_reg.name, carry_operand.size) shift_type = carry_operand.shift_type shift_amount = carry_operand.shift_amount if (shift_type == 'lsl'): if isinstance(shift_amount, ArmImmediateOperand): if shift_amount.immediate == 0: return else: # carry_out = Rm[32 - shift_imm] shift_carry_out = tb._extract_bit( base, 32 - shift_amount.immediate) elif isinstance(shift_amount, ArmRegisterOperand): # Rs: register with shift amount # if Rs[7:0] == 0 then # carry_out = C Flag # else if Rs[7:0] <= 32 then # carry_out = Rm[32 - Rs[7:0]] # else /* Rs[7:0] > 32 */ # carry_out = 0 shift_carry_out = tb.temporal(1) tb.add( self._builder.gen_str(self._flags["cf"], shift_carry_out)) rs = ReilRegisterOperand(shift_amount.name, shift_amount.size) rs_7_0 = tb._and_regs(rs, tb.immediate(0xFF, rs.size)) end_label = tb.label('end_label') rs_greater_32_label = tb.label('rs_greater_32_label') # if Rs[7:0] == 0 then # carry_out = C Flag tb._jump_if_zero( rs_7_0, end_label ) # shift_carry_out already has the C flag set, so do nothing tb.add( self._builder.gen_jcc( tb._greater_than_or_equal( rs_7_0, tb.immediate(33, rs_7_0.size)), rs_greater_32_label)) # Rs > 0 and Rs <= 32 # carry_out = Rm[32 - Rs[7:0]] extract_bit_number = tb.temporal(rs_7_0.size) tb.add( self._builder.gen_sub(tb.immediate(32, rs_7_0.size), rs_7_0, extract_bit_number)) tb.add( self._builder.gen_str( tb._extract_bit_with_register( base, extract_bit_number), shift_carry_out)) tb._jump_to(end_label) # else /* Rs[7:0] > 32 */ # carry_out = 0 tb.add(rs_greater_32_label) tb.add( self._builder.gen_str(tb.immediate(0, 1), shift_carry_out)) # tb._jump_to(end_label) tb.add(end_label) else: raise Exception("carry_out: Unknown shift amount type.") else: # TODO: Implement other shift types raise NotImplementedError( "Instruction Not Implemented: carry_out: shift type " + carry_operand.shift_type) else: raise Exception("carry_out: Unknown operand type.") tb.add(self._builder.gen_str(shift_carry_out, self._flags["cf"])) def _update_flags_data_proc_add(self, tb, oprnd0, oprnd1, result): self._update_zf(tb, oprnd0, oprnd1, result) self._update_nf(tb, oprnd0, oprnd1, result) self._carry_from_uf(tb, oprnd0, oprnd1, result) self._overflow_from_add_uf(tb, oprnd0, oprnd1, result) def _update_flags_data_proc_sub(self, tb, oprnd0, oprnd1, result): self._update_zf(tb, oprnd0, oprnd1, result) self._update_nf(tb, oprnd0, oprnd1, result) self._borrow_from_uf(tb, oprnd0, oprnd1, result) # C Flag = NOT BorrowFrom (to be used by subsequent instructions like SBC and RSC) tb.add( self._builder.gen_str(tb._negate_reg(self._flags["cf"]), self._flags["cf"])) self._overflow_from_sub_uf(tb, oprnd0, oprnd1, result) def _update_flags_data_proc_other(self, tb, second_operand, oprnd0, oprnd1, result): self._update_zf(tb, oprnd0, oprnd1, result) self._update_nf(tb, oprnd0, oprnd1, result) self._carry_out(tb, second_operand, oprnd0, oprnd1, result) # Overflow Flag (V) unaffected def _update_flags_other(self, tb, oprnd0, oprnd1, result): self._update_zf(tb, oprnd0, oprnd1, result) self._update_nf(tb, oprnd0, oprnd1, result) # Carry Flag (C) unaffected # Overflow Flag (V) unaffected def _undefine_flag(self, tb, flag): # NOTE: In every test I've made, each time a flag is leave # undefined it is always set to 0. imm = tb.immediate(0, flag.size) tb.add(self._builder.gen_str(imm, flag)) def _clear_flag(self, tb, flag): imm = tb.immediate(0, flag.size) tb.add(self._builder.gen_str(imm, flag)) def _set_flag(self, tb, flag): imm = tb.immediate(1, flag.size) tb.add(self._builder.gen_str(imm, flag)) # EQ: Z set def _evaluate_eq(self, tb): return self._flags["zf"] # NE: Z clear def _evaluate_ne(self, tb): return tb._negate_reg(self._flags["zf"]) # CS: C set def _evaluate_cs(self, tb): return self._flags["cf"] # CC: C clear def _evaluate_cc(self, tb): return tb._negate_reg(self._flags["cf"]) # MI: N set def _evaluate_mi(self, tb): return self._flags["nf"] # PL: N clear def _evaluate_pl(self, tb): return tb._negate_reg(self._flags["nf"]) # VS: V set def _evaluate_vs(self, tb): return self._flags["vf"] # VC: V clear def _evaluate_vc(self, tb): return tb._negate_reg(self._flags["vf"]) # HI: C set and Z clear def _evaluate_hi(self, tb): return tb._and_regs(self._flags["cf"], tb._negate_reg(self._flags["zf"])) # LS: C clear or Z set def _evaluate_ls(self, tb): return tb._or_regs(tb._negate_reg(self._flags["cf"]), self._flags["zf"]) # GE: N == V def _evaluate_ge(self, tb): return tb._equal_regs(self._flags["nf"], self._flags["vf"]) # LT: N != V def _evaluate_lt(self, tb): return tb._negate_reg(self._evaluate_ge(tb)) # GT: (Z == 0) and (N == V) def _evaluate_gt(self, tb): return tb._and_regs(tb._negate_reg(self._flags["zf"]), self._evaluate_ge(tb)) # LE: (Z == 1) or (N != V) def _evaluate_le(self, tb): return tb._or_regs(self._flags["zf"], self._evaluate_lt(tb)) def _evaluate_condition_code(self, tb, instruction, nop_label): if (instruction.condition_code == ARM_COND_CODE_AL): return eval_cc_fn = { ARM_COND_CODE_EQ: self._evaluate_eq, ARM_COND_CODE_NE: self._evaluate_ne, ARM_COND_CODE_CS: self._evaluate_cs, ARM_COND_CODE_HS: self._evaluate_cs, ARM_COND_CODE_CC: self._evaluate_cc, ARM_COND_CODE_LO: self._evaluate_cc, ARM_COND_CODE_MI: self._evaluate_mi, ARM_COND_CODE_PL: self._evaluate_pl, ARM_COND_CODE_VS: self._evaluate_vs, ARM_COND_CODE_VC: self._evaluate_vc, ARM_COND_CODE_HI: self._evaluate_hi, ARM_COND_CODE_LS: self._evaluate_ls, ARM_COND_CODE_GE: self._evaluate_ge, ARM_COND_CODE_LT: self._evaluate_lt, ARM_COND_CODE_GT: self._evaluate_gt, ARM_COND_CODE_LE: self._evaluate_le, } neg_cond = tb._negate_reg(eval_cc_fn[instruction.condition_code](tb)) tb.add(self._builder.gen_jcc(neg_cond, nop_label)) return # "Data-processing Instructions" # ============================================================================ # def _translate_mov(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) tb.write(instruction.operands[0], oprnd1) if instruction.update_flags: self._update_flags_data_proc_other(tb, instruction.operands[1], oprnd1, None, oprnd1) def _translate_and(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) oprnd2 = tb.read(instruction.operands[2]) result = tb.temporal(oprnd1.size) tb.add(self._builder.gen_and(oprnd1, oprnd2, result)) tb.write(instruction.operands[0], result) if instruction.update_flags: self._update_flags_data_proc_other(tb, instruction.operands[2], oprnd1, oprnd2, result) def _translate_orr(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) oprnd2 = tb.read(instruction.operands[2]) result = tb.temporal(oprnd1.size) tb.add(self._builder.gen_or(oprnd1, oprnd2, result)) tb.write(instruction.operands[0], result) if instruction.update_flags: self._update_flags_data_proc_other(tb, instruction.operands[2], oprnd1, oprnd2, result) def _translate_eor(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) oprnd2 = tb.read(instruction.operands[2]) result = tb.temporal(oprnd1.size) tb.add(self._builder.gen_xor(oprnd1, oprnd2, result)) tb.write(instruction.operands[0], result) if instruction.update_flags: self._update_flags_data_proc_other(tb, instruction.operands[2], oprnd1, oprnd2, result) def _translate_add(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) oprnd2 = tb.read(instruction.operands[2]) result = tb.temporal(oprnd1.size * 2) tb.add(self._builder.gen_add(oprnd1, oprnd2, result)) tb.write(instruction.operands[0], result) if instruction.update_flags: self._update_flags_data_proc_add(tb, oprnd1, oprnd2, result) def _translate_sub(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) oprnd2 = tb.read(instruction.operands[2]) result = tb.temporal(oprnd1.size * 2) tb.add(self._builder.gen_sub(oprnd1, oprnd2, result)) tb.write(instruction.operands[0], result) if instruction.update_flags: self._update_flags_data_proc_sub(tb, oprnd1, oprnd2, result) def _translate_mul(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) oprnd2 = tb.read(instruction.operands[2]) result = tb.temporal(oprnd1.size * 2) tb.add(self._builder.gen_mul(oprnd1, oprnd2, result)) tb.write(instruction.operands[0], result) if instruction.update_flags: self._update_zf(tb, oprnd1, oprnd2, result) self._update_nf(tb, oprnd1, oprnd2, result) def _translate_cmn(self, tb, instruction): oprnd1 = tb.read(instruction.operands[0]) oprnd2 = tb.read(instruction.operands[1]) result = tb.temporal(oprnd1.size * 2) tb.add(self._builder.gen_add(oprnd1, oprnd2, result)) self._update_flags_data_proc_add( tb, oprnd1, oprnd2, result) # S = 1 (implied in the instruction) def _translate_cmp(self, tb, instruction): oprnd1 = tb.read(instruction.operands[0]) oprnd2 = tb.read(instruction.operands[1]) result = tb.temporal(oprnd1.size * 2) tb.add(self._builder.gen_sub(oprnd1, oprnd2, result)) self._update_flags_data_proc_sub( tb, oprnd1, oprnd2, result) # S = 1 (implied in the instruction) # "Load/store word and unsigned byte Instructions" # ============================================================================ # def _translate_ldr(self, tb, instruction): oprnd1 = tb.read(instruction.operands[1]) tb.write(instruction.operands[0], oprnd1) def _translate_str(self, tb, instruction): oprnd0 = tb.read(instruction.operands[0]) tb.write(instruction.operands[1], oprnd0) # "Load/store multiple Instructions" # ============================================================================ # def _translate_ldm(self, tb, instruction): self._translate_ldm_stm(tb, instruction, True) def _translate_stm(self, tb, instruction): self._translate_ldm_stm(tb, instruction, False) # LDM and STM have exactly the same logic except one loads and the other stores # It is assumed that the disassembler (for example Capstone) writes the register list in increasing order def _translate_ldm_stm(self, tb, instruction, load=True): base = tb.read(instruction.operands[0]) reg_list = tb.read(instruction.operands[1]) if instruction.ldm_stm_addr_mode == None: instruction.ldm_stm_addr_mode = ARM_LDM_STM_IA # default mode for load and store if load: load_store_fn = self._load_value # Convert stack addressing modes to non-stack addressing modes if instruction.ldm_stm_addr_mode in ldm_stack_am_to_non_stack_am: instruction.ldm_stm_addr_mode = ldm_stack_am_to_non_stack_am[ instruction.ldm_stm_addr_mode] else: # Store load_store_fn = self._store_value if instruction.ldm_stm_addr_mode in stm_stack_am_to_non_stack_am: instruction.ldm_stm_addr_mode = stm_stack_am_to_non_stack_am[ instruction.ldm_stm_addr_mode] pointer = tb.temporal(base.size) tb.add(self._builder.gen_str(base, pointer)) reg_list_size_bytes = ReilImmediateOperand( self._ws.immediate * len(reg_list), base.size) if instruction.ldm_stm_addr_mode == ARM_LDM_STM_IA: for reg in reg_list: load_store_fn(tb, pointer, reg) pointer = tb._add_to_reg(pointer, self._ws) elif instruction.ldm_stm_addr_mode == ARM_LDM_STM_IB: for reg in reg_list: pointer = tb._add_to_reg(pointer, self._ws) load_store_fn(tb, pointer, reg) elif instruction.ldm_stm_addr_mode == ARM_LDM_STM_DA: reg_list.reverse( ) # Assuming the registry list was in increasing registry number for reg in reg_list: load_store_fn(tb, pointer, reg) pointer = tb._sub_to_reg(pointer, self._ws) elif instruction.ldm_stm_addr_mode == ARM_LDM_STM_DB: reg_list.reverse() for reg in reg_list: pointer = tb._sub_to_reg(pointer, self._ws) load_store_fn(tb, pointer, reg) else: raise Exception("Unknown addressing mode.") # Write-back if instruction.operands[0].wb: if instruction.ldm_stm_addr_mode == ARM_LDM_STM_IA or instruction.ldm_stm_addr_mode == ARM_LDM_STM_IB: tmp = tb._add_to_reg(base, reg_list_size_bytes) elif instruction.ldm_stm_addr_mode == ARM_LDM_STM_DA or instruction.ldm_stm_addr_mode == ARM_LDM_STM_DB: tmp = tb._sub_to_reg(base, reg_list_size_bytes) tb.add(self._builder.gen_str(tmp, base)) def _load_value(self, tb, mem_dir, value): tb.add(self._builder.gen_ldm(mem_dir, value)) def _store_value(self, tb, mem_dir, value): tb.add(self._builder.gen_stm(value, mem_dir)) # PUSH and POP are equivalent to STM and LDM in FD mode with the SP (and write-back) # Instructions are modified to adapt it to the LDM/STM interface def _translate_push_pop(self, tb, instruction, translate_fn): sp_name = "r13" # TODO: Use self._sp sp_size = instruction.operands[0].reg_list[0][ 0].size # Infer it from the registers list sp_reg = ArmRegisterOperand(sp_name, sp_size) sp_reg.wb = True instruction.operands = [sp_reg, instruction.operands[0]] instruction.ldm_stm_addr_mode = ARM_LDM_STM_FD translate_fn(tb, instruction) def _translate_push(self, tb, instruction): self._translate_push_pop(tb, instruction, self._translate_stm) def _translate_pop(self, tb, instruction): self._translate_push_pop(tb, instruction, self._translate_ldm) # "Branch Instructions" # ============================================================================ # def _translate_b(self, tb, instruction): self._translate_branch(tb, instruction, link=False) def _translate_bl(self, tb, instruction): self._translate_branch(tb, instruction, link=True) # TODO: Thumb def _translate_bx(self, tb, instruction): self._translate_branch(tb, instruction, link=False) def _translate_blx(self, tb, instruction): self._translate_branch(tb, instruction, link=True) def _translate_branch(self, tb, instruction, link): arm_operand = instruction.operands[0] if isinstance(arm_operand, ArmImmediateOperand): target = ReilImmediateOperand(arm_operand.immediate << 8, self._pc.size + 8) elif isinstance(arm_operand, ArmRegisterOperand): target = ReilRegisterOperand(arm_operand.name, arm_operand.size) target = tb._and_regs( target, ReilImmediateOperand(0xFFFFFFFE, target.size)) tmp0 = tb.temporal(target.size + 8) tmp1 = tb.temporal(target.size + 8) tb.add(self._builder.gen_str(target, tmp0)) tb.add( self._builder.gen_bsh(tmp0, ReilImmediateOperand(8, target.size + 8), tmp1)) target = tmp1 else: raise NotImplementedError( "Instruction Not Implemented: Unknown operand for branch operation." ) if (link): tb.add( self._builder.gen_str( ReilImmediateOperand( instruction.address + instruction.size, self._pc.size), self._lr)) tb._jump_to(target)