def parse_instruction(string, location, tokens): """Parse an ARM instruction. """ mnemonic_str = tokens.get("mnemonic") operands = [op for op in tokens.get("operands", [])] instr = ArmInstruction(string, mnemonic_str["ins"], operands, arch_info.architecture_mode) if "cc" in mnemonic_str: instr.condition_code = cc_mapper[mnemonic_str["cc"]] if "uf" in mnemonic_str: instr.update_flags = True if "ldm_stm_addr_mode" in mnemonic_str: instr.ldm_stm_addr_mode = ldm_stm_am_mapper[ mnemonic_str["ldm_stm_addr_mode"]] return instr
def parse_instruction(string, location, tokens): """Parse an ARM instruction. """ mnemonic = tokens.get("mnemonic") operands = [op for op in tokens.get("operands", [])] instr = ArmInstruction( string, mnemonic["ins"], operands, arch_info.architecture_mode ) if "cc" in mnemonic: instr.condition_code = cc_mapper[mnemonic["cc"]] if "uf" in mnemonic: instr.update_flags = True if "ldm_stm_addr_mode" in mnemonic: instr.ldm_stm_addr_mode = ldm_stm_am_mapper[mnemonic["ldm_stm_addr_mode"]] return instr
def _cs_translate_insn(self, cs_insn): operands = [ self.__cs_translate_operand(op, cs_insn) for op in cs_insn.operands ] mnemonic = cs_insn.mnemonic # Special case: register list "{rX - rX}", stored as a series of # registers has to be converted to ArmRegisterListOperand. if "{" in cs_insn.op_str: reg_list = [] op_translated = [] if not ("push" in mnemonic or "pop" in mnemonic): # First operand is the base (in push/pop, the base # register, sp is omitted) op_translated.append(operands[0]) operands = operands[1:] for r in operands: reg_list.append([r]) op_translated.append( ArmRegisterListOperand(reg_list, reg_list[0][0].size)) operands = op_translated # Remove narrow/wide compiler suffixes (.w/.n), they are of no # interest for tranlation purpouses if mnemonic[-2:] == ".w" or mnemonic[-2:] == ".n": mnemonic = mnemonic[:-2] # Remove condition code from the mnemonic, this goes first than the # removal of the update flags suffix, because according to UAL syntax # the this suffix goes after the update flags suffix in the mnemonic. if cs_insn.cc != ARM_CC_INVALID and cs_insn.cc != ARM_CC_AL: cc_suffix_str = cc_inverse_mapper[cc_capstone_barf_mapper[ cs_insn.cc]] if cc_suffix_str == mnemonic[-2:]: mnemonic = mnemonic[:-2] # Remove update flags suffix (s) if cs_insn.update_flags and mnemonic[-1] == 's': mnemonic = mnemonic[:-1] # Remove LDM/STM addressing modes from the mnemonic, later include it in the ArmInstruction if mnemonic[0:3] == "ldm" or mnemonic[0:3] == "stm": ldm_stm_am = None if mnemonic[-2:] in ldm_stm_am_mapper: ldm_stm_am = ldm_stm_am_mapper[mnemonic[-2:]] mnemonic = mnemonic[:-2] # TODO: Temporary hack to accommodate THUMB short notation: # "add r0, r1" -> "add r0, r0, r1" if len(operands) == 2 and (mnemonic == "add" or mnemonic == "eor" or mnemonic == "orr" or mnemonic == "sub"): operands = [operands[0], operands[0], operands[1]] instr = ArmInstruction(mnemonic + " " + cs_insn.op_str, mnemonic, operands, self._arch_mode) if cs_insn.cc != ARM_CC_INVALID: instr.condition_code = cc_capstone_barf_mapper[cs_insn.cc] if cs_insn.update_flags: instr.update_flags = True if mnemonic[0:3] == "ldm" or mnemonic[0:3] == "stm": instr.ldm_stm_addr_mode = ldm_stm_am if "!" in cs_insn.op_str: instr.operands[0].wb = True # TODO: LOAD/STORE MODE (it may be necessary to parse the mnemonic). return instr
def _cs_translate_insn(self, cs_insn): operands = [self.__cs_translate_operand(op, cs_insn) for op in cs_insn.operands] mnemonic = cs_insn.mnemonic # Special case: register list "{rX - rX}", stored as a series of # registers has to be converted to ArmRegisterListOperand. if "{" in cs_insn.op_str: reg_list = [] op_translated = [] if not("push" in mnemonic or "pop" in mnemonic): # First operand is the base (in push/pop, the base # register, sp is omitted) op_translated.append(operands[0]) operands = operands[1:] for r in operands: reg_list.append([r]) op_translated.append(ArmRegisterListOperand(reg_list, reg_list[0][0].size)) operands = op_translated # Remove narrow/wide compiler suffixes (.w/.n), they are of no # interest for tranlation purpouses if mnemonic[-2:] == ".w" or mnemonic[-2:] == ".n": mnemonic = mnemonic[:-2] # Remove condition code from the mnemonic, this goes first than the # removal of the update flags suffix, because according to UAL syntax # the this suffix goes after the update flags suffix in the mnemonic. if cs_insn.cc != ARM_CC_INVALID and cs_insn.cc != ARM_CC_AL: cc_suffix_str = cc_inverse_mapper[cc_capstone_barf_mapper[cs_insn.cc]] if cc_suffix_str == mnemonic[-2:]: mnemonic = mnemonic[:-2] # Remove update flags suffix (s) if cs_insn.update_flags and mnemonic[-1] == 's': mnemonic = mnemonic[:-1] # Remove LDM/STM addressing modes from the mnemonic, later include it in the ArmInstruction if mnemonic[0:3] == "ldm" or mnemonic[0:3] == "stm": ldm_stm_am = None if mnemonic[-2:] in ldm_stm_am_mapper: ldm_stm_am = ldm_stm_am_mapper[mnemonic[-2:]] mnemonic = mnemonic[:-2] # TODO: Temporary hack to accommodate THUMB short notation: # "add r0, r1" -> "add r0, r0, r1" if len(operands) == 2 and (mnemonic == "add" or mnemonic == "eor" or mnemonic == "orr" or mnemonic == "sub"): operands = [operands[0], operands[0], operands[1]] instr = ArmInstruction( mnemonic + " " + cs_insn.op_str, mnemonic, operands, self._arch_mode ) if cs_insn.cc != ARM_CC_INVALID: instr.condition_code = cc_capstone_barf_mapper[cs_insn.cc] if cs_insn.update_flags: instr.update_flags = True if mnemonic[0:3] == "ldm" or mnemonic[0:3] == "stm": instr.ldm_stm_addr_mode = ldm_stm_am if "!" in cs_insn.op_str: instr.operands[0].wb = True # TODO: LOAD/STORE MODE (it may be necessary to parse the mnemonic). return instr