def populate(self, known_instrs): addr = self.entry while True: known_instrs[addr] = self if self.exe.architecture() == "x86": opcode = self.exe.read(addr, 15) result = X86.disassemble32(opcode, addr) opcode = opcode[0:result.length] instr = X86Instruction(opcode, addr, result, 4) arch = X86 elif self.exe.architecture() == "x86_64": opcode = self.exe.read(addr, 15) result = X86.disassemble64(opcode, addr) opcode = opcode[0:result.length] instr = X86Instruction(opcode, addr, result, 8) arch = X86 elif self.exe.architecture() == "ppc": opcode = self.exe.read(addr, 4) if len(opcode) == 4: result = PPC.disassemble(struct.unpack(">I", opcode)[0], addr) instr = PPCInstruction(opcode, addr, result) else: instr = PPCInstruction("", addr, PPC.Instruction()) arch = PPC elif self.exe.architecture() == "arm": opcode = self.exe.read(addr & (~1), 4) if len(opcode) == 4: result = Arm.disassemble(struct.unpack("<I", opcode)[0], addr) instr = ArmInstruction(opcode, addr, result) else: instr = ArmInstruction("", addr, Arm.Instruction()) arch = Arm else: break self.instrs += [instr] instr.format_text(self, self.analysis.options) if not instr.isValid(): break if instr.isBlockEnding(): if instr.isConditionalBranch(): self.true_path = instr.target self.false_path = addr + instr.length() self.exits += [self.true_path, self.false_path] elif instr.target != None: self.exits += [instr.target] break addr += instr.length() if addr in known_instrs: self.exits += [addr] break
def populate(self, known_instrs): addr = self.entry while True: known_instrs[addr] = self if self.exe.architecture() == "x86": opcode = self.exe.read(addr, 15) result = X86.disassemble32(opcode, addr) opcode = opcode[0:result.length] instr = X86Instruction(opcode, addr, result, 4) arch = X86 elif self.exe.architecture() == "x86_64": opcode = self.exe.read(addr, 15) result = X86.disassemble64(opcode, addr) opcode = opcode[0:result.length] instr = X86Instruction(opcode, addr, result, 8) arch = X86 elif self.exe.architecture() == "ppc": opcode = self.exe.read(addr, 4) if len(opcode) == 4: result = PPC.disassemble( struct.unpack(">I", opcode)[0], addr) instr = PPCInstruction(opcode, addr, result) else: instr = PPCInstruction("", addr, PPC.Instruction()) arch = PPC elif self.exe.architecture() == "arm": opcode = self.exe.read(addr & (~1), 4) if len(opcode) == 4: result = Arm.disassemble( struct.unpack("<I", opcode)[0], addr) instr = ArmInstruction(opcode, addr, result) else: instr = ArmInstruction("", addr, Arm.Instruction()) arch = Arm else: break self.instrs += [instr] instr.format_text(self, self.analysis.options) if not instr.isValid(): break if instr.isBlockEnding(): if instr.isConditionalBranch(): self.true_path = instr.target self.false_path = addr + instr.length() self.exits += [self.true_path, self.false_path] elif instr.target != None: self.exits += [instr.target] break addr += instr.length() if addr in known_instrs: self.exits += [addr] break
def p_instr_pseudo(self,p): 'instr : pseudo' pseudo = p[1] def mk_op_list(op): if op is None: return [None] if isinstance(op,X86UnknownSizeImmediate): v = op.value l = [X86.Id(v)] if v <= 0xFF or (v <= 0x7F or v >= 0xFFFFFF80) or (v <= 0x7F or (v >= 0xFF80 and v <= 0xFFFF)): l.append(X86.Ib(v)) if v <= 0xFFFF or (v <= 0x7F or v >= 0xFFFF8000) or (v <= 0x7F or (v >= 0xFF80 and v <= 0xFFFF)): l.append(X86.Iw(v)) return l elif isinstance(op,X86UnknownSizeMem16): return map(lambda s: X86.Mem16(op.Seg,s,op.BaseReg,op.IndexReg,op.Disp),memsizes) elif isinstance(op,X86UnknownSizeMem32): return map(lambda s: X86.Mem32(op.Seg,s,op.BaseReg,op.IndexReg,op.ScaleFac,op.Disp),memsizes) else: return [op] g = itertools.product(*tuple(map(mk_op_list,[pseudo.op1,pseudo.op2,pseudo.op3]))) tc = X86TypeChecker.X86TypeChecker() for t in g: i = X86.Instruction(pseudo.pf,pseudo.mnem,*t) for enc in X86EncodeTable.mnem_to_encodings[i.mnem.IntValue()]: # See if the encoding matches, i.e. if the operands type-check. val = tc.TypeCheckInstruction_opt(i,enc.ops) if val == None: continue p[0] = i return raise ValueError("%s: bad instruction" % pseudo)
def mk_op_list(op): if op is None: return [None] if isinstance(op,X86UnknownSizeImmediate): v = op.value l = [X86.Id(v)] if v <= 0xFF or (v <= 0x7F or v >= 0xFFFFFF80) or (v <= 0x7F or (v >= 0xFF80 and v <= 0xFFFF)): l.append(X86.Ib(v)) if v <= 0xFFFF or (v <= 0x7F or v >= 0xFFFF8000) or (v <= 0x7F or (v >= 0xFF80 and v <= 0xFFFF)): l.append(X86.Iw(v)) return l elif isinstance(op,X86UnknownSizeMem16): return map(lambda s: X86.Mem16(op.Seg,s,op.BaseReg,op.IndexReg,op.Disp),memsizes) elif isinstance(op,X86UnknownSizeMem32): return map(lambda s: X86.Mem32(op.Seg,s,op.BaseReg,op.IndexReg,op.ScaleFac,op.Disp),memsizes) else: return [op]
def validate_meminner(mi,unk=False): me = mi if isinstance(mi,X86.Mem16): m = X86ModRM.ModRM16() try: m.EncodeFromParts(mi.BaseReg,mi.IndexReg,mi.Disp) br,ir,disp,_ = m.Interpret() if unk: me = X86UnknownSizeMem16(XM.CS,XM.Mb,br,ir,disp) else: me = X86.Mem16(XM.CS,XM.Mb,br,ir,disp) except IndexError, e: raise ValueError("%s: invalid ModRM/16 expression" % mi)
def p_op_dbg(self,p): 'op : DBG' p[0] = X86.DebugReg(p[1])
def p_op_cnt(self,p): 'op : CNT' p[0] = X86.ControlReg(p[1])
def p_op_seg(self,p): 'op : Seg' p[0] = X86.SegReg(p[1])
def p_meminner_Gd_times_num(self,p): 'meminner : Gd TIMES NUM' p[0] = X86.Mem32(XM.CS,XM.Mb,None,p[1],validate_scale(p[3]),None)
def p_meminner_Gd(self,p): 'meminner : Gd' p[0] = X86.Mem32(XM.CS,XM.Mb,p[1],None,0,None)
def p_meminner_Gd_plus_Gd_plus_num(self,p): 'meminner : Gd PLUS Gd PLUS NUM' p[0] = X86.Mem32(XM.CS,XM.Mb,p[1],p[3],0,p[5])
def p_op_xmm(self,p): 'op : XMM' p[0] = X86.XMMReg(p[1])
def p_op_ap32(self,p): 'op : NUM COLON NUM' p[0] = X86.AP32(p[1],p[3])
def p_meminner_Gw(self,p): 'meminner : Gw' p[0] = X86.Mem16(XM.CS,XM.Mb,p[1],None,None)
def p_meminner_Gw_plus_num(self,p): 'meminner : Gw PLUS NUM' p[0] = X86.Mem16(XM.CS,XM.Mb,p[1],None,p[3])
def p_meminner_Gw_plus_Gw(self,p): 'meminner : Gw PLUS Gw' p[0] = X86.Mem16(XM.CS,XM.Mb,p[1],p[3],None)
def p_meminner_num(self,p): 'meminner : NUM' p[0] = X86.Mem32(XM.CS,XM.Mb,None,None,0,p[1])
def p_op_fpu(self,p): 'op : FPU' p[0] = X86.FPUReg(p[1])
def p_op_mmx(self,p): 'op : MMX' p[0] = X86.MMXReg(p[1])
def format_text(self, block, options): old_lines = [] old_tokens = [] self.text.lines = [] self.text.tokens = [] line = [] tokens = [] x = 0 instr = self.disasm if "address" in options: string = "%.8x " % self.addr line += [[string, QColor(0, 0, 128)]] x += len(string) if instr.operation == None: line += [["??", Qt.black]] self.text.lines += [line] self.text.tokens += [tokens] return (old_lines != self.text.lines) or (old_tokens != self.text.tokens) result = "" operation = "" if instr.flags & X86.FLAG_LOCK: operation += "lock " if instr.flags & X86.FLAG_ANY_REP: operation += "rep" if instr.flags & X86.FLAG_REPNE: operation += "ne" elif instr.flags & X86.FLAG_REPE: operation += "e" operation += " " operation += instr.operation if len(operation) < 7: operation += " " * (7 - len(operation)) result += operation + " " for j in range(0, len(instr.operands)): if j != 0: result += ", " if instr.operands[j].operand == "imm": value = instr.operands[j].immediate & ((1 << (instr.operands[j].size * 8)) - 1) numfmt = "0x%%.%dx" % (instr.operands[j].size * 2) string = numfmt % value if (instr.operands[j].size == self.addr_size) and (value in block.analysis.functions): # Pointer to existing function func = block.analysis.functions[value] string = func.name if func.plt: color = QColor(192, 0, 192) else: color = QColor(0, 0, 192) if len(result) > 0: line += [[result, Qt.black]] x += len(result) result = "" line += [[string, color]] tokens += [[x, len(string), "ptr", value, string]] x += len(string) elif (instr.operands[j].size == self.addr_size) and (value >= block.exe.start()) and (value < block.exe.end()) and (not self.isLocalJump()): # Pointer within module if len(result) > 0: line += [[result, Qt.black]] x += len(result) result = "" if value in block.exe.symbols_by_addr: string = block.exe.symbols_by_addr[value] line += [[string, QColor(0, 0, 192)]] tokens += [[x, len(string), "ptr", value, string]] x += len(string) else: result += string elif instr.operands[j].operand == "mem": plus = False result += X86.get_size_string(instr.operands[j].size) if (instr.segment != None) or (instr.operands[j].segment == "es"): result += instr.operands[j].segment + ":" result += '[' if instr.operands[j].components[0] != None: tokens += [[x + len(result), len(instr.operands[j].components[0]), "reg", instr.operands[j].components[0]]] result += instr.operands[j].components[0] plus = True if instr.operands[j].components[1] != None: if plus: tokens += [[x + len(result) + 1, len(instr.operands[j].components[1]), "reg", instr.operands[j].components[1]]] else: tokens += [[x + len(result), len(instr.operands[j].components[1]), "reg", instr.operands[j].components[1]]] result += X86.get_operand_string(instr.operands[j].components[1], instr.operands[j].scale, plus) plus = True if (instr.operands[j].immediate != 0) or ((instr.operands[j].components[0] == None) and (instr.operands[j].components[1] == None)): if plus and (instr.operands[j].immediate >= -0x80) and (instr.operands[j].immediate < 0): result += '-' result += "0x%.2x" % (-instr.operands[j].immediate) elif plus and (instr.operands[j].immediate > 0) and (instr.operands[j].immediate <= 0x7f): result += '+' result += "0x%.2x" % instr.operands[j].immediate elif plus and (instr.operands[j].immediate >= -0x8000) and (instr.operands[j].immediate < 0): result += '-' result += "0x%.8x" % (-instr.operands[j].immediate) elif instr.flags & X86.FLAG_64BIT_ADDRESS: if plus: result += '+' value = instr.operands[j].immediate string = "0x%.16x" % instr.operands[j].immediate if hasattr(block.exe, "plt") and value in block.exe.plt: # Pointer to PLT entry self.plt = block.exe.plt[value] if len(result) > 0: line += [[result, Qt.black]] x += len(result) result = "" string = self.plt + "@PLT" line += [[string, QColor(0, 0, 192)]] tokens += [[x, len(string), "ptr", value, string]] x += len(string) elif (value >= block.exe.start()) and (value < block.exe.end()): # Pointer within module if len(result) > 0: line += [[result, Qt.black]] x += len(result) result = "" if value in block.exe.symbols_by_addr: string = block.exe.symbols_by_addr[value] line += [[string, QColor(0, 0, 192)]] tokens += [[x, len(string), "ptr", value, string]] x += len(string) else: result += string else: if plus: result += '+' value = instr.operands[j].immediate & 0xffffffff string = "0x%.8x" % value if (self.addr_size == 4) and hasattr(block.exe, "plt") and value in block.exe.plt: # Pointer to PLT entry self.plt = block.exe.plt[value] if len(result) > 0: line += [[result, Qt.black]] x += len(result) result = "" string = block.exe.decorate_plt_name(self.plt) line += [[string, QColor(0, 0, 192)]] tokens += [[x, len(string), "ptr", value, string]] x += len(string) elif (self.addr_size == 4) and (value >= block.exe.start()) and (value < block.exe.end()): # Pointer within module if len(result) > 0: line += [[result, Qt.black]] x += len(result) result = "" if value in block.exe.symbols_by_addr: string = block.exe.symbols_by_addr[value] line += [[string, QColor(0, 0, 192)]] tokens += [[x, len(string), "ptr", value, string]] x += len(string) else: result += string result += ']' else: tokens += [[x + len(result), len(instr.operands[j].operand), "reg", instr.operands[j].operand]] result += instr.operands[j].operand if len(result) > 0: line += [[result, Qt.black]] self.text.lines += [line] self.text.tokens += [tokens] return (old_lines != self.text.lines) or (old_tokens != self.text.tokens)
def p_meminner_Gd_plus_Gd_times_num_plus_num(self,p): 'meminner : Gd PLUS Gd TIMES NUM PLUS NUM' p[0] = X86.Mem32(XM.CS,XM.Mb,p[1],p[3],validate_scale(p[5]),p[7])
def format_text(self, block, options): old_lines = [] old_tokens = [] self.text.lines = [] self.text.tokens = [] line = [] tokens = [] x = 0 instr = self.disasm if "address" in options: string = "%.8x " % self.addr line += [[string, QColor(0, 0, 128)]] x += len(string) if instr.operation == None: line += [["??", Qt.black]] self.text.lines += [line] self.text.tokens += [tokens] return (old_lines != self.text.lines) or (old_tokens != self.text.tokens) result = "" operation = "" if instr.flags & X86.FLAG_LOCK: operation += "lock " if instr.flags & X86.FLAG_ANY_REP: operation += "rep" if instr.flags & X86.FLAG_REPNE: operation += "ne" elif instr.flags & X86.FLAG_REPE: operation += "e" operation += " " operation += instr.operation if len(operation) < 7: operation += " " * (7 - len(operation)) result += operation + " " for j in range(0, len(instr.operands)): if j != 0: result += ", " if instr.operands[j].operand == "imm": value = instr.operands[j].immediate & ( (1 << (instr.operands[j].size * 8)) - 1) numfmt = "0x%%.%dx" % (instr.operands[j].size * 2) string = numfmt % value if (instr.operands[j].size == self.addr_size) and ( block.analysis.functions.has_key(value)): # Pointer to existing function func = block.analysis.functions[value] string = func.name if func.plt: color = QColor(192, 0, 192) else: color = QColor(0, 0, 192) if len(result) > 0: line += [[result, Qt.black]] x += len(result) result = "" line += [[string, color]] tokens += [[x, len(string), "ptr", value, string]] x += len(string) elif (instr.operands[j].size == self.addr_size) and ( value >= block.exe.start() ) and (value < block.exe.end()) and (not self.isLocalJump()): # Pointer within module if len(result) > 0: line += [[result, Qt.black]] x += len(result) result = "" if value in block.exe.symbols_by_addr: string = block.exe.symbols_by_addr[value] line += [[string, QColor(0, 0, 192)]] tokens += [[x, len(string), "ptr", value, string]] x += len(string) else: result += string elif instr.operands[j].operand == "mem": plus = False result += X86.get_size_string(instr.operands[j].size) if (instr.segment != None) or (instr.operands[j].segment == "es"): result += instr.operands[j].segment + ":" result += '[' if instr.operands[j].components[0] != None: tokens += [[ x + len(result), len(instr.operands[j].components[0]), "reg", instr.operands[j].components[0] ]] result += instr.operands[j].components[0] plus = True if instr.operands[j].components[1] != None: if plus: tokens += [[ x + len(result) + 1, len(instr.operands[j].components[1]), "reg", instr.operands[j].components[1] ]] else: tokens += [[ x + len(result), len(instr.operands[j].components[1]), "reg", instr.operands[j].components[1] ]] result += X86.get_operand_string( instr.operands[j].components[1], instr.operands[j].scale, plus) plus = True if (instr.operands[j].immediate != 0) or ( (instr.operands[j].components[0] == None) and (instr.operands[j].components[1] == None)): if plus and (instr.operands[j].immediate >= -0x80) and (instr.operands[j].immediate < 0): result += '-' result += "0x%.2x" % (-instr.operands[j].immediate) elif plus and (instr.operands[j].immediate > 0) and ( instr.operands[j].immediate <= 0x7f): result += '+' result += "0x%.2x" % instr.operands[j].immediate elif plus and (instr.operands[j].immediate >= -0x8000 ) and (instr.operands[j].immediate < 0): result += '-' result += "0x%.8x" % (-instr.operands[j].immediate) elif instr.flags & X86.FLAG_64BIT_ADDRESS: if plus: result += '+' value = instr.operands[j].immediate string = "0x%.16x" % instr.operands[j].immediate if hasattr(block.exe, "plt") and block.exe.plt.has_key(value): # Pointer to PLT entry self.plt = block.exe.plt[value] if len(result) > 0: line += [[result, Qt.black]] x += len(result) result = "" string = self.plt + "@PLT" line += [[string, QColor(0, 0, 192)]] tokens += [[x, len(string), "ptr", value, string]] x += len(string) elif (value >= block.exe.start()) and (value < block.exe.end()): # Pointer within module if len(result) > 0: line += [[result, Qt.black]] x += len(result) result = "" if value in block.exe.symbols_by_addr: string = block.exe.symbols_by_addr[value] line += [[string, QColor(0, 0, 192)]] tokens += [[x, len(string), "ptr", value, string]] x += len(string) else: result += string else: if plus: result += '+' value = instr.operands[j].immediate & 0xffffffff string = "0x%.8x" % value if (self.addr_size == 4) and hasattr( block.exe, "plt") and block.exe.plt.has_key(value): # Pointer to PLT entry self.plt = block.exe.plt[value] if len(result) > 0: line += [[result, Qt.black]] x += len(result) result = "" string = block.exe.decorate_plt_name(self.plt) line += [[string, QColor(0, 0, 192)]] tokens += [[x, len(string), "ptr", value, string]] x += len(string) elif (self.addr_size == 4) and (value >= block.exe.start()) and ( value < block.exe.end()): # Pointer within module if len(result) > 0: line += [[result, Qt.black]] x += len(result) result = "" if value in block.exe.symbols_by_addr: string = block.exe.symbols_by_addr[value] line += [[string, QColor(0, 0, 192)]] tokens += [[x, len(string), "ptr", value, string]] x += len(string) else: result += string result += ']' else: tokens += [[ x + len(result), len(instr.operands[j].operand), "reg", instr.operands[j].operand ]] result += instr.operands[j].operand if len(result) > 0: line += [[result, Qt.black]] self.text.lines += [line] self.text.tokens += [tokens] return (old_lines != self.text.lines) or (old_tokens != self.text.tokens)
def p_op_gb(self,p): 'op : Gb' p[0] = X86.Gb(p[1])
def p_meminner_Gd_plus_Gd(self,p): 'meminner : Gd PLUS Gd' p[0] = X86.Mem32(XM.CS,XM.Mb,p[1],p[3],0,None)
""" Program entry point """ import argparse, os, sys import X86, Arm, Power, Linker, Globals from Arch import Arch from Globals import er # Instantiate one object representing each architecture and put them # in a dictionary indexed by the Arch "enum" (see Arch.py) x86_obj = X86.X86() arm_obj = Arm.Arm() power_obj = Power.Power() archs = {Arch.X86: x86_obj, Arch.ARM: arm_obj, Arch.POWER: power_obj} considered_archs = [] # filled by setConsideredArchs considered_sections = [".text", ".data", ".bss", ".rodata", ".tdata", ".tbss"] def buildArgParser(): """ Construct the command line argument parser object """ res = argparse.ArgumentParser( description="Align symbols in binaries from" + " multiple ISAs") res.add_argument("--compiler-inst", help="Path to the compiler installation", required=True) res.add_argument("--x86-bin", help="Path to the input x86 executable") res.add_argument("--arm-bin", help="Path to the input ARM executable")
def p_op_gw(self,p): 'op : Gw' p[0] = X86.Gw(p[1])
def p_op_gd(self,p): 'op : Gd' p[0] = X86.Gd(p[1])
m.EncodeFromParts(mi.BaseReg,mi.IndexReg,mi.Disp) br,ir,disp,_ = m.Interpret() if unk: me = X86UnknownSizeMem16(XM.CS,XM.Mb,br,ir,disp) else: me = X86.Mem16(XM.CS,XM.Mb,br,ir,disp) except IndexError, e: raise ValueError("%s: invalid ModRM/16 expression" % mi) elif isinstance(mi,X86.Mem32): m = X86ModRM.ModRM32() m.EncodeFromParts(mi.BaseReg,mi.IndexReg,mi.ScaleFac,mi.Disp) br,ir,sf,disp,_ = m.Interpret() if unk: me = X86UnknownSizeMem32(XM.CS,XM.Mb,br,ir,sf,disp) else: me = X86.Mem32(XM.CS,XM.Mb,br,ir,sf,disp) else: raise ValueError("WTF is this memory expression %s" % mi) me.Seg = me.DefaultSeg() return me class X86Yacc(object): start = 'instr' def p_op_gb(self,p): 'op : Gb' p[0] = X86.Gb(p[1]) def p_op_gw(self,p): 'op : Gw' p[0] = X86.Gw(p[1])