def get_returnenc(curr_instr, popcookie=False): """ Get code block for return address encryption :param curr_instr: return instruction :param popcookie: True if also frame cookie must be removed :return: return encryption instruction list """ pre = list(framecookietail) if popcookie else [] loc = get_loc(curr_instr) if curr_instr[0].upper().startswith('POP'): # pop {...,pc} i = map(str.upper, curr_instr[1]).index('PC') rlist = curr_instr[1][:i] + ('lr', ) + curr_instr[1][i + 1:] pre.insert( 0, Types.DoubleInstr( (curr_instr[0], Types.RegList(rlist), None, False))) curr_instr = Types.DoubleInstr( ('bx', Types.RegClass('lr'), None, False)) elif curr_instr[0].upper().startswith( 'LDR') and curr_instr[1].upper() == 'PC': # ldr pc, [sp], #4 pre.insert( 0, type(curr_instr)((curr_instr[0], Types.RegClass('lr')) + curr_instr[2:])) curr_instr = Types.DoubleInstr( ('bx', Types.RegClass('lr'), None, False)) return set_inlineblocklocation(loc, pre + returnenc) + \ [set_loc(curr_instr, Types.Loc('', loc.loc_addr, loc.loc_visible))]
def add_func_label(self, ufuncs, instrs): """ Insert function labels :param ufuncs: function list :param instrs: instruction list :return: instruction list with function declarations """ i = 0 j = 0 while True: if i == len(ufuncs) or j == len(instrs): break hf = ufuncs[i] hi = instrs[j] iloc = get_loc(hi) if hf.func_begin_addr == iloc.loc_addr and hf.func_name not in iloc.loc_label: lab = '\n' + hf.func_name + ' : ' if ELF_utils.elf_arm(): lab = '\n.thumb_func' + lab iloc.loc_label = lab + iloc.loc_label instrs[j] = set_loc(hi, iloc) i += 1 j -= 1 elif hf.func_begin_addr < iloc.loc_addr: i += 1 j += 1 return instrs
def scan(self): """ Scan instruction list and modify PC relative code with labels """ i = 0 inlen = len(self.instrs) - 1 while i < inlen: h1 = self.instrs[i] if get_loc(h1).loc_addr >= self.funcs[self.curr_func][1]: # It can be assumed that the base register is set only inside a single function self.curr_func += 1 self.curr_regs.clear() if isinstance(h1, Types.TripleInstr) and (self.match_get_pc_thunk(h1) \ or (h1[0].upper() == 'MOV' and isinstance(h1[2], Types.RegClass) \ and h1[2].upper() in self.curr_regs and isinstance(h1[1], Types.RegClass))): # .got.plt address can also be copied to more than one register self.curr_regs.add(h1[1].upper()) elif len(self.curr_regs) > 0: if isinstance(h1, Types.DoubleInstr): self.instrs[i] = Types.DoubleInstr((h1[0], self.v_exp(h1[1]), h1[2], h1[3])) elif not isinstance(h1, Types.SingleInstr): if isinstance(h1, Types.TripleInstr): self.instrs[i] = Types.TripleInstr((h1[0], self.v_exp(h1[1]), self.v_exp(h1[2]), h1[3], h1[4])) elif isinstance(h1, Types.FourInstr): self.instrs[i] = Types.FourInstr((h1[0], h1[1], self.v_exp(h1[2]), h1[3], h1[4], h1[5])) if isinstance(h1[1], Types.RegClass) and h1[1].upper() in self.curr_regs: # Remove if overridden self.curr_regs.remove(h1[1].upper()) i += 1
def get_returnenc(curr_instr, popcookie=False): """ Get code block for return address encryption :param curr_instr: return instruction :param popcookie: True if also frame cookie must be removed :return: return encryption instruction list """ loc = get_loc(curr_instr) return set_inlineblocklocation(loc, framecookietail + returnenc if popcookie else returnenc) + \ [set_loc(curr_instr, Types.Loc('', loc.loc_addr, loc.loc_visible))]
def unify_loc(self, instrs): last_label = '' for i in xrange(len(instrs)): lo = get_loc(instrs[i]) if lo.loc_label != '' and lo.loc_label == last_label: instrs[i] = set_loc(instrs[i], Types.Loc('', lo.loc_addr, True)) else: last_label = lo.loc_label return instrs
def pcreloffARM(self, instrs): """ Trying to handle this mess ldr r7, label -> ldr r7, label ... -> ... add r7,pc -> add r7, #0 ... -> ... ldr r1, [r7] -> ldr r1, [r7] ... -> ... label: .word 0xoffset -> label: .word (0xoffset + pc) :param instrs: instruction list """ i = 0 offsets = {} inlen = len(instrs) while i < inlen: hi = instrs[i] if isinstance(hi, Types.TripleInstr) \ and hi[0].lower().startswith('ldr') \ and isinstance(hi[2], Types.Point): j = i + 1 while j - i < config.ARM_maxAddPcDist and j < inlen: hj = instrs[j] if isinstance(hj, Types.TripleInstr) \ and hj[0].lower().startswith('add') \ and hj[1] == hi[1] \ and isinstance(hj[2], Types.RegClass) \ and hj[2].lower() == 'pc': offsets[hi[2]] = get_loc(hj).loc_addr + 4 instrs[j] = Types.TripleInstr( (hj[0], hj[1], Types.Normal(0), hj[3], hj[4])) break j += 1 elif isinstance(hi, Types.DoubleInstr) and isinstance( hi[0], Types.InlineData): off = offsets.pop(get_loc(hi).loc_addr, None) if off: instrs[i] = Types.DoubleInstr( (hi[0], Types.Point((hi[1] + off) & 0xFFFFFFFF), hi[2], hi[3])) i += 1
def visit(self, instrs): self.end_loc = get_loc(instrs[-1]) il1 = map(self.vinst, instrs) self.update_bl() self.fb_list(self.bl) self.bl_sort = sorted(self.bl, cmp=lambda b1, b2: b1.bblock_begin_loc.loc_addr - b2.bblock_begin_loc.loc_addr) self.bl_sort = map( lambda b: tb(b.bblock_name, b.bblock_begin_loc.loc_addr, b. bblock_end_loc.loc_addr), self.bl_sort) return il1
def help_entry(self, i): if self.found_entry: bn = 'BB_' + str(cfg.counter) cfg.counter += 1 b = Types.Bblock('', bn, self.entry_loc, self.last_loc, self.entry_instr) self.bb_list.insert(0, b) self.found_entry = True self.entry_instr = i self.entry_loc = get_loc(i) self.last_loc = self.entry_loc return i
def build_func_info(self): """ Evaluate function boundaries """ self.func_begins = unify_int_list(self.func_begins) self.func_begins = self.filter_addr_by_secs(self.func_begins) for i in range(len(self.func_begins) - 1): self.baddr = self.func_begins[i] self.eaddr = self.func_begins[i + 1] self.update() self.baddr = self.func_begins[-1] self.eaddr = get_loc(self.instrs[0]).loc_addr self.update()
def get_framecookiecheck(curr_instr, funcID): """ Get code block for frame cookie check :param curr_instr: indirect branch instruction :param funcID: generated function ID :return: frame cookie check instruction list """ loc = get_loc(curr_instr) tmp = list(framecookiecheck) tmp[3][2] = Types.Normal(funcID) tmp[3] = Types.TripleInstr(tmp[3]) return set_inlineblocklocation(loc, tmp) + \ [set_loc(curr_instr, Types.Loc('', loc.loc_addr, loc.loc_visible))]
def get_framecookiehead(curr_instr, funcID): """ Get function header code block for frame cookie insertion :param curr_instr: function entry instruction :param funcID: generated function ID :return: frame cookie insertion instruction list """ loc = get_loc(curr_instr) tmp = list(framecookiehead) tmp[5][2] = Types.Normal(funcID) tmp[5] = Types.TripleInstr(tmp[5]) return set_inlineblocklocation(loc, returnenc + tmp) + \ [set_loc(curr_instr, Types.Loc('', loc.loc_addr, loc.loc_visible))]
def remove_its(self): """ Remove IT block from ARM code """ i = 0 while i < len(self.instrs): ins = self.instrs[i] if get_loc(ins).loc_addr > self.itremlimit: break if ins[0].upper().startswith('IT'): j = len(ins[0].strip()) + 1 self.instrs[i:i + j] = inlining.translate_it_block( self.instrs[i:i + j]) i += 1
def rewrite_instr(self): """ Rewrite x86 instruction containing bytes encoding ret """ i = 0 bswap_bad = set(('EDX', 'EBX', 'RDX', 'RBX')) while i < len(self.instrs): ins = self.instrs[i] if ins[0].upper() == 'MOVNTI': self.instrs[i] = type(ins)(('mov', ) + ins[1:]) elif ins[0].upper() == 'BSWAP' and ins[1].upper() in bswap_bad: self.instrs[i:i + 1] = inlining.bswapsub(ins[1], get_loc(ins)) i += 1
def addxorcanary(self, i, func): """ Apply return address encryption :param i: starting instruction index :param func: current funtion :return: instruction index after last inserted block """ if func.func_begin_addr in self.avoid: return i + 1 if len(self.indcalls[func.func_begin_addr]) == 0: header = inlining.get_returnenc(self.instrs[i]) self.instrs[i:i + 1] = header i += len(header) - 1 popcookie = False else: popcookie = True for t in self.rets[func.func_begin_addr]: while get_loc(self.instrs[i]).loc_addr != t: i += 1 if ELF_utils.elf_arm( ) and self.instrs[i][0][-2:] in Types.CondSuff: # Handle somehow IT blocks itlen = 0 while not self.instrs[i - itlen][0].upper().startswith( 'IT') and itlen < 5: itlen += 1 if itlen < 5: i -= itlen j = len(self.instrs[i][0].strip()) + 1 self.instrs[i:i + j] = inlining.translate_it_block( self.instrs[i:i + j]) while get_loc(self.instrs[i]).loc_addr != t: i += 1 footer = inlining.get_returnenc(self.instrs[i], popcookie) self.instrs[i:i + 1] = footer i += len(footer) return i
def visit(self, instrlist): """ Check disassbled code :param instrlist: list of instruction objects """ self.text_sec_collect() self.locs = filter(lambda i: self.invalid_opcode(i) or self.invalid_transfer(i), instrlist) self.locs = map(lambda i: get_loc(i).loc_addr, self.locs) if len(self.locs) != 0: if config.arch == config.ARCH_ARMT: print colored(' Warning:', 'yellow'), 'instructions at this locations were probably misinterpreted:' print ' ' + str(map(hex, self.locs)) else: print map(hex, self.locs) exit() self.validate(instrlist)
def visit_heuristic_analysis(self, instrs): """ Reconstruct symbolic information :param instrs: instruction list :return: instruction list with labels """ func = lambda i: self.check_text(get_loc(i).loc_addr) self.instr_list = instrs if ELF_utils.elf_arm(): self.pcreloffARM(instrs) instrs = map(self.vinst2ARM, enumerate(instrs)) self.doublemovARM(instrs) else: instrs = map(lambda i: self.vinst2(func, i), instrs) self.symbol_list = map(lambda l: int(l.split('x')[1], 16), self.deslist) + self.symbol_list return instrs
def addinlining(self, locations, instrumenter): """ Apply inlining :param locations: list of code locations to modify :param instrumenter: function applying inlining """ i = 0 j = 0 while i < len(self.instrs): loc_addr = get_loc(self.instrs[i]).loc_addr if loc_addr >= self.funcs[j].func_end_addr and j < len( self.funcs) - 1: j += 1 if loc_addr == self.funcs[j].func_begin_addr and len( locations[loc_addr]) > 0: i = instrumenter(i, self.funcs[j]) else: i += 1
def help_exit(self, i): loc = get_loc(i) if self.found_entry: self.last_loc = loc bn = 'BB_' + str(cfg.counter) cfg.counter += 1 b = Types.Bblock('', bn, self.entry_loc, self.last_loc, self.entry_instr) self.bb_list.insert(0, b) self.found_entry = False self.skip_entry = True elif loc.loc_addr == self.end_loc.loc_addr: bn = 'BB_' + str(cfg.counter) cfg.counter += 1 b = Types.Bblock('', bn, loc, loc, i) self.bb_list.insert(0, b) else: self.last_loc = loc return i
def addframecookie(self, i, func): """ Apply frame cookie :param i: starting instruction index :param func: current function :return: instruction index after last inserted block """ if len(self.rets[func.func_begin_addr]) == 0: return i + 1 fID = self.generatefuncID() header = inlining.get_framecookiehead(self.instrs[i], fID) self.instrs[i:i + 1] = header i += len(header) - 1 for t in self.indcalls[func.func_begin_addr]: while get_loc(self.instrs[i]).loc_addr != t: i += 1 check = inlining.get_framecookiecheck(self.instrs[i], fID) self.instrs[i:i + 1] = check i += len(check) return i
def alignvldrARM(self, instrs): """ Insert alignment compiler instruction for vldr targets (ARM) :param instrs: instruction list """ self.ARMvldrtargets = sorted(set(self.ARMvldrtargets)) i = 0 j = 0 while True: if i == len(instrs) or j == len(self.ARMvldrtargets): break loc = get_loc(instrs[i]) if loc.loc_addr == self.ARMvldrtargets[j]: loc.loc_label = '\n.align 2' + loc.loc_label instrs[i] = set_loc(instrs[i], loc) j += 1 elif loc.loc_addr > self.ARMvldrtargets[j]: j += 1 i -= 2 i += 1 return instrs
def add_bblock_label(self, bbl, instrs): bbl1 = sorted( bbl, lambda b1, b2: b1.bblock_begin_loc.loc_addr - b2. bblock_begin_loc.loc_addr) i = 0 j = 0 while True: if i == len(instrs) and j < len(bbl1): raise Exception('failed to add block label') if j == len(bbl1): break hi = instrs[i] hb = bbl1[j] iloc = get_loc(hi) bloc = hb.bblock_begin_loc if bloc.loc_addr == iloc.loc_addr: iloc = Types.Loc(hb.bblock_name + ': ' + iloc.loc_label, iloc.loc_addr, iloc.loc_visible) instrs[i] = set_loc(instrs[i], iloc) j += 1 i += 1 return instrs
def validate(self, instrlist): self.five_q = simple_queue() for (index, i) in enumerate(instrlist): loc = get_loc(i) if loc.loc_addr in self.locs: self.up_bound = dis_validator.icf_stack.pop() self.looking_for_cfd = True else: if len(loc.loc_label) > 1: self.update_cfd(index, instrlist) self.update_cft_track(i) else: p = i[0] e = i[1] if isinstance(i, Types.DoubleInstr) else None if Opcode_utils.call_patt.match(p): # @UndefinedVariable print "detected call instruction in disassembly validator: " + str(i) self.update_cfd(index + 1, instrlist) if self.is_icf(p, e): self.update_cft_stack(i) elif self.is_icf(p, e): self.update_cft_stack(i) self.update_cft_track(i)
def findfreebranches(self): """ Locate free branches in program's code for each function """ j = 0 curr_func = self.funcs[0] for ins in self.instrs: loc_addr = get_loc(ins).loc_addr if loc_addr >= self.funcs[j].func_end_addr and j < len( self.funcs) - 1: j += 1 curr_func = self.funcs[j] if Opcode_utils.is_indirect(ins[1]): self.indcalls[curr_func.func_begin_addr].append(loc_addr) elif Opcode_utils.is_ret(ins): self.rets[curr_func.func_begin_addr].append(loc_addr) elif Opcode_utils.is_any_jump(ins[0]): if (isinstance(ins[1], Types.Label) \ and (not ins[1].startswith('S_0') \ or int(ins[1].lstrip('S_'), 16) in self.rets)) \ or Opcode_utils.is_func(ins[1]): self.rets[curr_func.func_begin_addr].append(loc_addr)
def pp_print_instr(i): """ Get instruction string in assembler syntax :param i: instruction :return: instruction string """ loc = get_loc(i) if not loc.loc_visible: return p_location(loc) res = p_location(loc) + p_prefix(i[-1]) if isinstance(i, Types.SingleInstr): res += p_single(i[0]) elif isinstance(i, Types.DoubleInstr): res += p_double(i[0], i[1]) elif isinstance(i, Types.TripleInstr): res += p_triple(i[0], i[1], i[2]) elif isinstance(i, Types.FourInstr): res += p_four(i[0], i[1], i[2], i[3]) elif isinstance(i, Types.FiveInstr): res += p_five(i[0], i[1], i[2], i[3], i[4]) elif ELF_utils.elf_arm() and isinstance(i, Types.CoproInstr): res += p_copro(i) return res
def aux(bnl, acc, i): if isinstance(i, Types.SingleInstr) and Opcode_utils.is_ret( (i[0], i[1])): bn = self.bbn_byloc(get_loc(i).loc_addr) acc.insert(0, (bn, (Types.J(), 'RET'))) elif isinstance(i, Types.DoubleInstr): if Opcode_utils.is_indirect(i[1]): bn = self.bbn_byloc(get_loc(i).loc_addr) acc.insert(0, (bn, (Types.J(), 'T'))) elif Opcode_utils.is_call(i[0]): bn = self.bbn_byloc(get_loc(i).loc_addr) bn1 = self.next_bb(bnl, bn) acc.insert(0, (bn, (Types.J(), bn1))) acc.insert(0, (bn, (Types.J(), 'INTER'))) elif Opcode_utils.is_jmp(i[0]): bn = self.bbn_byloc(get_loc(i).loc_addr) if Opcode_utils.is_func(i[1]): acc.insert(0, (bn, (Types.J(), 'INTER'))) else: en = recover_addr_from_label(p_exp(i[1])) if en == -1: acc.insert(0, (bn, (Types.J(), 'T'))) else: dn = self.bbn_byloc(en) acc.insert(0, (bn, (Types.J(), dn))) elif Opcode_utils.is_cond_jmp(i[0]): if not Opcode_utils.is_func(i[1]): bn = self.bbn_byloc(get_loc(i).loc_addr) sn = self.next_bb(bnl, bn) acc.insert(0, (bn, (Types.F(), sn))) else: assert (False) else: bn = self.bbn_byloc(get_loc(i).loc_addr) dn = self.next_bb(bnl, bn) acc.insert(0, (bn, (Types.F(), dn))) return acc
def update_cft_stack(self, instr): inv = lambda i: get_loc(i).loc_addr in self.locs if not self.five_q.exists(inv): dis_validator.icf_stack.push(get_loc(instr))
def bb_entry(self, i): return ':' in get_loc(i).loc_label