def test_logical_operands(self): t = symath.symbolic(True) f = symath.symbolic(False) self.assertEqual(symath.stdops.LogicalAnd(t, f).simplify(), f) self.assertEqual(symath.stdops.LogicalAnd(t, f).simplify(), False) self.assertEqual(symath.stdops.LogicalOr(t, f).simplify(), t) self.assertEqual(symath.stdops.LogicalOr(t, f).simplify(), True) self.assertEqual(symath.stdops.LogicalXor(t, t).simplify(), False) self.assertEqual(symath.stdops.LogicalXor(f, t).simplify(), True) self.assertEqual(symath.stdops.LogicalXor(f, f).simplify(), False)
def reg_size(reg): a,b = wilds('a b') val = WildResults() if reg in (AX,BX,CX,DX,DI,SI,BP,SP): return symbolic(2) elif reg in (AL,AH,BL,BH,CL,CH,DL,DH): return symbolic(1) elif reg in (EAX,EBX,ECX,EDX,EDI,ESI,EBP,ESP,EFLAGS): return symbolic(4) elif reg.match(DEREF(a, b), val): return val.a else: raise BaseException('Unknown Register %s' % reg)
def resolve_op(ist, opnum): op = ist.operands[opnum] if op.type == 'AbsoluteMemory': rv = 0 idaist = idautils.DecodeInstruction(ist.address) if op.index != None: rv += symbolic.symbols(distorm3.Registers[op.index].lower()) * op.scale if op.base != None: rv += symbolic.symbols(distorm3.Registers[op.base].lower()) if op.disp != None: rv += op.disp return DEREF(op.op_size, rv.simplify()) if ist.mnemonic.lower() != 'lea' else rv elif op.type == 'Register': return symbolic.symbols(distorm3.Registers[op.index].lower()) elif op.type == 'Immediate': return symbolic.symbolic(op.value) elif op.type == 'AbsoluteMemoryAddress': return DEREF(op.op_size, op.disp) else: raise BaseException("Unknown Operand Type %s" % (op.type))
def _get_operand_sym(op): if op.type == 'Immediate': return symath.symbolic(op.value) elif op.type == 'AbsoluteMemoryAddress': return DEREF(op.op_size, op.disp) elif op.type == 'Register': return symath.symbols(distorm3.Registers[op.index].upper()) elif op.type == 'AbsoluteMemory': rv = 0 if op.index != None: rv += symath.symbols(distorm3.Registers[op.index].upper()) * op.scale if op.base != None: rv += symath.symbols(distorm3.Registers[op.base].upper()) if op.disp != None: rv += symath.symbolic(op.disp) return DEREF(op.op_size, rv) else: raise BaseException("Unknown operand type %s (%s)" % (op.type, op))
def _get_operand_sym(op): if op.type == 'Immediate': return symath.symbolic(op.value) elif op.type == 'AbsoluteMemoryAddress': return DEREF(op.op_size, op.disp) elif op.type == 'Register': return symath.symbols(distorm3.Registers[op.index].upper()) elif op.type == 'AbsoluteMemory': rv = 0 if op.index != None: rv += symath.symbols( distorm3.Registers[op.index].upper()) * op.scale if op.base != None: rv += symath.symbols(distorm3.Registers[op.base].upper()) if op.disp != None: rv += symath.symbolic(op.disp) return DEREF(op.op_size, rv) else: raise BaseException("Unknown operand type %s (%s)" % (op.type, op))
def decode(ea=None): if ea == None: ea = idc.ScreenEA() ist = idautils.DecodeInstruction(ea) if ist == None: return None _bytes = map(lambda x: chr(idc.Byte(ea + x)), range(ist.size)) _bytes = ''.join(_bytes) ist = distorm3.Decompose(ea, _bytes)[0] # distorm doesn't decode the operand logical size ie.. byte ptr, so use IDA for that for i in range(len(ist.operands)): idaop = idautils.DecodeInstruction(ist.address)[i] setattr(ist.operands[i], 'op_size', op_size(idaop)) def _get_operand_sym(op): if op.type == 'Immediate': return symath.symbolic(op.value) elif op.type == 'AbsoluteMemoryAddress': return DEREF(op.op_size, op.disp) elif op.type == 'Register': return symath.symbols(distorm3.Registers[op.index].upper()) elif op.type == 'AbsoluteMemory': rv = 0 if op.index != None: rv += symath.symbols( distorm3.Registers[op.index].upper()) * op.scale if op.base != None: rv += symath.symbols(distorm3.Registers[op.base].upper()) if op.disp != None: rv += symath.symbolic(op.disp) return DEREF(op.op_size, rv) else: raise BaseException("Unknown operand type %s (%s)" % (op.type, op)) args = list(map(_get_operand_sym, ist.operands)) if ist.mnemonic.lower() == 'call': spdiff = idc.GetSpDiff(ist.address + ist.size) if spdiff == None: spdiff = 0 try: return Call(args[0], spdiff, ist.address) except Exception as ex: print 'failed to wrap call @%x' % (ist.address) raise ex else: return symath.symbolic(ist.mnemonic.lower())(*args)
def decode(ea=None): if ea == None: ea = idc.ScreenEA() ist = idautils.DecodeInstruction(ea) if ist == None: return None _bytes = map(lambda x: chr(idc.Byte(ea+x)), range(ist.size)) _bytes = ''.join(_bytes) ist = distorm3.Decompose(ea, _bytes)[0] # distorm doesn't decode the operand logical size ie.. byte ptr, so use IDA for that for i in range(len(ist.operands)): idaop = idautils.DecodeInstruction(ist.address)[i] setattr(ist.operands[i], 'op_size', op_size(idaop)) def _get_operand_sym(op): if op.type == 'Immediate': return symath.symbolic(op.value) elif op.type == 'AbsoluteMemoryAddress': return DEREF(op.op_size, op.disp) elif op.type == 'Register': return symath.symbols(distorm3.Registers[op.index].upper()) elif op.type == 'AbsoluteMemory': rv = 0 if op.index != None: rv += symath.symbols(distorm3.Registers[op.index].upper()) * op.scale if op.base != None: rv += symath.symbols(distorm3.Registers[op.base].upper()) if op.disp != None: rv += symath.symbolic(op.disp) return DEREF(op.op_size, rv) else: raise BaseException("Unknown operand type %s (%s)" % (op.type, op)) args = list(map(_get_operand_sym, ist.operands)) if ist.mnemonic.lower() == 'call': spdiff = idc.GetSpDiff(ist.address+ist.size) if spdiff == None: spdiff = 0 try: return Call(args[0], spdiff, ist.address) except Exception as ex: print 'failed to wrap call @%x' % (ist.address) raise ex else: return symath.symbolic(ist.mnemonic.lower())(*args)
def get_coefficient(y, x): ''' divides y by x and returns - only works if x is a factor of y ''' assert is_factor(x, y) assert x != 1 a,b,c = symath.wilds('a b c') val = symath.WildResults() if y == x: return symath.symbolic(1) if y.match(a * b, val): if is_factor(x, val.a): return get_coefficient(val.a, x) * val.b else: return get_coefficient(val.b, x) * val.a elif y.match(c(a, b), val): return val.c(get_coefficient(val.a, x), get_coefficient(val.b, x))
def get_coefficient(y, x): ''' divides y by x and returns - only works if x is a factor of y ''' assert is_factor(x, y) assert x != 1 a, b, c = symath.wilds('a b c') val = symath.WildResults() if y == x: return symath.symbolic(1) if y.match(a * b, val): if is_factor(x, val.a): return get_coefficient(val.a, x) * val.b else: return get_coefficient(val.b, x) * val.a elif y.match(c(a, b), val): return val.c(get_coefficient(val.a, x), get_coefficient(val.b, x))
def _cleanup_derefs(exp): a,b,c = symbolic.wilds('a b c') val = symbolic.WildResults() if exp.match(a & DEREF(b, c), val): if (exp[1] & 0xff) == symbolic.symbolic(0xff) and exp[2][1] == symbolic.symbolic(0x1): exp = exp[2] if (exp[1] & 0xffff) == symbolic.symbolic(0xffff) and exp[2][1] == symbolic.symbolic(0x2): exp = exp[2] if (exp[1] & 0xffffffff) == symbolic.symbolic(0xffffffff) and exp[2][1] == symbolic.symbolic(0x4): exp = exp[2] if exp[0] == DEREF: if exp in known: exp = known[exp] return exp
def tag_aggregate_complexity(): import function import idc import idautils import callgraph import symath.graph.signatures as sigs cg = callgraph.CallGraph(includeImports=False) graphs = {} _reversed = {} rv = {} fns = set(idautils.Functions()) cc = {} for f in fns: graphs[f] = FunctionGraph(f) _reversed[f] = FunctionGraph._tag_val(f, 'reversed') != None if _reversed[f]: cg.strip_edges_to(idc.GetTrueName(f)) for i in fns: ac = symbolic(0) for j,l in cg.walk(idc.GetTrueName(i), direction='outgoing'): loc = idc.LocByName(j) if loc not in graphs: continue if loc not in cc: cc[loc] = sigs.complexity(graphs[loc])[1] ac += cc[loc] if cc[loc] > 0 else 0 ac = ac.simplify() function.tag(i, 'aggregate complexity', ac) rv[i] = ac return rv
def _(exp): if isinstance(exp, symath.core.Wild): return symath.symbolic(exp.name) else: return exp
def test_symath_imports_symbolic(self): sn = symath.symbolic(3) self.assertTrue(isinstance(sn, symath.Number)) self.assertEqual(sn, 3)
def test_subtraction_no_lidentity(self): self.assertEqual((symath.symbolic(0) - 1).simplify(), -1)
def test_desymbolic(self): a = symath.desymbolic(3.0) self.assertEqual(a, 3.0) a = symath.desymbolic(symath.symbolic(4.0)) self.assertEqual(type(a), float)
def calc(addr=None, graph=None, _loop_headers=None, target=None): ''' calc known values at addr, assuming a blank slate at the top of the loop or function ''' if _loop_headers == None: _loop_headers = {} if addr == None: addr = idc.ScreenEA() if graph == None: graph = FunctionGraph(addr) def _resolve_ops(ist, n): rv = [] for i in range(n): rv.append(resolve_op(ist, i)) return tuple(rv) if n > 1 else rv[0] with memoize.m(algorithms, 'dominate_sets'): ds = algorithms.dominate_sets(graph, graph.start_addr) if graph.start_addr not in _loop_headers: _loop_headers[graph.start_addr] = algorithms.loop_headers(graph, ds, graph.start_addr) loop_headers = _loop_headers[graph.start_addr] known = {} ist = decode(addr) # shortcut!! if target != None and \ ist.mnemonic.lower() == 'mov' and \ ist.operands[1].type == 'Immediate' and \ ist.operands[0].type == 'Register' and \ distorm3.Registers[ist.operands[0].index].lower() == target.name: known[target] = symbolic.symbolic(ist.operands[1].value) return known if addr not in loop_headers: for i in graph.nodes[addr].incoming: results = calc(i, graph, _loop_headers=_loop_headers, target=target) known = _combine_dicts(known, results) def _cleanup_derefs(exp): a,b,c = symbolic.wilds('a b c') val = symbolic.WildResults() if exp.match(a & DEREF(b, c), val): if (exp[1] & 0xff) == symbolic.symbolic(0xff) and exp[2][1] == symbolic.symbolic(0x1): exp = exp[2] if (exp[1] & 0xffff) == symbolic.symbolic(0xffff) and exp[2][1] == symbolic.symbolic(0x2): exp = exp[2] if (exp[1] & 0xffffffff) == symbolic.symbolic(0xffffffff) and exp[2][1] == symbolic.symbolic(0x4): exp = exp[2] if exp[0] == DEREF: if exp in known: exp = known[exp] return exp def _set(dst, src, extend=False, iscmp=False, ismov=False): dst = dst.simplify() src = src.simplify() oldflags = known[eflags] if eflags in known else eflags if dst[0] == DEREF: dst = dst.substitute(known).simplify() if src in regmasks: mask = regmasks[src][1] src = regmasks[src][2] src = (src & mask).substitute(known).simplify() else: src = src.substitute(regmasks).substitute(known).simplify() if dst in regmasks: mask = regmasks[dst][1] dst = regmasks[dst][2] mask = _invert_mask(mask) known[eflags] = (((dst & mask) | src).walk(_cleanup_derefs)).simplify() else: known[eflags] = src.walk(_cleanup_derefs).simplify() if not iscmp: known[dst] = known[eflags] if ismov: known[eflags] = oldeflags def _dstsrc(istn, fnc, extend=False, iscmp=False): if ist.mnemonic.lower() == istn: dst,src = _resolve_ops(ist, 2) _set(dst, fnc(dst, src), extend=extend, iscmp=iscmp) def _oneop(istn, fnc, iscmp=False): if ist.mnemonic.lower() == istn: x = _resolve_ops(ist, 1) _set(x, fnc(x), iscmp=iscmp) # arithmetic _dstsrc('add', lambda dst, src: dst + src) _dstsrc('sub', lambda dst, src: dst - src) _dstsrc('cmp', lambda dst, src: dst - src, iscmp=True) _dstsrc('mul', lambda dst, src: dst * src) _dstsrc('div', lambda dst, src: dst / src) _dstsrc('xor', lambda dst, src: dst ^ src) _dstsrc('or', lambda dst, src: dst | src) _dstsrc('and', lambda dst, src: dst & src) _dstsrc('test', lambda dst, src: dst & src, iscmp=True) _dstsrc('sar', lambda dst, src: dst >> src) _dstsrc('shr', lambda dst, src: dst >> src) _dstsrc('sal', lambda dst, src: dst << src) _dstsrc('shl', lambda dst, src: dst << src) _oneop('inc', lambda x: x + 1) _oneop('dec', lambda x: x - 1) # mov instructions _dstsrc('lea', lambda dst, src: src) # resolve_op is smart enough to not DEREF lea's _dstsrc('mov', lambda dst, src: src) _dstsrc('movsx', lambda dst, src: src, extend=True) _dstsrc('movzx', lambda dst, src: src, extend=True) # stack manipulations instructions def _stack(istn, offset, dst=None, src=None): if ist.mnemonic.lower() == istn: pesp = esp if esp not in known else known[esp] if src != None: known[DEREF(ist.operands[0].op_size, pesp+offset).simplify()] = src().substitute(known).walk(_cleanup_derefs).simplify() if dst != None: known[dst().simplify()] = DEREF(ist.operands[0].op_size, pesp).walk(_cleanup_derefs).simplify() known[esp] = (pesp+offset).simplify() _stack('push', -4, src=lambda: _resolve_ops(ist, 1)) _stack('pop', 4, dst=lambda: _resolve_ops(ist, 1)) # function calls if ist.mnemonic.lower() == 'call': fn = _resolve_ops(ist, 1) if isinstance(fn, symbolic.Number): fn_name = idc.GetFunctionName(int(fn.n)) if fn_name != '': fn = fn_name known[eax] = LOOKUP(AT(CALL(fn), ist.address), eax) #known[ecx] = LOOKUP(AT(CALL(fn), ist.address), ecx) #known[edx] = LOOKUP(AT(CALL(fn), ist.address), edx) known[esp] = (known[esp] + idc.GetSpDiff(ist.address+ist.size) if esp in known else esp + idc.GetSpDiff(ist.address+ist.size)).simplify() return known
def __getitem__(self, idx): return self.model[_convert(symath.symbolic(idx))]