def extract_function_calls_from(f, bb, insn): if insn.mnem != "call": return target = None # traditional call via IAT, x32 if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper): oper = insn.opers[0] target = oper.getOperAddr(insn) yield Characteristic("calls from"), target # call via thunk on x86, # see 9324d1a8ae37a36ae560c37448c9705a at 0x407985 # # call to internal function on x64 # see Lab21-01.exe_:0x140001178 elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper): target = insn.opers[0].getOperValue(insn) yield Characteristic("calls from"), target # call via IAT, x64 elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper): op = insn.opers[0] target = op.getOperAddr(insn) yield Characteristic("calls from"), target if target and target == f.va: # if we found a jump target and it's the function address # mark as recursive yield Characteristic("recursive call"), target
def extract_insn_segment_access_features(f, bb, insn): """parse the instruction for access to fs or gs""" operands = [o.strip() for o in insn.operands.split(",")] for operand in operands: if "fs:" in operand: yield Characteristic("fs access"), insn.offset elif "gs:" in operand: yield Characteristic("gs access"), insn.offset
def extract_insn_segment_access_features(f, bb, insn): """parse the instruction for access to fs or gs""" prefix = insn.getPrefixName() if prefix == "fs": yield Characteristic("fs access"), insn.va if prefix == "gs": yield Characteristic("gs access"), insn.va
def extract_insn_peb_access_characteristic_features(f, bb, insn): """ parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64 """ if insn.mnemonic not in ["push", "mov"]: return operands = [o.strip() for o in insn.operands.split(",")] for operand in operands: if "fs:" in operand and "0x30" in operand: yield Characteristic("peb access"), insn.offset elif "gs:" in operand and "0x60" in operand: yield Characteristic("peb access"), insn.offset
def extract_function_calls_from(f, bb, insn): if insn.mnemonic != "call": return if insn.offset in f.outrefs: for outref in f.outrefs[insn.offset]: yield Characteristic("calls from"), outref if outref == f.offset: # if we found a jump target and it's the function address # mark as recursive yield Characteristic("recursive call"), outref if insn.offset in f.apirefs: yield Characteristic("calls from"), insn.offset
def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, insn): """ parse call $+5 instruction from the given instruction. """ if insn.mnem != "call": return if isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper): if insn.va + 5 == insn.opers[0].getOperValue(insn): yield Characteristic("call $+5"), insn.va if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper) or isinstance( insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper): if insn.va + 5 == insn.opers[0].getOperAddr(insn): yield Characteristic("call $+5"), insn.va
def extract_insn_cross_section_cflow(f, bb, insn): """ inspect the instruction for a CALL or JMP that crosses section boundaries. """ if insn.mnemonic in ["call", "jmp"]: if insn.offset in f.apirefs: return smda_report = insn.smda_function.smda_report if insn.offset in f.outrefs: for target in f.outrefs[insn.offset]: if smda_report.getSection(insn.offset) != smda_report.getSection(target): yield Characteristic("cross section flow"), insn.offset elif insn.operands.startswith("0x"): target = int(insn.operands, 16) if smda_report.getSection(insn.offset) != smda_report.getSection(target): yield Characteristic("cross section flow"), insn.offset
def extract_recursive_call(f): """extract recursive function call args: f (IDA func_t) """ if capa.features.extractors.ida.helpers.is_function_recursive(f): yield Characteristic("recursive call"), f.start_ea
def extract_function_calls_to(f): """extract callers to a function args: f (IDA func_t) """ for ea in idautils.CodeRefsTo(f.start_ea, True): yield Characteristic("calls to"), ea
def extract_function_indirect_call_characteristic_features(f, bb, insn): """ extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4]) does not include calls like => call ds:dword_ABD4974 """ if insn.mnem != "call": return # Checks below work for x86 and x64 if isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper): # call edx yield Characteristic("indirect call"), insn.va elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegMemOper): # call dword ptr [eax+50h] yield Characteristic("indirect call"), insn.va elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386SibOper): # call qword ptr [rsp+78h] yield Characteristic("indirect call"), insn.va
def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, insn): """ parse call $+5 instruction from the given instruction. """ if not idaapi.is_call_insn(insn): return if insn.ea + 5 == idc.get_operand_value(insn.ea, 0): yield Characteristic("call $+5"), insn.ea
def extract_bb_stackstring(f, bb): """extract stackstring indicators from basic block args: f (IDA func_t) bb (IDA BasicBlock) """ if bb_contains_stackstring(f, bb): yield Characteristic("stack string"), bb.start_ea
def extract_bb_tight_loop(f, bb): """extract tight loop indicators from a basic block args: f (IDA func_t) bb (IDA BasicBlock) """ if capa.features.extractors.ida.helpers.is_basic_block_tight_loop(bb): yield Characteristic("tight loop"), bb.start_ea
def extract_insn_segment_access_features(f, bb, insn): """parse instruction fs or gs access TODO: IDA should be able to do this... """ if all(map(lambda op: op.type != idaapi.o_mem, insn.ops)): # try to optimize for only memory references return disasm = idc.GetDisasm(insn.ea) if " fs:" in disasm: # TODO: replace above with proper IDA yield Characteristic("fs access"), insn.ea if " gs:" in disasm: # TODO: replace above with proper IDA yield Characteristic("gs access"), insn.ea
def extract_file_embedded_pe(): """extract embedded PE features IDA must load resource sections for this to be complete - '-R' from console - Check 'Load resource sections' when opening binary in IDA manually """ for seg in capa.features.extractors.ida.helpers.get_segments( skip_header_segments=True): for (ea, _) in check_segment_for_pe(seg): yield Characteristic("embedded pe"), ea
def extract_function_loop(f): """ parse if a function has a loop """ edges = [] for bb_from, bb_tos in f.blockrefs.items(): for bb_to in bb_tos: edges.append((bb_from, bb_to)) if edges and loops.has_loop(edges): yield Characteristic("loop"), f.offset
def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, insn): """ parse call $+5 instruction from the given instruction. """ if insn.mnemonic != "call": return if not insn.operands.startswith("0x"): return if int(insn.operands, 16) == insn.offset + 5: yield Characteristic("call $+5"), insn.offset
def extract_function_calls_from(f, bb, insn): """extract functions calls from features most relevant at the function scope, however, its most efficient to extract at the instruction scope args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) """ if idaapi.is_call_insn(insn): for ref in idautils.CodeRefsFrom(insn.ea, False): yield Characteristic("calls from"), ref
def extract_function_indirect_call_characteristic_features(f, bb, insn): """extract indirect function calls (e.g., call eax or call dword ptr [edx+4]) does not include calls like => call ds:dword_ABD4974 most relevant at the function or basic block scope; however, its most efficient to extract at the instruction scope args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) """ if idaapi.is_call_insn(insn) and idc.get_operand_type(insn.ea, 0) in (idc.o_reg, idc.o_phrase, idc.o_displ): yield Characteristic("indirect call"), insn.ea
def extract_insn_nzxor_characteristic_features(f, bb, insn): """ parse non-zeroing XOR instruction from the given instruction. ignore expected non-zeroing XORs, e.g. security cookies. """ if insn.mnem not in ("xor", "xorpd", "xorps", "pxor"): return if insn.opers[0] == insn.opers[1]: return if is_security_cookie(f, bb, insn): return yield Characteristic("nzxor"), insn.va
def extract_insn_peb_access_characteristic_features(f, bb, insn): """ parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64 """ # TODO handle where fs/gs are loaded into a register or onto the stack and used later if insn.mnem not in ["push", "mov"]: return prefix = insn.getPrefixName() if "fs" in prefix: for oper in insn.opers: # examples # # IDA: mov eax, large fs:30h # viv: fs: mov eax,dword [0x00000030] ; i386ImmMemOper # IDA: push large dword ptr fs:30h # viv: fs: push dword [0x00000030] # fs: push dword [eax + 0x30] ; i386RegMemOper, with eax = 0 if (isinstance(oper, envi.archs.i386.disasm.i386RegMemOper) and oper.disp == 0x30) or (isinstance( oper, envi.archs.i386.disasm.i386ImmMemOper) and oper.imm == 0x30): yield Characteristic("peb access"), insn.va elif "gs" in prefix: for oper in insn.opers: if ((isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60) or (isinstance(oper, envi.archs.amd64.disasm.i386SibOper) and oper.imm == 0x60) or (isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60)): yield Characteristic("peb access"), insn.va else: pass
def extract_function_loop(f): """extract loop indicators from a function args: f (IDA func_t) """ edges = [] # construct control flow graph for bb in idaapi.FlowChart(f): for succ in bb.succs(): edges.append((bb.start_ea, succ.start_ea)) if loops.has_loop(edges): yield Characteristic("loop"), f.start_ea
def extract_function_loop(f): """ parse if a function has a loop """ edges = [] for bb in f.basic_blocks: if len(bb.instructions) > 0: for bva, bflags in bb.instructions[-1].getBranches(): # vivisect does not set branch flags for non-conditional jmp so add explicit check if (bflags & envi.BR_COND or bflags & envi.BR_FALL or bflags & envi.BR_TABLE or bb.instructions[-1].mnem == "jmp"): edges.append((bb.va, bva)) if edges and loops.has_loop(edges): yield Characteristic("loop"), f.va
def extract_function_indirect_call_characteristic_features(f, bb, insn): """ extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4]) does not include calls like => call ds:dword_ABD4974 """ if insn.mnemonic != "call": return if insn.operands.startswith("0x"): return False if "qword ptr" in insn.operands and "rip" in insn.operands: return False if insn.operands.startswith("dword ptr [0x"): return False # call edx # call dword ptr [eax+50h] # call qword ptr [rsp+78h] yield Characteristic("indirect call"), insn.offset
def extract_insn_nzxor_characteristic_features(f, bb, insn): """parse instruction non-zeroing XOR instruction ignore expected non-zeroing XORs, e.g. security cookies args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) """ if insn.itype not in (idaapi.NN_xor, idaapi.NN_xorpd, idaapi.NN_xorps, idaapi.NN_pxor): return if capa.features.extractors.ida.helpers.is_operand_equal(insn.Op1, insn.Op2): return if is_nzxor_stack_cookie(f, bb, insn): return yield Characteristic("nzxor"), insn.ea
def extract_insn_nzxor_characteristic_features(f, bb, insn): """ parse non-zeroing XOR instruction from the given instruction. ignore expected non-zeroing XORs, e.g. security cookies. """ if insn.mnemonic not in ("xor", "xorpd", "xorps", "pxor"): return operands = [o.strip() for o in insn.operands.split(",")] if operands[0] == operands[1]: return if is_security_cookie(f, bb, insn): return yield Characteristic("nzxor"), insn.offset
def extract_insn_cross_section_cflow(f, bb, insn): """inspect the instruction for a CALL or JMP that crosses section boundaries args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) """ for ref in idautils.CodeRefsFrom(insn.ea, False): if ref in get_imports(f.ctx).keys(): # ignore API calls continue if not idaapi.getseg(ref): # handle IDA API bug continue if idaapi.getseg(ref) == idaapi.getseg(insn.ea): continue yield Characteristic("cross section flow"), insn.ea
def extract_insn_peb_access_characteristic_features(f, bb, insn): """parse instruction peb access fs:[0x30] on x86, gs:[0x60] on x64 TODO: IDA should be able to do this.. """ if insn.itype not in (idaapi.NN_push, idaapi.NN_mov): return if all(map(lambda op: op.type != idaapi.o_mem, insn.ops)): # try to optimize for only memory references return disasm = idc.GetDisasm(insn.ea) if " fs:30h" in disasm or " gs:60h" in disasm: # TODO: replace above with proper IDA yield Characteristic("peb access"), insn.ea
def extract_insn_cross_section_cflow(f, bb, insn): """ inspect the instruction for a CALL or JMP that crosses section boundaries. """ for va, flags in insn.getBranches(): if va is None: # va may be none for dynamic branches that haven't been resolved, such as `jmp eax`. continue if flags & envi.BR_FALL: continue try: # skip 32-bit calls to imports if insn.mnem == "call" and isinstance( insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper): oper = insn.opers[0] target = oper.getOperAddr(insn) if target in get_imports(f.vw): continue # skip 64-bit calls to imports elif insn.mnem == "call" and isinstance( insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper): op = insn.opers[0] target = op.getOperAddr(insn) if target in get_imports(f.vw): continue if get_section(f.vw, insn.va) != get_section(f.vw, va): yield Characteristic("cross section flow"), insn.va except KeyError: continue
def extract_file_embedded_pe(pe, file_path): with open(file_path, "rb") as f: fbytes = f.read() for offset, i in capa.features.extractors.helpers.carve_pe(fbytes, 1): yield Characteristic("embedded pe"), offset