def extract_insn_bytes_features(f, bb, insn): """parse referenced byte sequences args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) example: push offset iid_004118d4_IShellLinkA ; riid """ for ref in insn.dr: ref = str(ref) found = None # check string first (ghidra/ida put it there) if ref in f.unit.obj.bin.strings: found = f.unit.obj.bin.strings[ref] yield Bytes(found.encode("utf-16le")), insn.ea if found: return # then check referenced data if ref in f.unit.obj.bin.data: found = f.unit.obj.bin.data[ref] # found = struct.pack('<Q', int(found, base=16)) # found = bytes.fromhex(found) found = base64.b64decode(found) yield Bytes(found), insn.ea
def extract_insn_bytes_features(f, bb, insn): """ parse byte sequence features from the given instruction. example: # push offset iid_004118d4_IShellLinkA ; riid """ for oper in insn.opers: if insn.mnem == "call": continue if isinstance(oper, envi.archs.i386.disasm.i386ImmOper): v = oper.getOperValue(oper) elif isinstance(oper, envi.archs.i386.disasm.i386RegMemOper): # handle case like: # movzx ecx, ds:byte_423258[eax] v = oper.disp elif isinstance(oper, envi.archs.i386.disasm.i386SibOper): # like 0x401000 in `mov eax, 0x401000[2 * ebx]` v = oper.imm elif isinstance(oper, envi.archs.amd64.disasm.Amd64RipRelOper): # see: Lab21-01.exe_:0x1400010D3 v = oper.getOperAddr(insn) else: continue for v in derefs(f.vw, v): try: buf = read_bytes(f.vw, v) except envi.SegmentationViolation: continue if capa.features.extractors.helpers.all_zeros(buf): continue yield Bytes(buf), insn.va
def extract_insn_bytes_features(f, bb, insn): """ parse byte sequence features from the given instruction. example: # push offset iid_004118d4_IShellLinkA ; riid """ for data_ref in insn.getDataRefs(): for v in derefs(f.smda_report, data_ref): bytes_read = read_bytes(f.smda_report, v) if bytes_read is None: continue if capa.features.extractors.helpers.all_zeros(bytes_read): continue yield Bytes(bytes_read), insn.offset
def extract_insn_bytes_features(f, bb, insn): """ parse referenced byte sequences args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) example: push offset iid_004118d4_IShellLinkA ; riid """ if idaapi.is_call_insn(insn): # ignore call instructions return for ref in idautils.DataRefsFrom(insn.ea): extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE) if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes): yield Bytes(extracted_bytes), insn.ea
def extract_insn_bytes_features(f, bb, insn): """parse referenced byte sequences args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) example: push offset iid_004118d4_IShellLinkA ; riid """ if idaapi.is_call_insn(insn): return ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn) if ref != insn.ea: extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE) if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes): yield Bytes(extracted_bytes), insn.ea
def extract_insn_bytes_features(f, bb, insn): """ parse byte sequence features from the given instruction. example: # push offset iid_004118d4_IShellLinkA ; riid """ for oper in insn.opers: if insn.mnem == "call": # ignore call instructions continue if isinstance(oper, envi.archs.i386.disasm.i386ImmOper): v = oper.getOperValue(oper) elif isinstance(oper, envi.archs.i386.disasm.i386RegMemOper): # handle case like: # movzx ecx, ds:byte_423258[eax] v = oper.disp elif isinstance(oper, envi.archs.amd64.disasm.Amd64RipRelOper): # see: Lab21-01.exe_:0x1400010D3 v = oper.getOperAddr(insn) else: continue segm = f.vw.getSegment(v) if not segm: continue segm_end = segm[0] + segm[1] try: # Do not read beyond the end of a segment if v + MAX_BYTES_FEATURE_SIZE > segm_end: extracted_bytes = f.vw.readMemory(v, segm_end - v) else: extracted_bytes = f.vw.readMemory(v, MAX_BYTES_FEATURE_SIZE) except envi.SegmentationViolation: pass else: if not capa.features.extractors.helpers.all_zeros(extracted_bytes): yield Bytes(extracted_bytes), insn.va