def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.function = kwargs['function'] self.binary = self.function.binary self.train_name = UNKNOWN_LABEL self.test_name = UNKNOWN_LABEL self.low_pc = None self.high_pc = None self.ttype = Ttype(owner=self) self.n2p_type = self.binary.config.INF self.features = set() self.blks = set() if self.binary.config.MACHINE_ARCH == 'x86': self.var_type = LOC_VAR elif self.binary.config.MACHINE_ARCH == 'x64': if self.base_register in X64_FUN_ARG_REGS and self.index == 0: self.var_type = FUN_ARG else: self.var_type = LOC_VAR elif self.binary.config.MACHINE_ARCH == 'ARM': if self.base_register in ARM_FUN_ARG_REGS and self.index == 0: self.var_type = FUN_ARG else: self.var_type = LOC_VAR
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.function = kwargs['function'] self.binary = self.function.binary self.base_pointer = kwargs['base_pointer'] self.offset = kwargs['offset'] self.index = kwargs['index'] self.name = '{}:S:{}'.format(self.base_pointer, self.offset) self.ttype = Ttype(owner=self) self.n2p_type = self.binary.config.INF self.train_name = UNKNOWN_LABEL self.test_name = UNKNOWN_LABEL self.low_pc = None self.high_pc = None self.pcs = set() self.blks = set() self.features = set() if self.binary.config.MACHINE_ARCH == 'x86': if self.base_pointer == 'EBP' and self.offset >= 0: self.var_type = FUN_ARG else: self.var_type = LOC_VAR elif self.binary.config.MACHINE_ARCH == 'x64': if self.base_pointer == 'RBP' and self.offset >= 0: self.var_type = FUN_ARG else: self.var_type = LOC_VAR elif self.binary.config.MACHINE_ARCH == 'ARM': self.var_type = LOC_VAR
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.binary = kwargs['binary'] self.offset = kwargs['offset'] self.access = kwargs['access'] self.name = '@DO' self.is_name_given = False self.ttype = Ttype(owner=self) self.n2p_type = self.binary.config.INF self.train_name = UNKNOWN_LABEL self.test_name = UNKNOWN_LABEL self.var_type = LOC_VAR
def initialize(self): from elements.blk import Blk self.ttype = Ttype(owner=self) for blk_bap in self.bap.blks: blk = Blk(function=self, bap=blk_bap) self.blks[blk.tid] = blk for virtual_exp in self.virtual_exps.values(): if isinstance(virtual_exp.exp, list): virtual_exp.exp = [ EXP_TRANSFORMER.visit(e, blk=virtual_exp.blk, pc=virtual_exp.pc) for e in virtual_exp.exp ] else: virtual_exp.exp = EXP_TRANSFORMER.visit(virtual_exp.exp, blk=virtual_exp.blk, pc=virtual_exp.pc) for blk in self.blks.values(): blk.initialize() for blk in self.blks.values(): blk.init_features() for l in self.bap.cfg: src = l[0] dst = l[1] if src in self.blks and dst in self.blks: self.blks[src].add_callee(self.blks[dst]) self.blks[dst].add_caller(self.blks[src]) if self.binary.config.INDIRECT_OFFSET_WITH_INDEX: for base_pointer, offset in self.indirect_offsets: key = (base_pointer, offset) for index in self.indirect_offsets[key]: reg_key = (base_pointer, index) if reg_key in self.regs: reg = self.regs[reg_key] indirect_offset = self.indirect_offsets[key][index] for pc in reg.pcs: indirect_offset.add_pc(pc) if self.binary.config.MACHINE_ARCH == 'x86': self.find_fun_args()
class Reg(RegBase): total = 0 known = 0 unknown = 0 inf = 0 tp_1p = 0 fp_1p = 0 tn_1p = 0 fn_1p = 0 correct = 0 ttype_total = 0 ttype_known = 0 ttype_unknown = 0 ttype_inf = 0 ttype_tp_1p = 0 ttype_fp_1p = 0 ttype_tn_1p = 0 ttype_fn_1p = 0 ttype_correct = 0 def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.function = kwargs['function'] self.binary = self.function.binary self.train_name = UNKNOWN_LABEL self.test_name = UNKNOWN_LABEL self.low_pc = None self.high_pc = None self.ttype = Ttype(owner=self) self.n2p_type = self.binary.config.INF self.features = set() self.blks = set() if self.binary.config.MACHINE_ARCH == 'x86': self.var_type = LOC_VAR elif self.binary.config.MACHINE_ARCH == 'x64': if self.base_register in X64_FUN_ARG_REGS and self.index == 0: self.var_type = FUN_ARG else: self.var_type = LOC_VAR elif self.binary.config.MACHINE_ARCH == 'ARM': if self.base_register in ARM_FUN_ARG_REGS and self.index == 0: self.var_type = FUN_ARG else: self.var_type = LOC_VAR def __repr__(self): return '(Reg {}.{})'.format(self.base_register, self.index) def __str__(self): if self.test_name == self.train_name: return '(Reg {} {})'.format(self.train_name, str(self.ttype)) else: if self.train_name == UNKNOWN_LABEL: return '(Reg (WRONGU {} {}) {})'.format( self.train_name, self.test_name, str(self.ttype)) else: return '(Reg (WRONGK {} {}) {})'.format( self.train_name, self.test_name, str(self.ttype)) def init_features(self): coarse = depgraph.infos.coarse fine = depgraph.infos.fine self.features.add(coarse(self)) if self.binary.config.MACHINE_ARCH in ('x64', 'ARM') \ and self.var_type == FUN_ARG: self.features.add(fine(self)) self.features.add('blk[{}][{}]'.format(len(self.blks), coarse(self))) def add_pc(self, pc): self.pcs.add(pc) if self.low_pc is not None: self.low_pc = min(pc, self.low_pc) else: self.low_pc = pc if self.high_pc is not None: self.high_pc = max(pc, self.high_pc) else: self.high_pc = pc def train_info(self, die, ttype): origin = self.binary.debug_info.get_name_origin(die) name_attr = origin.attributes.get('DW_AT_name', None) if name_attr is not None: name = name_attr.value.decode('ascii') if self.train_name == UNKNOWN_LABEL: self.ttype.train_info(ttype) self.train_name = name else: if self.ttype.train_name in (UNKNOWN_LABEL, VOID) and ttype != UNKNOWN_LABEL: self.ttype.train_info(ttype) self.train_name == name else: if self.train_name > name: self.train_name = name self.ttype.train_info(ttype) else: pass def stat(self): super().stat() Reg.total += 1 if self.train_name != UNKNOWN_LABEL: Reg.known += 1 RegBase.known += 1 else: Reg.unknown += 1 RegBase.unknown += 1 if self.n2p_type == self.binary.config.INF: Reg.inf += 1 RegBase.inf += 1 if self.train_name == UNKNOWN_LABEL: Reg.fp_1p += 1 RegBase.fp_1p += 1 else: Reg.tp_1p += 1 RegBase.tp_1p += 1 elif self.n2p_type == self.binary.config.GIV: if self.train_name == UNKNOWN_LABEL: Reg.tn_1p += 1 RegBase.tn_1p += 1 else: Reg.fn_1p += 1 RegBase.fn_1p += 1 def debug_info(self): bs = bytearray() if self.var_type == FUN_ARG: bs.append(ENUM_ABBREV_CODE['FUN_ARG']) elif self.var_type == LOC_VAR: bs.append(ENUM_ABBREV_CODE['VARIABLE']) bs.extend(map(ord, self.test_name)) bs.append(0x0) if self.test_name not in TTYPES and self.test_name != UNKNOWN_LABEL: self.binary.predicted.add(self.test_name) bs.append(0x01) bs.append(self.binary.config.REG_MAPPING[self.base_register] + ENUM_DW_FORM_exprloc['DW_OP_reg0']) if self.ttype.test_name is None \ or self.ttype.test_name in (UNKNOWN_LABEL, VOID) \ or self.ttype.test_name not in TTYPES: bs += utils.encode_kbytes(self.binary.types.get_offset(INT), 4) else: bs += utils.encode_kbytes( self.binary.types.get_offset(self.ttype.test_name), 4) return bs
class DirectOffset(Offset): total = 0 known = 0 unknown = 0 inf = 0 giv = 0 correct = 0 ttype_total = 0 ttype_known = 0 ttype_unknown = 0 ttype_inf = 0 ttype_tp_1p = 0 ttype_fp_1p = 0 ttype_tn_1p = 0 ttype_fn_1p = 0 ttype_correct = 0 def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.binary = kwargs['binary'] self.offset = kwargs['offset'] self.access = kwargs['access'] self.name = '@DO' self.is_name_given = False self.ttype = Ttype(owner=self) self.n2p_type = self.binary.config.INF self.train_name = UNKNOWN_LABEL self.test_name = UNKNOWN_LABEL self.var_type = LOC_VAR def __repr__(self): return '[DirectOffset {} {}]'.format(format(self.offset, '02x'), repr(self.access)) def __str__(self): if self.test_name == self.train_name or self.is_name_given: return '[DirectOffset {} {}]'.format(self.train_name, str(self.ttype)) else: if self.train_name == UNKNOWN_LABEL: return '[DirectOffset (WRONGU {} {}) {}]'.format( self.train_name, self.test_name, str(self.ttype)) else: return '[DirectOffset (WRONGK {} {}) {}]'.format( self.train_name, self.test_name, str(self.ttype)) def train_info(self, die, ttype): origin = self.binary.debug_info.get_name_origin(die) name_attr = origin.attributes.get('DW_AT_name', None) if name_attr is not None: name = name_attr.value.decode('ascii') if self.train_name == UNKNOWN_LABEL: self.ttype.train_info(ttype) self.train_name = name else: if self.ttype.train_name in (UNKNOWN_LABEL, VOID) and ttype != UNKNOWN_LABEL: self.ttype.train_info(ttype) self.train_name == name else: if self.train_name > name: self.train_name = name self.ttype.train_info(ttype) else: pass def stat(self): super().stat() DirectOffset.total += 1 if self.is_name_given: DirectOffset.giv += 1 Offset.giv += 1 else: DirectOffset.inf += 1 Offset.inf += 1 if self.train_name != UNKNOWN_LABEL: DirectOffset.known += 1 Offset.known += 1 Offset.tp_1p += 1 else: DirectOffset.unknown += 1 Offset.unknown += 1 Offset.fp_1p += 1 def debug_info(self): bs = bytearray() bs.append(ENUM_ABBREV_CODE['VARIABLE']) # name bs.extend(map(ord, self.test_name)) bs.append(0x00) if self.test_name not in TTYPES \ and self.test_name != UNKNOWN_LABEL \ and self.test_name not in self.binary.sections.symbol_names: self.binary.predicted.add(self.test_name) bs.append(self.binary.config.ADDRESS_BYTE_SIZE + 1) bs.append(ENUM_DW_FORM_exprloc['DW_OP_addr']) bs += utils.encode_address(self.offset, self.binary) if self.ttype.test_name is None \ or self.ttype.test_name in (UNKNOWN_LABEL, VOID) \ or self.ttype.test_name not in TTYPES: bs += utils.encode_kbytes(self.binary.types.get_offset(INT), 4) else: bs += utils.encode_kbytes( self.binary.types.get_offset(self.ttype.test_name), 4) return bs