def process_msvc_func(func): view = func.view arch = func.arch symbol = func.symbol mangled_name = symbol.raw_name if mangled_name.startswith('??_7') and not mangled_name.endswith( '@@6B@'): # Skip buggy vtables return sym_type, sym_parts = demangle_ms(arch, mangled_name) if (sym_type is None) or (sym_type.type_class != TypeClass.FunctionTypeClass): return if isinstance(sym_parts, str): return params = [ v.type for v in sym_type.parameters if v.type.type_class != TypeClass.VoidTypeClass ] return_type = sym_type.return_value tokens_before = [str(v) for v in sym_type.get_tokens_before_name()] convention = 'cdecl' if '__cdecl' in tokens_before: convention = 'cdecl' elif '__stdcall' in tokens_before: convention = 'stdcall' elif '__thiscall' in tokens_before: convention = 'thiscall' if (convention == 'thiscall') and len(sym_parts) >= 2: if 'static' not in tokens_before: type_name = '::'.join(sym_parts[:-1]) this_type = Type.pointer( arch, Type.named_type( NamedTypeReference( NamedTypeReferenceClass.StructNamedTypeClass, name=type_name))) params.insert(0, this_type) func.function_type = Type.function(return_type, params, arch.calling_conventions[convention], sym_type.has_variable_arguments)
def process_msvc_func(func): arch = func.arch plat = func.platform sym_type, sym_parts = demangle_ms(arch, func.symbol.raw_name) if (sym_type is None) or (sym_type.type_class != TypeClass.FunctionTypeClass): return if isinstance(sym_parts, str): return params = [ v.type for v in sym_type.parameters if v.type.type_class != TypeClass.VoidTypeClass ] return_type = sym_type.return_value tokens_before = [str(v) for v in sym_type.get_tokens_before_name()] is_member = ('public:' in tokens_before) or ( 'protected:' in tokens_before) or ('private:' in tokens_before) is_static = 'static' in tokens_before is_virtual = 'virtual' in tokens_before convention = plat.default_calling_convention if '__cdecl' in tokens_before: convention = plat.cdecl_calling_convention elif '__stdcall' in tokens_before: convention = plat.stdcall_calling_convention elif '__fastcall' in tokens_before: convention = plat.fastcall_calling_convention elif '__thiscall' in tokens_before: convention = arch.calling_conventions['thiscall'] if return_type.type_class == TypeClass.NamedTypeReferenceClass and return_type.named_type_reference.type_class in { NamedTypeReferenceClass.ClassNamedTypeClass, NamedTypeReferenceClass.StructNamedTypeClass, NamedTypeReferenceClass.UnionNamedTypeClass }: # TODO: This should only added for large/non trivial types return_type = Type.pointer(arch, return_type) params.insert(0, FunctionParameter(return_type, name="retptr")) if len(sym_parts) >= 2 and (is_member or is_virtual) and not is_static: type_name = '::'.join(sym_parts[:-1]) this_type = Type.pointer( arch, Type.named_type( NamedTypeReference( NamedTypeReferenceClass.StructNamedTypeClass, name=type_name))) params.insert(0, FunctionParameter(this_type, name="this")) if (sym_parts[-1] == sym_parts[-2]) and (return_type.type_class == TypeClass.VoidTypeClass): return_type = this_type func_type = Type.function(return_type, params, convention, sym_type.has_variable_arguments) func.function_type = func_type
# ----------------------------------------------------------------------------- # attempt #2: use the named_type_reference() function in TypeBuilder # ----------------------------------------------------------------------------- builder = TypeBuilder.named_type_reference( NamedTypeReferenceClass.UnionNamedTypeClass, # type_class QualifiedName('pthread_attr_t'), # name 'libc.so.6:["pthread_attr_t"]', # type_id 1, # alignment 56 # width ) builder.const = True assert builder.const == True foo = Type.named_type(builder) if foo.const: print("SUCCEEDED") else: try: foo.const = True print("SUCCEEDED") except AttributeError as e: print("FAILED") # ----------------------------------------------------------------------------- # attempt #3: ask the core to set const in the builder # ----------------------------------------------------------------------------- builder = TypeBuilder.named_type_reference( NamedTypeReferenceClass.UnionNamedTypeClass, # type_class
def ty_from_demangler_node(node, cv_qual=frozenset(), arg_count_hint=None): if node.kind == 'builtin': if node.value in ty_for_cxx_builtin: return ty_for_cxx_builtin[node.value] else: return None elif node.kind in ['name', 'qual_name']: named_ty_ref = NamedTypeReference(name=str(node)) return Type.named_type(named_ty_ref) elif node.kind in ['pointer', 'lvalue', 'rvalue']: pointee_ty = ty_from_demangler_node(node.value) if pointee_ty is None: return None is_const = ('const' in cv_qual) is_volatile = ('volatile' in cv_qual) if node.kind == 'pointer': return Type.pointer(arch, pointee_ty, is_const, is_volatile) elif node.kind == 'lvalue': return Type.pointer( arch, pointee_ty, is_const, is_volatile, ref_type=ReferenceType.ReferenceReferenceType) elif node.kind == 'rvalue': return Type.pointer(arch, pointee_ty, is_const, is_volatile, ref_type=ReferenceType.RValueReferenceType) elif node.kind == 'cv_qual': return ty_from_demangler_node(node.value, cv_qual=node.qual) elif node.kind == 'func': is_ctor_dtor = False if node.name and node.name.kind == 'qual_name': qual_name = node.name.value if qual_name[-1].kind in ['ctor', 'dtor']: is_ctor_dtor = True if is_ctor_dtor: ret_ty = Type.void() elif node.ret_ty is not None: ret_ty = ty_from_demangler_node(node.ret_ty) if ret_ty is None: return None else: ret_ty = Type.int(arch.default_int_size).with_confidence(0) arg_nodes = list(node.arg_tys) arg_tys = [] var_arg = False if arg_nodes[-1].kind == 'builtin' and arg_nodes[-1].value == '...': arg_nodes.pop() var_arg = True elif arg_nodes[0].kind == 'builtin' and arg_nodes[ 0].value == 'void': arg_nodes = arg_nodes[1:] this_arg = False if node.name and node.name.kind == 'qual_name': qual_name = node.name.value if is_ctor_dtor or (arg_count_hint is not None and len(arg_nodes) == arg_count_hint - 1): this_arg = True this_node = Node('qual_name', qual_name[:-1]) this_ty = ty_from_demangler_node(this_node) if this_ty is None: return None arg_tys.append(Type.pointer(arch, this_ty)) for arg_node in arg_nodes: arg_ty = ty_from_demangler_node(arg_node) if arg_ty is None: return None arg_tys.append(arg_ty) ty = Type.function(ret_ty, arg_tys, variable_arguments=var_arg) if arg_count_hint is not None: # toplevel invocation, so return whether we inferred a this argument return this_arg, ty else: return ty else: log.log_warn("Cannot convert demangled AST {} to a type".format( repr(node)))
def analyze_cxx_abi(view, start=None, length=None, task=None): platform = view.platform arch = platform.arch void_p_ty = Type.pointer(arch, Type.void()) char_p_ty = Type.pointer(arch, Type.int(1)) unsigned_int_ty = Type.int(arch.default_int_size, False) signed_int_ty = Type.int(arch.default_int_size, True) base_type_info_ty = Type.named_type( NamedTypeReference(name='std::type_info')) base_type_info_ptr_ty = Type.pointer(arch, base_type_info_ty) def char_array_ty(length): return Type.array(Type.int(1), strings[0].length) def type_info_ty(kind=None): type_info_struct = Structure() type_info_struct.append(void_p_ty, 'vtable') type_info_struct.append(char_p_ty, 'name') if kind == 'si_class': type_info_struct.append(base_type_info_ptr_ty, 'base_type') return Type.structure_type(type_info_struct) def vtable_ty(vfunc_count): vtable_struct = Structure() vtable_struct.append(signed_int_ty, 'top_offset') vtable_struct.append(base_type_info_ptr_ty, 'typeinfo') vtable_struct.append(Type.array(void_p_ty, vfunc_count), 'functions') return Type.structure_type(vtable_struct) if platform.name.startswith("windows-"): long_size = arch.default_int_size else: long_size = arch.address_size if arch.name.startswith('x86'): char_signed = True else: char_signed = False # not always true short_size = 2 # not always true long_long_size = 8 # not always true ty_for_cxx_builtin = { 'void': Type.void(), 'wchar_t': Type.int(2, sign=char_signed, altname='wchar_t'), 'bool': Type.bool(), 'char': Type.int(1, sign=char_signed), 'signed char': Type.int(1, sign=True), 'unsigned char': Type.int(1, sign=False), 'short': Type.int(short_size, sign=True), 'unsigned short': Type.int(short_size, sign=False), 'int': Type.int(arch.default_int_size, sign=True), 'unsigned int': Type.int(arch.default_int_size, sign=False), 'long': Type.int(long_size, sign=True), 'unsigned long': Type.int(long_size, sign=False), 'long long': Type.int(long_long_size, sign=True), 'unsigned long long': Type.int(long_long_size, sign=False), '__int128': Type.int(16, sign=True), 'unsigned __int128': Type.int(16, sign=False), 'float': Type.float(4), 'double': Type.float(8), '__float80': Type.float(10), '__float128': Type.float(16), 'char32_t': Type.int(4, sign=char_signed, altname='char32_t'), 'char16_t': Type.int(2, sign=char_signed, altname='char16_t'), } def ty_from_demangler_node(node, cv_qual=frozenset(), arg_count_hint=None): if node.kind == 'builtin': if node.value in ty_for_cxx_builtin: return ty_for_cxx_builtin[node.value] else: return None elif node.kind in ['name', 'qual_name']: named_ty_ref = NamedTypeReference(name=str(node)) return Type.named_type(named_ty_ref) elif node.kind in ['pointer', 'lvalue', 'rvalue']: pointee_ty = ty_from_demangler_node(node.value) if pointee_ty is None: return None is_const = ('const' in cv_qual) is_volatile = ('volatile' in cv_qual) if node.kind == 'pointer': return Type.pointer(arch, pointee_ty, is_const, is_volatile) elif node.kind == 'lvalue': return Type.pointer( arch, pointee_ty, is_const, is_volatile, ref_type=ReferenceType.ReferenceReferenceType) elif node.kind == 'rvalue': return Type.pointer(arch, pointee_ty, is_const, is_volatile, ref_type=ReferenceType.RValueReferenceType) elif node.kind == 'cv_qual': return ty_from_demangler_node(node.value, cv_qual=node.qual) elif node.kind == 'func': is_ctor_dtor = False if node.name and node.name.kind == 'qual_name': qual_name = node.name.value if qual_name[-1].kind in ['ctor', 'dtor']: is_ctor_dtor = True if is_ctor_dtor: ret_ty = Type.void() elif node.ret_ty is not None: ret_ty = ty_from_demangler_node(node.ret_ty) if ret_ty is None: return None else: ret_ty = Type.int(arch.default_int_size).with_confidence(0) arg_nodes = list(node.arg_tys) arg_tys = [] var_arg = False if arg_nodes[-1].kind == 'builtin' and arg_nodes[-1].value == '...': arg_nodes.pop() var_arg = True elif arg_nodes[0].kind == 'builtin' and arg_nodes[ 0].value == 'void': arg_nodes = arg_nodes[1:] this_arg = False if node.name and node.name.kind == 'qual_name': qual_name = node.name.value if is_ctor_dtor or (arg_count_hint is not None and len(arg_nodes) == arg_count_hint - 1): this_arg = True this_node = Node('qual_name', qual_name[:-1]) this_ty = ty_from_demangler_node(this_node) if this_ty is None: return None arg_tys.append(Type.pointer(arch, this_ty)) for arg_node in arg_nodes: arg_ty = ty_from_demangler_node(arg_node) if arg_ty is None: return None arg_tys.append(arg_ty) ty = Type.function(ret_ty, arg_tys, variable_arguments=var_arg) if arg_count_hint is not None: # toplevel invocation, so return whether we inferred a this argument return this_arg, ty else: return ty else: log.log_warn("Cannot convert demangled AST {} to a type".format( repr(node))) reader = BinaryReader(view) def read(size): if size == 4: return reader.read32() elif size == 8: return reader.read64() else: assert False symbols = view.get_symbols(start, length) if task: task.set_total(len(symbols)) mangled_re = re.compile('_?_Z') demangler_failures = 0 for symbol in symbols: if task and not task.advance(): break if not mangled_re.match(symbol.raw_name): continue is_data = (symbol.type == SymbolType.DataSymbol) is_code = (symbol.type in [ SymbolType.FunctionSymbol, SymbolType.ImportedFunctionSymbol ]) raw_name, suffix = symbol.raw_name, '' if '@' in raw_name: match = re.match(r'^(.+?)(@.+)$', raw_name) raw_name, suffix = match.group(1), match.group(2) try: name_ast = parse_mangled(raw_name) if name_ast is None: log.log_warn( "Demangler failed to recognize {}".format(raw_name)) demangler_failures += 1 except NotImplementedError as e: log.log_warn("Demangler feature missing on {}: {}".format( raw_name, str(e))) demangler_failures += 1 if name_ast: if name_ast.kind == 'func': short_name = str(name_ast.name) else: short_name = str(name_ast) symbol = Symbol(symbol.type, symbol.address, short_name=short_name + suffix, full_name=str(name_ast) + suffix, raw_name=symbol.raw_name) else: symbol = Symbol(symbol.type, symbol.address, short_name=symbol.raw_name, full_name=None, raw_name=symbol.raw_name) view.define_auto_symbol(symbol) if name_ast is None: continue elif is_data and name_ast.kind == 'typeinfo_name': strings = view.get_strings(symbol.address, 1) if not strings: continue view.define_data_var(symbol.address, char_array_ty(length)) elif is_data and name_ast.kind == 'typeinfo': reader.offset = symbol.address + arch.address_size * 2 kind = None # heuristic: is this is an abi::__si_class_type_info? base_or_flags = read(arch.default_int_size) base_symbol = view.get_symbol_at(base_or_flags) if base_symbol and base_symbol.raw_name.startswith('_ZTI'): kind = 'si_class' view.define_data_var(symbol.address, type_info_ty(kind)) elif is_data and name_ast.kind == 'vtable': vtable_addr = symbol.address reader.offset = vtable_addr + arch.address_size * 2 while True: vfunc_count = 0 check_next = True while True: vfunc_ptr_symbol = view.get_symbol_at(reader.offset) if vfunc_ptr_symbol and vfunc_ptr_symbol.raw_name.startswith( '_Z'): # any C++ symbol definitely terminates the vtable check_next = False break # heuristic: existing function vfunc_addr = read(arch.address_size) if view.get_function_at(vfunc_addr): vfunc_count += 1 continue # explicitly reject null pointers; in position-independent code # address zero can belong to the executable segment if vfunc_addr == 0: check_next = False break # heuristic: pointer to executable memory vfunc_segment = view.get_segment_at(vfunc_addr) if vfunc_addr != 0 and vfunc_segment and vfunc_segment.executable: view.add_function(vfunc_addr) vfunc_count += 1 log.log_info( 'Discovered function at {:#x} via {}'.format( vfunc_addr, symbol.full_name or symbol.short_name)) changed = True continue # we've fell off the end of the vtable break view.define_data_var(vtable_addr, vtable_ty(vfunc_count)) if check_next: # heuristic: can another vtable follow this one? let's see if it has typeinfo, # since that should be always true for when we have a virtual base typeinfo_ptr = read(arch.address_size) typeinfo_ptr_symbol = view.get_symbol_at(typeinfo_ptr) if typeinfo_ptr_symbol and typeinfo_ptr_symbol.raw_name.startswith( '_ZTI'): vtable_addr = reader.offset - 2 * arch.address_size # documentat it with a symbol secondary_symbol_name = '{}_secondary_{:x}'.format( symbol.short_name, vtable_addr - symbol.address) secondary_symbol = Symbol( SymbolType.DataSymbol, vtable_addr, short_name=secondary_symbol_name) view.define_auto_symbol(secondary_symbol) continue break elif is_code and name_ast.kind == 'func': func = view.get_function_at(symbol.address) demangled = ty_from_demangler_node( name_ast, arg_count_hint=len(func.function_type.parameters)) if demangled is not None: this_arg, ty = demangled func.apply_auto_discovered_type(ty) view.update_analysis() if demangler_failures: log.log_warn('{} demangler failures'.format(demangler_failures))