Exemplo n.º 1
0
    def run(self):
        if len(self.bv.platform.type_libraries) == 0:
            log_warn(f"No type libraries loaded for: {self.bv.platform.name}")
            return

        for sym in self.bv.get_symbols_of_type(SymbolType.ImportedFunctionSymbol):
            # log_debug(f"checking sym: {sym.name}")
            for typelib in self.bv.platform.type_libraries:
                sym_type = typelib.get_named_object(sym.name)

                # log_debug(f"Checking in typelib: {typelib}")

                if sym_type == None:
                    continue

                # log_debug(f"Found type type: {sym_type}")

                func = self.bv.get_function_at(sym.address)
                if func == None:
                    continue

                func.set_user_type(sym_type)
                log_debug("Updated sym %s at 0x%02X" % (sym.name, sym.address))

        self.bv.update_analysis_and_wait()
Exemplo n.º 2
0
def get_bininfo(bv):
	if bv is None:
		filename = ""
		if len(sys.argv) > 1:
			filename = sys.argv[1]
		else:
			filename = interaction.get_open_filename_input("Filename:")
			if filename is None:
				log.log_warn("No file specified")
				sys.exit(1)

		bv = BinaryViewType.get_view_of_file(filename)
		log.redirect_output_to_log()
		log.log_to_stdout(True)

	contents = "## %s ##\n" % bv.file.filename
	contents += "- START: 0x%x\n\n" % bv.start
	contents += "- ENTRY: 0x%x\n\n" % bv.entry_point
	contents += "- ARCH: %s\n\n" % bv.arch.name
	contents += "### First 10 Functions ###\n"

	contents += "| Start | Name   |\n"
	contents += "|------:|:-------|\n"
	for i in xrange(min(10, len(bv.functions))):
		contents += "| 0x%x | %s |\n" % (bv.functions[i].start, bv.functions[i].symbol.full_name)

	contents += "### First 10 Strings ###\n"
	contents += "| Start | Length | String |\n"
	contents += "|------:|-------:|:-------|\n"
	for i in xrange(min(10, len(bv.strings))):
		start = bv.strings[i].start
		length = bv.strings[i].length
		string = bv.read(start, length)
		contents += "| 0x%x |%d | %s |\n" % (start, length, string)
	return contents
Exemplo n.º 3
0
    def import_selected(self):
        selected_type_indexes: List[
            QtCore.QModelIndex] = self.types_table.selectedIndexes()

        selected = set(i.row() for i in selected_type_indexes)

        for row in selected:
            name, type_ = self.types_table.model().types[row]
            self.view.define_user_type(name, type_)

        selected_object_indexes: List[
            QtCore.QModelIndex] = self.objects_table.selectedIndexes()

        selected = set(i.row() for i in selected_object_indexes)

        for row in selected:
            name, type_ = self.objects_table.model().types[row]

            symbol = next(
                (s for s in self.view.get_symbols_by_name(str(name))
                 if s.type in (SymbolType.ImportAddressSymbol,
                               SymbolType.ImportedDataSymbol)),
                None,
            )

            if symbol is None:
                log.log_warn(f"Could not find symbol `{name}` in the binary!")
                continue

            ptr_type = Type.pointer(self.view.arch, type_)
            self.view.define_user_data_var(symbol.address, ptr_type)

        self.view.update_analysis()
def find_ecall_table_heuristic(bv):
    ptr_s = bv.arch.address_size
    ptr = 'I' if ptr_s == 4 else 'Q'

    found = []
    try:
        sections = ('.rdata', '.rodata')
        for s in sections:
            if s in bv.sections:
                sectionname = s
                break

        start = bv.sections[sectionname].start
        length = bv.sections[sectionname].end - start
        data = bv.read(start, length)

        i = 5 * ptr_s
        while i < len(data):
            nu = data.index(b"\0" * ptr_s, i)
            if not nu:
                break
            i = nu + 1
            if (nu % ptr_s) == 0:
                nr, *ec = unpack(f"<{ptr * 5}",
                                 data[nu - 2 * ptr_s:nu + 3 * ptr_s])
                if (2 <= nr < 300 and ec[1] == ec[3] == 0
                        and ec[0] != 0 != ec[2]):
                    found.append(start + nu - 2 * ptr_s)
    except Exception as e:
        log_warn(f"exception thrown: {e}")
    return found
Exemplo n.º 5
0
def get_bininfo(bv):
    if bv is None:
        filename = ""
        if len(sys.argv) > 1:
            filename = sys.argv[1]
        else:
            filename = interaction.get_open_filename_input("Filename:")
            if filename is None:
                log.log_warn("No file specified")
                sys.exit(1)

        bv = BinaryViewType.get_view_of_file(filename)
        log.log_to_stdout(True)

    contents = "## %s ##\n" % os.path.basename(bv.file.filename)
    contents += "- START: 0x%x\n\n" % bv.start
    contents += "- ENTRY: 0x%x\n\n" % bv.entry_point
    contents += "- ARCH: %s\n\n" % bv.arch.name
    contents += "### First 10 Functions ###\n"

    contents += "| Start | Name   |\n"
    contents += "|------:|:-------|\n"
    for i in range(min(10, len(bv.functions))):
        contents += "| 0x%x | %s |\n" % (bv.functions[i].start,
                                         bv.functions[i].symbol.full_name)

    contents += "### First 10 Strings ###\n"
    contents += "| Start | Length | String |\n"
    contents += "|------:|-------:|:-------|\n"
    for i in range(min(10, len(bv.strings))):
        start = bv.strings[i].start
        length = bv.strings[i].length
        string = bv.read(start, length)
        contents += "| 0x%x |%d | %s |\n" % (start, length, string)
    return contents
Exemplo n.º 6
0
def get_matching_method(bv):
    method = MatchingMethod(settings.get_string('depanalyzer.matching_method'))
    if method == MatchingMethod.Address and bv.relocatable:
        log_warn(
            'Attempted address-based matching on relocatable file: resetting to auto'
        )
        method = MatchingMethod.Auto
        settings.set_string('depanalyzer.matching_method',
                            method.value,
                            view=bv,
                            scope=SettingsScope.SettingsContextScope)
    if method == MatchingMethod.Ordinal and not supports_ordinals(bv):
        log_warn(
            'Attempted ordinal-based matching on non-supported file type: resetting to auto'
        )
        method = MatchingMethod.Auto
        settings.set_string('depanalyzer.matching_method',
                            method.value,
                            view=bv,
                            scope=SettingsScope.SettingsContextScope)

    if method == MatchingMethod.Auto:
        if supports_ordinals(bv):
            method = MatchingMethod.Ordinal
        else:
            method = MatchingMethod.Name

    return method
Exemplo n.º 7
0
def node_token(node):
    if type(node) is MediumLevelILInstruction:
        return str(node).decode('utf-8')
    elif type(node) is SSAVariable:
        return "{}#{}".format(node.var.name, node.version)
    else:
        log.log_warn("No way to stringify node of type %s" % type(node))
        return str(node)
Exemplo n.º 8
0
def resolve_imports_for_library(bv, lib):
    source_bv = peutils.files[lib.name.lower()]
    exports = pe_parsing.get_exports(source_bv)

    for import_ in lib.imports:
        # Find the name
        name = None
        for export in exports:
            if export.ord == import_.ordinal:
                print(export)
                name = export.name
                export_symbol = export.symbol

        if not name:
            log_warn("Unable to find name for %r" % import_)

        # Redefine the IAT thunk symbol
        original_symbol = bv.get_symbol_at(import_.datavar_addr)

        # Delete any existing auto symbols
        if original_symbol:
            log_info("Renaming %s to %s:%s" %
                     (original_symbol.name, lib.name, name))
            bv.undefine_auto_symbol(original_symbol)
        else:
            log_info("Creating IAT symbol %s:%s @ %08x" %
                     (lib.name.split(".")[0], name, import_.datavar_addr))

        # Create the new symbol
        bv.define_auto_symbol(
            Symbol(
                SymbolType.ImportAddressSymbol,
                import_.datavar_addr,
                name + "@IAT",
                namespace=lib.name.split(".")[0],
            ))

        # Transplant type info
        export_func = source_bv.get_function_at(export_symbol.address)
        type_tokens = [token.text for token in export_func.type_tokens]
        i = type_tokens.index(export_symbol.name)
        type_tokens[i] = "(*const func_name)"

        type_string = "".join(type_tokens)
        log_info("Setting type for %s to %r" % (name, type_string))

        try:
            (type_, name) = bv.parse_type_string(type_string)
        except:
            log_error("Invalid type, skipping")

        bv.define_data_var(import_.datavar_addr, type_)

        # FIXME: Apply params to ImportedFunctionSymbols -- check xref on
        # datavar and filter by associated symbols
        # This doesn't actually seem to help and apparently I didn't have to do
        # this before? Maybe I just didn't handle jump
        """
Exemplo n.º 9
0
    def rename_functions(self):
        renamed = 0
        log_info("renaming functions based on .gopclntab section")

        gopclntab = self.get_section_by_name(".gopclntab")

        if gopclntab is None:
            pattern = "\xfb\xff\xff\xff\x00\x00"
            base_addr = self.bv.find_next_data(0, pattern)

            if base_addr is None:
                log_alert("Failed to find section '.gopclntab'")
                return
        else:
            base_addr = gopclntab.start

        size_addr = base_addr + 8
        size = self.get_pointer_at(size_addr)

        log_info("found .gopclntab section at 0x{:x} with {} entries".format(
            base_addr, size / (self.ptr_size * 2)))

        start_addr = size_addr + self.ptr_size
        end_addr = base_addr + (size * self.ptr_size * 2)

        for addr in range(start_addr, end_addr, (2 * self.ptr_size)):
            log_debug("analyzing at 0x{:x}".format(addr))
            func_addr = self.get_pointer_at(addr)
            entry_offset = self.get_pointer_at(addr + self.ptr_size)

            log_debug("func_addr 0x{:x}, entry offset 0x{:x}".format(
                func_addr, entry_offset))

            name_str_offset = self.get_pointer_at(
                base_addr + entry_offset + self.ptr_size, 4)
            name_addr = base_addr + name_str_offset

            name = self.read_cstring(name_addr)
            log_debug("found name '{}' for address 0x{:x}".format(
                name, func_addr))

            func = self.bv.get_function_at(func_addr)
            if not func:
                func = self.bv.create_user_function(func_addr)

            if name and len(name) > 2:
                name = GOFUNC_PREFIX + santize_gofunc_name(name)
                sym = bn.types.Symbol('FunctionSymbol', func_addr, name, name)
                self.bv.define_user_symbol(sym)
                renamed += 1
            else:
                log_warn(
                    ("not using function name {!r} for function at 0x{:x}"
                     " in .gopclntab addr 0x{:x} name addr 0x{:x}").format(
                         name, func_addr, addr, name_addr))

        log_info("renamed {} go functions".format(renamed))
Exemplo n.º 10
0
def decode(data, addr):
    decoder = Decoder(data)
    try:
        instr = Instruction(decoder)
        instr.decode(decoder, addr)
        return instr
    except KeyError:
        log.log_warn('At address {:05x}: unknown encoding {}'.format(
            addr, data.hex()))
    except coding.BufferTooShort:
        pass
Exemplo n.º 11
0
def get_bin_view(bv):
    if bv is None:
        filename = ""
        if len(sys.argv) > 1:
            filename = sys.argv[1]
        else:
            filename = interaction.get_open_filename_input("Filename:")
            if filename is None:
                log.log_warn("No file specified")
                sys.exit(1)
        bv = BinaryViewType.get_view_of_file(filename)
        log.log_to_stdout(True)
        return bv
Exemplo n.º 12
0
 def set_comment_at_xref(self, xref, comment):
     try:
         op = xref.function.get_lifted_il_at(xref.address).operation
     except IndexError:
         w = "ManticoreTrace: Could not lookup " + hex(xref.address)
         w += " address for function " + str(xref.function)
         log.log_warn(w)
         return
     if not (op == enums.LowLevelILOperation.LLIL_CALL
             or op == enums.LowLevelILOperation.LLIL_JUMP
             or op == enums.LowLevelILOperation.LLIL_JUMP_TO
             or op == enums.LowLevelILOperation.LLIL_SYSCALL
             or op == enums.LowLevelILOperation.LLIL_GOTO):
         return
     self.cov_comments.add((xref.function, xref.address))
     xref.function.set_comment_at(xref.address, comment)
Exemplo n.º 13
0
def r(kind, il, v=0):
    """
    Never called with MOVX, handles IRAM/SFRs addresses only. The MOVX
    instruction is distinct from the others.
    
    Never called on 16-bit immediates. No way to distinguish from 8-bit.
    """
    if kind.startswith('@'):
        reg = il.reg(1, kind[1:])
        addr = il.add(6, reg, il.const(6, mem.IRAM))
        return il.load(1, addr)
    if kind == '#data':
        return il.const(1, v)
    if kind.endswith('addr'):
        if kind == 'code addr':
            return il.const_pointer(6, v)
        if kind == 'data addr':
            if v in mem.regs:
                return il.reg(1, mem.regs[v])
            # TODO: overlay PSW as register? how to compute from flags?
            return il.load(1, il.const_pointer(6, v))
        if kind.endswith('bit addr'):  # cosmetic / prefix, optional
            byte, bit = v
            if byte == mem.PSW and bit in mem.flags:
                return il.flag(mem.flags[bit])
            if byte in mem.regs:
                if mem.regs[byte] == 'A' and bit == 7:  # a47e
                    return il.flag(
                        's'
                    )  # TODO TODO TODO how will setting this be tracked??
                # Strangely, based on LLIL pretty-printing, test_bit takes a
                # *mask* not a bit index.
                return il.test_bit(1, il.reg(1, mem.regs[byte]),
                                   il.const(0, 1 << bit))
            addr = il.const_pointer(6, byte)
            return il.test_bit(1, il.load(1, addr), il.const(0, 1 << bit))
    if kind == 'DPTR':
        return il.reg(2, kind)
    if kind.startswith('R') or kind in ['A', 'B'] or kind in mem.regs:
        return il.reg(1, kind)
    if kind == 'C':
        return il.flag('c')

    # @A+DPTR and @A+PC can be special-cased in their instructions

    log_warn('r ' + repr((kind, il, v)))
    assert not "reachable"
Exemplo n.º 14
0
def w(kind, il, val, v=0):
    """
    kind: type of write
    il: LowLevelILFunction
    val: symbolic source
    v: constant source
    """
    if kind.startswith('@'):
        reg = il.reg(1, kind[1:])
        addr = il.add(6, reg, il.const(6, mem.IRAM))
        return il.append(il.store(1, addr, val))
    if kind.endswith('addr'):
        if kind == 'data addr':
            if v in mem.regs:
                return il.append(il.set_reg(1, mem.regs[v],
                                            val))  # aa5b good test aa68
            # TODO: overlay PSW as register? how to compute from flags?
            return il.append(il.store(1, il.const_pointer(6, v), val))
        if kind.endswith('bit addr'):  # cosmetic / prefix, optional
            byte, bit = v
            if byte == mem.PSW and bit in mem.flags:
                return il.append(il.set_flag(mem.flags[bit], val))
            if byte in mem.regs:  # a465
                src = il.reg(1, mem.regs[byte])
                mask = il.shift_left(1, il.const(1, 1), il.const(1, bit))
                # TODO: endianness, also need to clear bit not just set it...
                return il.append(
                    il.set_reg(1, mem.regs[byte], il.or_expr(1, src, mask)))
            # TODO sketchy bit-write endianness
            addr = il.const_pointer(6,
                                    byte)  # should be properly mapped by ana
            mask = il.shift_left(1, il.const(1, 1), il.const(1, bit))
            val = il.or_expr(1, il.load(1, addr),
                             mask)  # <- also only sets, never clears :|
            return il.append(il.store(1, addr, val))
    if kind.startswith('R') or kind in ['A', 'B']:
        return il.append(il.set_reg(1, kind, val))
    if kind == 'DPTR':
        return il.append(il.set_reg(2, kind, val))
    if kind == 'C':
        return il.append(il.set_flag('c', val))

    log_warn('w ' + repr((kind, il, val, v)))
    assert not "reachable"
Exemplo n.º 15
0
def safe_psx_set_type(view, f, calladdr, callnr):
    try:
        type = psx_get_type(calladdr, callnr)
    except:
        if calladdr:
            log_error("failed to lookup %s call %s" %
                      (format(calladdr, '#5x'), format(callnr, '#05x')))
        else:
            log_error("failed to lookup syscall %s" % callnr)
    if type:
        new_name = "PSX_" + type[0]
        f.name = new_name
        i = 0
        # TODO: Avoid touching user defined functions
        while f.name != new_name:
            log_warn(
                "Unable to change name to %r, duplicate? Trying numbered alias"
                % new_name)
            new_name = "PSX_" + type[0] + "_copy_" + str(i)
            f.name = new_name
            f.set_user_type(view.parse_type_string(type[1])[0])
    else:
        if calladdr:
            log_warn("Unknown %s call: %s" %
                     (format(calladdr, '#5x'), format(callnr, '#05x')))
        else:
            log_warn("Unknown syscall %s" % callnr)
Exemplo n.º 16
0
def main(args):
    log_to_stdout(LogLevel.InfoLog)

    if not os.path.exists(args.input_file):
        log_warn(f"input file: {args.input_file} does not exist")
        return 1

    dir_path = os.path.dirname(os.path.realpath(args.output))
    if not os.path.exists(dir_path):
        log_warn(f"Output path directory {dir_path} does not exist")
        return 1

    try:
        platform: Platform = Platform[args.platform]
    except KeyError:
        log_warn(f"'{args.platform}' is not supported binja platform")
        return 1

    with open(args.input_file) as fd:
        type_data = fd.read()

    if args.definitions:
        prepend_str = ""
        for defintion in args.definitions.split(","):
            prepend_str += f"#define {defintion} 1\n"
        type_data = "%s%s" % (prepend_str, type_data)

    types_path = [os.path.dirname(os.path.realpath(args.input_file))]

    type_res = platform.parse_types_from_source(type_data, filename=args.input_file, include_dirs=types_path)

    cur_typelib: TypeLibrary = TypeLibrary.new(Architecture[platform.arch.name], args.name)

    for name, type_obj in type_res.functions.items():
        # log_info(f"Adding function {name}")
        cur_typelib.add_named_object(name, type_obj)

    for name, type_obj in type_res.types.items():
        # log_info(f"Adding type {name}")
        cur_typelib.add_named_type(name, type_obj)

    cur_typelib.add_platform(platform)

    if args.alt_names:
        for name in args.alt_names.split(","):
            cur_typelib.add_alternate_name(name)

    if args.guid:
        cur_typelib.guid = args.guid

    cur_typelib.finalize()

    log_info(f"Wrote type library to {args.output}")
    cur_typelib.write_to_file(args.output)

    return 0
Exemplo n.º 17
0
 def __setitem__(self, k, v):
     if self.blacklist_enabled and k in self.__blacklist:
         log.log_warn(
             'Setting variable "{}" will have no affect as it is automatically controlled by the ScriptingProvider.'
             .format(k))
     super(BlacklistedDict, self).__setitem__(k, v)
def fix_printfs(view: BinaryView):
    printf = view.get_symbols_by_name('_printf')

    if not printf:
        printf = view.get_symbols_by_name('printf')

    if not printf:
        return

    for sym in printf:
        function = view.get_function_at(sym.address)
        if not function:
            continue

        xrefs = view.get_code_refs(function.start)

        for xref in xrefs:
            caller: Function = xref.function

            call_mlil = caller.get_low_level_il_at(xref.address).mlil
            print(call_mlil)
            if call_mlil is None:
                continue

            fmt_operand = call_mlil.params[0]
            if fmt_operand.operation == MediumLevelILOperation.MLIL_VAR:
                log.log_warn(
                    f"Potential format string bug: {fmt_operand.address:x}")
                continue

            elif fmt_operand.operation in (
                    MediumLevelILOperation.MLIL_CONST_PTR,
                    MediumLevelILOperation.MLIL_CONST):
                fmt_address = fmt_operand.constant
                fmt = view.get_ascii_string_at(fmt_address, 2)

                if fmt is None:
                    continue

                fmt_value = fmt.value

            else:
                continue

            specifiers = fmt_value.split('%')

            param_types = []

            for specifier in specifiers[1:]:
                if not specifier:
                    continue

                if specifier.startswith('d'):
                    param_types.append(Type.int(4, sign=True))
                elif specifier.startswith('s'):
                    param_types.append(Type.pointer(view.arch, Type.char()))
                elif specifier.startswith('p'):
                    param_types.append(Type.pointer(view.arch, Type.void()))
                else:
                    log.log_warn(
                        f'Unknown format specifier: {specifier}; skipping')
                    param_types.append(Type.pointer(view.arch, Type.void()))

            param_idx = 1
            params = [
                FunctionParameter(Type.pointer(view.arch, Type.char()), 'fmt')
            ]
            for param in param_types:
                params.append(FunctionParameter(param, f'arg{param_idx}'))
                param_idx += 1

            caller.set_call_type_adjustment(xref.address,
                                            Type.function(Type.int(4), params))
Exemplo n.º 19
0
    def init(self):
        try:
            hdr = self.parent_view.read(0, 0x800)
            # Remember, Pythons indexer is retarded: from:(to+1)
            self.init_pc = struct.unpack("<L", hdr[0x10:0x14])[0]
            self.text_start = struct.unpack("<L", hdr[0x18:0x1c])[0]
            self.text_size = struct.unpack("<L", hdr[0x1c:0x20])[0]
            self.init_sp = struct.unpack("<L", hdr[0x30:0x34])[0]
            self.info = hdr[0x4c:self.HDR_SIZE]
            # log_debug("/info: %r" % self.info)
            log_debug("/info size: %s" % format(len(self.info), '#010x'))

            log_info("PC:   %s" % format(self.init_pc, '#010x'))
            log_info("TEXT: %s" % format(self.text_start, '#010x'))
            log_info("SIZE: %s" % format(self.text_size, '#010x'))
            log_info("SP:   %s" % format(self.init_sp, '#010x'))
            log_info("info: %s" % self.info)

            # PSX came with 2M, but the BIOS supports 8
            # for dev machines. Supposed be multiple if
            # 2048, but that is not required for the
            # method used to sideload homebrew. (FIXME: Is
            # it enforced by the BIOS? Can't remember...)

            # FIXME: this is just a sanity check. Make it
            # check if text_start + text_size would run
            # outside address space.
            if (self.text_size > 0x800000):
                log_warn("size > 8M: %d", self.text_size)
            if (self.text_size % 2048 != 0):
                log_warn("size not divisable by 2k")

            text = self.parent_view.read(self.HDR_SIZE, self.text_size)
            log_info("Actual size of aquired TEXT: %s" %
                     format(len(text), '#010x'))
            if (len(text) != self.text_size):
                log_error(
                    "Size of aquired data is not same as header-prescribed TEXT size. Truncated file?"
                )

# add_auto_segment(start, length,
#                  data_offset, data_length, flags)

            r__ = SegmentFlag.SegmentReadable
            rw_ = (SegmentFlag.SegmentReadable | SegmentFlag.SegmentWritable)
            rwx = (SegmentFlag.SegmentReadable | SegmentFlag.SegmentWritable
                   | SegmentFlag.SegmentExecutable)
            r_x = (SegmentFlag.SegmentReadable | SegmentFlag.SegmentExecutable)
            r_xc = (SegmentFlag.SegmentReadable | SegmentFlag.SegmentExecutable
                    | SegmentFlag.SegmentContainsCode)

            # Scratchpad RAM 1k
            self.add_auto_segment(0x9F800000, 1024, 0, 0, rwx)
            self.add_auto_section("Scratchpad", 0x9F800000, 1024)

            # FIXME: I seem to remember most IO access as
            # in the KSEG1 region. This wont cover that.

            # IO Ports 8k
            self.add_auto_segment(0x9F801000, 8 * 1024, 0, 0, rwx)
            self.add_auto_section("IO Ports", 0x9F801000, 8 * 1024)
            # Expansion 2 (IO Ports) 8k
            self.add_auto_segment(0x9F802000, 8 * 1024, 0, 0, rwx)
            self.add_auto_section("Expansion region 2 (IO Ports)", 0x9F802000,
                                  8 * 1024)
            # Expansion 3 2M
            self.add_auto_segment(0x9FA00000, 0x200000, 0, 0, rwx)
            self.add_auto_section("Expansion region 3", 0x9FA00000, 0x200000)
            # BIOS ROM 512k
            self.add_auto_segment(0x9FC00000, 512 * 1024, 0, 0, r_x)
            self.add_auto_section("BIOS", 0x9FC00000, 512 * 1024)

            # RAM (cached address space) 2M
            # Dividing this into pre-EXE and post-EXE
            # space since it's the only way I've found to
            # not have the exe zeroed out

            # FIXME: The areas definitions overlap by one
            # byte: Getting one missing byte in the
            # viewer if I don't. Is BN using the wierd
            # python semantics of ranges?
            ramsize = 0x200000
            prestart = 0x80000000
            presize = (self.text_start - 0) - 0x80000000
            if (presize > 0):
                log_info("pre-RAM: %s - %s, size: %s" %
                         (format(prestart,
                                 '#010x'), format(prestart + presize, '#010x'),
                          format(presize, '#010x')))
                self.add_auto_segment(prestart, presize, 0, 0, rwx)
                self.add_auto_section("RAM (pre EXE)", 0x80000000, presize)

# Area for the actual executable. Will overlap
# with RAM if it's a correct PSX-EXE
            self.add_auto_segment(self.text_start, self.text_size,
                                  self.HDR_SIZE, self.text_size, r_xc)
            self.add_auto_section("PS-X EXE", self.text_start, self.text_size)
            # semantics = SectionSemantics.ReadOnlyCodeSectionSemantics)

            # RAM (cached address space) 2M
            poststart = self.text_start + self.text_size
            postsize = (prestart + ramsize) - (self.text_start +
                                               self.text_size)
            if (postsize > 0):
                log_info("post-RAM: %s - %s, size: %s" %
                         (format(poststart, '#010x'),
                          format(poststart + postsize,
                                 '#010x'), format(postsize, '#010x')))
                self.add_auto_segment(poststart, postsize, 0, 0, rwx)
                self.add_auto_section("RAM (post EXE)", poststart, postsize)

            self.define_auto_symbol(
                Symbol(SymbolType.FunctionSymbol, self.init_pc, "_start"))
            self.define_auto_symbol(
                Symbol(SymbolType.FunctionSymbol, self.init_sp,
                       "_stack"))  # default: 0x801ffff0
            self.add_entry_point(self.init_pc)

            # The iomapped control of devices in a PSX is
            # high up that every time those values are
            # used you can be pretty sure we are talking
            # about the control registers and not some
            # random loop counter.
            #
            # FIXME: With the exception of a0, b0 and
            # c0-calls, those should be lifted in some
            # other manner but are useful enough that they
            # are hardcoded right now.
            for addr, symbol in psx_memmap_constants.iteritems():
                self.define_auto_symbol(
                    Symbol(SymbolType.DataSymbol, addr, symbol))

# Uncomment this and remove the hook if you want to run
# function BIOS calls manually.

#PluginCommand.register('Find PSX BIOS calls',
#                       'Find PSX BIOS calls and rename them.',
#                       find_bios_calls_run)

# Run PSX BIOS function finder after analysis
            self.add_analysis_completion_event(
                lambda _: find_bios_calls_run(self))

            return True
        except:
            log_error(traceback.format_exc())
            return False
Exemplo n.º 20
0
def analyze_cxx_abi(view, start=None, length=None, task=None):
    platform = view.platform
    arch = platform.arch

    void_p_ty = Type.pointer(arch, Type.void())
    char_p_ty = Type.pointer(arch, Type.int(1))
    unsigned_int_ty = Type.int(arch.default_int_size, False)
    signed_int_ty = Type.int(arch.default_int_size, True)

    base_type_info_ty = Type.named_type(
        NamedTypeReference(name='std::type_info'))
    base_type_info_ptr_ty = Type.pointer(arch, base_type_info_ty)

    def char_array_ty(length):
        return Type.array(Type.int(1), strings[0].length)

    def type_info_ty(kind=None):
        type_info_struct = Structure()
        type_info_struct.append(void_p_ty, 'vtable')
        type_info_struct.append(char_p_ty, 'name')
        if kind == 'si_class':
            type_info_struct.append(base_type_info_ptr_ty, 'base_type')
        return Type.structure_type(type_info_struct)

    def vtable_ty(vfunc_count):
        vtable_struct = Structure()
        vtable_struct.append(signed_int_ty, 'top_offset')
        vtable_struct.append(base_type_info_ptr_ty, 'typeinfo')
        vtable_struct.append(Type.array(void_p_ty, vfunc_count), 'functions')
        return Type.structure_type(vtable_struct)

    if platform.name.startswith("windows-"):
        long_size = arch.default_int_size
    else:
        long_size = arch.address_size

    if arch.name.startswith('x86'):
        char_signed = True
    else:
        char_signed = False  # not always true

    short_size = 2  # not always true
    long_long_size = 8  # not always true

    ty_for_cxx_builtin = {
        'void': Type.void(),
        'wchar_t': Type.int(2, sign=char_signed, altname='wchar_t'),
        'bool': Type.bool(),
        'char': Type.int(1, sign=char_signed),
        'signed char': Type.int(1, sign=True),
        'unsigned char': Type.int(1, sign=False),
        'short': Type.int(short_size, sign=True),
        'unsigned short': Type.int(short_size, sign=False),
        'int': Type.int(arch.default_int_size, sign=True),
        'unsigned int': Type.int(arch.default_int_size, sign=False),
        'long': Type.int(long_size, sign=True),
        'unsigned long': Type.int(long_size, sign=False),
        'long long': Type.int(long_long_size, sign=True),
        'unsigned long long': Type.int(long_long_size, sign=False),
        '__int128': Type.int(16, sign=True),
        'unsigned __int128': Type.int(16, sign=False),
        'float': Type.float(4),
        'double': Type.float(8),
        '__float80': Type.float(10),
        '__float128': Type.float(16),
        'char32_t': Type.int(4, sign=char_signed, altname='char32_t'),
        'char16_t': Type.int(2, sign=char_signed, altname='char16_t'),
    }

    def ty_from_demangler_node(node, cv_qual=frozenset(), arg_count_hint=None):
        if node.kind == 'builtin':
            if node.value in ty_for_cxx_builtin:
                return ty_for_cxx_builtin[node.value]
            else:
                return None
        elif node.kind in ['name', 'qual_name']:
            named_ty_ref = NamedTypeReference(name=str(node))
            return Type.named_type(named_ty_ref)
        elif node.kind in ['pointer', 'lvalue', 'rvalue']:
            pointee_ty = ty_from_demangler_node(node.value)
            if pointee_ty is None:
                return None
            is_const = ('const' in cv_qual)
            is_volatile = ('volatile' in cv_qual)
            if node.kind == 'pointer':
                return Type.pointer(arch, pointee_ty, is_const, is_volatile)
            elif node.kind == 'lvalue':
                return Type.pointer(
                    arch,
                    pointee_ty,
                    is_const,
                    is_volatile,
                    ref_type=ReferenceType.ReferenceReferenceType)
            elif node.kind == 'rvalue':
                return Type.pointer(arch,
                                    pointee_ty,
                                    is_const,
                                    is_volatile,
                                    ref_type=ReferenceType.RValueReferenceType)
        elif node.kind == 'cv_qual':
            return ty_from_demangler_node(node.value, cv_qual=node.qual)
        elif node.kind == 'func':
            is_ctor_dtor = False
            if node.name and node.name.kind == 'qual_name':
                qual_name = node.name.value
                if qual_name[-1].kind in ['ctor', 'dtor']:
                    is_ctor_dtor = True

            if is_ctor_dtor:
                ret_ty = Type.void()
            elif node.ret_ty is not None:
                ret_ty = ty_from_demangler_node(node.ret_ty)
                if ret_ty is None:
                    return None
            else:
                ret_ty = Type.int(arch.default_int_size).with_confidence(0)

            arg_nodes = list(node.arg_tys)
            arg_tys = []

            var_arg = False
            if arg_nodes[-1].kind == 'builtin' and arg_nodes[-1].value == '...':
                arg_nodes.pop()
                var_arg = True
            elif arg_nodes[0].kind == 'builtin' and arg_nodes[
                    0].value == 'void':
                arg_nodes = arg_nodes[1:]

            this_arg = False
            if node.name and node.name.kind == 'qual_name':
                qual_name = node.name.value
                if is_ctor_dtor or (arg_count_hint is not None
                                    and len(arg_nodes) == arg_count_hint - 1):
                    this_arg = True
                    this_node = Node('qual_name', qual_name[:-1])
                    this_ty = ty_from_demangler_node(this_node)
                    if this_ty is None:
                        return None
                    arg_tys.append(Type.pointer(arch, this_ty))

            for arg_node in arg_nodes:
                arg_ty = ty_from_demangler_node(arg_node)
                if arg_ty is None:
                    return None
                arg_tys.append(arg_ty)

            ty = Type.function(ret_ty, arg_tys, variable_arguments=var_arg)
            if arg_count_hint is not None:
                # toplevel invocation, so return whether we inferred a this argument
                return this_arg, ty
            else:
                return ty
        else:
            log.log_warn("Cannot convert demangled AST {} to a type".format(
                repr(node)))

    reader = BinaryReader(view)

    def read(size):
        if size == 4:
            return reader.read32()
        elif size == 8:
            return reader.read64()
        else:
            assert False

    symbols = view.get_symbols(start, length)
    if task:
        task.set_total(len(symbols))

    mangled_re = re.compile('_?_Z')

    demangler_failures = 0
    for symbol in symbols:
        if task and not task.advance():
            break

        if not mangled_re.match(symbol.raw_name):
            continue

        is_data = (symbol.type == SymbolType.DataSymbol)
        is_code = (symbol.type in [
            SymbolType.FunctionSymbol, SymbolType.ImportedFunctionSymbol
        ])

        raw_name, suffix = symbol.raw_name, ''
        if '@' in raw_name:
            match = re.match(r'^(.+?)(@.+)$', raw_name)
            raw_name, suffix = match.group(1), match.group(2)

        try:
            name_ast = parse_mangled(raw_name)
            if name_ast is None:
                log.log_warn(
                    "Demangler failed to recognize {}".format(raw_name))
                demangler_failures += 1
        except NotImplementedError as e:
            log.log_warn("Demangler feature missing on {}: {}".format(
                raw_name, str(e)))
            demangler_failures += 1

        if name_ast:
            if name_ast.kind == 'func':
                short_name = str(name_ast.name)
            else:
                short_name = str(name_ast)
            symbol = Symbol(symbol.type,
                            symbol.address,
                            short_name=short_name + suffix,
                            full_name=str(name_ast) + suffix,
                            raw_name=symbol.raw_name)
        else:
            symbol = Symbol(symbol.type,
                            symbol.address,
                            short_name=symbol.raw_name,
                            full_name=None,
                            raw_name=symbol.raw_name)
        view.define_auto_symbol(symbol)

        if name_ast is None:
            continue

        elif is_data and name_ast.kind == 'typeinfo_name':
            strings = view.get_strings(symbol.address, 1)
            if not strings:
                continue

            view.define_data_var(symbol.address, char_array_ty(length))

        elif is_data and name_ast.kind == 'typeinfo':
            reader.offset = symbol.address + arch.address_size * 2

            kind = None

            # heuristic: is this is an abi::__si_class_type_info?
            base_or_flags = read(arch.default_int_size)
            base_symbol = view.get_symbol_at(base_or_flags)
            if base_symbol and base_symbol.raw_name.startswith('_ZTI'):
                kind = 'si_class'

            view.define_data_var(symbol.address, type_info_ty(kind))

        elif is_data and name_ast.kind == 'vtable':
            vtable_addr = symbol.address

            reader.offset = vtable_addr + arch.address_size * 2
            while True:
                vfunc_count = 0
                check_next = True
                while True:
                    vfunc_ptr_symbol = view.get_symbol_at(reader.offset)
                    if vfunc_ptr_symbol and vfunc_ptr_symbol.raw_name.startswith(
                            '_Z'):
                        # any C++ symbol definitely terminates the vtable
                        check_next = False
                        break

                    # heuristic: existing function
                    vfunc_addr = read(arch.address_size)
                    if view.get_function_at(vfunc_addr):
                        vfunc_count += 1
                        continue

                    # explicitly reject null pointers; in position-independent code
                    # address zero can belong to the executable segment
                    if vfunc_addr == 0:
                        check_next = False
                        break

                    # heuristic: pointer to executable memory
                    vfunc_segment = view.get_segment_at(vfunc_addr)
                    if vfunc_addr != 0 and vfunc_segment and vfunc_segment.executable:
                        view.add_function(vfunc_addr)
                        vfunc_count += 1

                        log.log_info(
                            'Discovered function at {:#x} via {}'.format(
                                vfunc_addr, symbol.full_name
                                or symbol.short_name))
                        changed = True
                        continue

                    # we've fell off the end of the vtable
                    break

                view.define_data_var(vtable_addr, vtable_ty(vfunc_count))

                if check_next:
                    # heuristic: can another vtable follow this one? let's see if it has typeinfo,
                    # since that should be always true for when we have a virtual base
                    typeinfo_ptr = read(arch.address_size)
                    typeinfo_ptr_symbol = view.get_symbol_at(typeinfo_ptr)
                    if typeinfo_ptr_symbol and typeinfo_ptr_symbol.raw_name.startswith(
                            '_ZTI'):
                        vtable_addr = reader.offset - 2 * arch.address_size

                        # documentat it with a symbol
                        secondary_symbol_name = '{}_secondary_{:x}'.format(
                            symbol.short_name, vtable_addr - symbol.address)
                        secondary_symbol = Symbol(
                            SymbolType.DataSymbol,
                            vtable_addr,
                            short_name=secondary_symbol_name)
                        view.define_auto_symbol(secondary_symbol)
                        continue

                break

        elif is_code and name_ast.kind == 'func':
            func = view.get_function_at(symbol.address)
            demangled = ty_from_demangler_node(
                name_ast, arg_count_hint=len(func.function_type.parameters))
            if demangled is not None:
                this_arg, ty = demangled
                func.apply_auto_discovered_type(ty)

    view.update_analysis()

    if demangler_failures:
        log.log_warn('{} demangler failures'.format(demangler_failures))
Exemplo n.º 21
0
    def ty_from_demangler_node(node, cv_qual=frozenset(), arg_count_hint=None):
        if node.kind == 'builtin':
            if node.value in ty_for_cxx_builtin:
                return ty_for_cxx_builtin[node.value]
            else:
                return None
        elif node.kind in ['name', 'qual_name']:
            named_ty_ref = NamedTypeReference(name=str(node))
            return Type.named_type(named_ty_ref)
        elif node.kind in ['pointer', 'lvalue', 'rvalue']:
            pointee_ty = ty_from_demangler_node(node.value)
            if pointee_ty is None:
                return None
            is_const = ('const' in cv_qual)
            is_volatile = ('volatile' in cv_qual)
            if node.kind == 'pointer':
                return Type.pointer(arch, pointee_ty, is_const, is_volatile)
            elif node.kind == 'lvalue':
                return Type.pointer(
                    arch,
                    pointee_ty,
                    is_const,
                    is_volatile,
                    ref_type=ReferenceType.ReferenceReferenceType)
            elif node.kind == 'rvalue':
                return Type.pointer(arch,
                                    pointee_ty,
                                    is_const,
                                    is_volatile,
                                    ref_type=ReferenceType.RValueReferenceType)
        elif node.kind == 'cv_qual':
            return ty_from_demangler_node(node.value, cv_qual=node.qual)
        elif node.kind == 'func':
            is_ctor_dtor = False
            if node.name and node.name.kind == 'qual_name':
                qual_name = node.name.value
                if qual_name[-1].kind in ['ctor', 'dtor']:
                    is_ctor_dtor = True

            if is_ctor_dtor:
                ret_ty = Type.void()
            elif node.ret_ty is not None:
                ret_ty = ty_from_demangler_node(node.ret_ty)
                if ret_ty is None:
                    return None
            else:
                ret_ty = Type.int(arch.default_int_size).with_confidence(0)

            arg_nodes = list(node.arg_tys)
            arg_tys = []

            var_arg = False
            if arg_nodes[-1].kind == 'builtin' and arg_nodes[-1].value == '...':
                arg_nodes.pop()
                var_arg = True
            elif arg_nodes[0].kind == 'builtin' and arg_nodes[
                    0].value == 'void':
                arg_nodes = arg_nodes[1:]

            this_arg = False
            if node.name and node.name.kind == 'qual_name':
                qual_name = node.name.value
                if is_ctor_dtor or (arg_count_hint is not None
                                    and len(arg_nodes) == arg_count_hint - 1):
                    this_arg = True
                    this_node = Node('qual_name', qual_name[:-1])
                    this_ty = ty_from_demangler_node(this_node)
                    if this_ty is None:
                        return None
                    arg_tys.append(Type.pointer(arch, this_ty))

            for arg_node in arg_nodes:
                arg_ty = ty_from_demangler_node(arg_node)
                if arg_ty is None:
                    return None
                arg_tys.append(arg_ty)

            ty = Type.function(ret_ty, arg_tys, variable_arguments=var_arg)
            if arg_count_hint is not None:
                # toplevel invocation, so return whether we inferred a this argument
                return this_arg, ty
            else:
                return ty
        else:
            log.log_warn("Cannot convert demangled AST {} to a type".format(
                repr(node)))
def annotate_operations_ending_at_mlil_instruction(
    bv: BinaryView, instruction: MediumLevelILInstruction, function: Function
):
    """
    Annotate divisions and modulos that end at the specified MLIL instruction

    :bv: Current binaryview
    :instruction: Instruction to examine
    :function: Current function
    """
    ssa_instruction = instruction.ssa_form

    # TODO: There is probably an easy way to know more instructions that can be skipped.

    if ssa_instruction.operation != MediumLevelILOperation.MLIL_SET_VAR_SSA:
        log.log_debug("Deoptimizer: Skipping Instruction")
        return None

    # 15 found experimentially. There may be longer modulo optimiztions.
    backtracking_state = BacktrackingState(bv, function, depth=15)
    start = MLILInstructionExecutor(bv, ssa_instruction)

    try:
        start.execute(backtracking_state)
    except NotImplementedError as e:
        log.log_debug(
            "Unsupported Instruction: {}. If this instruction is necessary to deoptimize your code, please report this to the github: https://github.com/jmprdi/binja-division-deoptimization".format(
                e.args[0]
            )
        )
        return
    except Exception as e:
        log.log_warn(
            "Deoptimizer Error: {} Please report this to the github: https://github.com/jmprdi/binja-division-deoptimization".format(
                repr(e)
            )
        )
        raise e
        return

    if len(backtracking_state.potential_inputs) == 0:
        log.log_debug("Deoptimizer: No potential inputs")
        return None

    input_bv = backtracking_state.potential_inputs[-1]
    output_bv = backtracking_state.variables[ssa_instruction.dest]

    def do_operation(dividend):
        s = Solver()
        s.set("timeout", 10)
        s.add(input_bv == dividend)
        r = s.check()
        if r != sat:
            return None
        m = s.model()
        solved = m.eval(output_bv)
        try:
            return solved.as_long()
        except AttributeError:
            return None

    modulo = modulo_binary_search(do_operation, 2 ** input_bv.size())
    if modulo is not None:
        bv.set_comment_at(ssa_instruction.address, "modulo by {}".format(modulo))
        return

    divisor = integer_division_binary_search(do_operation, 2 ** input_bv.size())
    if divisor is not None:
        bv.set_comment_at(ssa_instruction.address, "divide by {}".format(divisor))
        return