Exemplo n.º 1
0
    def __init__(self, binary_name: str, function_addr: int):
        """
        尝试去除ollvm中的flat混淆
        :param binary_name: binary文件名
        :param function_addr: 被混淆的目标函数地址
        """
        # 1.加载程序
        self.p = angr.Project(binary_name, load_options={'auto_load_libs': False})

        # 2.获取函数CFG
        self.whole_cfg = self.p.analyses.CFG()
        self.func = self.whole_cfg.functions[function_addr]
        self.func.normalize()
        self.cfg = am_graph.to_supergraph(self.func.transition_graph)

        # 3.初始化节点变量
        # 入度为0即序言块
        self.prologue_node = None
        # 出度为0即返回块
        self.ret_node = None
        # 序言的后继即主分发器
        self.main_dispatcher_node = None
        # 主分发器的前驱(除序言外)即预处理器
        self.pre_dispatcher_node = None
        # 预处理器的前驱即真实块
        self.relevant_nodes = []
        # 其余为无用块
        self.nop_nodes = []
        # 除了无用块外的所有块都为有用块
        self.useful_nodes = []
        self.useful_nodes_except_ret = []
        # 存放nodes的信息
        self.nodes_info = {}
        # 存放恢复出来的控制流信息
        self.flows = []
        # 用于记录在恢复flow时指定block的初试state
        # 正常来说没有必要,用于O3混淆下
        self.next_state = None
        # 配置选项
        self.use_unicorn = False
Exemplo n.º 2
0
        'po': '\x8B',
        's': '\x88',
        'nop': '\x90',
        'jmp': '\xE9',
        'j': '\x0F'
    }

    filename = sys.argv[1]
    start = int(sys.argv[2], 16)

    project = angr.Project(filename, load_options={'auto_load_libs': False})
    cfg = project.analyses.CFGFast(
        normalize=True)  # do normalize to avoid overlapping blocks
    target_function = cfg.functions.get(start)
    # A super transition graph is a graph that looks like IDA Pro's CFG
    supergraph = am_graph.to_supergraph(target_function.transition_graph)

    base_addr = project.loader.main_object.mapped_base >> 12 << 12

    # get prologue_node and retn_node
    prologue_node = None
    for node in supergraph.nodes():
        if supergraph.in_degree(node) == 0:
            prologue_node = node
        if supergraph.out_degree(node) == 0:
            retn_node = node

    if prologue_node is None or prologue_node.addr != start:
        print("Something must be wrong...")
        sys.exit(-1)
Exemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser(description="deflat control flow script")
    parser.add_argument("-f", "--file", help="binary to analyze")
    parser.add_argument(
        "--addr", help="address of target function in hex format")
    args = parser.parse_args()

    if args.file is None or args.addr is None:
        parser.print_help()
        sys.exit(0)

    filename = args.file
    start = int(args.addr, 16)

    project = angr.Project(filename, load_options={'auto_load_libs': False})
    # do normalize to avoid overlapping blocks, disable force_complete_scan to avoid possible "wrong" blocks
    cfg = project.analyses.CFGFast(normalize=True, force_complete_scan=False)
    target_function = cfg.functions.get(start)
    # A super transition graph is a graph that looks like IDA Pro's CFG
    supergraph = am_graph.to_supergraph(target_function.transition_graph)

    base_addr = project.loader.main_object.mapped_base >> 12 << 12

    # get prologue_node and retn_node
    prologue_node = None
    for node in supergraph.nodes():
        if supergraph.in_degree(node) == 0:
            prologue_node = node
        if supergraph.out_degree(node) == 0 and len(node.out_branches) == 0:
            retn_node = node

    if prologue_node is None or prologue_node.addr != start:
        print("Something must be wrong...")
        sys.exit(-1)

    main_dispatcher_node = list(supergraph.successors(prologue_node))[0]
    for node in supergraph.predecessors(main_dispatcher_node):
        if node.addr != prologue_node.addr:
            pre_dispatcher_node = node
            break

    relevant_nodes, nop_nodes = get_relevant_nop_nodes(
        supergraph, pre_dispatcher_node, prologue_node, retn_node)
    print('*******************relevant blocks************************')
    print('prologue: %#x' % start)
    print('main_dispatcher: %#x' % main_dispatcher_node.addr)
    print('pre_dispatcher: %#x' % pre_dispatcher_node.addr)
    print('retn: %#x' % retn_node.addr)
    relevant_block_addrs = [node.addr for node in relevant_nodes]
    print('relevant_blocks:', [hex(addr) for addr in relevant_block_addrs])

    print('*******************symbolic execution*********************')
    relevants = relevant_nodes
    relevants.append(prologue_node)
    relevants_without_retn = list(relevants)
    relevants.append(retn_node)
    relevant_block_addrs.extend([prologue_node.addr, retn_node.addr])

    flow = defaultdict(list)
    patch_instrs = {}
    for relevant in relevants_without_retn:
        print('-------------------dse %#x---------------------' % relevant.addr)
        block = project.factory.block(relevant.addr, size=relevant.size)
        has_branches = False
        hook_addrs = set([])
        for ins in block.capstone.insns:
            if project.arch.name in ARCH_X86:
                if ins.insn.mnemonic.startswith('cmov'):
                    # only record the first one
                    if relevant not in patch_instrs:
                        patch_instrs[relevant] = ins
                        has_branches = True
                elif ins.insn.mnemonic.startswith('call'):
                    hook_addrs.add(ins.insn.address)
            elif project.arch.name in ARCH_ARM:
                if ins.insn.mnemonic != 'mov' and ins.insn.mnemonic.startswith('mov'):
                    if relevant not in patch_instrs:
                        patch_instrs[relevant] = ins
                        has_branches = True
                elif ins.insn.mnemonic in {'bl', 'blx'}:
                    hook_addrs.add(ins.insn.address)
            elif project.arch.name in ARCH_ARM64:
                if ins.insn.mnemonic.startswith('cset'):
                    if relevant not in patch_instrs:
                        patch_instrs[relevant] = ins
                        has_branches = True
                elif ins.insn.mnemonic in {'bl', 'blr'}:
                    hook_addrs.add(ins.insn.address)

        if has_branches:
            tmp_addr = symbolic_execution(project, relevant_block_addrs,
                                                     relevant.addr, hook_addrs, claripy.BVV(1, 1), True)
            if tmp_addr is not None:
                flow[relevant].append(tmp_addr)
            tmp_addr = symbolic_execution(project, relevant_block_addrs,
                                                     relevant.addr, hook_addrs, claripy.BVV(0, 1), True)
            if tmp_addr is not None:
                flow[relevant].append(tmp_addr)
        else:
            tmp_addr = symbolic_execution(project, relevant_block_addrs,
                                                     relevant.addr, hook_addrs)
            if tmp_addr is not None:
                flow[relevant].append(tmp_addr)

    print('************************flow******************************')
    for k, v in flow.items():
        print('%#x: ' % k.addr, [hex(child) for child in v])

    print('%#x: ' % retn_node.addr, [])

    print('************************patch*****************************')
    with open(filename, 'rb') as origin:
        # Attention: can't transform to str by calling decode() directly. so use bytearray instead.
        origin_data = bytearray(origin.read())
        origin_data_len = len(origin_data)

    recovery_file = filename + '_recovered'
    recovery = open(recovery_file, 'wb')

    # patch irrelevant blocks
    for nop_node in nop_nodes:
        fill_nop(origin_data, nop_node.addr-base_addr,
                 nop_node.size, project.arch)

    # remove unnecessary control flows
    for parent, childs in flow.items():
        if len(childs) == 1:
            parent_block = project.factory.block(parent.addr, size=parent.size)
            last_instr = parent_block.capstone.insns[-1]
            file_offset = last_instr.address - base_addr
            # patch the last instruction to jmp
            if project.arch.name in ARCH_X86:
                fill_nop(origin_data, file_offset,
                         last_instr.size, project.arch)
                patch_value = ins_j_jmp_hex_x86(last_instr.address, childs[0], 'jmp')
            elif project.arch.name in ARCH_ARM:
                patch_value = ins_b_jmp_hex_arm(last_instr.address, childs[0], 'b')
                if project.arch.memory_endness == "Iend_BE":
                    patch_value = patch_value[::-1]
            elif project.arch.name in ARCH_ARM64:
                # FIXME: For aarch64/arm64, the last instruction of prologue seems useful in some cases, so patch the next instruction instead.
                if parent.addr == start:
                    file_offset += 4
                    patch_value = ins_b_jmp_hex_arm64(last_instr.address+4, childs[0], 'b')
                else:
                    patch_value = ins_b_jmp_hex_arm64(last_instr.address, childs[0], 'b')
                if project.arch.memory_endness == "Iend_BE":
                    patch_value = patch_value[::-1]
            patch_instruction(origin_data, file_offset, patch_value)
        else:
            instr = patch_instrs[parent]
            file_offset = instr.address - base_addr
            # patch instructions starting from `cmovx` to the end of block
            fill_nop(origin_data, file_offset, parent.addr +
                     parent.size - base_addr - file_offset, project.arch)
            if project.arch.name in ARCH_X86:
                # patch the cmovx instruction to jx instruction
                patch_value = ins_j_jmp_hex_x86(instr.address, childs[0], instr.mnemonic[len('cmov'):])
                patch_instruction(origin_data, file_offset, patch_value)

                file_offset += 6
                # patch the next instruction to jmp instrcution
                patch_value = ins_j_jmp_hex_x86(instr.address+6, childs[1], 'jmp')
                patch_instruction(origin_data, file_offset, patch_value)
            elif project.arch.name in ARCH_ARM:
                # patch the movx instruction to bx instruction
                bx_cond = 'b' + instr.mnemonic[len('mov'):]
                patch_value = ins_b_jmp_hex_arm(instr.address, childs[0], bx_cond)
                if project.arch.memory_endness == 'Iend_BE':
                    patch_value = patch_value[::-1]
                patch_instruction(origin_data, file_offset, patch_value)

                file_offset += 4
                # patch the next instruction to b instrcution
                patch_value = ins_b_jmp_hex_arm(instr.address+4, childs[1], 'b')
                if project.arch.memory_endness == 'Iend_BE':
                    patch_value = patch_value[::-1]
                patch_instruction(origin_data, file_offset, patch_value)
            elif project.arch.name in ARCH_ARM64:
                # patch the cset.xx instruction to bx instruction
                bx_cond = instr.op_str.split(',')[-1].strip()
                patch_value = ins_b_jmp_hex_arm64(instr.address, childs[0], bx_cond)
                if project.arch.memory_endness == 'Iend_BE':
                    patch_value = patch_value[::-1]
                patch_instruction(origin_data, file_offset, patch_value)

                file_offset += 4
                # patch the next instruction to b instruction
                patch_value = ins_b_jmp_hex_arm64(instr.address+4, childs[1], 'b')
                if project.arch.memory_endness == 'Iend_BE':
                    patch_value = patch_value[::-1]
                patch_instruction(origin_data, file_offset, patch_value)

    assert len(origin_data) == origin_data_len, "Error: size of data changed!!!"
    recovery.write(origin_data)
    recovery.close()
    print('Successful! The recovered file: %s' % recovery_file)
Exemplo n.º 4
0
def main():
    if len(sys.argv) != 3:
        print('Usage: python debougs.py filename function_address(hex)')
        exit(0)

    filename = sys.argv[1]
    start = int(sys.argv[2], 16)

    project = angr.Project(filename, load_options={'auto_load_libs': False})
    cfg = project.analyses.CFGFast(normalize=True)
    target_function = cfg.functions.get(start)
    supergraph = am_graph.to_supergraph(target_function.transition_graph)

    base_addr = project.loader.main_object.mapped_base >> 12 << 12

    state = project.factory.blank_state(
        addr=target_function.addr,
        remove_options={angr.sim_options.LAZY_SOLVES})

    flow = set()
    flow.add(target_function.addr)

    print('*******************symbolic execution*********************')
    sm = project.factory.simulation_manager(state)
    sm.step()
    while len(sm.active) > 0:
        for active in sm.active:
            flow.add(active.addr)
        sm.step()

    print('executed blocks: ', list(map(hex, flow)))

    print('************************patch******************************')

    with open(filename, 'rb') as origin:
        origin_data = bytearray(origin.read())
        origin_data_len = len(origin_data)

    patch_nodes = set()
    for node in supergraph.nodes():
        if node.addr in patch_nodes:
            continue

        if node.addr not in flow:
            # patch unnecessary node
            file_offset = node.addr - base_addr
            fill_nop(origin_data, file_offset, node.size)
        else:
            suc_nodes = list(supergraph.successors(node))
            jmp_targets = []

            for suc_node in suc_nodes:
                if suc_node.addr in flow:
                    jmp_targets.append(suc_node.addr)
                else:
                    # patch unnecessary suc_node
                    file_offset = suc_node.addr - base_addr
                    fill_nop(origin_data, file_offset, suc_node.size)
                    patch_nodes.add(suc_node.addr)

            # patch jmp instruction
            if len(suc_nodes) > 1 and len(jmp_targets) == 1:
                file_offset = node.addr + node.size - 6 - base_addr
                patch_byte(origin_data, file_offset, opcodes['nop'])
                patch_byte(origin_data, file_offset + 1, opcodes['jmp'])
                fill_jmp_offset(origin_data, file_offset + 2,
                                jmp_targets[0] - (node.addr + node.size))

    assert len(
        origin_data) == origin_data_len, "Error: size of data changed!!!"

    recovery_file = filename + '_recovered'
    with open(recovery_file, 'wb') as recovery:
        recovery.write(origin_data)

    print('Successful! The recovered file: %s' % recovery_file)
Exemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser(description="debogus control flow script")
    parser.add_argument("-f", "--file", help="binary to analyze")
    parser.add_argument("--addr",
                        help="address of target function in hex format")
    args = parser.parse_args()

    if args.file is None or args.addr is None:
        parser.print_help()
        sys.exit(0)

    filename = args.file
    start = int(args.addr, 16)

    project = angr.Project(filename, load_options={'auto_load_libs': False})
    cfg = project.analyses.CFGFast(normalize=True, force_complete_scan=False)
    target_function = cfg.functions.get(start)
    supergraph = am_graph.to_supergraph(target_function.transition_graph)

    base_addr = project.loader.main_object.mapped_base >> 12 << 12

    state = project.factory.blank_state(
        addr=target_function.addr,
        remove_options={angr.sim_options.LAZY_SOLVES})

    flow = set()
    flow.add(target_function.addr)

    print('*******************symbolic execution*********************')
    sm = project.factory.simulation_manager(state)
    sm.step()
    while len(sm.active) > 0:
        for active in sm.active:
            flow.add(active.addr)
        sm.step()

    print('executed blocks: ', list(map(hex, flow)))

    print('************************patch******************************')

    with open(filename, 'rb') as origin:
        origin_data = bytearray(origin.read())
        origin_data_len = len(origin_data)

    patch_nodes = set()
    for node in supergraph.nodes():
        if node.addr in patch_nodes:
            continue

        if node.addr not in flow:
            # patch unnecessary node
            file_offset = node.addr - base_addr
            fill_nop(origin_data, file_offset, node.size, project.arch)
        else:
            suc_nodes = list(supergraph.successors(node))
            jmp_targets = []

            for suc_node in suc_nodes:
                if suc_node.addr in flow:
                    jmp_targets.append(suc_node.addr)
                else:
                    # patch unnecessary suc_node
                    file_offset = suc_node.addr - base_addr
                    fill_nop(origin_data, file_offset, suc_node.size,
                             project.arch)
                    patch_nodes.add(suc_node.addr)

            # patch jmp instruction
            if len(suc_nodes) > 1 and len(jmp_targets) == 1:
                if project.arch.name in ARCH_X86:
                    file_offset = node.addr + node.size - 6 - base_addr
                    # nop + jmp
                    patch_value = OPCODES['x86']['nop'] + ins_j_jmp_hex_x86(
                        node.addr + node.size - 5, jmp_targets[0], 'jmp')
                    patch_instruction(origin_data, file_offset, patch_value)
                elif project.arch.name in ARCH_ARM:
                    file_offset = node.addr + node.size - 4 - base_addr
                    patch_value = ins_b_jmp_hex_arm(node.addr + node.size - 4,
                                                    jmp_targets[0], 'b')
                    if project.arch.memory_endness == 'Iend_BE':
                        patch_value = patch_value[::-1]
                    patch_instruction(origin_data, file_offset, patch_value)
                elif project.arch.name in ARCH_ARM64:
                    file_offset = node.addr + node.size - 4 - base_addr
                    patch_value = ins_b_jmp_hex_arm64(
                        node.addr + node.size - 4, jmp_targets[0], 'b')
                    if project.arch.memory_endness == 'Iend_BE':
                        patch_value = patch_value[::-1]
                    patch_instruction(origin_data, file_offset, patch_value)

    assert len(
        origin_data) == origin_data_len, "Error: size of data changed!!!"

    recovery_file = filename + '_recovered'
    with open(recovery_file, 'wb') as recovery:
        recovery.write(origin_data)

    print('Successful! The recovered file: %s' % recovery_file)