Пример #1
0
class BARF(object):
    """Binary Analysis Framework."""

    def __init__(self, filename):
        logger.info("[+] BARF: Initializing...")

        self.code_analyzer = None
        self.ir_translator = None
        self.binary = None
        self.smt_solver = None
        self.gadget_classifier = None
        self.gadget_verifier = None
        self.arch_info = None
        self.gadget_finder = None
        self.text_section = None
        self.disassembler = None
        self.smt_translator = None
        self.ir_emulator = None
        self.bb_builder = None

        self.open(filename)

    def _load(self):
        # setup architecture
        self._setup_arch()

        # set up core modules
        self._setup_core_modules()

        # setup analysis modules
        self._setup_analysis_modules()

    def _setup_arch(self):
        """Set up architecture.
        """
        # set up architecture information
        self.arch_info = None

        if self.binary.architecture == arch.ARCH_X86:
            self._setup_x86_arch()
        else:
            # TODO: add arch in the binary file class
            self._setup_arm_arch()

    def _setup_arm_arch(self):
        """Set up ARM architecture.
        """
        arch_mode = arch.ARCH_ARM_MODE_THUMB

        self.arch_info = ArmArchitectureInformation(arch_mode)
        self.disassembler = ArmDisassembler(architecture_mode=arch_mode)
        self.ir_translator = ArmTranslator(architecture_mode=arch_mode)

    def _setup_x86_arch(self):
        """Set up x86 architecture.
        """
        arch_mode = self.binary.architecture_mode

        # Set up architecture information
        self.arch_info = X86ArchitectureInformation(arch_mode)
        self.disassembler = X86Disassembler(architecture_mode=arch_mode)
        self.ir_translator = X86Translator(architecture_mode=arch_mode)

    def _setup_core_modules(self):
        """Set up core modules.
        """
        self.ir_emulator = None
        self.smt_solver = None
        self.smt_translator = None

        if self.arch_info:
            # Set REIL emulator.
            self.ir_emulator = ReilEmulator(self.arch_info)

            # Set SMT Solver.
            if SMT_SOLVER == "Z3":
                self.smt_solver = Z3Solver()
            elif SMT_SOLVER == "CVC4":
                self.smt_solver = CVC4Solver()
            elif SMT_SOLVER is not None:
                raise Exception("Invalid SMT solver.")

            # Set SMT translator.
            if self.smt_solver:
                self.smt_translator = SmtTranslator(self.smt_solver, self.arch_info.address_size)

                self.smt_translator.set_arch_alias_mapper(self.arch_info.alias_mapper)
                self.smt_translator.set_arch_registers_size(self.arch_info.registers_size)

    def _setup_analysis_modules(self):
        """Set up analysis modules.
        """
        ## basic block
        self.bb_builder = BasicBlockBuilder(self.disassembler, self.text_section, self.ir_translator, self.arch_info)

        ## code analyzer
        self.code_analyzer = CodeAnalyzer(self.smt_solver, self.smt_translator, self.arch_info)

        ## gadget
        self.gadget_classifier = GadgetClassifier(self.ir_emulator, self.arch_info)
        self.gadget_finder = GadgetFinder(self.disassembler, self.text_section, self.ir_translator, self.binary.architecture, self.binary.architecture_mode)
        self.gadget_verifier = GadgetVerifier(self.code_analyzer, self.arch_info)

    # ======================================================================== #

    def open(self, filename):
        """Open a file for analysis.

        :param filename: name of an executable file
        :type filename: str

        """
        if filename:
            self.binary = BinaryFile(filename)
            self.text_section = self.binary.text_section

            self._load()

    def translate(self, ea_start=None, ea_end=None):
        """Translate to REIL instructions.

        :param ea_start: start address
        :type ea_start: int
        :param ea_end: end address
        :type ea_end: int

        :returns: a tuple of the form (address, assembler instruction, instruction size)
        :rtype: (int, Instruction, int)

        """
        start_addr = ea_start if ea_start else self.binary.ea_start
        end_addr = ea_end if ea_end else self.binary.ea_end

        self.ir_translator.reset()

        for addr, asm, _ in self.disassemble(start_addr, end_addr):
            yield addr, asm, self.ir_translator.translate(asm)

    def disassemble(self, ea_start=None, ea_end=None):
        """Disassemble assembler instructions.

        :param ea_start: start address
        :type ea_start: int
        :param ea_end: end address
        :type ea_end: int

        :returns: a tuple of the form (address, assembler instruction, instruction size)
        :rtype: (int, Instruction, int)

        """
        curr_addr = ea_start if ea_start else self.binary.ea_start
        end_addr = ea_end if ea_end else self.binary.ea_end

        while curr_addr < end_addr:
            # disassemble instruction
            start, end = curr_addr, min(curr_addr + 16, self.binary.ea_end + 1)

            asm = self.disassembler.disassemble(self.text_section[start:end], curr_addr)

            if not asm:
                return

            yield curr_addr, asm, asm.size

            # update instruction pointer
            curr_addr += asm.size

    def recover_cfg(self, ea_start=None, ea_end=None, symbols=None):
        """Recover CFG

        :param ea_start: start address
        :type ea_start: int
        :param ea_end: end address
        :type ea_end: int

        :returns: a graph where each node is a basic block
        :rtype: BasicBlockGraph

        """
        start_addr = ea_start if ea_start else self.binary.ea_start
        end_addr = ea_end if ea_end else self.binary.ea_end

        bb_list = self.bb_builder.build(start_addr, end_addr, symbols)
        bb_graph = BasicBlockGraph(bb_list)

        return bb_graph

    def recover_bbs(self, ea_start=None, ea_end=None):
        """Recover basic blocks.

        :param ea_start: start address
        :type ea_start: int
        :param ea_end: end address
        :type ea_end: int

        :returns: a list of basic blocks
        :rtype: list

        """
        start_addr = ea_start if ea_start else self.binary.ea_start
        end_addr = ea_end if ea_end else self.binary.ea_end

        bb_list = self.bb_builder.build(start_addr, end_addr)

        return bb_list

    def emulate_full(self, context, ea_start=None, ea_end=None):
        """Emulate REIL instructions.

        :param context: processor context
        :type context: dict

        :returns: a context
        :rtype: dict

        """
        start_addr = ea_start if ea_start else self.binary.ea_start
        end_addr = ea_end if ea_end else self.binary.ea_end

        # load registers
        if 'registers' in context:
            for reg, val in context['registers'].items():
                self.ir_emulator.registers[reg] = val

        # load memory
        if 'memory' in context:
            for addr, val in context['memory'].items():
                self.ir_emulator.memory.write(addr, 32 / 8, val)

        # instrs = [reil for _, _, reil in self.translate(ea_start, ea_end)]

        # self.ir_emulator.execute(instrs, start_addr << 8, end_address=end_addr << 8)

        # Create ReilContainer
        # ==================================================================== #
        from core.reil.reil import ReilContainer
        from core.reil.reil import ReilSequence

        instr_container = ReilContainer()

        asm_instr_last = None
        instr_seq_prev = None

        for asm_addr, asm_instr, asm_size in self.disassemble(ea_start, ea_end):
            instr_seq = ReilSequence()

            for reil_instr in self.ir_translator.translate(asm_instr):
                instr_seq.append(reil_instr)

            if instr_seq_prev:
                instr_seq_prev.next_sequence_address = instr_seq.address

            instr_container.add(instr_seq)

            instr_seq_prev = instr_seq

        if instr_seq_prev:
            if asm_instr_last:
                instr_seq_prev.next_sequence_address = (asm_instr_last.address + asm_instr_last.size) << 8
        # ==================================================================== #

        self.ir_emulator.execute(instr_container, start_addr << 8, end=end_addr << 8)

        context_out = {}

        # save registers
        context_out['registers'] = {}
        for reg, val in self.ir_emulator.registers.items():
            context_out['registers'][reg] = val

        # save memory
        context_out['memory'] = {}

        return context_out

    def emulate_full_ex(self, context, instr_container, ea_start=None, ea_end=None):
        """Emulate REIL instructions from an instruction container.

        :param context: processor context
        :type context: dict

        :returns: a context
        :rtype: dict

        """
        start_addr = ea_start if ea_start else self.binary.ea_start
        end_addr = ea_end if ea_end else self.binary.ea_end

        # load registers
        if 'registers' in context:
            for reg, val in context['registers'].items():
                self.ir_emulator.registers[reg] = val

        # load memory
        if 'memory' in context:
            for addr, val in context['memory'].items():
                self.ir_emulator.memory.write(addr, 32 / 8, val)

        self.ir_emulator.execute(instr_container, start_addr << 8, end=end_addr << 8)

        context_out = {}

        # save registers
        context_out['registers'] = {}
        for reg, val in self.ir_emulator.registers.items():
            context_out['registers'][reg] = val

        # save memory
        context_out['memory'] = {}

        return context_out
Пример #2
0
class BARF(object):
    """Binary Analysis Framework."""
    def __init__(self, filename):
        logger.info("[+] BARF: Initializing...")

        self.code_analyzer = None
        self.ir_translator = None
        self.binary = None
        self.smt_solver = None
        self.gadget_classifier = None
        self.gadget_verifier = None
        self.arch_info = None
        self.gadget_finder = None
        self.text_section = None
        self.disassembler = None
        self.smt_translator = None
        self.ir_emulator = None
        self.bb_builder = None

        self.open(filename)

    def _load(self):
        # setup architecture
        self._setup_arch()

        # set up core modules
        self._setup_core_modules()

        # setup analysis modules
        self._setup_analysis_modules()

    def _setup_arch(self):
        """Set up architecture.
        """
        # set up architecture information
        self.arch_info = None

        if self.binary.architecture == arch.ARCH_X86:
            self._setup_x86_arch()
        else:
            # TODO: add arch in the binary file class
            self._setup_arm_arch()

    def _setup_arm_arch(self):
        """Set up ARM architecture.
        """
        self.arch_info = ArmArchitectureInformation(ARCH_ARM_MODE_32)
        self.disassembler = ArmDisassembler(architecture_mode=ARCH_ARM_MODE_32)
        self.ir_translator = ArmTranslator(architecture_mode=ARCH_ARM_MODE_32)

    def _setup_x86_arch(self):
        """Set up x86 architecture.
        """
        arch_mode = self.binary.architecture_mode

        # Set up architecture information
        self.arch_info = X86ArchitectureInformation(arch_mode)
        self.disassembler = X86Disassembler(architecture_mode=arch_mode)
        self.ir_translator = X86Translator(architecture_mode=arch_mode)

    def _setup_core_modules(self):
        """Set up core modules.
        """
        self.ir_emulator = None
        self.smt_solver = None
        self.smt_translator = None

        if self.arch_info:
            self.ir_emulator = ReilEmulator(self.arch_info.address_size)

            if SMT_SOLVER == "Z3":
                self.smt_solver = Z3Solver()
            elif SMT_SOLVER == "CVC4":
                self.smt_solver = CVC4Solver()
            else:
                raise Exception("Invalid SMT solver.")

            self.smt_translator = SmtTranslator(self.smt_solver,
                                                self.arch_info.address_size)

            self.ir_emulator.set_arch_registers(
                self.arch_info.registers_gp_all)
            self.ir_emulator.set_arch_registers_size(
                self.arch_info.registers_size)
            self.ir_emulator.set_reg_access_mapper(self.arch_info.alias_mapper)

            self.smt_translator.set_reg_access_mapper(
                self.arch_info.alias_mapper)
            self.smt_translator.set_arch_registers_size(
                self.arch_info.registers_size)

    def _setup_analysis_modules(self):
        """Set up analysis modules.
        """
        ## basic block
        self.bb_builder = BasicBlockBuilder(self.disassembler,
                                            self.text_section,
                                            self.ir_translator)

        ## code analyzer
        self.code_analyzer = CodeAnalyzer(self.smt_solver, self.smt_translator)

        ## gadget
        self.gadget_classifier = GadgetClassifier(self.ir_emulator,
                                                  self.arch_info)
        self.gadget_finder = GadgetFinder(self.disassembler, self.text_section,
                                          self.ir_translator,
                                          self.binary.architecture,
                                          self.binary.architecture_mode)
        self.gadget_verifier = GadgetVerifier(self.code_analyzer,
                                              self.arch_info)

    # ======================================================================== #

    def open(self, filename):
        """Open a file for analysis.

        :param filename: name of an executable file
        :type filename: str

        """
        if filename:
            self.binary = BinaryFile(filename)
            self.text_section = self.binary.text_section

            self._load()

    def translate(self, ea_start=None, ea_end=None):
        """Translate to REIL instructions.

        :param ea_start: start address
        :type ea_start: int
        :param ea_end: end address
        :type ea_end: int

        :returns: a tuple of the form (address, assembler instruction, instruction size)
        :rtype: (int, Instruction, int)

        """
        start_addr = ea_start if ea_start else self.binary.ea_start
        end_addr = ea_end if ea_end else self.binary.ea_end

        self.ir_translator.reset()

        for addr, asm, _ in self.disassemble(start_addr, end_addr):
            yield addr, asm, self.ir_translator.translate(asm)

    def disassemble(self, ea_start=None, ea_end=None):
        """Disassemble assembler instructions.

        :param ea_start: start address
        :type ea_start: int
        :param ea_end: end address
        :type ea_end: int

        :returns: a tuple of the form (address, assembler instruction, instruction size)
        :rtype: (int, Instruction, int)

        """
        curr_addr = ea_start if ea_start else self.binary.ea_start
        end_addr = ea_end if ea_end else self.binary.ea_end

        while curr_addr < end_addr:
            # disassemble instruction
            start, end = curr_addr, min(curr_addr + 16, self.binary.ea_end + 1)

            asm = self.disassembler.disassemble(self.text_section[start:end],
                                                curr_addr)

            if not asm:
                return

            yield curr_addr, asm, asm.size

            # update instruction pointer
            curr_addr += asm.size

    def recover_cfg(self, ea_start=None, ea_end=None):
        """Recover CFG

        :param ea_start: start address
        :type ea_start: int
        :param ea_end: end address
        :type ea_end: int

        :returns: a graph where each node is a basic block
        :rtype: BasicBlockGraph

        """
        start_addr = ea_start if ea_start else self.binary.ea_start
        end_addr = ea_end if ea_end else self.binary.ea_end

        bb_list = self.bb_builder.build(start_addr, end_addr)
        bb_graph = BasicBlockGraph(bb_list)

        return bb_graph

    def recover_bbs(self, ea_start=None, ea_end=None):
        """Recover basic blocks.

        :param ea_start: start address
        :type ea_start: int
        :param ea_end: end address
        :type ea_end: int

        :returns: a list of basic blocks
        :rtype: list

        """
        start_addr = ea_start if ea_start else self.binary.ea_start
        end_addr = ea_end if ea_end else self.binary.ea_end

        bb_list = self.bb_builder.build(start_addr, end_addr)

        return bb_list

    def emulate_full(self, context, ea_start=None, ea_end=None):
        """Emulate REIL instructions.

        :param context: processor context
        :type context: dict

        :returns: a context
        :rtype: dict

        """
        start_addr = ea_start if ea_start else self.binary.ea_start
        end_addr = ea_end if ea_end else self.binary.ea_end

        # load registers
        if 'registers' in context:
            for reg, val in context['registers'].items():
                self.ir_emulator.registers[reg] = val

        # load memory
        if 'memory' in context:
            for addr, val in context['memory'].items():
                self.ir_emulator.memory.write(addr, 32, val)

        instrs = [reil for _, _, reil in self.translate(ea_start, ea_end)]

        self.ir_emulator.execute(instrs,
                                 start_addr << 8,
                                 end_address=end_addr << 8)

        context_out = {}

        # save registers
        context_out['registers'] = {}
        for reg, val in self.ir_emulator.registers.items():
            context_out['registers'][reg] = val

        # save memory
        context_out['memory'] = {}

        return context_out
Пример #3
0
class BARF(object):
    """Binary Analysis Framework."""
    def __init__(self, filename, load_bin=True):
        logger.info("Initializing BARF")

        self.name = None
        self.code_analyzer = None
        self.ir_translator = None
        self.binary = None
        self.smt_solver = None
        self.gadget_classifier = None
        self.gadget_verifier = None
        self.arch_info = None
        self.gadget_finder = None
        self.text_section = None
        self.disassembler = None
        self.smt_translator = None
        self.ir_emulator = None
        self.bb_builder = None
        self.ip = None
        self.sp = None
        self.ws = None
        self._load_bin = load_bin

        self._arch_mode = None

        self.open(filename)

    def _load(self, arch_mode=None):
        # setup architecture
        self._setup_arch(arch_mode=arch_mode)

        # set up core modules
        self._setup_core_modules()

        # setup analysis modules
        self._setup_analysis_modules()

        if self._load_bin:
            self.load_binary()

    def _setup_arch(self, arch_mode=None):
        """Set up architecture.
        """
        # set up architecture information
        self.arch_info = None

        if self.binary.architecture == arch.ARCH_X86:
            self._setup_x86_arch(arch_mode)
        else:
            # TODO: add arch to the binary file class.
            self._setup_arm_arch(arch_mode)

    def _setup_arm_arch(self, arch_mode=None):
        """Set up ARM architecture.
        """
        if arch_mode is None:
            arch_mode = arch.ARCH_ARM_MODE_THUMB

        self.name = "ARM"
        self.arch_info = ArmArchitectureInformation(arch_mode)
        self.disassembler = ArmDisassembler(architecture_mode=arch_mode)
        self.ir_translator = ArmTranslator(architecture_mode=arch_mode)

        # Load instruction pointer register.
        if self.arch_info.architecture_mode == arch.ARCH_ARM_MODE_THUMB:
            self.ip = "r15"
            self.sp = "r13"
            self.ws = 2  # TODO Check.
        elif self.arch_info.architecture_mode == arch.ARCH_ARM_MODE_ARM:
            self.ip = "r15"
            self.sp = "r13"
            self.ws = 4
        else:
            raise Exception("Invalid architecture mode.")

    def _setup_x86_arch(self, arch_mode=None):
        """Set up x86 architecture.
        """
        if arch_mode is None:
            arch_mode = self.binary.architecture_mode

        # Set up architecture information
        self.name = "x86"
        self.arch_info = X86ArchitectureInformation(arch_mode)
        self.disassembler = X86Disassembler(architecture_mode=arch_mode)
        self.ir_translator = X86Translator(architecture_mode=arch_mode)

        # Load instruction pointer register.
        if self.arch_info.architecture_mode == arch.ARCH_X86_MODE_32:
            self.ip = "eip"
            self.sp = "esp"
            self.ws = 4
        elif self.arch_info.architecture_mode == arch.ARCH_X86_MODE_64:
            self.ip = "rip"
            self.sp = "rsp"
            self.ws = 8
        else:
            raise Exception("Invalid architecture mode.")

    def _setup_core_modules(self):
        """Set up core modules.
        """
        self.ir_emulator = None
        self.smt_solver = None
        self.smt_translator = None

        if self.arch_info:
            # Set REIL emulator.
            self.ir_emulator = ReilEmulator(self.arch_info)

            # Set SMT Solver.
            self.smt_solver = None

            if SMT_SOLVER == "Z3":
                if _check_solver_installation("z3"):
                    self.smt_solver = Z3Solver()
                else:
                    logger.warn(
                        "z3 solver is not installed. Run 'barf-install-solvers.sh' to install it."
                    )
            elif SMT_SOLVER == "CVC4":
                if _check_solver_installation("cvc4"):
                    self.smt_solver = CVC4Solver()
                else:
                    logger.warn(
                        "cvc4 solver is not installed. Run 'barf-install-solvers.sh' to install it."
                    )
            elif SMT_SOLVER is not None:
                raise Exception("Invalid SMT solver.")

            # Set SMT translator.
            self.smt_translator = None

            if self.smt_solver:
                self.smt_translator = SmtTranslator(
                    self.smt_solver, self.arch_info.address_size)

                self.smt_translator.set_arch_alias_mapper(
                    self.arch_info.alias_mapper)
                self.smt_translator.set_arch_registers_size(
                    self.arch_info.registers_size)

    def _setup_analysis_modules(self):
        """Set up analysis modules.
        """
        # Basic block.
        self.bb_builder = CFGRecoverer(
            RecursiveDescent(self.disassembler, self.text_section,
                             self.ir_translator, self.arch_info))

        # Code analyzer.
        self.code_analyzer = None

        if self.smt_translator:
            self.code_analyzer = CodeAnalyzer(self.smt_solver,
                                              self.smt_translator,
                                              self.arch_info)

        # Gadgets classifier.
        self.gadget_classifier = GadgetClassifier(self.ir_emulator,
                                                  self.arch_info)

        # Gadgets finder.
        self.gadget_finder = GadgetFinder(self.disassembler, self.text_section,
                                          self.ir_translator,
                                          self.binary.architecture,
                                          self.binary.architecture_mode)

        # Gadget verifier.
        self.gadget_verifier = None

        if self.code_analyzer:
            self.gadget_verifier = GadgetVerifier(self.code_analyzer,
                                                  self.arch_info)

    # ======================================================================== #

    def open(self, filename):
        """Open a file for analysis.

        Args:
            filename (str): Name of an executable file.
        """
        if filename:
            self.binary = BinaryFile(filename)
            self.text_section = self.binary.text_section

            print("[open] self.binary.architecture_mode: {}".format(
                self.binary.architecture_mode))

            self._load(arch_mode=self.binary.architecture_mode)

    def load_architecture(self, name, arch_info, disassembler, translator):
        """Translate to REIL instructions.

        Args:
            name (str): Architecture's name.
            arch_info (ArchitectureInformation): Architecture information object.
            disassembler (Disassembler): Disassembler for the architecture.
            translator (Translator): Translator for the architecture.
        """
        # Set up architecture information.
        self.name = name
        self.arch_info = arch_info
        self.disassembler = disassembler
        self.ir_translator = translator

        # Setup analysis modules.
        self._setup_analysis_modules()

    def translate(self, start=None, end=None, arch_mode=None):
        """Translate to REIL instructions.

        Args:
            start (int): Start address.
            end (int): End address.
            arch_mode (int): Architecture mode.

        Returns:
            (int, Instruction, list): A tuple of the form (address, assembler instruction, REIL instructions).
        """
        start_addr = start if start else self.binary.ea_start
        end_addr = end if end else self.binary.ea_end

        self.ir_translator.reset()

        for addr, asm, _ in self.disassemble(start=start_addr,
                                             end=end_addr,
                                             arch_mode=arch_mode):
            yield addr, asm, self.ir_translator.translate(asm)

    def disassemble(self, start=None, end=None, arch_mode=None):
        """Disassemble native instructions.

        Args:
            start (int): Start address.
            end (int): End address.
            arch_mode (int): Architecture mode.

        Returns:
            (int, Instruction, int): A tuple of the form (address, assembler instruction, instruction size).
        """
        if arch_mode is None:
            arch_mode = self.binary.architecture_mode

        curr_addr = start if start else self.binary.ea_start
        end_addr = end if end else self.binary.ea_end

        while curr_addr < end_addr:
            # Fetch the instruction.
            encoding = self.__fetch_instr(curr_addr)

            # Decode it.
            asm_instr = self.disassembler.disassemble(
                encoding, curr_addr, architecture_mode=arch_mode)

            if not asm_instr:
                return

            yield curr_addr, asm_instr, asm_instr.size

            # update instruction pointer
            curr_addr += asm_instr.size

    def recover_cfg(self,
                    start=None,
                    end=None,
                    symbols=None,
                    callback=None,
                    arch_mode=None):
        """Recover CFG.

        Args:
            start (int): Start address.
            end (int): End address.
            symbols (dict): Symbol table.
            callback (function): A callback function which is called after each successfully recovered CFG.
            arch_mode (int): Architecture mode.

        Returns:
            ControlFlowGraph: A CFG.
        """
        # Set architecture in case it wasn't already set.
        if arch_mode is None:
            arch_mode = self.binary.architecture_mode

        # Reload modules.
        self._load(arch_mode=arch_mode)

        # Check start address.
        start = start if start else self.binary.entry_point

        cfg, _ = self._recover_cfg(start=start,
                                   end=end,
                                   symbols=symbols,
                                   callback=callback)

        return cfg

    def recover_cfg_all(self,
                        entries,
                        symbols=None,
                        callback=None,
                        arch_mode=None):
        """Recover CFG for all functions from an entry point and/or symbol table.

        Args:
            entries (list): A list of function addresses' to start the CFG recovery process.
            symbols (dict): Symbol table.
            callback (function): A callback function which is called after each successfully recovered CFG.
            arch_mode (int): Architecture mode.

        Returns:
            list: A list of recovered CFGs.
        """
        # Set architecture in case it wasn't already set.
        if arch_mode is None:
            arch_mode = self.binary.architecture_mode

        # Reload modules.
        self._load(arch_mode=arch_mode)

        # Set symbols.
        symbols = {} if not symbols else symbols

        # Recover the CFGs.
        cfgs = []
        addrs_processed = set()
        calls = entries

        while len(calls) > 0:
            start, calls = calls[0], calls[1:]

            cfg, calls_tmp = self._recover_cfg(start=start,
                                               symbols=symbols,
                                               callback=callback)

            addrs_processed.add(start)

            cfgs.append(cfg)

            for addr in sorted(calls_tmp):
                if addr not in addrs_processed and addr not in calls:
                    calls.append(addr)

        return cfgs

    def _recover_cfg(self, start=None, end=None, symbols=None, callback=None):
        """Recover CFG

        """
        # Retrieve symbol name in case it is available.
        if symbols and start in symbols:
            name = symbols[start][0]
            size = symbols[start][1] - 1 if symbols[start][1] != 0 else 0
        else:
            name = "sub_{:x}".format(start)
            size = 0

        # Compute start and end address.
        start_addr = start if start else self.binary.ea_start
        end_addr = end if end else self.binary.ea_end

        # Set callback.
        if callback:
            callback(start, name, size)

        # Recover basic blocks.
        bbs, calls = self.bb_builder.build(start_addr, end_addr, symbols)

        # Build CFG.
        cfg = ControlFlowGraph(bbs, name=name)

        return cfg, calls

    def emulate(self,
                context=None,
                start=None,
                end=None,
                arch_mode=None,
                hooks=None,
                max_instrs=None):
        """Emulate native code.

        Args:
            context (dict): Processor context (register and/or memory).
            start (int): Start address.
            end (int): End address.
            arch_mode (int): Architecture mode.
            hooks (dict): Hooks by address.
            max_instrs (int): Maximum number of instructions to execute.

        Returns:
            dict: Processor context.
        """
        if arch_mode is not None:
            # Reload modules.
            self._load(arch_mode=arch_mode)

        context = context if context else {}

        start_addr = start if start else self.binary.ea_start
        end_addr = end if end else self.binary.ea_end

        hooks = hooks if hooks else {}

        # Load registers
        for reg, val in context.get('registers', {}).items():
            self.ir_emulator.registers[reg] = val

        # Load memory
        # TODO Memory content should be encoded as hex strings so each
        # entry can be of different sizes.
        for addr, val in context.get('memory', {}).items():
            self.ir_emulator.memory.write(addr, 4, val)

        # Execute the code.
        # Switch arch mode accordingly for ARM base on the start address.
        if self.binary.architecture == arch.ARCH_ARM:
            if start_addr & 0x1 == 0x1:
                start_addr = start_addr & ~0x1
                end_addr = end_addr & ~0x1
                self._arch_mode = arch.ARCH_ARM_MODE_THUMB
            else:
                self._arch_mode = arch.ARCH_ARM_MODE_ARM

        if self.binary.architecture == arch.ARCH_X86:
            self._arch_mode = self.binary.architecture_mode

        execution_cache = ExecutionCache()

        next_addr = start_addr
        instr_count = 0
        asm_instr = None
        while next_addr != end_addr:
            if max_instrs and instr_count > max_instrs:
                break

            # Process hooks.
            if next_addr in hooks:
                fn, param = hooks[next_addr]

                fn(self.ir_emulator, param)

                # Compute next address after hook.
                if self.binary.architecture == arch.ARCH_X86:
                    next_addr = asm_instr.address + asm_instr.size

                if self.binary.architecture == arch.ARCH_ARM:
                    next_addr = asm_instr.address + asm_instr.size

            try:
                # Retrieve next instruction from the execution cache.
                asm_instr, reil_container = execution_cache.retrieve(next_addr)
            except InvalidAddressError:
                # Fetch the instruction.
                encoding = self.__fetch_instr(next_addr)

                # Decode it.
                asm_instr = self.disassembler.disassemble(
                    encoding, next_addr, architecture_mode=self._arch_mode)

                # Translate it.
                reil_container = self.__build_reil_container(asm_instr)

                # Add it to the execution cache.
                execution_cache.add(next_addr, asm_instr, reil_container)

            # Update the instruction pointer.
            self.__update_ip(asm_instr)

            # Execute instruction.
            print("{:#x} {}".format(asm_instr.address, asm_instr))

            target_addr = self.__process_reil_container(
                reil_container, to_reil_address(next_addr))

            # Get next address to execute.
            next_addr = to_asm_address(
                target_addr
            ) if target_addr else asm_instr.address + asm_instr.size

            # Count instruction.
            instr_count += 1

        context_out = {'registers': {}, 'memory': {}}

        # save registers
        for reg, val in self.ir_emulator.registers.items():
            context_out['registers'][reg] = val

        return context_out

    def __process_reil_container(self, container, ip):
        next_addr = None

        while ip:
            # Fetch instruction.
            try:
                reil_instr = container.fetch(ip)
            except ReilContainerInvalidAddressError:
                next_addr = ip
                break

            next_ip = self.ir_emulator.single_step(reil_instr)

            # Update instruction pointer.
            ip = next_ip if next_ip else container.get_next_address(ip)

        # Delete temporal registers.
        regs = self.ir_emulator.registers.keys()

        for r in regs:
            if r.startswith("t"):
                del self.ir_emulator.registers[r]

        return next_addr

    def __build_reil_container(self, asm_instr):
        reil_translator = self.ir_translator

        container = ReilContainer()
        instr_seq = ReilSequence()

        for reil_instr in reil_translator.translate(asm_instr):
            instr_seq.append(reil_instr)

        container.add(instr_seq)

        return container

    def __fetch_instr(self, next_addr):
        start, end = next_addr, next_addr + self.arch_info.max_instruction_size

        encoding = ""
        for i in xrange(end - start):
            encoding += chr(self.ir_emulator.read_memory(start + i, 1))

        return encoding

    def __update_ip(self, asm_instr):
        if self.binary.architecture == arch.ARCH_X86:
            self.ir_emulator.registers[
                self.ip] = asm_instr.address + asm_instr.size

        if self.binary.architecture == arch.ARCH_ARM:
            if self._arch_mode == arch.ARCH_ARM_MODE_ARM:
                self.ir_emulator.registers[self.ip] = asm_instr.address + 8
            elif self._arch_mode == arch.ARCH_ARM_MODE_THUMB:
                self.ir_emulator.registers[self.ip] = asm_instr.address + 4

    def _load_binary_elf(self, filename):
        logger.info("Loading ELF image into memory")

        f = open(filename, 'rb')

        elffile = ELFFile(f)

        for index, segment in enumerate(elffile.iter_segments()):
            logger.info("Loading segment #{} ({:#x}-{:#x})".format(
                index, segment.header.p_vaddr,
                segment.header.p_vaddr + segment.header.p_filesz))

            for i, b in enumerate(bytearray(segment.data())):
                self.ir_emulator.write_memory(segment.header.p_vaddr + i, 1, b)

        f.close()

    def _load_binary_pe(self, filename):
        raise NotImplementedError()

    def load_binary(self):
        try:
            fd = open(self.binary.filename, 'rb')
            signature = fd.read(4)
            fd.close()
        except:
            raise Exception("Error loading file.")

        if signature[:4] == b"\x7f\x45\x4c\x46":
            self._load_binary_elf(self.binary.filename)
        elif signature[:2] == b"\x4d\x5a":
            self._load_binary_pe(self.binary.filename)
        else:
            raise Exception("Unknown file format.")
Пример #4
0
class BARF(object):
    """Binary Analysis Framework."""
    def __init__(self, filename, load_bin=True):
        logger.info("Initializing BARF")

        self.name = None
        self.code_analyzer = None
        self.ir_translator = None
        self.binary = None
        self.smt_solver = None
        self.gadget_classifier = None
        self.gadget_verifier = None
        self.arch_info = None
        self.gadget_finder = None
        self.text_section = None
        self.disassembler = None
        self.smt_translator = None
        self.ir_emulator = None
        self.bb_builder = None
        self._load_bin = load_bin

        self.emulator = None

        self._arch_mode = None

        self.open(filename)

    def _load(self, arch_mode=None):
        # setup architecture
        self._setup_arch(arch_mode=arch_mode)

        # set up core modules
        self._setup_core_modules()

        # setup analysis modules
        self._setup_analysis_modules()

        if self._load_bin:
            self.emulator.load_binary(self.binary)

    def _setup_arch(self, arch_mode=None):
        """Set up architecture.
        """
        # set up architecture information
        self.arch_info = None

        if self.binary.architecture == arch.ARCH_X86:
            self._setup_x86_arch(arch_mode)
        else:
            # TODO: add arch to the binary file class.
            self._setup_arm_arch(arch_mode)

    def _setup_arm_arch(self, arch_mode=None):
        """Set up ARM architecture.
        """
        if arch_mode is None:
            arch_mode = arch.ARCH_ARM_MODE_THUMB

        self.name = "ARM"
        self.arch_info = ArmArchitectureInformation(arch_mode)
        self.disassembler = ArmDisassembler(architecture_mode=arch_mode)
        self.ir_translator = ArmTranslator(architecture_mode=arch_mode)

    def _setup_x86_arch(self, arch_mode=None):
        """Set up x86 architecture.
        """
        if arch_mode is None:
            arch_mode = self.binary.architecture_mode

        # Set up architecture information
        self.name = "x86"
        self.arch_info = X86ArchitectureInformation(arch_mode)
        self.disassembler = X86Disassembler(architecture_mode=arch_mode)
        self.ir_translator = X86Translator(architecture_mode=arch_mode)

    def _setup_core_modules(self):
        """Set up core modules.
        """
        self.ir_emulator = None
        self.smt_solver = None
        self.smt_translator = None

        if self.arch_info:
            # Set REIL emulator.
            self.ir_emulator = ReilEmulator(self.arch_info)

            # Set SMT Solver.
            self.smt_solver = None

            if SMT_SOLVER not in ("Z3", "CVC4"):
                raise Exception(
                    "{} SMT solver not supported.".format(SMT_SOLVER))

            try:
                if SMT_SOLVER == "Z3":
                    self.smt_solver = Z3Solver()
                elif SMT_SOLVER == "CVC4":
                    self.smt_solver = CVC4Solver()
            except SmtSolverNotFound:
                logger.warn(
                    "{} Solver is not installed. Run 'barf-install-solvers.sh' to install it."
                    .format(SMT_SOLVER))

            # Set SMT translator.
            self.smt_translator = None

            if self.smt_solver:
                self.smt_translator = SmtTranslator(
                    self.smt_solver, self.arch_info.address_size)

                self.smt_translator.set_arch_alias_mapper(
                    self.arch_info.alias_mapper)
                self.smt_translator.set_arch_registers_size(
                    self.arch_info.registers_size)

    def _setup_analysis_modules(self):
        """Set up analysis modules.
        """
        # Basic block.
        self.bb_builder = CFGRecoverer(
            RecursiveDescent(self.disassembler, self.text_section,
                             self.ir_translator, self.arch_info))

        # Code analyzer.
        self.code_analyzer = None

        if self.smt_translator:
            self.code_analyzer = CodeAnalyzer(self.smt_solver,
                                              self.smt_translator,
                                              self.arch_info)

        # Gadgets classifier.
        self.gadget_classifier = GadgetClassifier(self.ir_emulator,
                                                  self.arch_info)

        # Gadgets finder.
        self.gadget_finder = GadgetFinder(self.disassembler, self.text_section,
                                          self.ir_translator,
                                          self.binary.architecture,
                                          self.binary.architecture_mode)

        # Gadget verifier.
        self.gadget_verifier = None

        if self.code_analyzer:
            self.gadget_verifier = GadgetVerifier(self.code_analyzer,
                                                  self.arch_info)

        self.emulator = Emulator(self.arch_info, self.ir_emulator,
                                 self.ir_translator, self.disassembler)

    # ======================================================================== #

    def open(self, filename):
        """Open a file for analysis.

        Args:
            filename (str): Name of an executable file.
        """
        if filename:
            self.binary = BinaryFile(filename)
            self.text_section = self.binary.text_section

            self._load(arch_mode=self.binary.architecture_mode)

    def load_architecture(self, name, arch_info, disassembler, translator):
        """Translate to REIL instructions.

        Args:
            name (str): Architecture's name.
            arch_info (ArchitectureInformation): Architecture information object.
            disassembler (Disassembler): Disassembler for the architecture.
            translator (Translator): Translator for the architecture.
        """
        # Set up architecture information.
        self.name = name
        self.arch_info = arch_info
        self.disassembler = disassembler
        self.ir_translator = translator

        # Setup analysis modules.
        self._setup_analysis_modules()

    def translate(self, start=None, end=None, arch_mode=None):
        """Translate to REIL instructions.

        Args:
            start (int): Start address.
            end (int): End address.
            arch_mode (int): Architecture mode.

        Returns:
            (int, Instruction, list): A tuple of the form (address, assembler instruction, REIL instructions).
        """
        start_addr = start if start else self.binary.ea_start
        end_addr = end if end else self.binary.ea_end

        self.ir_translator.reset()

        for addr, asm, _ in self.disassemble(start=start_addr,
                                             end=end_addr,
                                             arch_mode=arch_mode):
            yield addr, asm, self.ir_translator.translate(asm)

    def disassemble(self, start=None, end=None, arch_mode=None):
        """Disassemble native instructions.

        Args:
            start (int): Start address.
            end (int): End address.
            arch_mode (int): Architecture mode.

        Returns:
            (int, Instruction, int): A tuple of the form (address, assembler instruction, instruction size).
        """
        if arch_mode is None:
            arch_mode = self.binary.architecture_mode

        curr_addr = start if start else self.binary.ea_start
        end_addr = end if end else self.binary.ea_end

        while curr_addr < end_addr:
            # Fetch the instruction.
            encoding = self.__fetch_instr(curr_addr)

            # Decode it.
            asm_instr = self.disassembler.disassemble(
                encoding, curr_addr, architecture_mode=arch_mode)

            if not asm_instr:
                return

            yield curr_addr, asm_instr, asm_instr.size

            # update instruction pointer
            curr_addr += asm_instr.size

    def recover_cfg(self,
                    start=None,
                    end=None,
                    symbols=None,
                    callback=None,
                    arch_mode=None):
        """Recover CFG.

        Args:
            start (int): Start address.
            end (int): End address.
            symbols (dict): Symbol table.
            callback (function): A callback function which is called after each successfully recovered CFG.
            arch_mode (int): Architecture mode.

        Returns:
            ControlFlowGraph: A CFG.
        """
        # Set architecture in case it wasn't already set.
        if arch_mode is None:
            arch_mode = self.binary.architecture_mode

        # Reload modules.
        self._load(arch_mode=arch_mode)

        # Check start address.
        start = start if start else self.binary.entry_point

        cfg, _ = self._recover_cfg(start=start,
                                   end=end,
                                   symbols=symbols,
                                   callback=callback)

        return cfg

    def recover_cfg_all(self,
                        entries,
                        symbols=None,
                        callback=None,
                        arch_mode=None):
        """Recover CFG for all functions from an entry point and/or symbol table.

        Args:
            entries (list): A list of function addresses' to start the CFG recovery process.
            symbols (dict): Symbol table.
            callback (function): A callback function which is called after each successfully recovered CFG.
            arch_mode (int): Architecture mode.

        Returns:
            list: A list of recovered CFGs.
        """
        # Set architecture in case it wasn't already set.
        if arch_mode is None:
            arch_mode = self.binary.architecture_mode

        # Reload modules.
        self._load(arch_mode=arch_mode)

        # Set symbols.
        symbols = {} if not symbols else symbols

        # Recover the CFGs.
        cfgs = []
        addrs_processed = set()
        calls = entries

        while len(calls) > 0:
            start, calls = calls[0], calls[1:]

            cfg, calls_tmp = self._recover_cfg(start=start,
                                               symbols=symbols,
                                               callback=callback)

            addrs_processed.add(start)

            cfgs.append(cfg)

            for addr in sorted(calls_tmp):
                if addr not in addrs_processed and addr not in calls:
                    calls.append(addr)

        return cfgs

    def _recover_cfg(self, start=None, end=None, symbols=None, callback=None):
        """Recover CFG

        """
        # Retrieve symbol name in case it is available.
        if symbols and start in symbols:
            name = symbols[start][0]
            size = symbols[start][1] - 1 if symbols[start][1] != 0 else 0
        else:
            name = "sub_{:x}".format(start)
            size = 0

        # Compute start and end address.
        start_addr = start if start else self.binary.ea_start
        end_addr = end if end else self.binary.ea_end

        # Set callback.
        if callback:
            callback(start, name, size)

        # Recover basic blocks.
        bbs, calls = self.bb_builder.build(start_addr, end_addr, symbols)

        # Build CFG.
        cfg = ControlFlowGraph(bbs, name=name)

        return cfg, calls

    def emulate(self,
                context=None,
                start=None,
                end=None,
                arch_mode=None,
                hooks=None,
                max_instrs=None,
                print_asm=False):
        """Emulate native code.

        Args:
            context (dict): Processor context (register and/or memory).
            start (int): Start address.
            end (int): End address.
            arch_mode (int): Architecture mode.
            hooks (dict): Hooks by address.
            max_instrs (int): Maximum number of instructions to execute.
            print_asm (bool): Print asm.

        Returns:
            dict: Processor context.
        """
        if arch_mode is not None:
            # Reload modules.
            self._load(arch_mode=arch_mode)

        context = context if context else {}

        start_addr = start if start else self.binary.ea_start
        end_addr = end if end else self.binary.ea_end

        hooks = hooks if hooks else {}

        # Load registers
        for reg, val in context.get('registers', {}).items():
            self.ir_emulator.registers[reg] = val

        # Load memory
        # TODO Memory content should be encoded as hex strings so each
        # entry can be of different sizes.
        for addr, val in context.get('memory', {}).items():
            self.ir_emulator.memory.write(addr, 4, val)

        # Execute the code.
        self.emulator.emulate(start_addr, end_addr, hooks, max_instrs,
                              print_asm)

        context_out = {'registers': {}, 'memory': {}}

        # save registers
        for reg, val in self.ir_emulator.registers.items():
            context_out['registers'][reg] = val

        return context_out

    def __fetch_instr(self, next_addr):
        start, end = next_addr, next_addr + self.arch_info.max_instruction_size

        encoding = ""
        for i in xrange(end - start):
            encoding += chr(self.ir_emulator.read_memory(start + i, 1))

        return encoding
Пример #5
0
class BARF(object):
    """Binary Analysis Framework."""
    def __init__(self, filename):
        logger.info("[+] BARF: Initializing...")

        self.code_analyzer = None
        self.ir_translator = None
        self.binary = None
        self.smt_solver = None
        self.gadget_classifier = None
        self.gadget_verifier = None
        self.arch_info = None
        self.gadget_finder = None
        self.text_section = None
        self.disassembler = None
        self.smt_translator = None
        self.ir_emulator = None
        self.bb_builder = None

        self.open(filename)

    def _load(self, arch_mode=None):
        # setup architecture
        self._setup_arch(arch_mode=arch_mode)

        # set up core modules
        self._setup_core_modules()

        # setup analysis modules
        self._setup_analysis_modules()

    def _setup_arch(self, arch_mode=None):
        """Set up architecture.
        """
        # set up architecture information
        self.arch_info = None

        if self.binary.architecture == arch.ARCH_X86:
            self._setup_x86_arch(arch_mode)
        else:
            # TODO: add arch to the binary file class.
            self._setup_arm_arch(arch_mode)

    def _setup_arm_arch(self, arch_mode=None):
        """Set up ARM architecture.
        """
        if arch_mode is None:
            arch_mode = arch.ARCH_ARM_MODE_THUMB

        self.arch_info = ArmArchitectureInformation(arch_mode)
        self.disassembler = ArmDisassembler(architecture_mode=arch_mode)
        self.ir_translator = ArmTranslator(architecture_mode=arch_mode)

    def _setup_x86_arch(self, arch_mode=None):
        """Set up x86 architecture.
        """
        if arch_mode is None:
            arch_mode = self.binary.architecture_mode

        # Set up architecture information
        self.arch_info = X86ArchitectureInformation(arch_mode)
        self.disassembler = X86Disassembler(architecture_mode=arch_mode)
        self.ir_translator = X86Translator(architecture_mode=arch_mode)

    def _setup_core_modules(self):
        """Set up core modules.
        """
        self.ir_emulator = None
        self.smt_solver = None
        self.smt_translator = None

        if self.arch_info:
            # Set REIL emulator.
            self.ir_emulator = ReilEmulator(self.arch_info)

            # Set SMT Solver.
            if SMT_SOLVER == "Z3":
                self.smt_solver = Z3Solver()
            elif SMT_SOLVER == "CVC4":
                self.smt_solver = CVC4Solver()
            elif SMT_SOLVER is not None:
                raise Exception("Invalid SMT solver.")

            # Set SMT translator.
            if self.smt_solver:
                self.smt_translator = SmtTranslator(
                    self.smt_solver, self.arch_info.address_size)

                self.smt_translator.set_arch_alias_mapper(
                    self.arch_info.alias_mapper)
                self.smt_translator.set_arch_registers_size(
                    self.arch_info.registers_size)

    def _setup_analysis_modules(self):
        """Set up analysis modules.
        """
        # Basic block.
        self.bb_builder = CFGRecoverer(
            RecursiveDescent(self.disassembler, self.text_section,
                             self.ir_translator, self.arch_info))

        # Code analyzer.
        self.code_analyzer = CodeAnalyzer(self.smt_solver, self.smt_translator,
                                          self.arch_info)

        # Gadgets.
        self.gadget_classifier = GadgetClassifier(self.ir_emulator,
                                                  self.arch_info)
        self.gadget_finder = GadgetFinder(self.disassembler, self.text_section,
                                          self.ir_translator,
                                          self.binary.architecture,
                                          self.binary.architecture_mode)
        self.gadget_verifier = GadgetVerifier(self.code_analyzer,
                                              self.arch_info)

    # ======================================================================== #

    def open(self, filename):
        """Open a file for analysis.

        :param filename: name of an executable file
        :type filename: str

        """
        if filename:
            self.binary = BinaryFile(filename)
            self.text_section = self.binary.text_section

            self._load()

    def load_architecture(self, name, arch_info, disassembler, translator):
        # Set up architecture information
        self.arch_info = arch_info
        self.disassembler = disassembler
        self.ir_translator = translator

        # setup analysis modules
        self._setup_analysis_modules()

    def translate(self, ea_start=None, ea_end=None, arch_mode=None):
        """Translate to REIL instructions.

        :param ea_start: start address
        :type ea_start: int
        :param ea_end: end address
        :type ea_end: int
        :param arch_mode: architecture mode
        :type arch_mode: int

        :returns: a tuple of the form (address, assembler instruction, instruction size)
        :rtype: (int, Instruction, int)

        """
        start_addr = ea_start if ea_start else self.binary.ea_start
        end_addr = ea_end if ea_end else self.binary.ea_end

        self.ir_translator.reset()

        for addr, asm, _ in self.disassemble(ea_start=start_addr,
                                             ea_end=end_addr,
                                             arch_mode=arch_mode):
            yield addr, asm, self.ir_translator.translate(asm)

    def disassemble(self, ea_start=None, ea_end=None, arch_mode=None):
        """Disassemble assembler instructions.

        :param ea_start: start address
        :type ea_start: int
        :param ea_end: end address
        :type ea_end: int
        :param arch_mode: architecture mode
        :type arch_mode: int

        :returns: a tuple of the form (address, assembler instruction, instruction size)
        :rtype: (int, Instruction, int)

        """
        if arch_mode is None:
            arch_mode = self.binary.architecture_mode

        curr_addr = ea_start if ea_start else self.binary.ea_start
        end_addr = ea_end if ea_end else self.binary.ea_end

        while curr_addr < end_addr:
            # disassemble instruction
            start, end = curr_addr, min(curr_addr + 16, self.binary.ea_end + 1)

            asm = self.disassembler.disassemble(self.text_section[start:end],
                                                curr_addr,
                                                architecture_mode=arch_mode)

            if not asm:
                return

            yield curr_addr, asm, asm.size

            # update instruction pointer
            curr_addr += asm.size

    def recover_cfg(self,
                    ea_start=None,
                    ea_end=None,
                    symbols=None,
                    callback=None,
                    arch_mode=None):
        """Recover CFG

        :int start: Start address.
        :int end: End address.

        :returns: A CFG.

        """
        # Set architecture in case it wasn't already set.
        if arch_mode is None:
            arch_mode = self.binary.architecture_mode

        # Reload modules.
        self._load(arch_mode=arch_mode)

        cfg, _ = self._recover_cfg(start=ea_start,
                                   end=ea_end,
                                   symbols=symbols,
                                   callback=callback)

        return cfg

    def recover_cfg_all(self,
                        entries,
                        symbols=None,
                        callback=None,
                        arch_mode=None):
        """Recover CFG for all functions from an entry point and/or symbol table.

        :int start: Start address.
        :returns: A list of CFGs.

        """
        # Set architecture in case it wasn't already set.
        if arch_mode is None:
            arch_mode = self.binary.architecture_mode

        # Reload modules.
        self._load(arch_mode=arch_mode)

        # Set symbols.
        symbols = {} if not symbols else symbols

        # Recover the CFGs.
        cfgs = []
        addrs_processed = set()
        calls = entries

        while len(calls) > 0:
            start, calls = calls[0], calls[1:]

            cfg, calls_tmp = self._recover_cfg(start=start,
                                               symbols=symbols,
                                               callback=callback)

            addrs_processed.add(start)

            cfgs.append(cfg)

            for addr in sorted(calls_tmp):
                if addr not in addrs_processed and addr not in calls:
                    calls.append(addr)

        return cfgs

    def _recover_cfg(self, start=None, end=None, symbols=None, callback=None):
        """Recover CFG

        """
        # Retrieve symbol name in case it is available.
        if symbols and start in symbols:
            name = symbols[start][0]
            size = symbols[start][1] - 1 if symbols[start][1] != 0 else 0
        else:
            name = "sub_{:x}".format(start)
            size = 0

        # Compute start and end address.
        start_addr = start if start else self.binary.ea_start
        end_addr = end if end else self.binary.ea_end

        # Set callback.
        if callback:
            callback(start, name, size)

        # Recover basic blocks.
        bbs, calls = self.bb_builder.build(start_addr, end_addr, symbols)

        # Build CFG.
        cfg = ControlFlowGraph(bbs, name=name)

        return cfg, calls

    def recover_bbs(self, ea_start=None, ea_end=None):
        """Recover basic blocks.

        :param ea_start: start address
        :type ea_start: int
        :param ea_end: end address
        :type ea_end: int

        :returns: a list of basic blocks
        :rtype: list

        """
        start_addr = ea_start if ea_start else self.binary.ea_start
        end_addr = ea_end if ea_end else self.binary.ea_end

        bb_list = self.bb_builder.build(start_addr, end_addr)

        return bb_list

    def emulate_full(self,
                     context,
                     ea_start=None,
                     ea_end=None,
                     arch_mode=None):
        """Emulate REIL instructions.

        :param context: processor context (register and/or memory)
        :type context: dict
        :param ea_start: start address
        :type ea_start: int
        :param ea_end: end address
        :type ea_end: int
        :param arch_mode: architecture mode
        :type arch_mode: int

        :returns: a context
        :rtype: dict

        """
        def _translate_asm_instruction(asm_instr):
            reil_translator = self.ir_translator

            # Create ReilContainer
            instr_container = ReilContainer()
            instr_seq = ReilSequence()
            for reil_instr in reil_translator.translate(asm_instr):
                instr_seq.append(reil_instr)
            instr_container.add(instr_seq)

            return instr_container

        def _process_asm_instruction(reil_emulator, asm_instr):
            instr_container = _translate_asm_instruction(asm_instr)
            ip = asm_instr.address << 8 | 0x0
            next_addr = None

            while ip:
                # Fetch instruction.
                try:
                    reil_instr = instr_container.fetch(ip)
                except ReilContainerInvalidAddressError:
                    next_addr = split_address(ip)[0]
                    break

                next_ip = reil_emulator.single_step(reil_instr)

                # Update instruction pointer.
                ip = next_ip if next_ip else instr_container.get_next_address(
                    ip)

            if next_addr is None:
                next_addr = asm_instr.address + asm_instr.size

            return next_addr

        if arch_mode is not None:
            # Reload modules.
            self._load(arch_mode=arch_mode)

        start_addr = ea_start if ea_start else self.binary.ea_start
        end_addr = ea_end if ea_end else self.binary.ea_end

        # Load registers
        for reg, val in context.get('registers', {}).items():
            self.ir_emulator.registers[reg] = val

        # Load memory
        for addr, val in context.get('memory', {}).items():
            self.ir_emulator.memory.write(addr, 4, val)

        next_addr = start_addr
        while next_addr != end_addr:
            start, end = next_addr, next_addr + self.arch_info.max_instruction_size
            asm_instr = self.disassembler.disassemble(
                self.text_section[start:end], next_addr)
            next_addr = _process_asm_instruction(self.ir_emulator, asm_instr)

        context_out = {'registers': {}, 'memory': {}}

        # save registers
        for reg, val in self.ir_emulator.registers.items():
            context_out['registers'][reg] = val

        return context_out

    def emulate_full_ex(self,
                        context,
                        instr_container,
                        ea_start=None,
                        ea_end=None,
                        arch_mode=None):
        """Emulate REIL instructions from an instruction container.

        :param context: processor context (register and/or memory)
        :type context: dict
        :param instr_container: instruction container
        :type instr_container: ReilContainer
        :param ea_start: start address
        :type ea_start: int
        :param ea_end: end address
        :type ea_end: int
        :param arch_mode: architecture mode
        :type arch_mode: int

        :returns: a context
        :rtype: dict

        """
        if arch_mode is not None:
            # Reload modules.
            self._load(arch_mode=arch_mode)

        start_addr = ea_start if ea_start else self.binary.ea_start
        end_addr = ea_end if ea_end else self.binary.ea_end

        # Load registers
        for reg, val in context.get('registers', {}).items():
            self.ir_emulator.registers[reg] = val

        # Load memory
        for addr, val in context.get('memory', {}).items():
            self.ir_emulator.memory.write(addr, 4, val)

        self.ir_emulator.execute(instr_container,
                                 start=start_addr << 8,
                                 end=end_addr << 8)

        context_out = {'registers': {}, 'memory': {}}

        # save registers
        for reg, val in self.ir_emulator.registers.items():
            context_out['registers'][reg] = val

        return context_out
Пример #6
0
class BARF(object):
    """Binary Analysis Framework."""

    def __init__(self, filename):
        logger.info("[+] BARF: Initializing...")

        self.code_analyzer = None
        self.ir_translator = None
        self.binary = None
        self.smt_solver = None
        self.gadget_classifier = None
        self.gadget_verifier = None
        self.arch_info = None
        self.gadget_finder = None
        self.text_section = None
        self.disassembler = None
        self.smt_translator = None
        self.ir_emulator = None
        self.bb_builder = None

        self.open(filename)

    def _load(self, arch_mode=None):
        # setup architecture
        self._setup_arch(arch_mode=arch_mode)

        # set up core modules
        self._setup_core_modules()

        # setup analysis modules
        self._setup_analysis_modules()

    def _setup_arch(self, arch_mode=None):
        """Set up architecture.
        """
        # set up architecture information
        self.arch_info = None

        if self.binary.architecture == arch.ARCH_X86:
            self._setup_x86_arch(arch_mode)
        else:
            # TODO: add arch to the binary file class.
            self._setup_arm_arch(arch_mode)

    def _setup_arm_arch(self, arch_mode=None):
        """Set up ARM architecture.
        """
        if arch_mode is None:
            arch_mode = arch.ARCH_ARM_MODE_THUMB

        self.arch_info = ArmArchitectureInformation(arch_mode)
        self.disassembler = ArmDisassembler(architecture_mode=arch_mode)
        self.ir_translator = ArmTranslator(architecture_mode=arch_mode)

    def _setup_x86_arch(self, arch_mode=None):
        """Set up x86 architecture.
        """
        if arch_mode is None:
            arch_mode = self.binary.architecture_mode

        # Set up architecture information
        self.arch_info = X86ArchitectureInformation(arch_mode)
        self.disassembler = X86Disassembler(architecture_mode=arch_mode)
        self.ir_translator = X86Translator(architecture_mode=arch_mode)

    def _setup_core_modules(self):
        """Set up core modules.
        """
        self.ir_emulator = None
        self.smt_solver = None
        self.smt_translator = None

        if self.arch_info:
            # Set REIL emulator.
            self.ir_emulator = ReilEmulator(self.arch_info)

            # Set SMT Solver.
            self.smt_solver = None

            if SMT_SOLVER == "Z3":
                if _check_solver_installation("z3"):
                    self.smt_solver = Z3Solver()
                else:
                    logger.warn("z3 solver is not installed. Run 'barf-install-solvers.sh' to install it.")
            elif SMT_SOLVER == "CVC4":
                if _check_solver_installation("cvc4"):
                    self.smt_solver = CVC4Solver()
                else:
                    logger.warn("cvc4 solver is not installed. Run 'barf-install-solvers.sh' to install it.")
            elif SMT_SOLVER is not None:
                raise Exception("Invalid SMT solver.")

            # Set SMT translator.
            self.smt_translator = None

            if self.smt_solver:
                self.smt_translator = SmtTranslator(self.smt_solver, self.arch_info.address_size)

                self.smt_translator.set_arch_alias_mapper(self.arch_info.alias_mapper)
                self.smt_translator.set_arch_registers_size(self.arch_info.registers_size)

    def _setup_analysis_modules(self):
        """Set up analysis modules.
        """
        # Basic block.
        self.bb_builder = CFGRecoverer(RecursiveDescent(self.disassembler, self.text_section, self.ir_translator,
                                                        self.arch_info))

        # Code analyzer.
        self.code_analyzer = None

        if self.smt_translator:
            self.code_analyzer = CodeAnalyzer(self.smt_solver, self.smt_translator, self.arch_info)

        # Gadgets classifier.
        self.gadget_classifier = GadgetClassifier(self.ir_emulator, self.arch_info)


        # Gadgets finder.
        self.gadget_finder = GadgetFinder(self.disassembler, self.text_section, self.ir_translator,
                                          self.binary.architecture, self.binary.architecture_mode)

        # Gadget verifier.
        self.gadget_verifier = None

        if self.code_analyzer:
            self.gadget_verifier = GadgetVerifier(self.code_analyzer, self.arch_info)

    # ======================================================================== #

    def open(self, filename):
        """Open a file for analysis.

        :param filename: name of an executable file
        :type filename: str

        """
        if filename:
            self.binary = BinaryFile(filename)
            self.text_section = self.binary.text_section

            self._load()

    def load_architecture(self, name, arch_info, disassembler, translator):
        # Set up architecture information
        self.arch_info = arch_info
        self.disassembler = disassembler
        self.ir_translator = translator

        # setup analysis modules
        self._setup_analysis_modules()

    def translate(self, ea_start=None, ea_end=None, arch_mode=None):
        """Translate to REIL instructions.

        :param ea_start: start address
        :type ea_start: int
        :param ea_end: end address
        :type ea_end: int
        :param arch_mode: architecture mode
        :type arch_mode: int

        :returns: a tuple of the form (address, assembler instruction, instruction size)
        :rtype: (int, Instruction, int)

        """
        start_addr = ea_start if ea_start else self.binary.ea_start
        end_addr = ea_end if ea_end else self.binary.ea_end

        self.ir_translator.reset()

        for addr, asm, _ in self.disassemble(ea_start=start_addr, ea_end=end_addr, arch_mode=arch_mode):
            yield addr, asm, self.ir_translator.translate(asm)

    def disassemble(self, ea_start=None, ea_end=None, arch_mode=None):
        """Disassemble assembler instructions.

        :param ea_start: start address
        :type ea_start: int
        :param ea_end: end address
        :type ea_end: int
        :param arch_mode: architecture mode
        :type arch_mode: int

        :returns: a tuple of the form (address, assembler instruction, instruction size)
        :rtype: (int, Instruction, int)

        """
        if arch_mode is None:
            arch_mode = self.binary.architecture_mode

        curr_addr = ea_start if ea_start else self.binary.ea_start
        end_addr = ea_end if ea_end else self.binary.ea_end

        while curr_addr < end_addr:
            # disassemble instruction
            start, end = curr_addr, min(curr_addr + 16, self.binary.ea_end + 1)

            asm = self.disassembler.disassemble(self.text_section[start:end], curr_addr, architecture_mode=arch_mode)

            if not asm:
                return

            yield curr_addr, asm, asm.size

            # update instruction pointer
            curr_addr += asm.size

    def recover_cfg(self, ea_start=None, ea_end=None, symbols=None, callback=None, arch_mode=None):
        """Recover CFG

        :int start: Start address.
        :int end: End address.

        :returns: A CFG.

        """
        # Set architecture in case it wasn't already set.
        if arch_mode is None:
            arch_mode = self.binary.architecture_mode

        # Reload modules.
        self._load(arch_mode=arch_mode)

        cfg, _ = self._recover_cfg(start=ea_start, end=ea_end, symbols=symbols, callback=callback)

        return cfg

    def recover_cfg_all(self, entries, symbols=None, callback=None, arch_mode=None):
        """Recover CFG for all functions from an entry point and/or symbol table.

        :int start: Start address.
        :returns: A list of CFGs.

        """
        # Set architecture in case it wasn't already set.
        if arch_mode is None:
            arch_mode = self.binary.architecture_mode

        # Reload modules.
        self._load(arch_mode=arch_mode)

        # Set symbols.
        symbols = {} if not symbols else symbols

        # Recover the CFGs.
        cfgs = []
        addrs_processed = set()
        calls = entries

        while len(calls) > 0:
            start, calls = calls[0], calls[1:]

            cfg, calls_tmp = self._recover_cfg(start=start, symbols=symbols, callback=callback)

            addrs_processed.add(start)

            cfgs.append(cfg)

            for addr in sorted(calls_tmp):
                if addr not in addrs_processed and addr not in calls:
                    calls.append(addr)

        return cfgs

    def _recover_cfg(self, start=None, end=None, symbols=None, callback=None):
        """Recover CFG

        """
        # Retrieve symbol name in case it is available.
        if symbols and start in symbols:
            name = symbols[start][0]
            size = symbols[start][1] - 1 if symbols[start][1] != 0 else 0
        else:
            name = "sub_{:x}".format(start)
            size = 0

        # Compute start and end address.
        start_addr = start if start else self.binary.ea_start
        end_addr = end if end else self.binary.ea_end

        # Set callback.
        if callback:
            callback(start, name, size)

        # Recover basic blocks.
        bbs, calls = self.bb_builder.build(start_addr, end_addr, symbols)

        # Build CFG.
        cfg = ControlFlowGraph(bbs, name=name)

        return cfg, calls

    def recover_bbs(self, ea_start=None, ea_end=None):
        """Recover basic blocks.

        :param ea_start: start address
        :type ea_start: int
        :param ea_end: end address
        :type ea_end: int

        :returns: a list of basic blocks
        :rtype: list

        """
        start_addr = ea_start if ea_start else self.binary.ea_start
        end_addr = ea_end if ea_end else self.binary.ea_end

        bb_list = self.bb_builder.build(start_addr, end_addr)

        return bb_list

    def emulate_full(self, context, ea_start=None, ea_end=None, arch_mode=None):
        """Emulate REIL instructions.

        :param context: processor context (register and/or memory)
        :type context: dict
        :param ea_start: start address
        :type ea_start: int
        :param ea_end: end address
        :type ea_end: int
        :param arch_mode: architecture mode
        :type arch_mode: int

        :returns: a context
        :rtype: dict

        """
        def _translate_asm_instruction(asm_instr):
            reil_translator = self.ir_translator

            # Create ReilContainer
            instr_container = ReilContainer()
            instr_seq = ReilSequence()
            for reil_instr in reil_translator.translate(asm_instr):
                instr_seq.append(reil_instr)
            instr_container.add(instr_seq)

            return instr_container

        def _process_asm_instruction(reil_emulator, asm_instr):
            instr_container = _translate_asm_instruction(asm_instr)
            ip = asm_instr.address << 8 | 0x0
            next_addr = None

            while ip:
                # Fetch instruction.
                try:
                    reil_instr = instr_container.fetch(ip)
                except ReilContainerInvalidAddressError:
                    next_addr = split_address(ip)[0]
                    break

                next_ip = reil_emulator.single_step(reil_instr)

                # Update instruction pointer.
                ip = next_ip if next_ip else instr_container.get_next_address(ip)

            if next_addr is None:
                next_addr = asm_instr.address + asm_instr.size

            return next_addr

        if arch_mode is not None:
            # Reload modules.
            self._load(arch_mode=arch_mode)

        start_addr = ea_start if ea_start else self.binary.ea_start
        end_addr = ea_end if ea_end else self.binary.ea_end

        # Load registers
        for reg, val in context.get('registers', {}).items():
            self.ir_emulator.registers[reg] = val

        # Load memory
        for addr, val in context.get('memory', {}).items():
            self.ir_emulator.memory.write(addr, 4, val)

        next_addr = start_addr
        while next_addr != end_addr:
            start, end = next_addr, next_addr + self.arch_info.max_instruction_size
            asm_instr = self.disassembler.disassemble(self.text_section[start:end], next_addr)
            next_addr = _process_asm_instruction(self.ir_emulator, asm_instr)

        context_out = {
            'registers': {},
            'memory': {}
        }

        # save registers
        for reg, val in self.ir_emulator.registers.items():
            context_out['registers'][reg] = val

        return context_out

    def emulate_full_ex(self, context, instr_container, ea_start=None, ea_end=None, arch_mode=None):
        """Emulate REIL instructions from an instruction container.

        :param context: processor context (register and/or memory)
        :type context: dict
        :param instr_container: instruction container
        :type instr_container: ReilContainer
        :param ea_start: start address
        :type ea_start: int
        :param ea_end: end address
        :type ea_end: int
        :param arch_mode: architecture mode
        :type arch_mode: int

        :returns: a context
        :rtype: dict

        """
        if arch_mode is not None:
            # Reload modules.
            self._load(arch_mode=arch_mode)

        start_addr = ea_start if ea_start else self.binary.ea_start
        end_addr = ea_end if ea_end else self.binary.ea_end

        # Load registers
        for reg, val in context.get('registers', {}).items():
            self.ir_emulator.registers[reg] = val

        # Load memory
        for addr, val in context.get('memory', {}).items():
            self.ir_emulator.memory.write(addr, 4, val)

        self.ir_emulator.execute(instr_container, start=start_addr << 8, end=end_addr << 8)

        context_out = {
            'registers': {},
            'memory': {}
        }

        # save registers
        for reg, val in self.ir_emulator.registers.items():
            context_out['registers'][reg] = val

        return context_out