Exemplo n.º 1
0
    def run(self):
        # Architecture
        architecture = False
        if self.args.architecture:
            architecture = self.args.architecture
        else:
            with open(self.args.filename) as fdesc:
                architecture = ArchHeuristic(fdesc).guess()
            if not architecture:
                raise ValueError("Unable to recognize the architecture, please specify it")
            if self.args.verbose:
                print "Guessed architecture: %s" % architecture

        cont = Container.from_stream(open(self.args.filename))
        machine = Machine(architecture)
        addr_size = machine.ira().pc.size / 4
        fh = FuncHeuristic(cont, machine)

        # Enable / disable heuristics
        for name in self.args.enable_heuristic:
            heur = fh.name2heuristic(name)
            if heur not in fh.heuristics:
                fh.heuristics.append(heur)
        for name in self.args.disable_heuristic:
            heur = fh.name2heuristic(name)
            fh.heuristics.remove(heur)

        if self.args.verbose:
            print "Heuristics to run: %s" % ", ".join(fh.heuristic_names)


        # Launch guess
        fmt = "0x{:0%dx}" % addr_size
        for addr in fh.guess():
            print fmt.format(addr)
Exemplo n.º 2
0
    def run(self):
        # Architecture
        architecture = False
        if self.args.architecture:
            architecture = self.args.architecture
        else:
            with open(self.args.filename) as fdesc:
                architecture = ArchHeuristic(fdesc).guess()
            if not architecture:
                raise ValueError(
                    "Unable to recognize the architecture, please specify it")
            if self.args.verbose:
                print "Guessed architecture: %s" % architecture

        cont = Container.from_stream(open(self.args.filename))
        machine = Machine(architecture)
        addr_size = machine.ira().pc.size / 4
        fh = FuncHeuristic(cont, machine)

        # Enable / disable heuristics
        for name in self.args.enable_heuristic:
            heur = fh.name2heuristic(name)
            if heur not in fh.heuristics:
                fh.heuristics.append(heur)
        for name in self.args.disable_heuristic:
            heur = fh.name2heuristic(name)
            fh.heuristics.remove(heur)

        if self.args.verbose:
            print "Heuristics to run: %s" % ", ".join(fh.heuristic_names)

        # Launch guess
        fmt = "0x{:0%dx}" % addr_size
        for addr in fh.guess():
            print fmt.format(addr)
Exemplo n.º 3
0
def symexec(handler):
    inst_bytes = handler.bytes_without_jmp
    machine = Machine("x86_32")
    cont = Container.from_string(inst_bytes)
    bs = cont.bin_stream
    mdis = machine.dis_engine(bs, symbol_pool=cont.symbol_pool)

    end_offset = len(inst_bytes)

    mdis.dont_dis = [end_offset]

    asm_block = mdis.dis_block(0)
    # print asm_block
    ira = machine.ira(mdis.symbol_pool)
    ira.add_block(asm_block)

    symb = SymbolicExecutionEngine(ira, symbols_init)

    cur_addr = symb.emul_ir_block(0)
    count = 0
    while cur_addr != ExprInt(end_offset, 32):  # execute to end
        cur_addr = symb.emul_ir_block(cur_addr)

        count += 1
        if count > 1000:
            print '[!] to many loop at %s' % handler.name
            break

    return symb
def main():
    global cfg
    global block
    global data

    #Paint the cfg_before image from disassembly
    cont = Container.from_stream(open('300.bin'))
    bin_stream = cont.bin_stream
    adr = 0x401550
    machine = Machine(cont.arch)
    mdis = machine.dis_engine(bin_stream)
    blocks = mdis.dis_multibloc(adr)
    open("cfg_before.dot", "w").write(blocks.dot())

    #Get filename
    parser = Sandbox_Linux_x86_64.parser(description="300.bin")
    parser.add_argument("filename", help="filename")
    options = parser.parse_args()
    options.mimic_env = True

    #Start Sandbox
    sb = Sandbox_Linux_x86_64(options.filename, options, globals())
    sb.jitter.init_run(sb.entry_point)
    sb.jitter.add_breakpoint(sb.entry_point, stop)
    machine = Machine("x86_64")
    sb.run()

    #Get bytecode
    interpret()

    #Paint cfg
    open("vm_graph.dot", "w").write(cfg.dot())
Exemplo n.º 5
0
def test(data):
    # Digest C informations
    text = """
    struct human {
            unsigned short age;
            unsigned int height;
            char name[50];
    };

    struct ll_human {
            struct ll_human* next;
            struct human human;
    };
    """

    my_types = CTypeAMD64_unk()
    types_mngr = CTypesManagerNotPacked(my_types.types)

    types_mngr.add_c_decl(text)

    # Analyze binary
    cont = Container.fallback_container(data, None, addr=0)

    machine = Machine("x86_64")
    dis_engine, ira = machine.dis_engine, machine.ira

    mdis = dis_engine(cont.bin_stream, symbol_pool=cont.symbol_pool)
    addr_head = 0
    blocks = mdis.dis_multibloc(addr_head)
    lbl_head = mdis.symbol_pool.getby_offset(addr_head)

    ir_arch_a = ira(mdis.symbol_pool)
    for block in blocks:
        ir_arch_a.add_bloc(block)

    open('graph_irflow.dot', 'w').write(ir_arch_a.graph.dot())

    # Main function's first argument's type is "struct ll_human*"
    void_ptr = types_mngr.void_ptr
    ll_human = types_mngr.get_type(('ll_human',))
    ptr_llhuman = ObjCPtr('noname', ll_human,
                          void_ptr.align, void_ptr.size)

    arg0 = ExprId('ptr', 64)
    ctx = {ir_arch_a.arch.regs.RDI: arg0}
    expr_types = {arg0.name: ptr_llhuman}

    mychandler = MyCHandler(types_mngr, expr_types)

    for expr in get_funcs_arg0(ctx, ir_arch_a, lbl_head):
        print "Access:", expr
        target_types = mychandler.expr_to_types(expr)
        for target_type in target_types:
            print '\tType:', target_type
        c_strs = mychandler.expr_to_c(expr)
        for c_str in c_strs:
            print "\tC access:", c_str
        print
def main():
    #Setup Machine for arm, get filename
    machine = Machine('armtl')
    parser = ArgumentParser("Description")
    parser.add_argument('filename', help='filename')
    args = parser.parse_args()

    #Setup disassembly stream in container, get blocks and draw the graph
    cont = Container.from_stream(open(args.filename))
    bin_stream = cont.bin_stream
    mdis = machine.dis_engine(bin_stream)
    blocks = mdis.dis_multibloc(0x614)
    open("cfg.dot", "w").write(blocks.dot())

    #Create a template for matching blocks in the control flow graph
    #Requirement 1) Don't get block 0xdf8, it can't disassemble
    #Requirement 2) Get ones that start with LDR
    #Requirement 3) Get ones where the second to last instruction is CMP
    #No restructions for in going and out going edges
    mblock = MatchGraphJoker(
        name='mblock',
        restrict_in=False,
        restrict_out=False,
        filt=lambda block: block.label.offset != 0xdf8 and "LDR" in block.
        lines[0].name and "CMP" in block.lines[-2].name)

    #Basic block matcher
    nblock = MatchGraphJoker(name="next",
                             restrict_in=False,
                             restrict_out=False)

    #Now it should match the blocks we want with the checks
    matcher = nblock >> mblock

    flag_storage = {}
    #Loop through matching template blocks
    for sol in matcher.match(blocks):
        try:
            #Grab position line
            pline = sol[mblock].lines[3]
            #Grab character check line
            cline = sol[mblock].lines[-2]
            #Transform character and position to integer
            pos = int(pline.arg2str(pline.args[1]), 16)
            c = int(cline.arg2str(cline.args[1]), 16)
            #If its NULL, ignore
            if c != 0:
                flag_storage.update({pos: c})
        except ValueError:
            #The F at the beginning is a NULL check
            pass
    #Print Flag
    flag = "".join(map(lambda x: chr(flag_storage[x]),
                       sorted(flag_storage))).replace("F", "I")
    print "F" + flag

    pass
Exemplo n.º 7
0
def container_guess(archinfo):
    """Use the architecture provided by the container, if any
    @archinfo: ArchHeuristic instance
    """

    cont = Container.from_stream(archinfo.stream)

    if isinstance(cont, ContainerUnknown) or not cont.arch:
        return {}

    return {cont.arch: 1}
Exemplo n.º 8
0
def test_learn(args):

    machine = Machine("x86_64")

    # Compil tests
    log_info("Remove old files")
    os.system("make clean")
    log_info("Compile C files")
    status = os.system("make")
    assert status == 0

    # Find test names
    c_files = []

    for cur_dir, sub_dir, files in os.walk("."):
        c_files += [x[:-2] for x in files if x.endswith(".c")]

    for c_file in c_files:
        cont = Container.from_stream(open(c_file))

        func_name = c_file
        main_addr = cont.symbol_pool["main"].offset
        func_addr = cont.symbol_pool[func_name].offset

        log_info("Learning " + func_name + " over " + func_name + ".c")

        cmd = [
            "sibyl", "learn", "-t", "miasm", "-m",
            hex(main_addr), func_name, c_file
        ]
        sibyl = subprocess.Popen(cmd,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
        stdout, stderr = sibyl.communicate()
        assert sibyl.returncode == 0

        log_info("Testing generated class")

        mod = imp.new_module("testclass")
        exec stdout in mod.__dict__
        classTest = getattr(mod, "Test" + c_file)
        tl = TestLauncher(c_file, machine, ABI_AMD64_SYSTEMV, [classTest],
                          "gcc")

        possible_funcs = tl.run(func_addr)
        if tl.possible_funcs:
            log_success("Generated class recognize the function " + func_name)
        else:
            log_error("Generated class failed to recognize the function " +
                      func_name)

    log_info("Remove old files")
    os.system("make clean")
Exemplo n.º 9
0
    def __init__(self, filename, verbose=False):
        """Load binary file.

        Args:
            filename (str): path to a file to be analyzed
            verbose (bool): affects log verbosity
        """
        self._container = Container.from_stream(open(filename))
        self.bin_stream = self._container.bin_stream
        self.entry_point = self._container.entry_point
        self.machine = Machine(self._container.arch)

        self.fn = {}
        self.interval = interval()
        self.deep = 0
        self.offset = 0

        self._set_logging(verbose)
        self._logger.info("PE loaded")
Exemplo n.º 10
0
    def run(self):
        # Architecture
	map_addr = int(self.args.mapping_base, 0)
        architecture = False
        if self.args.architecture:
            architecture = self.args.architecture
        else:
            with open(self.args.filename) as fdesc:
                architecture = ArchHeuristic(fdesc).guess()
            if not architecture:
                raise ValueError("Unable to recognize the architecture, please specify it")
            if self.args.verbose:
                print "Guessed architecture: %s" % architecture

        #cont = Container.from_stream(open(self.args.filename))
	cont = Container.from_stream(open(self.args.filename), addr=map_addr)
        machine = Machine(architecture)
        addr_size = machine.ira().pc.size / 4
        fh = FuncHeuristic(cont, machine, self.args.filename)

        # Default: force only IDA or GHIDRA if available
        if config.idaq64_path:
            fh.heuristics = [ida_funcs]
        elif config.ghidra_headless_path:
            fh.heuristics = [ghidra_funcs]

        # Enable / disable heuristics
        for name in self.args.enable_heuristic:
            heur = fh.name2heuristic(name)
            if heur not in fh.heuristics:
                fh.heuristics.append(heur)
        for name in self.args.disable_heuristic:
            heur = fh.name2heuristic(name)
            fh.heuristics.remove(heur)

        if self.args.verbose:
            print "Heuristics to run: %s" % ", ".join(fh.heuristic_names)


        # Launch guess
        fmt = "0x{:0%dx}" % addr_size
        for addr in fh.guess():
            print fmt.format(addr)
Exemplo n.º 11
0
    def run(self):
        # Currently only AMD64 SYSTEMV ABI is supported by the learning module
        abi = ABI_AMD64_SYSTEMV

        # Currently only x86_64 is supported by the learning module
        machine = "x86_64"

        if self.args.trace != "miasm" and self.args.main != None:
            raise ValueError("Main argument is only used by miasm tracer")

        main = int(self.args.main, 0) if self.args.main else None

        # If function address is not set then use the symbol address
        if self.args.address is None:
            cont = Container.from_stream(open(self.args.program))
            try:
                address = cont.symbol_pool[self.args.functionname].offset
            except KeyError:
                raise ValueError("Symbol %s does not exists in %s" %
                                 (self.args.functionname, self.args.program))
        else:
            address = int(self.args.address, 0)

        testcreator = TestCreator(self.args.functionname, address,
                                  self.args.program, self.args.headerfile,
                                  AVAILABLE_TRACER[self.args.trace],
                                  AVAILABLE_GENERATOR[self.args.generator],
                                  main, abi, machine, self.args.avoid_null)

        if self.args.verbose == 0:
            testcreator.logger.setLevel(logging.WARN)
        if self.args.verbose == 1:
            testcreator.logger.setLevel(logging.INFO)
        elif self.args.verbose == 2:
            testcreator.logger.setLevel(logging.DEBUG)

        createdTest = testcreator.create_test()

        if self.args.output:
            open(self.args.output, "w+").write(createdTest)
        else:
            print createdTest
Exemplo n.º 12
0
    def run(self):
        # Currently only AMD64 SYSTEMV ABI is supported by the learning module
        abi = ABI_AMD64_SYSTEMV

        # Currently only x86_64 is supported by the learning module
        machine = "x86_64"

        if self.args.trace != "miasm" and self.args.main != None:
            raise ValueError("Main argument is only used by miasm tracer")

        main = int(self.args.main, 0) if self.args.main else None

        # If function address is not set then use the symbol address
        if self.args.address is None:
            cont = Container.from_stream(open(self.args.program))
            try:
                address = cont.symbol_pool[self.args.functionname].offset
            except KeyError:
                raise ValueError("Symbol %s does not exists in %s" % (self.args.functionname, self.args.program))
        else:
            address = int(self.args.address, 0)


        testcreator = TestCreator(self.args.functionname, address,
                                  self.args.program, self.args.headerfile,
                                  AVAILABLE_TRACER[self.args.trace],
                                  AVAILABLE_GENERATOR[self.args.generator],
                                  main, abi, machine)

        if self.args.verbose == 0:
            testcreator.logger.setLevel(logging.WARN)
        if self.args.verbose == 1:
            testcreator.logger.setLevel(logging.INFO)
        elif self.args.verbose == 2:
            testcreator.logger.setLevel(logging.DEBUG)

        createdTest = testcreator.create_test()

        if self.args.output:
            open(self.args.output, "w+").write(createdTest)
        else:
            print createdTest
Exemplo n.º 13
0
from miasm2.analysis.binary import Container
from miasm2.analysis.machine import Machine
from miasm2.ir.symbexec import SymbolicExecutionEngine
from miasm2.core.locationdb import LocationDB

START_ADDR = 0
machine = Machine("x86_32")
loc_db = LocationDB()

# Assemble and disassemble a MOV
## Ensure that attributes 'offset' and 'l' are set
line = machine.mn.fromstring("MOV EAX, EBX", loc_db, 32)
asm = machine.mn.asm(line)[0]

# Get back block
cont = Container.from_string(asm, loc_db=loc_db)
mdis = machine.dis_engine(cont.bin_stream, loc_db=loc_db)
mdis.lines_wd = 1
asm_block = mdis.dis_block(START_ADDR)

# Translate ASM -> IR
ira = machine.ira(mdis.loc_db)
ircfg = ira.new_ircfg()
ira.add_asmblock_to_ircfg(asm_block, ircfg)

# Instanciate a Symbolic Execution engine with default value for registers
symb = SymbolicExecutionEngine(ira)

# Emulate one IR basic block
## Emulation of several basic blocks can be done through .emul_ir_blocks
cur_addr = symb.run_at(ircfg, START_ADDR)
Exemplo n.º 14
0
import os
from miasm2.analysis.binary import Container
from miasm2.analysis.machine import Machine
from miasm2.core.graph import DiGraphSimplifier, MatchGraphJoker

container = Container.from_stream(open('dump2.bin'))
bin_stream = container.bin_stream

#machine name = container.arch
machine = Machine(container.arch)

#fireup disasm engine
mdis = machine.dis_engine(bin_stream)

#Return an AsmCFG instance containing disassembled blocks
#https://github.com/cea-sec/miasm/pull/309
blocks = mdis.dis_multibloc(container.entry_point)

#open('AsmCFG_input.dot','w+').write(blocks.dot())
'''
for head in blocks.heads():
	for child in blocks.reachable_sons(head):
		print child

'''

filter_block = lambda block: (len(block.lines)==2 and \
       block.lines[0].name == 'PUSH' and \
       block.lines[1].name == 'MOV')

#parent joker node for the first block in MatchGraph / defining a filter for
Exemplo n.º 15
0
parser.add_argument("-p",
                    "--passthrough",
                    help="Reg-exp for passthrough files",
                    default="^$")
parser.add_argument("-f", "--flags", help="Flags")
parser.add_argument("-v",
                    "--verbose",
                    action="store_true",
                    help="Activate verbose syscalls")
args = parser.parse_args()

if args.verbose:
    syscall.log.setLevel(logging.DEBUG)

# Get corresponding interpreter and reloc address
cont_target_tmp = Container.from_stream(open(args.target))
ld_path = str(cont_target_tmp.executable.getsectionbyname(
    ".interp").content).strip("\x00")
if cont_target_tmp.executable.Ehdr.type in [elf_csts.ET_REL, elf_csts.ET_DYN]:
    elf_base_addr = 0x40000000
elif cont_target_tmp.executable.Ehdr.type == elf_csts.ET_EXEC:
    elf_base_addr = 0  # Not relocatable
else:
    raise ValueError("Unsuported type %d" %
                     cont_target_tmp.executable.Ehdr.type)

# Instanciate a jitter
machine = Machine(cont_target_tmp.arch)
jitter = machine.jitter(args.jitter)
jitter.init_stack()
Exemplo n.º 16
0
from argparse import ArgumentParser
from miasm2.analysis.binary import Container
from miasm2.analysis.machine import Machine
from miasm2.jitter.llvmconvert import LLVMType, LLVMContext_IRCompilation, LLVMFunction_IRCompilation
from llvmlite import ir as llvm_ir
from miasm2.expression.simplifications import expr_simp_high_to_explicit

parser = ArgumentParser("LLVM export example")
parser.add_argument("target", help="Target binary")
parser.add_argument("addr", help="Target address")
parser.add_argument("--architecture", "-a", help="Force architecture")
args = parser.parse_args()

# This part focus on obtaining an IRCFG to transform #
cont = Container.from_stream(open(args.target))
machine = Machine(args.architecture if args.architecture else cont.arch)
ir = machine.ir(cont.loc_db)
dis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db)
asmcfg = dis.dis_multiblock(int(args.addr, 0))
ircfg = ir.new_ircfg_from_asmcfg(asmcfg)
ircfg.simplify(expr_simp_high_to_explicit)
######################################################

# Instantiate a context and the function to fill
context = LLVMContext_IRCompilation()
context.ir_arch = ir

func = LLVMFunction_IRCompilation(context, name="test")
func.ret_type = llvm_ir.VoidType()
func.init_fc()
Exemplo n.º 17
0
                    "Use only with --propagexpr option. "
                    "WARNING: not reliable, may fail.")
parser.add_argument('-e', "--loadint", action="store_true",
                    help="Load integers from binary in fixed memory lookup.")
parser.add_argument('-j', "--calldontmodstack", action="store_true",
                    help="Consider stack high is not modified in subcalls")


args = parser.parse_args()

if args.verbose:
    log_asmblock.setLevel(logging.DEBUG)

log.info('Load binary')
if args.rawbinary:
    cont = Container.fallback_container(open(args.filename, "rb").read(),
                                        vm=None, addr=args.base_address)
else:
    with open(args.filename, "rb") as fdesc:
        cont = Container.from_stream(fdesc, addr=args.base_address)

default_addr = cont.entry_point
bs = cont.bin_stream
e = cont.executable
log.info('ok')

log.info("import machine...")
# Use the guessed architecture or the specified one
arch = args.architecture if args.architecture else cont.arch
if not arch:
    print "Architecture recognition fail. Please specify it in arguments"
    exit(-1)
Exemplo n.º 18
0
# First stage sample name (also its SHA-1)
first_stage_fn = "0413f832d8161187172aef7a769586515f969479"
# ChaCha decryption function address for this particular sample
decrypt_func_addr = 0x400830
# Memory address of the initialization vector
iv_addr = 0x614000
# Memory address of the key
key_addr = 0x614020
# Arbitrary memory address to map the encrypted file in memory
in_addr = 0x40000000

# Create new instance of x86_64 sandbox to emulate the decryption function
sb = Sandbox_Linux_x86_64(first_stage_fn, options, globals())
with open(first_stage_fn,
          "rb") as first_stage, open(options.in_filename) as enc_bin:
    cont = Container.from_stream(first_stage)
    in_data = enc_bin.read()
    in_size = len(in_data)

# Allocate memory to store the output
out_addr = linobjs.heap.alloc(sb.jitter, in_size)
# Map the encrypted file in memory
sb.jitter.vm.add_memory_page(in_addr, PAGE_READ | PAGE_WRITE, in_data)

# Call the decryption function with the good arguments
sb.call(decrypt_func_addr, key_addr, 1, iv_addr, in_addr, out_addr, in_size)

# Get the decrypted data from memory
out_bin = sb.jitter.vm.get_mem(out_addr, in_size)
with open(options.out_filename, "wb") as dec_bin:
    dec_bin.write(out_bin)
Exemplo n.º 19
0
        print 'IN', [str(x) for x in irb_in_nodes[label]]
        print 'OUT', [str(x) for x in irb_out_nodes[label]]

    print '*' * 20, 'interblock', '*' * 20
    inter_block_flow(ir_arch, ircfg, flow_graph, irblock_0.loc_key,
                     irb_in_nodes, irb_out_nodes)

    # from graph_qt import graph_qt
    # graph_qt(flow_graph)
    open('data.dot', 'w').write(flow_graph.dot())


ad = int(args.addr, 16)

print 'disasm...'
cont = Container.from_stream(open(args.filename))
machine = Machine("x86_32")

mdis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db)
mdis.follow_call = True
asmcfg = mdis.dis_multiblock(ad)
print 'ok'

print 'generating dataflow graph for:'
ir_arch_analysis = machine.ira(mdis.loc_db)
ircfg = ir_arch_analysis.new_ircfg_from_asmcfg(asmcfg)

for irblock in ircfg.blocks.values():
    print irblock

if args.symb:
Exemplo n.º 20
0
def test_learn(args):
    machine = Machine("x86_64")

    # Compil tests
    log_info("Remove old files")
    os.system("make clean")
    log_info("Compile C files")
    status = os.system("make")
    assert status == 0

    # Find test names
    c_files = []

    for cur_dir, sub_dir, files in os.walk("."):
        c_files += [x[:-2] for x in files if x.endswith(".c")]

    # Ways to invoke
    to_invoke = {
        "Miasm": invoke_miasm,
    }
    if args.pin_tracer:
        to_invoke["PIN"] = invoke_pin

    # Learn + test
    fail = False
    for filename in c_files:

        if filename in unsupported:
            log_error("Skip %s (unsupported)" % filename)
            continue

        with open(filename) as fdesc:
            cont = Container.from_stream(fdesc)

        func_name = filename
        func_addr = cont.loc_db.get_name_offset(func_name)
        header_filename = "%s.h" % filename

        for name, cb in to_invoke.iteritems():
            log_info("Learning %s over %s with %s" % (func_name,
                                                      filename, name))
            cmdline = cb(filename, func_name, header_filename, cont)

            print " ".join(cmdline)
            sibyl = subprocess.Popen(cmdline, env=os.environ,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)
            stdout, stderr = sibyl.communicate()
            if sibyl.returncode != 0:
                log_error("Failed to learn with error:")
                print stderr
                fail = True
                continue

            log_info("Testing generated class")

            mod = imp.new_module("testclass")
            exec stdout in mod.__dict__
            classTest = getattr(mod, "TESTS")[0]
            tl = TestLauncher(filename, machine, ABI_AMD64_SYSTEMV, [classTest],
                              config.jit_engine)

            possible_funcs = tl.run(func_addr)
            if tl.possible_funcs and possible_funcs == [filename]:
                log_success("Generated class recognize the function " \
                            "'%s'" % func_name)
            else:
                log_error("Generated class failed to recognize the function " \
                          "'%s'" % func_name)
                fail = True

    # Clean
    log_info( "Remove old files" )
    os.system("make clean")

    return fail
Exemplo n.º 21
0
                    help="Use implicit tracking",
                    action="store_true")
parser.add_argument("--unfollow-mem",
                    help="Stop on memory statements",
                    action="store_true")
parser.add_argument("--unfollow-call",
                    help="Stop on call statements",
                    action="store_true")
parser.add_argument("--do-not-simplify",
                    help="Do not simplify expressions",
                    action="store_true")
args = parser.parse_args()

# Get architecture
with open(args.filename) as fstream:
    cont = Container.from_stream(fstream)

arch = args.architecture if args.architecture else cont.arch
machine = Machine(arch)

# Check elements
elements = set()
regs = machine.mn.regs.all_regs_ids_byname
for element in args.element:
    try:
        elements.add(regs[element.upper()])
    except KeyError:
        raise ValueError("Unknown element '%s'" % element)

mdis = machine.dis_engine(cont.bin_stream, dont_dis_nulstart_bloc=True)
ir_arch = machine.ira(mdis.symbol_pool)
Exemplo n.º 22
0
                    help="Display image representation of disasm")
parser.add_argument('-c', "--rawbinary", default=False, action="store_true",
                    help="Don't interpret input as ELF/PE/...")
parser.add_argument('-d', "--defuse", action="store_true",
                    help="Dump the def-use graph in file 'defuse.dot'."
                    "The defuse is dumped after simplifications if -s option is specified")

args = parser.parse_args()

if args.verbose:
    log_asmblock.setLevel(logging.DEBUG)

log.info('Load binary')
if args.rawbinary:
    shift = args.shiftoffset if args.shiftoffset is not None else 0
    cont = Container.fallback_container(open(args.filename).read(),
                                        None, addr=shift)
else:
    with open(args.filename) as fdesc:
        cont = Container.from_stream(fdesc, addr=args.shiftoffset)

default_addr = cont.entry_point
bs = cont.bin_stream
e = cont.executable
log.info('ok')

log.info("import machine...")
# Use the guessed architecture or the specified one
arch = args.architecture if args.architecture else cont.arch
if not arch:
    print "Architecture recognition fail. Please specify it in arguments"
    exit(-1)
Exemplo n.º 23
0
    def from_bytecode(self, bytecode):

        container = Container.from_string(bytecode)

        mdis = self.machine.dis_engine(container.bin_stream)
        self.blks = mdis.dis_multibloc(0)
Exemplo n.º 24
0
                    action="store_true")
parser.add_argument("--unfollow-call", help="Stop on call statements",
                    action="store_true")
parser.add_argument("--do-not-simplify", help="Do not simplify expressions",
                    action="store_true")
parser.add_argument("--rename-args",
                    help="Rename common arguments (@32[ESP_init] -> Arg1)",
                    action="store_true")
parser.add_argument("--json",
                    help="Output solution in JSON",
                    action="store_true")
args = parser.parse_args()

# Get architecture
with open(args.filename) as fstream:
    cont = Container.from_stream(fstream)

arch = args.architecture if args.architecture else cont.arch
machine = Machine(arch)

# Check elements
elements = set()
regs = machine.mn.regs.all_regs_ids_byname
for element in args.element:
    try:
        elements.add(regs[element])
    except KeyError:
        raise ValueError("Unknown element '%s'" % element)

mdis = machine.dis_engine(cont.bin_stream, dont_dis_nulstart_bloc=True)
ir_arch = machine.ira(mdis.symbol_pool)
Exemplo n.º 25
0
import sys
from miasm2.arch.x86.disasm import dis_x86_32
from miasm2.core.asmbloc import bloc2graph
from miasm2.analysis.binary import Container
from pdb import pm

if len(sys.argv) != 3:
    print 'Example:'
    print "%s samples/box_upx.exe 0x407570" % sys.argv[0]
    sys.exit(0)

addr = int(sys.argv[2], 16)
cont = Container.from_stream(open(sys.argv[1]))
mdis = dis_x86_32(cont.bin_stream)
# Inform the engine to avoid disassembling null instructions
mdis.dont_dis_nulstart_bloc = True
blocs = mdis.dis_multibloc(addr)

graph = bloc2graph(blocs)
open('graph.txt', 'w').write(graph)
Exemplo n.º 26
0
from miasm2.analysis.data_flow import dead_simp
from miasm2.expression.simplifications import expr_simp


parser = ArgumentParser("Constant expression propagation")
parser.add_argument('filename', help="File to analyze")
parser.add_argument('address', help="Starting address for disassembly engine")
parser.add_argument('-s', "--simplify", action="store_true",
                    help="Apply simplifications rules (liveness, graph simplification, ...)")

args = parser.parse_args()


machine = Machine("x86_32")

cont = Container.from_stream(open(args.filename))
ira, dis_engine = machine.ira, machine.dis_engine
mdis = dis_engine(cont.bin_stream)
ir_arch = ira(mdis.symbol_pool)
addr = int(args.address, 0)


asmcfg = mdis.dis_multiblock(addr)
for block in asmcfg.blocks:
    ir_arch.add_block(block)


init_infos = ir_arch.arch.regs.regs_init
cst_propag_link = propagate_cst_expr(ir_arch, addr, init_infos)

if args.simplify:
Exemplo n.º 27
0
 def load_vm(self, filename, map_addr):
     self.ctr = Container.from_stream(open(filename),
                                      vm=self.jitter.vm,
                                      addr=map_addr)
     self.jitter.cpu.init_regs()
     self.jitter.init_stack()
Exemplo n.º 28
0
    def from_bytecode(self, bytecode):

        container = Container.from_string(bytecode)

        mdis = self.machine.dis_engine(container.bin_stream)
        self.blks = mdis.dis_multibloc(0)
Exemplo n.º 29
0
"""This example illustrate the Sandbox.call API, for direct call of a given
function"""

from miasm2.analysis.sandbox import Sandbox_Linux_arml
from miasm2.analysis.binary import Container
from miasm2.os_dep.linux_stdlib import linobjs
from miasm2.core.utils import hexdump

# Parse arguments
parser = Sandbox_Linux_arml.parser(description="ELF sandboxer")
parser.add_argument("filename", help="ELF Filename")
options = parser.parse_args()

sb = Sandbox_Linux_arml(options.filename, options, globals())

with open(options.filename, "rb") as fdesc:
    cont = Container.from_stream(fdesc)
    loc_key = cont.loc_db.get_name_location("md5_starts")
    addr_to_call = cont.loc_db.get_location_offset(loc_key)

# Calling md5_starts(malloc(0x64))
addr = linobjs.heap.alloc(sb.jitter, 0x64)
sb.call(addr_to_call, addr)
hexdump(sb.jitter.vm.get_mem(addr, 0x64))
Exemplo n.º 30
0
 def load_vm(self, filename, map_addr):
     self.ctr = Container.from_stream(open(filename), vm=self.jitter.vm,
                                      addr=map_addr)
     self.jitter.cpu.init_regs()
     self.jitter.init_stack()
Exemplo n.º 31
0
};

struct ll_human {
        struct ll_human* next;
        struct human human;
};
"""

base_types = CTypeAMD64_unk()
types_ast = CAstTypes()
types_ast.add_c_decl(text)

types_mngr = CTypesManagerNotPacked(types_ast, base_types)

# Analyze binary
cont = Container.fallback_container(data, None, addr=0)

machine = Machine("x86_64")
dis_engine, ira = machine.dis_engine, machine.ira

mdis = dis_engine(cont.bin_stream, symbol_pool=cont.symbol_pool)
addr_head = 0
asmcfg = mdis.dis_multiblock(addr_head)
lbl_head = mdis.symbol_pool.getby_offset(addr_head)

ir_arch_a = ira(mdis.symbol_pool)
for block in asmcfg.blocks:
    ir_arch_a.add_block(block)

open('graph_irflow.dot', 'w').write(ir_arch_a.graph.dot())
Exemplo n.º 32
0
from pdb import pm

from miasm2.arch.x86.disasm import dis_x86_32
from miasm2.analysis.binary import Container
from miasm2.core.asmblock import AsmCFG, AsmConstraint, AsmBlock, \
    AsmLabel, AsmBlockBad, AsmConstraintTo, AsmConstraintNext, \
    bbl_simplifier
from miasm2.core.graph import DiGraphSimplifier, MatchGraphJoker
from miasm2.expression.expression import ExprId

# Initial data: from 'samples/simple_test.bin'
data = "5589e583ec10837d08007509c745fc01100000eb73837d08017709c745fc02100000eb64837d08057709c745fc03100000eb55837d080774138b450801c083f80e7509c745fc04100000eb3c8b450801c083f80e7509c745fc05100000eb298b450883e03085c07409c745fc06100000eb16837d08427509c745fc07100000eb07c745fc081000008b45fcc9c3".decode("hex")
cont = Container.from_string(data)

# Test Disasm engine
mdis = dis_x86_32(cont.bin_stream)
## Disassembly of one block
first_block = mdis.dis_bloc(0)
assert len(first_block.lines) == 5
print first_block

## Disassembly of several block, with cache
blocks = mdis.dis_multibloc(0)
assert len(blocks) == 0

## Test cache
mdis.job_done.clear()
blocks = mdis.dis_multibloc(0)
assert len(blocks) == 17
## Equality between assembly lines is not yet implemented
assert len(blocks.heads()) == 1
Exemplo n.º 33
0
from pdb import pm

from miasm2.arch.x86.disasm import dis_x86_32
from miasm2.analysis.binary import Container
from miasm2.core.asmblock import AsmCFG, AsmConstraint, AsmBlock, \
    AsmLabel, AsmBlockBad, AsmConstraintTo, AsmConstraintNext, \
    bbl_simplifier
from miasm2.core.graph import DiGraphSimplifier, MatchGraphJoker
from miasm2.expression.expression import ExprId

# Initial data: from 'samples/simple_test.bin'
data = "5589e583ec10837d08007509c745fc01100000eb73837d08017709c745fc02100000eb64837d08057709c745fc03100000eb55837d080774138b450801c083f80e7509c745fc04100000eb3c8b450801c083f80e7509c745fc05100000eb298b450883e03085c07409c745fc06100000eb16837d08427509c745fc07100000eb07c745fc081000008b45fcc9c3".decode(
    "hex")
cont = Container.from_string(data)

# Test Disasm engine
mdis = dis_x86_32(cont.bin_stream)
## Disassembly of one block
first_block = mdis.dis_block(0)
assert len(first_block.lines) == 5
print first_block

## Test redisassemble blocks
first_block_bis = mdis.dis_block(0)
assert len(first_block.lines) == len(first_block_bis.lines)
print first_block_bis

## Disassembly of several block, with cache
blocks = mdis.dis_multiblock(0)
assert len(blocks) == 17
Exemplo n.º 34
0
def test_learn(args):
    machine = Machine("x86_64")

    # Compil tests
    log_info("Remove old files")
    os.system("make clean")
    log_info("Compile C files")
    status = os.system("make")
    assert status == 0

    # Find test names
    c_files = []

    for cur_dir, sub_dir, files in os.walk("."):
        c_files += [x[:-2] for x in files if x.endswith(".c")]

    # Ways to invoke
    to_invoke = {
        "Miasm": invoke_miasm,
    }
    if args.pin_tracer:
        to_invoke["PIN"] = invoke_pin

    # Learn + test
    fail = False
    for filename in c_files:

        if filename in unsupported:
            log_error("Skip %s (unsupported)" % filename)
            continue

        with open(filename) as fdesc:
            cont = Container.from_stream(fdesc)

        func_name = filename
        func_addr = cont.loc_db.get_name_offset(func_name)
        header_filename = "%s.h" % filename

        for name, cb in to_invoke.iteritems():
            log_info("Learning %s over %s with %s" %
                     (func_name, filename, name))
            cmdline = cb(filename, func_name, header_filename, cont)

            print " ".join(cmdline)
            sibyl = subprocess.Popen(cmdline,
                                     env=os.environ,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)
            stdout, stderr = sibyl.communicate()
            if sibyl.returncode != 0:
                log_error("Failed to learn with error:")
                print stderr
                fail = True
                continue

            log_info("Testing generated class")

            mod = imp.new_module("testclass")
            exec stdout in mod.__dict__
            classTest = getattr(mod, "TESTS")[0]
            tl = TestLauncher(filename, machine, ABI_AMD64_SYSTEMV,
                              [classTest], config.jit_engine)

            possible_funcs = tl.run(func_addr)
            if tl.possible_funcs and possible_funcs == [filename]:
                log_success("Generated class recognize the function " \
                            "'%s'" % func_name)
            else:
                log_error("Generated class failed to recognize the function " \
                          "'%s'" % func_name)
                fail = True

    # Clean
    log_info("Remove old files")
    os.system("make clean")

    return fail
Exemplo n.º 35
0
};

struct ll_human {
        struct ll_human* next;
        struct human human;
};
"""

base_types = CTypeAMD64_unk()
types_ast = CAstTypes()
types_ast.add_c_decl(text)

types_mngr = CTypesManagerNotPacked(types_ast, base_types)

# Analyze binary
cont = Container.fallback_container(data, None, addr=0)

machine = Machine("x86_64")
dis_engine, ira = machine.dis_engine, machine.ira

mdis = dis_engine(cont.bin_stream, symbol_pool=cont.symbol_pool)
addr_head = 0
blocks = mdis.dis_multiblock(addr_head)
lbl_head = mdis.symbol_pool.getby_offset(addr_head)

ir_arch_a = ira(mdis.symbol_pool)
for block in blocks:
    ir_arch_a.add_block(block)

open('graph_irflow.dot', 'w').write(ir_arch_a.graph.dot())
Exemplo n.º 36
0
    # Update next blocks to process in the disassembly engine
    cur_bloc.bto.clear()
    cur_bloc.add_cst(loc_key, AsmConstraint.c_next)


# Prepare a tiny shellcode
shellcode = ''.join([
    "\xe8\x00\x00\x00\x00",  # CALL $
    "X",  # POP EAX
    "\xc3",  # RET
])

# Instantiate a x86 32 bit architecture
machine = Machine("x86_32")
cont = Container.from_string(shellcode)
mdis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db)

print "Without callback:\n"
asmcfg = mdis.dis_multiblock(0)
print "\n".join(str(block) for block in asmcfg.blocks)

# Enable callback
mdis.dis_block_callback = cb_x86_callpop

print "=" * 40
print "With callback:\n"
asmcfg_after = mdis.dis_multiblock(0)
print "\n".join(str(block) for block in asmcfg_after.blocks)

# Ensure the callback has been called
Exemplo n.º 37
0
        elif addr.is_int():
            addr = int(addr.arg)
            states_todo.add((addr, symbexec.symbols.copy(), tuple(conds)))
        elif addr.is_loc():
            states_todo.add((addr, symbexec.symbols.copy(), tuple(conds)))
        else:
            raise ValueError("Unsupported destination")


if __name__ == '__main__':

    translator_smt2 = Translator.to_language("smt2")

    addr = int(options.address, 16)

    cont = Container.from_stream(open(args[0]))
    mdis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db)
    ir_arch = machine.ir(mdis.loc_db)
    ircfg = ir_arch.new_ircfg()
    symbexec = SymbolicExecutionEngine(ir_arch)

    asmcfg, loc_db = parse_asm.parse_txt(machine.mn,
                                         32,
                                         '''
    init:
    PUSH argv
    PUSH argc
    PUSH ret_addr
    ''',
                                         loc_db=mdis.loc_db)
Exemplo n.º 38
0
from miasm2.analysis.binary import Container
from miasm2.analysis.machine import Machine

# The Container will provide a *bin_stream*, bytes source for the disasm engine
cont = Container.from_string(
    "\x83\xf8\x10\x74\x07\x89\xc6\x0f\x47\xc3\xeb\x08\x89\xc8\xe8\x31\x33\x22\x11\x40\xc3"
)

# Instantiate a x86 32 bit architecture
machine = Machine("x86_32")

# Instantiate a disassembler engine, using the previous bin_stream and its
# associated location DB.
mdis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db)

# Run a recursive traversal disassembling from address 0
asmcfg = mdis.dis_multiblock(0)

# Display each basic blocks
for block in asmcfg.blocks:
    print block

# Output control flow graph in a dot file
open('str_cfg.dot', 'w').write(asmcfg.dot())
Exemplo n.º 39
0
parser.add_argument('-o', "--shiftoffset", default=None,
                    type=lambda x: int(x, 0),
                    help="Shift input binary by an offset")
parser.add_argument('-a', "--try-disasm-all", action="store_true",
                    help="Try to disassemble the whole binary")
parser.add_argument('-i', "--image", action="store_true",
                    help="Display image representation of disasm")

args = parser.parse_args()

if args.verbose:
    log_asmbloc.setLevel(logging.DEBUG)

log.info('Load binary')
with open(args.filename) as fdesc:
    cont = Container.from_stream(fdesc, addr=args.shiftoffset)

default_addr = cont.entry_point
bs = cont.bin_stream
e = cont.executable
log.info('ok')

log.info("import machine...")
# Use the guessed architecture or the specified one
arch = args.architecture if args.architecture else cont.arch
if not arch:
    print "Architecture recognition fail. Please specify it in arguments"
    exit(-1)

# Instance the arch-dependent machine
machine = Machine(arch)
Exemplo n.º 40
0
                    "--try-disasm-all",
                    action="store_true",
                    help="Try to disassemble the whole binary")
parser.add_argument('-i',
                    "--image",
                    action="store_true",
                    help="Display image representation of disasm")

args = parser.parse_args()

if args.verbose:
    log_asmbloc.setLevel(logging.DEBUG)

log.info('Load binary')
with open(args.filename) as fdesc:
    cont = Container.from_stream(fdesc, addr=args.shiftoffset)

default_addr = cont.entry_point
bs = cont.bin_stream
e = cont.executable
log.info('ok')

log.info("import machine...")
# Use the guessed architecture or the specified one
arch = args.architecture if args.architecture else cont.arch
if not arch:
    print "Architecture recognition fail. Please specify it in arguments"
    exit(-1)

# Instance the arch-dependent machine
machine = Machine(arch)
Exemplo n.º 41
0
    '-d',
    "--defuse",
    action="store_true",
    help="Dump the def-use graph in file 'defuse.dot'."
    "The defuse is dumped after simplifications if -s option is specified")

args = parser.parse_args()

if args.verbose:
    log_asmblock.setLevel(logging.DEBUG)

log.info('Load binary')
if args.rawbinary:
    shift = args.shiftoffset if args.shiftoffset is not None else 0
    cont = Container.fallback_container(open(args.filename).read(),
                                        None,
                                        addr=shift)
else:
    with open(args.filename) as fdesc:
        cont = Container.from_stream(fdesc, addr=args.shiftoffset)

default_addr = cont.entry_point
bs = cont.bin_stream
e = cont.executable
log.info('ok')

log.info("import machine...")
# Use the guessed architecture or the specified one
arch = args.architecture if args.architecture else cont.arch
if not arch:
    print "Architecture recognition fail. Please specify it in arguments"
Exemplo n.º 42
0
Arquivo: test_dis.py Projeto: 8l/miasm
if options.verbose:
    log_asmbloc.setLevel(logging.DEBUG)

log.info("import machine...")
machine = Machine(options.machine)
mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira
log.info('ok')

if options.bw != None:
    options.bw = int(options.bw)
if options.funcswd != None:
    options.funcswd = int(options.funcswd)

log.info('Load binary')
with open(fname) as fdesc:
    cont = Container.from_stream(fdesc, addr=options.shiftoffset)

default_addr = cont.entry_point
bs = cont.bin_stream
e = cont.executable

log.info('ok')
mdis = dis_engine(bs)
# configure disasm engine
mdis.dontdis_retcall = options.dontdis_retcall
mdis.blocs_wd = options.bw
mdis.dont_dis_nulstart_bloc = not options.dis_nulstart_bloc

todo = []
addrs = [int(a, 16) for a in args[1:]]