def readInstsCompared(inst_pb): global InstsInfoCompared global InstsInfoComparedSet module = blocks_pb2.module() tmpFuncSet = set() try: pb_file = open(inst_pb, 'rb') module.ParseFromString(pb_file.read()) pb_file.close() except IOError: logging.error("Could not open the file %s!" % (inst_pb)) for func in module.fuc: tmpFuncSet.add(func.va) prev_size = -1 inst_compared = False for bb in func.bb: prev_va = -1 for inst in bb.instructions: inst_size = 0 if inst.size != 0: inst_size = inst.size InstsInfoComparedSet[inst.va] = inst_size InstsInfoCompared.append((inst.va, inst_size)) elif prev_va != -1: inst_compared = True inst_size = inst.va - prev_va InstsInfoComparedSet[prev_va] = inst_size InstsInfoCompared.append((prev_va, inst_size)) prev_va = inst.va if inst_compared: inst_size = bb.size + bb.va - prev_va InstsInfoComparedSet[prev_va] = inst_size InstsInfoCompared.append((prev_va, inst_size)) InstsInfoCompared.sort(key=(lambda tup: tup[0]))
def dumpBlocks(bv, output): module = blocks_pb2.module() for (func_idx, func) in enumerate(bv.functions): pbFunc = module.fuc.add() pbFunc.va = func.start binja.log_info("Function {0}: {1}".format(func_idx, func.start)) for (blk_idx, block) in enumerate(func): blk_start = None pbBB = pbFunc.bb.add() pbBB.va = block.start # can't get the basic block size for now pbBB.parent = pbFunc.va block_start = block.start binja.log_info("\tBasic Block {0:x}: {1:x}".format( blk_idx, block_start)) insn_cur = block_start if not block.can_exit: pbBB.type = 0x20 # ninja potentially non-return type binja.log_info("\t bb 0x%x can exit" % pbBB.va) for insn in block: instruction = pbBB.instructions.add() instruction.va = insn_cur binja.log_info("\t\t{0:x}".format(insn_cur)) insn_cur += insn[1] for (successor_idx, out_edge) in enumerate(block.outgoing_edges): print(out_edge) binja.log_info("\t\tsuccessor {0:x}: {1:x}".format( successor_idx, out_edge.target.start)) child = pbBB.child.add() child.va = out_edge.target.start f = open(output, "wb") f.write(module.SerializeToString()) f.close()
def dumpInsts(binary, output): try: output_tmp = randomString() execute_str = "objdump --wide -d %s | egrep '^[[:space:]]*[0-9a-f]+:' | cut -d: -f1 | awk '{print \"0x\"$1}' | tee /tmp/%s.log" % ( binary, output_tmp) print(execute_str) os.system(execute_str) with open("/tmp/%s.log" % (output_tmp)) as objdump_file: module = blocks_pb2.module() # because objdump doesn't have function and basic block if the binary is striped dummy_func = module.fuc.add() dummy_func.va = 0x0 dummy_bb = dummy_func.bb.add() dummy_bb.va = 0x0 dummy_bb.parent = 0x0 for line in objdump_file: line = line.strip() addr = int(line, 16) instruction = dummy_bb.instructions.add() instruction.va = addr f = open(output, "wb") f.write(module.SerializeToString()) f.close() except Exception as e: traceback.print_exc() return os.system("rm -f /tmp/%s.log" % (output_tmp))
def dumpBlocks(output): bbModel = BasicBlockModel(currentProgram) functionManager = currentProgram.getFunctionManager() module = blocks_pb2.module() # record the basic block that has been added by functions bb_set = set() # get all functions funcs_set = set() for func in functionManager.getFunctions(True): # we skip external functions if func.isExternal(): continue func_va = func.getEntryPoint().getOffset() if func_va in funcs_set: continue funcs_set.add(func_va) logging.debug("Function address is 0x{0:x}".format( func.getEntryPoint().getOffset())) codeBlockIterator = bbModel.getCodeBlocksContaining( func.getBody(), monitor) pbFunc = module.fuc.add() pbFunc.va = func.getEntryPoint().getOffset() if func.hasNoReturn(): pbFunc.type = 0x5 logging.debug("function at 0x%x does not return!" % pbFunc.va) # iter over the basic blocks while codeBlockIterator.hasNext(): bb = codeBlockIterator.next() pbBB = pbFunc.bb.add() bb_set.add(bb.getMinAddress().getOffset()) addBB(pbBB, bb, pbFunc) codeBlocks = bbModel.getCodeBlocks(monitor) dummy_func = module.fuc.add() dummy_func.va = 0x0 while codeBlocks.hasNext(): bb = codeBlocks.next() if bb.getMinAddress().getOffset() in bb_set: continue pbBB = dummy_func.bb.add() bb_set.add(bb.getMinAddress().getOffset()) logging.debug("Find another basic block 0x%x" % (bb.getMinAddress().getOffset())) addBB(pbBB, bb, dummy_func) f = open(output, "wb") f.write(module.SerializeToString()) f.close()
def readInstsInfo(inst_pb): global InstsInfo global InstsInfoSet global groundTruthInstructionsRange module = blocks_pb2.module() tmpFuncSet = set() global groundTruthFuncRange try: pb_file = open(inst_pb, 'rb') module.ParseFromString(pb_file.read()) pb_file.close() except IOError: logging.error("Could not open the file %s!" % (inst_pb)) range_start = 0x0 range_end = 0x0 for func in module.fuc: tmpFuncSet.add(func.va) for bb in func.bb: # add the range if bb.va != range_end: if range_start != range_end: groundTruthInstructionsRange.append( (range_start, range_end)) range_start = bb.va range_end = bb.va + bb.size - bb.padding else: range_end += bb.size - bb.padding for inst in bb.instructions: InstsInfo.append((inst.va, inst.size)) groundTruthFuncRange[inst.va] = inst.size InstsInfoSet[inst.va] = inst.size if range_start != range_end: groundTruthInstructionsRange.append((range_start, range_end)) InstsInfo.sort(key=(lambda tup: tup[0])) for func in linkerFuncAddr: if func not in tmpFuncSet: notIncludedLinkerFunc.add(func)
def readInstsInfo(inst_pb): global InstsInfo global InstsInfoSet global groundTruthInstructionsRange global BRANCH_RANGES module = blocks_pb2.module() tmpFuncSet = set() try: pb_file = open(inst_pb, 'rb') module.ParseFromString(pb_file.read()) pb_file.close() except IOError: logging.error("Could not open the file %s!" % (inst_pb)) range_start = 0x0 range_end = 0x0 for func in module.fuc: tmpFuncSet.add(func.va) for bb in func.bb: if bb.va != range_end: if range_start != range_end: groundTruthInstructionsRange.append( (range_start, range_end)) range_start = bb.va range_end = bb.va + bb.size - bb.padding else: range_end += bb.size - bb.padding if len(bb.instructions) > 0 and bb.type in \ {BlockType.COND_BRANCH, BlockType.DIRECT_BRANCH, BlockType.INDIRECT_BRANCH, BlockType.INDIRECT_CALL, BlockType.DIRECT_CALL}: last_inst = bb.instructions[-1] BRANCH_RANGES.append( (last_inst.va, last_inst.va + last_inst.size)) for inst in bb.instructions: InstsInfo.append((inst.va, inst.size)) InstsInfoSet[inst.va] = inst.size InstsInfo.sort(key=(lambda tup: tup[0]))
(options, args) = parser.parse_args() if options.groundtruth == None: print("Please input the ground truth file") exit(-1) if options.comparedfile == None: print("Please input the compared file") exit(-1) if options.binaryFile == None: print("Please input the binary file") exit(-1) exec_secs = parsePEExecSecs(options.binaryFile) (IMAGE_BASE, ELFClasss) = parsePEFile(options.binaryFile) mModule1 = blocks_pb2.module() mModule2 = blocks_pb2.module() try: f1 = open(options.groundtruth, 'rb') mModule1.ParseFromString(f1.read()) f1.close() f2 = open(options.comparedfile, 'rb') mModule2.ParseFromString(f2.read()) f2.close() except IOError: print("Could not open the file\n") exit(-1) # confirm which tool we are handling confirmTools(options.comparedfile)
def dumpBlocks(binary, output, output_sta): # "force_complete_scan" default is True p = angr.Project(binary, load_options={'auto_load_libs': False}) cfg = p.analyses.CFGFast(normalize=True, detect_tail_calls = True) # output func matching counts # outputFuncMatching(cfg, output_sta) module = blocks_pb2.module() # iter over the cfg functions for func_addr in cfg.functions: func = cfg.functions[func_addr] if func.returning == False: print("Non-return function at 0x%x" % func.addr) if func.alignment: print("function 0x%x is alignment function, skip!" % (func.addr)) continue # collect non-return calls current_non_bbs = set() for non_ret in func.callout_sites: if non_ret != None: print("non-return call at 0x%x" % non_ret.addr) current_non_bbs.add(non_ret.addr) #[current_non_bbs.add(non_ret.addr) for non_ret in func.callout_sites] pbFunc = module.fuc.add() pbFunc.va = func_addr print("function %s, its addr is 0x%x" % (func.name, func.addr)) # iter over blocks for bb in func.blocks: if bb == None: continue print("basic block addr 0x%x, its size 0x%x" % (bb.addr, bb.size)) cfg_node = cfg.get_any_node(bb.addr) # bb.instruction_addrs can get the instrction address of block if cfg_node != None and bb.size != 0: pbBB = pbFunc.bb.add() pbBB.va = bb.addr pbBB.size = bb.size pbBB.parent = func_addr successors = cfg_node.successors for suc in successors: child = pbBB.child.add() child.va = suc.addr print("Edge 0x%x -> 0x%x" % (bb.addr, suc.addr)) # iter over instructions # bb.instruction_addrs may have bug # we use capstone instead to extract instuction # for inst in bb.instruction_addrs: for inst in bb.capstone.insns: inst_va = inst.address instruction = pbBB.instructions.add() instruction.va = inst_va print("instruction: 0x%x" % (instruction.va)) # can't get its size from angr for now if bb.addr in current_non_bbs: pbBB.type = BlockType.NON_RETURN_CALL f = open(output, "wb") f.write(module.SerializeToString()) f.close()
(options, args) = parser.parse_args() assert options.groundtruth != None, "Please input the ground truth file" assert options.binaryfile != None, "Please input the binary file" if options.ispe: IS_PE = True if not IS_PE: init_elf(options.binaryfile) else: init_pe(options.binaryfile) if ELFCLASS == 32: IAT_BASE = 0x0 module = blocks_pb2.module() try: f1 = open(options.groundtruth, 'rb') module.ParseFromString(f1.read()) f1.close() except IOError: logging.error("Could not open the file %s" % options.groundtruth) exit(-1) known_nonret_funcs = set() iat_known_nonret_funcs = set() if not IS_PE: getNonRetFuncsFromSymbols(options.binaryfile, known_nonret_funcs) else: iats = getNonRetFuncsFromImportObjs(options.binaryfile,
def dumpBB(binary, output): # store the cfg edge. cfg_edge = dict() non_ret_sites = set() try: dump_tmp = randomString() abs_path = os.path.abspath(binary) basename = os.path.basename(binary) execute_str = "bap %s -d -drcfg -dasm --passes=with-no-return --print-bir-attr=address > /tmp/%s.dump" % (abs_path, dump_tmp) logging.info("execute string is %s" % (execute_str)) os.system(execute_str) ## collect all non-ret call sites non_ret_tmp = randomString() execute_str1 = 'grep "address\|call @.* with noreturn" /tmp/%s.dump > /tmp/%s.log' % (dump_tmp, non_ret_tmp) os.system(execute_str1) valid_address = -1 with open("/tmp/%s.log" % (non_ret_tmp), "r") as non_ret_file: for line in non_ret_file: if 'address' in line: try: valid_address = int(line.split(' ')[-1].strip(), 16) continue except: pass if 'noreturn' in line and valid_address != -1: logging.debug('collect noret site %x' % valid_address) non_ret_sites.add(valid_address) valid_address = -1 grep_cfg_tmp = randomString() execute_str2 = 'grep -e "->" /tmp/%s.dump | tr -s " " | cut -d \\" -f2,4 | awk "{print $1 $2}" > /tmp/%s.log' % (dump_tmp, grep_cfg_tmp) os.system(execute_str2) logging.info("excute string is %s" % (execute_str2)) with open("/tmp/%s.log" % (grep_cfg_tmp), "r") as grep_cfg_file: for line in grep_cfg_file: line = line.split('"') start = int(line[0], 16) end = int(line[1], 16) logging.info("edge 0x%x -> 0x%x" % (start, end)) if start in cfg_edge: cfg_edge[start].add(end) else: tmp_set = set() tmp_set.add(end) cfg_edge[start] = tmp_set os.system('rm /tmp/%s.log' % (grep_cfg_tmp)) os.system('rm /tmp/%s.log' % (non_ret_tmp)) except Exception as e: traceback.print_exc() return # get the function and basic block information try: #bb_dump_tmp = randomString() #execute_str3 = "bap %s -dasm > /tmp/%s.dump" % (binary, bb_dump_tmp) #logging.info("execute string is %s" % (execute_str3)) #os.system(execute_str3) grep_bb_tmp = randomString() execute_str4 = 'sed -ne "/Disassembly of/,$ p" /tmp/%s.dump | egrep "^[[:space:]]*[0-9a-f]+:" | cut -d : -f1 | awk "{print $1}" > /tmp/%s.log' % (dump_tmp, grep_bb_tmp) logging.info("execute string is %s" % (execute_str4)) os.system(execute_str4) last_last_inst_addr = None last_inst_addr = None cur_inst_addr = None last_func_addr = None last_bb_addr = None pb_cur_func = None pb_cur_bb = None with open("/tmp/%s.log" % (grep_bb_tmp), "r") as grep_bb_file: module = blocks_pb2.module() for line in grep_bb_file: cur_inst_addr = int(line.strip(), 16) if cur_inst_addr == last_inst_addr: # find the new function if last_inst_addr == last_last_inst_addr: logging.info("current function addr 0x%x" % (cur_inst_addr)) # delete the last function's last basic block if pb_cur_func != None: del pb_cur_func.bb[-1] pb_cur_func = module.fuc.add() pb_cur_func.va = cur_inst_addr # WARNING: the result lacks of basic block size pb_cur_bb = pb_cur_func.bb.add() pb_cur_bb.va = cur_inst_addr pb_cur_bb.parent = pb_cur_func.va # add the current basic block successors # WARNING: the successors does not contain the `call` instruction target successors = set() if cur_inst_addr in cfg_edge: successors = cfg_edge[cur_inst_addr] for suc in successors: child = pb_cur_bb.child.add() child.va = suc instruction = pb_cur_bb.instructions.add() instruction.va = cur_inst_addr if cur_inst_addr in non_ret_sites: instruction.call_type = 4 # call a non-return logging.debug("set non-return instruction at 0x%x, call_type is 0x%x" % (instruction.va, instruction.call_type)) elif pb_cur_func != None: # find the new basic block if pb_cur_bb != None: del pb_cur_bb.instructions[-1] logging.info("current basic block addr 0x%x" % (cur_inst_addr)) # WARNING: the result lacks of basic block size pb_cur_bb = pb_cur_func.bb.add() pb_cur_bb.va = cur_inst_addr pb_cur_bb.parent = pb_cur_func.va # add the current basic block successors # WARNING: the successors does not contain the `call` instruction target successors = set() if cur_inst_addr in cfg_edge: successors = cfg_edge[cur_inst_addr] for suc in successors: child = pb_cur_bb.child.add() child.va = suc instruction = pb_cur_bb.instructions.add() instruction.va = cur_inst_addr if cur_inst_addr in non_ret_sites: instruction.call_type = 4 # call a non-return logging.debug("set non-return instruction at 0x%x, call_type is 0x%x" % (instruction.va, instruction.call_type)) # current instruction elif pb_cur_bb != None: # WARNING: the result lacks of basic block size logging.info("current instruction addr 0x%x" % (cur_inst_addr)) instruction = pb_cur_bb.instructions.add() instruction.va = cur_inst_addr # set current bb type if cur_inst_addr in non_ret_sites: instruction.call_type = 4 # call a non-return last_last_inst_addr = last_inst_addr last_inst_addr = cur_inst_addr f = open(output, "wb") f.write(module.SerializeToString()) f.close() os.system('rm /tmp/%s.dump' % (dump_tmp)) os.system('rm /tmp/%s.log' % (grep_bb_tmp)) except Exception as e: traceback.print_exc() return
def dumpBlocks(binary, output, statics): prelude_funcs = set() try: r2 = r2pipe.open(binary) except: logging.error("r2pipe open binary error!") exit(-1) pbModule = blocks_pb2.module() ## analyse all #r2.cmd('aaa') r2.cmd('aa') r2.cmd('e anal.depth = 0x10000000') # recursivly disassemble from main function r2.cmd('s main') r2.cmd("afr") # aac heuristic, default is on in `aaa` analysis r2.cmd('aac') # scan the function prologue logging.debug("Before aap Analysis...") prelude_results = r2.cmd('aap') logging.debug(prelude_results) logging.debug("Done aap Analysis!") count_prelude = 0x0 for res in prelude_results.split('\n'): # demo output: [Binpang Debug]: Preludecnt number is 3 if "Binpang" not in res: continue if "Preclude" in res: prelude_addr = int(res.split()[-1], 16) prelude_funcs.add(prelude_addr) else: count_prelude += int(res.split()[-1], 10) r2.cmd('aanr') afl_result = r2.cmd('aflj') afl_result = json.loads(afl_result) all_func_result = set() for func in afl_result: func_addr = func['offset'] if func_addr in all_func_result: continue no_return = func['noreturn'] all_func_result.add(func_addr) pbFunc = pbModule.fuc.add() pbFunc.va = func_addr if no_return == True: pbFunc.type = 0x5 logging.info("Find function in %x" % (func_addr)) # seek the function start address r2.cmd('s %d' % func_addr) # output current function's basic block information afb_result = r2.cmd("afbj") try: afb_result = json.loads(afb_result) except: continue for bb in afb_result: bb_addr = bb['addr'] bb_size = bb['size'] pbBB = pbFunc.bb.add() pbBB.va = bb_addr pbBB.size = bb_size pbBB.parent = func_addr logging.info("Find basic block %x" % bb_addr) r2.cmd('s %d' % bb_addr) inst_num = bb['ninstr'] inst_result = r2.cmd('pdj %d' % bb['ninstr']) try: inst_result = json.loads(inst_result) except: continue for inst in inst_result: inst_addr = inst['offset'] inst_size = inst['size'] logging.info("Find instruction %x, size %x" % (inst_addr, inst_size)) instruction = pbBB.instructions.add() instruction.va = inst_addr instruction.size = inst_size # basic block fail address bb_fail = bb.get("fail", None) # basic block jump address bb_jmp = bb.get("jump", None) if bb_fail != None: logging.info("Successor: 0x%x" % (bb_fail)) child = pbBB.child.add() child.va = bb_fail if bb_jmp != None: logging.info("Successor: 0x%x" % (bb_jmp)) child = pbBB.child.add() child.va = bb_jmp # get switch cases successors switch_op = bb.get("switch_op", None) if switch_op == None: continue bb_cases = switch_op.get("cases", None) if bb_cases == None: continue visited_cases = set() for (idx, case) in enumerate(bb_cases): if case['addr'] in visited_cases: continue visited_cases.add(case['addr']) logging.info("jmptbl successor#%d: 0x%x" % (idx, case['addr'])) child = pbBB.child.add() child.va = case['addr'] f = open(output, "wb") f.write(pbModule.SerializeToString()) f.close() # dump scan function information outputFuncMatching(all_func_result, prelude_funcs, statics)