def run_variable_recovery_analysis(project, func, groundtruth, is_fast): # Create a temporary KnowledgeBase instance tmp_kb = angr.KnowledgeBase(project, project.loader.main_object) if is_fast: l.debug("Running VariableRecoveryFast on function %r.", func) vr = project.analyses.VariableRecoveryFast(func, kb=tmp_kb) else: l.debug("Running VariableRecovery on function %r.", func) vr = project.analyses.VariableRecovery(func, kb=tmp_kb) variable_manager = vr.variable_manager[func.addr] for insn_addr, variables in groundtruth.items(): for var_info in variables: var_sort = var_info['sort'] vars_and_offset = variable_manager.find_variables_by_insn(insn_addr, var_sort) # enumerate vars and find the variable that we want if var_sort == VariableType.MEMORY: the_var = next((var for var, _ in vars_and_offset if _compare_memory_variable(var, var_info)), None) elif var_sort == VariableType.REGISTER: the_var = next((var for var, _ in vars_and_offset if _compare_register_variable(var, var_info)), None) else: l.error('Unsupported variable sort %s.', var_sort) assert False nose.tools.assert_is_not_none(the_var, msg="The variable %s in groundtruth at instruction %#x cannot be " "found in variable manager." % (var_info, insn_addr) ) l.debug("Found variable %s at %#x.", the_var, insn_addr)
def test_get_the_sp_from_a_reaching_definition(self): binary = _binary_path('all') project = angr.Project(binary, auto_load_libs=False) cfg = project.analyses.CFGFast() tmp_kb = angr.KnowledgeBase(project) main_func = cfg.kb.functions['main'] rda = project.analyses.ReachingDefinitions( subject=main_func, kb=tmp_kb, observe_all=True ) def _is_right_before_main_node(definition): bloc, ins_addr, op_type = definition[0] return ( bloc == 'node' and ins_addr == main_func.addr and op_type == OP_BEFORE ) reach_definition_at_main = next(filter( _is_right_before_main_node, rda.observed_results.items() ))[1] sp_value = reach_definition_at_main.get_sp() self.assertEqual(sp_value, LiveDefinitions.INITIAL_SP_64BIT)
def test_get_the_sp_from_a_reaching_definition(self): binary = os.path.join(TESTS_LOCATION, 'x86_64', 'all') project = angr.Project(binary, auto_load_libs=False) cfg = project.analyses.CFGFast() tmp_kb = angr.KnowledgeBase(project) main_func = cfg.kb.functions['main'] rda = project.analyses.ReachingDefinitions( subject=main_func, kb=tmp_kb, observe_all=True ) def _is_right_before_main_node(definition): bloc, ins_addr, op_type = definition[0] return ( bloc == 'node' and ins_addr == main_func.addr and op_type == OP_BEFORE ) reach_definition_at_main = next(filter( _is_right_before_main_node, rda.observed_results.items() ))[1] sp_value = reach_definition_at_main.get_sp() nose.tools.assert_equal(sp_value, project.arch.initial_sp)
def test_angr_reaching_definitions(request): file_id, file_name, func_addr = ReqParams.many(request, ['file_id', 'file_name', 'func_addr.hex']) file_path = os.path.join(MyPath.samples(), file_name) project = angr.Project(file_path, load_options={'auto_load_libs': False}) # cfg = project.analyses.CFGFast() cfg = project.analyses.CFGEmulated() function = FunctionParse.func_by_addr(func_addr, cfg=cfg) tmp_kb = angr.KnowledgeBase(project) reaching_definition = project.analyses.ReachingDefinitions( subject=function, kb=tmp_kb, observe_all=True ) # nose.tools.assert_equal(reaching_definition.subject.__class__ is Subject, True) def _result_extractor(rda): unsorted_result = map( lambda x: {'key': x[0], 'register_definitions': x[1].register_definitions._storage, 'stack_definitions': x[1].stack_definitions._storage, 'memory_definitions': x[1].memory_definitions._storage}, rda.observed_results.items() ) return list(sorted( unsorted_result, key=lambda x: x['key'] )) result = _result_extractor(reaching_definition) pass
def _fast_cfg(self): if self.manager is not None: return self.manager.fast_cfg l.warning( 'Policy %s does not have an associated policy manager. The fast control flow graph is not cached.' ) tmp_kb = angr.KnowledgeBase(self.project, self.project.loader.main_bin) return self.project.analyses.CFGFast(kb=tmp_kb)
def test_cfg_with_patches(self): path = os.path.join(test_location, "x86_64", "fauxware") proj = angr.Project(path, auto_load_libs=False) cfg = proj.analyses.CFGFast() auth_func = cfg.functions["authenticate"] auth_func_addr = auth_func.addr # Take the authenticate function and add a retn patch for its very first block kb = angr.KnowledgeBase(proj) kb.patches.add_patch(auth_func_addr, b"\xc3") # with this patch, there should only be one block with one instruction in authenticate() _ = proj.analyses.CFGFast(kb=kb, use_patches=True) patched_func = kb.functions["authenticate"] assert len(patched_func.block_addrs_set) == 1 block = patched_func._get_block(auth_func_addr) assert len(block.instruction_addrs) == 1 # let's try to patch the second instruction of that function to ret kb = angr.KnowledgeBase(proj) kb.patches.add_patch( auth_func._get_block(auth_func_addr).instruction_addrs[1], b"\xc3" ) # with this patch, there should only be one block with two instructions in authenticate() _ = proj.analyses.CFGFast(kb=kb, use_patches=True) patched_func = kb.functions["authenticate"] assert len(patched_func.block_addrs_set) == 1 block = patched_func._get_block(auth_func_addr) assert len(block.instruction_addrs) == 2 # finally, if we generate a new CFG on a KB without any patch, we should still see the normal function (with 10 # blocks) kb = angr.KnowledgeBase(proj) _ = proj.analyses.CFGFast(kb=kb, use_patches=True) not_patched_func = kb.functions["authenticate"] assert len(not_patched_func.block_addrs_set) == 10
def _run_reaching_definition_analysis_test(self, project, function, result_path, _extract_result): tmp_kb = angr.KnowledgeBase(project) reaching_definition = project.analyses.ReachingDefinitions( subject=function, init_func=True, kb=tmp_kb, observe_all=True) result = _extract_result(reaching_definition) # Uncomment these to regenerate the reference results... if you dare #with open(result_path, 'wb') as result_file: # pickle.dump(result, result_file) with open(result_path, 'rb') as result_file: expected_result = pickle.load(result_file) nose.tools.assert_list_equal(result, expected_result)
def _run_reaching_definition_analysis_test(self, project, function, result_path, _extract_result): tmp_kb = angr.KnowledgeBase(project) reaching_definition = project.analyses[ReachingDefinitionsAnalysis].prep(kb=tmp_kb)( subject=function, observe_all=True, call_stack=[], ) result = _extract_result(reaching_definition) # Uncomment these to regenerate the reference results... if you dare #with open(result_path, 'wb') as result_file: # pickle.dump(result, result_file) with open(result_path, 'rb') as result_file: expected_result = pickle.load(result_file) self.assertListEqual(result, expected_result)
def _run_reaching_definition_analysis(self, project, func, result_path): tmp_kb = angr.KnowledgeBase(project) reaching_definition = project.analyses.ReachingDefinitions( func, init_func=True, kb=tmp_kb, observe_all=True) unsorted_result = map( lambda x: {'key': x[0],\ 'register_definitions': x[1].register_definitions,\ 'stack_definitions': x[1].stack_definitions,\ 'memory_definitions': x[1].memory_definitions}, reaching_definition.observed_results.items() ) result = list(sorted(unsorted_result, key=lambda x: x['key'])) with open(result_path, 'rb') as result_file: expected_result = pickle.load(result_file) nose.tools.assert_list_equal(result, expected_result)
def run_reaching_definition_analysis(project, func, groundtruth): # pylint:disable=unused-argument # Create a temporary KnowledgeBase instance tmp_kb = angr.KnowledgeBase(project) _ = project.analyses.ReachingDefinitions(func, kb=tmp_kb)
def run_variable_recovery_analysis(func_name, groundtruth, is_fast): binary_path = os.path.join(test_location, 'x86_64', 'fauxware') project = angr.Project(binary_path, load_options={'auto_load_libs': False}) cfg = project.analyses.CFG(normalize=True) func = cfg.kb.functions[func_name] # Create a temporary KnowledgeBase instance tmp_kb = angr.KnowledgeBase(project) if is_fast: l.debug("Running VariableRecoveryFast on function %r.", func) vr = project.analyses.VariableRecoveryFast(func, kb=tmp_kb) else: l.debug("Running VariableRecovery on function %r.", func) vr = project.analyses.VariableRecovery(func, kb=tmp_kb) variable_manager = vr.variable_manager[func.addr] for insn_addr, variables in groundtruth['variables_by_instruction'].items( ): for var_info in variables: var_sort = var_info['sort'] vars_and_offset = variable_manager.find_variables_by_insn( insn_addr, var_sort) # enumerate vars and find the variable that we want if var_sort == VariableType.MEMORY: the_var = next((var for var, _ in vars_and_offset if _compare_memory_variable(var, var_info)), None) elif var_sort == VariableType.REGISTER: the_var = next((var for var, _ in vars_and_offset if _compare_register_variable(var, var_info)), None) else: l.error('Unsupported variable sort %s.', var_sort) assert False nose.tools.assert_is_not_none( the_var, msg= "The variable %s in groundtruth at instruction %#x cannot be " "found in variable manager." % (var_info, insn_addr)) l.debug("Found variable %s at %#x.", the_var, insn_addr) for block_addr, variables in groundtruth['phi_variables_by_block'].items(): phi_variables = variable_manager.get_phi_variables(block_addr) for var_info in variables: var_sort = var_info['sort'] # enumerate vars and find the variable that we want if var_sort == VariableType.MEMORY: the_var = next((var for var in phi_variables if _compare_memory_variable(var, var_info)), None) elif var_sort == VariableType.REGISTER: the_var = next((var for var in phi_variables if _compare_register_variable(var, var_info)), None) else: l.error('Unsupported variable sort %s.', var_sort) assert False nose.tools.assert_is_not_none( the_var, msg="The phi variable %s in groundtruth at block %#x cannot be " "found in variable manager." % (var_info, block_addr)) l.debug("Found phi variable %s at %#x.", the_var, block_addr)
target_func = cfg.kb.functions.function(name="printf") target_node = cfg.get_any_node(target_func.addr) #bs = proj.analyses.BackwardSlice(cfg, cdg=cdg, ddg=ddg, targets=[ (target_node, -1) ]) #print(bs) #""" #bs.debug_repr() # symbolic execution simgr.explore(find=0x401278) #simgr.run() #bo = proj.analyses.BinaryOptimizer(cfg, {'register_reallocation', 'redundant_stack_variable_removal', 'constant_propagation'}) func_kb = angr.KnowledgeBase(proj, None) ddg = proj.analyses.DDG(kb=func_kb,cfg=cfg, call_depth=10) #ddg = proj.analyses.DDG(kb=proj.kb,cfg=cfg) fn = proj.kb.functions.get_by_addr(main.rebased_addr) main = cfg.functions.function(name='main') logging.getLogger('angr.analyses.vfg').setLevel(logging.DEBUG) # remove alloca statements with pattern matching """ idea : if we delete the useless variables, IDA/Ghidra will be able to perform constant propagation. * supprimer la paire d'instructions de la forme suivante: tYY/tZZ = Add64(tXX,0xffffffffffffffXX), where tXX is the same for both statements. * remplacer t124 = LDle:I32(tZZ) par la constante de l'instruction STle(tYY) = 0x024ced0a, à supprimer aussi. Ici, tyy vaut soit tYY, soit tZZ