def do_instruction_hash_match(self): basic_block0 = self.binaries[0].get_basic_blocks() basic_block1 = self.binaries[1].get_basic_blocks() print('Loading DiffAlgorithms...') diff_algorithms = pybinkit.DiffAlgorithms(self.binaries[0], self.binaries[1]) print('Performing instruction hash matches...') matches = diff_algorithms.do_instruction_hash_match() for match in matches: print('>> Match: %x vs %x - match_rate: %d' % (match.source, match.target, match.match_rate)) print('\tPerforming do_control_flow_match:') for control_flow_type in (CREF_FROM, ) : #, CALL, DREF_FROM): child_matches = diff_algorithms.do_control_flow_match(match.source, match.target, control_flow_type) for child_match in child_matches: print('\t\t%d: %x - %x vs %x - match_rate: %d' % (control_flow_type, child_match.type, child_match.source, child_match.target, child_match.match_rate)) print('\tPerforming do_control_flow_matches') for control_flow_type in (CREF_FROM, ): address_pair = pybinkit.AddressPair(match.source, match.target) match_data_combinations = diff_algorithms.do_control_flow_matches((address_pair,), control_flow_type) print('\tCombinations counts: %d' % (len(match_data_combinations))) for match_data_combination in match_data_combinations: print('\t\tMatch Data Combination: count: %d match_rate: %d%%' % (match_data_combination.count(), match_data_combination.get_match_rate())) for i in range(0, match_data_combination.count(), 1): match_data = match_data_combination.get(i) print('\t\t\t%x - %x : %d%%' % (match_data.source, match_data.target, match_data.match_rate))
def diff(self, algorithm='init', match_type='CREF_FROM', iteration=1): print('diff algorithm: %s' % algorithm) total_match_count = 0 if len(self.binaries) < 2: return total_match_count match_type = matchTypeMap.get(match_type.upper(), 1) if self.function_matches == None or algorithm == 'init': diff_algorithms = pybinkit.DiffAlgorithms(self.binaries[0], self.binaries[1]) self.function_matches = pybinkit.FunctionMatching( self.binaries[0], self.binaries[1]) total_match_count += self.function_matches.do_instruction_hash_match( ) i = 0 while i < iteration: current_match_count = 0 if algorithm in ('inshash', 'hash'): print('* do_instruction_hash_match:') current_match_count = self.function_matches.do_instruction_hash_match( ) elif algorithm in ('cf', 'controlflow'): print('* do_control_flow_match:') current_match_count = self.function_matches.do_control_flow_match( 0, match_type) print(' current_match_count: %d' % current_match_count) total_match_count += current_match_count if current_match_count == 0: break i += 1 print(' total_match_count: %d' % total_match_count) return total_match_count
def do_instruction_hash_match_in_functions(self, src_function_address, target_function_address): print('* do_instruction_hash_match_in_functions: %x - %x' % (src_function_address, target_function_address)) src_functions = self.binaries[0].get_functions() target_functions = self.binaries[1].get_functions() source_basic_block_addresses = src_functions.get_basic_blocks(src_function_address) target_basic_block_addresses = target_functions.get_basic_blocks(target_function_address) diff_algorithms = pybinkit.DiffAlgorithms(self.binaries[0], self.binaries[1]) for match_data in diff_algorithms.do_instruction_hash_match_in_blocks(source_basic_block_addresses, target_basic_block_addresses): print('\t%x - %x : %d%%' % (match_data.source, match_data.target, match_data.match_rate))
def perform_multilevel_control_flow_matches(self, source, target): diff_algorithms = pybinkit.DiffAlgorithms(self.binaries[0], self.binaries[1]) print('Control Flow Match: %x - %x' % (source, target)) address_pair = pybinkit.AddressPair(source, target) match_data_combinations = diff_algorithms.do_control_flow_matches((address_pair,), CREF_FROM) for match_data_combination in match_data_combinations: self.print_match_data_combination(match_data_combination) address_str_list = [] address_pairs = match_data_combination.get_address_pairs() for address_pair in address_pairs: address_str_list.append('%x - %x' % (address_pair.source, address_pair.target)) print('\tControl Flow Match:' + ','.join(address_str_list)) sub_match_data_combinations = diff_algorithms.do_control_flow_matches(address_pairs, CREF_FROM) self.print_match_data_combinations(sub_match_data_combinations, '\t')
def do_function_match(self): diff_algorithms = pybinkit.DiffAlgorithms(self.binaries[0], self.binaries[1]) print('Performing instruction hash matches...') matches = diff_algorithms.do_instruction_hash_match() function_matches = pybinkit.FunctionMatches(self.binaries[0], self.binaries[1]) function_matches.add_matches(matches) for function_match in function_matches.get_matches(): print('%x - %x (size: %d)' % (function_match.source, function_match.target, len(function_match.match_data_list))) match_data_combinations = diff_algorithms.get_match_data_combinations(function_match.match_data_list) self.print_match_data_combinations(match_data_combinations, '\t') print('') print('='*80) function_matches.do_instruction_hash_match() for function_match in function_matches.get_matches(): print('%x - %x (size: %d)' % (function_match.source, function_match.target, len(function_match.match_data_list)))