def __getcodeblockinfo(self, cu): cu_addr=cu.getMinAddress() BBModel=BasicBlockModel(self.__currentProgram) CBs=BBModel.getCodeBlocksContaining(cu_addr, None) if len(CBs)!=1: raise err_in_address("there is NOT only one codeblock that contains this codeunit") CB_maxaddr=CBs[0].getMaxAddress() return cu_addr, CB_maxaddr
def has_only_one_basic_block(function): blockModel = BasicBlockModel(currentProgram) blocks = blockModel.getCodeBlocksContaining(function.getBody(), monitor) count = 0 while blocks.hasNext(): blocks.next() count += 1 return count == 1
def dumpBlocks(output): bbModel = BasicBlockModel(currentProgram) functionManager = currentProgram.getFunctionManager() module = blocks_pb2.module() # record the basic block that has been added by functions bb_set = set() # get all functions funcs_set = set() for func in functionManager.getFunctions(True): # we skip external functions if func.isExternal(): continue func_va = func.getEntryPoint().getOffset() if func_va in funcs_set: continue funcs_set.add(func_va) logging.debug("Function address is 0x{0:x}".format( func.getEntryPoint().getOffset())) codeBlockIterator = bbModel.getCodeBlocksContaining( func.getBody(), monitor) pbFunc = module.fuc.add() pbFunc.va = func.getEntryPoint().getOffset() if func.hasNoReturn(): pbFunc.type = 0x5 logging.debug("function at 0x%x does not return!" % pbFunc.va) # iter over the basic blocks while codeBlockIterator.hasNext(): bb = codeBlockIterator.next() pbBB = pbFunc.bb.add() bb_set.add(bb.getMinAddress().getOffset()) addBB(pbBB, bb, pbFunc) codeBlocks = bbModel.getCodeBlocks(monitor) dummy_func = module.fuc.add() dummy_func.va = 0x0 while codeBlocks.hasNext(): bb = codeBlocks.next() if bb.getMinAddress().getOffset() in bb_set: continue pbBB = dummy_func.bb.add() bb_set.add(bb.getMinAddress().getOffset()) logging.debug("Find another basic block 0x%x" % (bb.getMinAddress().getOffset())) addBB(pbBB, bb, dummy_func) f = open(output, "wb") f.write(module.SerializeToString()) f.close()
def main(): model = BasicBlockModel(getCurrentProgram()) print "Block model: %s" % model.getName() print "Ignore threshold: %d" % ignore_threshold print "Print missing: %s" % print_missing print "Verbose=%s" % verbose ## # Read edges from file, then scan program and mark any reached blocks # # Input format: # One edge per line: "src,dst,num", where num is the number of times the edge was hit ## clear_markup() edges = read_edges("/tmp/edges_uniq.lst") unmapped_edges = scan_by_edges(model, edges) dump_blocks(verbose) return
def __init__(self, program): self._program = program self._flat_api = FlatProgramAPI(self._program) self._memory_map = self._program.getMemory() self._simple_blk = BasicBlockModel(self._program) self._monitor = self._flat_api.getMonitor() self._function_manager = self._program.getFunctionManager() self._address_factory = self._program.getAddressFactory() self.signatures = None self._strings = {} self._find_strings() start = time.time() self._signatures = self._generate() end = time.time() print 'Generated %d formal signatures and %d fuzzy signatures for %d '\ 'functions in %.2f seconds.' % (len(self._signatures.formal), len(self._signatures.fuzzy), len(self._signatures.functions), end - start)
class Rizzo(object): def __init__(self, program): self._program = program self._flat_api = FlatProgramAPI(self._program) self._memory_map = self._program.getMemory() self._simple_blk = BasicBlockModel(self._program) self._monitor = self._flat_api.getMonitor() self._function_manager = self._program.getFunctionManager() self._address_factory = self._program.getAddressFactory() self.signatures = None self._strings = {} self._find_strings() start = time.time() self._signatures = self._generate() end = time.time() print 'Generated %d formal signatures and %d fuzzy signatures for %d '\ 'functions in %.2f seconds.' % (len(self._signatures.formal), len(self._signatures.fuzzy), len(self._signatures.functions), end - start) def save(self, signature_file): """ Save Rizzo signatures to the supplied signature file. :param signature_file: Full path to save signatures. :type signature_file: str """ print 'Saving signature to %s...' % signature_file with open(signature_file, 'wb') as rizz_file: pickle.dump(self._signatures, rizz_file) print 'done.' def load(self, signature_file): """ Load Rizzo signatures from a file. :param signature_file: Full path to load signatures from. :type signature_file: str :returns: Loaded signatures :rtype: RizzoSignatures """ if not os.path.exists(signature_file): raise Exception('Signature file %s does not exist' % signature_file) print 'Loading signatures from %s...' % signature_file with open(signature_file, 'rb') as rizz_file: try: signatures = pickle.load(rizz_file) except: print 'This does not appear to be a Rizzo signature file.' exit(1) print 'done.' return signatures def apply(self, signatures): """ Apply signatures to the current program. :param signatures: Signatures to apply to current program. :type signatures: RizzoSignatures """ rename_count = 0 signature_matches = self._find_match(signatures) renamed = [] for matches in signature_matches: for curr_func, new_func in matches.iteritems(): curr_addr = self._address_factory.getAddress( hex(curr_func.address)[:-1]) function = self._flat_api.getFunctionAt(curr_addr) if function and new_func.name not in renamed: renamed.append(new_func.name) if self._rename_functions(function, new_func.name): rename_count += 1 duplicates = [] block_match = {} for block in new_func.blocks: new_block = RizzoBlockDescriptor(block) for curr_block in curr_func.blocks: curr_block = RizzoBlockDescriptor(curr_block) if curr_block == new_block: if curr_block in block_match: del block_match[curr_block] duplicates.append(curr_block) elif curr_block not in duplicates: block_match[curr_block] = new_block for curr_block, new_block in block_match.iteritems(): for curr_function, new_function in \ zip(curr_block.functions, new_block.functions): functions = utils.find_function( self._program, curr_function) if len(functions) == 1: if new_function not in renamed: renamed.append(new_function) if self._rename_functions( functions[0], new_function): rename_count += 1 print 'Renamed %d functions.' % rename_count def _find_match(self, signatures): """ Find matches to signatures in the current program. :param signatures: Signatures to find in current program. :type signatures: RizzoSignatures :returns: Tuple of matched signatures: (formal, string, immediate, fuzzy) :rtype: tuple """ formal_signatures = find_signature_matches(signatures.formal, self._signatures.formal, signatures.functions, self._signatures.functions, 'formal signatures') string_signatures = find_signature_matches(signatures.strings, self._signatures.strings, signatures.functions, self._signatures.functions, 'string signatures') immediate_signatures = find_signature_matches( signatures.immediates, self._signatures.immediates, signatures.functions, self._signatures.functions, 'immediate signatures') fuzzy_signatures = find_signature_matches( signatures.fuzzy, self._signatures.fuzzy, signatures.functions, self._signatures.functions, 'fuzzy signatures', lambda x, y: len(x.blocks) == len(y.blocks)) return (formal_signatures, string_signatures, immediate_signatures, fuzzy_signatures) def _rename_functions(self, function, name): """ Rename a function if the function has not be renamed and new name is a valid new function name. Previous renamed are determined by searching for 'FUN_' in the function. :param function: Function to be renamed. :type function: ghidra.program.model.listing.Function :param name: New name to give function. :type name: unicode :returns: True if function renamed, False for no rename. :rtype: bool """ if not function or not name: return False if 'FUN_' in function.name and 'FUN_' not in name: if function: print 'Renaming %s to %s' % (function.name, name) function.setName(name, SourceType.USER_DEFINED) return True elif 'FUN_' not in function.name and 'FUN_' not in name and \ function.name != name: print 'Found match with %s to %s but did not rename.' % \ (function.name, name) return False def _signature_hash(self, value): """ Simple hash function used to create a signature. :param value: Value to hash. :type value: variable :returns: Signature hash :rtype: int """ return hash(str(value)) & 0xFFFFFFFF def _find_strings(self): """ Find strings in the current program and create signatures for them. """ memory = self._memory_map.getAllInitializedAddressSet() strings = self._flat_api.findStrings(memory, 2, 1, True, True) for string in strings: addr = string.getAddress() value = string.getString(self._memory_map) xref = self._flat_api.getReferencesTo(addr) self._strings[addr.hashCode()] = RizzoString(addr, value, xref) def _get_function_blocks(self, function): """ Get all code blocks in the provided function. :param function: Function to get code blocks from. :type function: ghidra.program.model.listing.Function :returns: List of code blocks. :rtype: ghidra.program.model.block.CodeBlock """ blocks = [] code_blocks = self._simple_blk.getCodeBlocksContaining( function.body, self._monitor) while code_blocks.hasNext(): blocks.append(code_blocks.next()) return blocks def _hash_block(self, block): """ Create signatures for the provided code block. :returns: Tuple of formal, fuzzy, function, and immediate signatures) """ formal = [] fuzzy = [] functions = [] immediates = [] min_addr = block.minAddress max_addr = block.maxAddress curr_ins = self._flat_api.getInstructionAt(min_addr) while curr_ins and curr_ins.getAddress() < max_addr: code_ref = [] data_ref = [] # Create code and data reference signatures. references = curr_ins.getReferencesFrom() for reference in references: # Don't care about tracking stack references. if reference.isStackReference(): continue if is_code_ref(reference): code_ref.append(reference) # Get data reads only if they are to valid memory. elif is_data_ref(reference) and \ self._memory_map.contains(reference.toAddress): data_ref.append(reference) # Append the mnemonic string to the formal signature. formal.append(curr_ins.getMnemonicString()) # If its a call instruction add the function call to the functions # signature and make note of the call in the fuzzy signature. if is_call_instruction(curr_ins): for cref in code_ref: func = self._flat_api.getFunctionAt(cref.toAddress) if func: functions.append(func.getName()) fuzzy.append('funcref') # Make not of any data references. elif data_ref: for dref in data_ref: addr_hash = dref.toAddress.hashCode() if self._strings.has_key(addr_hash): string_value = self._strings[addr_hash].value else: string_value = 'dataref' formal.append(string_value) fuzzy.append(string_value) # If not data or code then add everything to the formal signature. elif not data_ref and not code_ref: for i in range(0, curr_ins.getNumOperands()): operand = curr_ins.getDefaultOperandRepresentation(i) formal.append(operand) op_type = curr_ins.getOperandRefType(i) if op_type.isData(): # Indeterminate return values. Just put a try/except # around it so the getValue AttributeError can be # ignored. Not worth checking for types since those # may come and go. try: op_value = curr_ins.getOpObjects(i)[0].getValue() if op_value > 0xFFFF: fuzzy.append(str(op_value)) immediates.append(op_value) except (AttributeError, IndexError): pass curr_ins = curr_ins.getNext() formal_sig = self._signature_hash(''.join(formal)) fuzzy_sig = self._signature_hash(''.join(fuzzy)) return (formal_sig, fuzzy_sig, immediates, functions) def _hash_function(self, function): """ Create a block by block signature for the provided function. :param function: Function to create signature hash for. :type function: ghidra.program.model.listing.Function :returns: List of signatures per block found. """ block_hash = [] func_blocks = self._get_function_blocks(function) for block in func_blocks: block_hash.append(self._hash_block(block)) return block_hash def _generate(self): """ Create signatures for the current program. """ signatures = RizzoSignature() # String based signatures for (str_hash, curr_string) in self._strings.iteritems(): # Only create signatures on reasonably long strings with one ref. if len(curr_string.value) >= 8 and len(curr_string.xrefs) == 1: function = self._flat_api.getFunctionContaining( curr_string.xrefs[0].fromAddress) if function: string_hash = self._signature_hash(curr_string.value) entry = utils.address_to_int(function.getEntryPoint()) signatures.add_string(string_hash, entry) # Formal, fuzzy, and immediate-based function signatures for function in self._function_manager.getFunctions(True): hashed_function_blocks = self._hash_function(function) formal = self._signature_hash(''.join( [str(e) for (e, _, _, _) in hashed_function_blocks])) fuzzy = self._signature_hash(''.join( [str(f) for (_, f, _, _) in hashed_function_blocks])) immediate = [str(i) for (_, _, i, _) in hashed_function_blocks] function_entry = utils.address_to_int(function.getEntryPoint()) signatures.functions[function_entry] = (function.getName(), hashed_function_blocks) signatures.add_formal(formal, function_entry) signatures.add_fuzzy(fuzzy, function_entry) for value in immediate: signatures.add_immediate(value, function_entry) signatures.reset_dups() return signatures
#Extract basic blocks of the program #@author Hsin-Wei Hung #@category _NEW_ #@keybinding #@menupath #@toolbar from ghidra.program.model.block import BasicBlockModel f = open("{}.bbs".format(currentProgram.getName()), "w+") f.write("{}\n".format(currentProgram.getName())) base = currentProgram.getImageBase().getOffset() bbm = BasicBlockModel(currentProgram) bbs = bbm.getCodeBlocks(monitor) while (bbs.hasNext()): bb = bbs.next() # print("cb {} {}".format(bb.getMinAddress(), bb.getMaxAddress())) addrs = bb.getAddresses(True) nextStartAddr = bb.getMinAddress() while (addrs.hasNext()): addr = addrs.next() instr = getInstructionAt(addr) if instr is not None: if not instr.getFlowType().isFallthrough(): instrSize = instr.getLength() # print("bb {} {}".format(nextStartAddr, addr.add(instrSize-1))) f.write("{} {}\n".format(nextStartAddr.getOffset()-base, addr.getOffset()-base)) nextStartAddr = addr.add(instrSize)
# credits for code logic # AndrewFasano/VisualizingFuzzerCoverage - github from ghidra.program.model.block import BasicBlockModel blockiterator = BasicBlockModel(currentProgram).getCodeBlocks(monitor) # dictionary contains basic block information functions = {} def add_block(function, block): if function not in functions: functions[function] = [] functions[function].append(block) # For each block, look through the function list until we find a match # This is terribly inefficient (O(N^2)) while blockiterator.hasNext(): cur_block = blockiterator.next().getMinAddress() function = getFirstFunction() found = False # Search functions until we find a match or run out of functions while function is not None: b = function.getBody() if b.contains(cur_block): add_block(function.getName(), cur_block) found = True break
def main(): edges = read_edges("/tmp/edges_uniq.lst") blocks = BasicBlockModel(getCurrentProgram()) print "Block Model: %s" % blocks.getName() ## # Scan program and mark any blocks we reached ## unmapped_edges = scan_by_edges(blocks, edges) ## # Compute blocks reached/unreached by function # While at it, also count the total number of base blocks ## BlockIter = blocks.getCodeBlocks(monitor) total_blocks = 0 reached_map = dict() # map of reached blocks by function blocks_map = dict() # map of total blocks by function while BlockIter.hasNext(): total_blocks += 1 block = BlockIter.next() addr = block.getFirstStartAddress() func = getFunctionContaining(addr) if func in blocks_map: blocks_map[func] += 1 else: blocks_map[func] = 1 if block in blocklist: if func in reached_map: reached_map[func] += 1 else: reached_map[func] = 1 ## ## Summarize blocks reached/missed by function ## print for func, blocks in sorted(reached_map.items(), key=lambda x: x[1]): total = blocks_map[func] percent = blocks * 100 / total if total > ignore_threshold: print "Reached: %3d blocks (%3d%%) in %s" % (blocks, percent, func) print if print_missing: for func, blocks in sorted(blocks_map.items(), key=lambda x: x[1]): if func not in reached_map and blocks > ignore_threshold: print "Missed: %3d blocks in %s" % (blocks, func) ## # Overall Summary ## block_cov = len(blocklist) * 100 / total_blocks func_cov = len(reached_map) * 100 / len(blocks_map) print print "Total blocks in file: %6d" % total_blocks print "Total edges in trace: %6d" % len(edges) print "Failed to map edges: %6d" % unmapped_edges print print "Total reached funcs: %5d / %5d (%d%%)" % (len(reached_map), len(blocks_map), func_cov) print "Total reached blocks: %5d / %5d (%d%%)" % (len(blocklist), total_blocks, block_cov) print
def main(): model = BasicBlockModel(getCurrentProgram()) print "Block model: %s" % model.getName() print "Ignore threshold: %d" % ignore_threshold print "Print missing: %s" % print_missing print "Verbose=%s" % verbose ## # Read edges from file, then scan program and mark any reached blocks # # Input format: # One edge per line: "src,dst,num", where num is the number of times the edge was hit ## clear_markup() edges = read_edges("/tmp/edges_uniq.lst") unmapped_edges = scan_by_edges(model, edges) ## # Compute blocks reached/unreached by function # While at it, also count the total number of base blocks ## BlockIter = model.getCodeBlocks(monitor) total_blocks = 0 missing_blocks = 0 reached_blocks = 0 reached_map = dict() # map of reached blocks by function blocks_map = dict() # map of total blocks by function while BlockIter.hasNext(): total_blocks += 1 block = BlockIter.next() addr = block.getFirstStartAddress() func = str(getFunctionContaining(addr)) if func in blocks_map: blocks_map[func] += 1 else: blocks_map[func] = 1 if func in func_blacklist: if block in blocklist: print "Block: addr=%s, name=%s func=%s, reached=%d" % ( addr, id(block), func, (block in blocklist)) continue if block in blocklist: reached_blocks += 1 if func in reached_map: reached_map[func] += 1 else: reached_map[func] = 1 else: missing_blocks += 1 #print "Block: addr=%s, name=%s func=%s, reached=%d" % ( # addr, id(block), func, (block in blocklist)) ## ## Summarize blocks reached/missed by function ## print total_blocks_reachable = 0 for func, blocks in sorted(reached_map.items(), key=lambda x: str(x[0]).lower): total = blocks_map[func] percent = blocks * 100 / total total_blocks_reachable += total if total > ignore_threshold: print "Reached: %3d from %3d blocks (%3d%%) in %s" % ( blocks, total, percent, func) print if print_missing: for func, blocks in sorted(blocks_map.items(), key=lambda x: str(x[0]).lower): if func in func_blacklist: print "Ignore: %3d blocks in %s" % (blocks, func) continue if func not in reached_map and blocks > ignore_threshold: print "Missed: %3d blocks in %s" % (blocks, func) if print_implicit and len(implicit_blocks): print "\nMarked %d implicitly reached blocks:\n\t%s" % (len( implicit_blocks), ', '.join(str(x.name) for x in implicit_blocks)) ## # Overall Summary ## blocks_ignored = 0 for func in func_blacklist: if blocks_map.get(func): blocks_ignored += blocks_map[func] blocks_ignored = sum([blocks_map.get(func, 0) for func in func_blacklist]) filtered_blocks = reached_blocks + missing_blocks block_cov = reached_blocks * 100 / filtered_blocks func_cov = len(reached_map) * 100 / len(blocks_map) print print "Total blocks in file: %6d" % total_blocks print " blacklisted: %6d" % blocks_ignored print " remaining: %6d" % filtered_blocks print print "Total edges in trace: %6d" % len(edges) print "Failed to map edges: %6d" % unmapped_edges print print "Total reached funcs: %6d / %6d (%d%%)" % ( len(reached_map), len(blocks_map), func_cov) print "Total reached blocks: %6d / %6d (%d%%)" % ( reached_blocks, filtered_blocks, block_cov) print " ..in reached funcs: %6d / %6d (%d%%)" % ( reached_blocks, total_blocks_reachable, 100 * reached_blocks / total_blocks_reachable) print " Blocks not reached: %6d" % missing_blocks print
# Example ghidra script showing how to extract the address and # names of basic blocks. # Run this script on the target binary and then feed the # output into breakpoints.txt from ghidra.program.model.block import BasicBlockModel from ghidra.util.task import TaskMonitor bbm = BasicBlockModel(currentProgram) blocks = bbm.getCodeBlocks(TaskMonitor.DUMMY) block = blocks.next() while block: print "{} {}".format(block.minAddress, block.name) print block = blocks.next()
from ghidra.program.model.pcode import HighSymbol from ghidra.program.model.pcode import HighVariable from ghidra.program.model.pcode import LocalSymbolMap from ghidra.program.model.pcode import PcodeOp from ghidra.program.model.pcode import Varnode from ghidra.program.model.symbol import Symbol from ghidra.program.model.symbol import Reference from ghidra.program.model.symbol import ReferenceIterator from ghidra.program.model.symbol import ReferenceManager from ghidra.util.task import ConsoleTaskMonitor from ghidra.program.model.block import BasicBlockModel from ghidra.program.model.data import Structure from ghidra.program.model.data import StructureDataType # for static blocks blockiterator = BasicBlockModel(currentProgram).getCodeBlocks(monitor) fun_blocks = {} def add_block(function, block): if function not in fun_blocks: fun_blocks[function] = [] fun_blocks[function].append(block) # For each block, look through the function list until we find a match # This is terribly inefficient (O(N^2)) def basicblocks(): while blockiterator.hasNext():
def __init__(self, program): self._program = program self._flat_api = FlatProgramAPI(program) self._monitor = self._flat_api.getMonitor() self._basic_blocks = BasicBlockModel(self._program)
# Ghidra plugin to generate a mapping of function names to basic blocks in JSON #@author Andrew Fasano & Brendan Dolan-Gavitt #@category CodeAnalysis #@keybinding #@menupath #@toolbar from ghidra.program.model.block import BasicBlockModel import json bbmodel = BasicBlockModel(currentProgram) blockIterator = bbmodel.getCodeBlocks(monitor) block_map = {} def add_block(fn, block): if fn not in block_map.keys(): block_map[fn] = [] block_map[fn].append(str(block)) # For each block, look through the function list until we find a match # This is terribly inefficient (O(N^2)) while blockIterator.hasNext(): cur_block = blockIterator.next().getMinAddress() function = getFirstFunction() found = False # Search functions until we find a match or run out of functions while function is not None:
from ghidra.program.model.address import AddressSetView from ghidra.app.decompiler import DecompInterface from ghidra.program.model.pcode import PcodeOp from ghidra.program.model.symbol import SourceType from ghidra.program.model.listing.Function import FunctionUpdateType from ghidra.app.util.cparser.C import CParserUtils from ghidra.program.model.listing import ReturnParameterImpl from ghidra.program.model.listing import ParameterImpl TL_APIS = {} MAX_TL_API = 0xC2 DR_APIS = {} MAX_DR_API = 0x3D blockModel = BasicBlockModel(currentProgram) functionManager = currentProgram.getFunctionManager() decompInterface = DecompInterface() decompInterface.openProgram(currentProgram) def load_api_names_types(): curdir = os.path.dirname(os.path.abspath(inspect.getsourcefile(lambda: 0))) with open(os.path.join(curdir, 'tl_apis.json'), 'r') as f: TL_APIS.update({i: (n, t) for i, n, t in json.loads(f.read())}) print("[*] Loaded %s trustlets APIs names" % len(TL_APIS)) global DR_APIS with open(os.path.join(curdir, 'dr_apis.json'), 'r') as f: DR_APIS.update({i: (n, t) for i, n, t in json.loads(f.read())})
def get_basic_blocks_containing(self, addrset): bbm = BasicBlockModel(self.cp) bbi = bbm.getCodeBlocksContaining(addrset, self.flatapi.monitor) return FakeIter(bbi)
def _basic_block_model( self) -> "ghidra.program.model.block.BasicBlockModel": if not self.__basic_block_model: from ghidra.program.model.block import BasicBlockModel self.__basic_block_model = BasicBlockModel(self._program) return self.__basic_block_model