class Plugin: def __init__(self): self.sim_hash_location = None self.metadata = None def init_db(self): # Fetch location location = bn.interaction.get_open_filename_input("Load SimHash database", ".simhash") if not location: bn.log_info("[*] Using default location for SimHash database: {}".format(default_sim_hash_location)) location = default_sim_hash_location # setup metadata class self.sim_hash_location = location self.metadata = Metadata(location+ '.meta') def extract_flowgraph_hash(self, function): """ Generates a flowgraph object that can be fed into FunctionSimSearch from a given address in Binary Ninja and returns set of hashes. """ nodes = [] graph = [] # Retrieve CFG data for block in function: local_node = [] shift = 0 position = block.start for instruction in block: local_node.append(instruction[0][0].text) shift += instruction[1] if instruction[0][0].text == 'call': # Split on call with assumption that we only care about x86/64 for now nodes.append((position, local_node)) local_node = [] graph.append((position, block.start+shift)) position = block.start + shift for edge in block.outgoing_edges: graph.append((position, edge.target.start)) if local_node: nodes.append((position, local_node)) else: graph.pop(-1) # Generate flowgraph flowgraph = fss.FlowgraphWithInstructions() for node in nodes: flowgraph.add_node(node[0]) flowgraph.add_instructions(node[0],tuple([((i), ()) for i in node[1]])) # Format conversion for edge in graph: flowgraph.add_edge(edge[0], edge[1]) hasher = fss.SimHasher() return hasher.calculate_hash(flowgraph) def get_exec_id(self, filename): h = hashlib.sha256() with open(filename, 'r') as fh: h.update(fh.read()) return long(h.hexdigest()[0:16], 16) def save_hash(self, bv, current_function): """ Save hash of current function into search index. """ if not self.sim_hash_location: self.init_db() # Supported platform check if bv.platform.name not in supported_arch: bn.log_error('[!] Right now this plugin supports only the following architectures: ' + str(supported_arch)) return -1 h1, h2 = self.extract_flowgraph_hash(current_function) if os.path.isfile(self.sim_hash_location): create_index = False else: create_index = True search_index = fss.SimHashSearchIndex(self.sim_hash_location, create_index, 28) # TODO: detect if we are opening database instead of binary exec_id = self.get_exec_id(bv.file.filename) search_index.add_function(h1, h2, exec_id, current_function.start) bn.log_info('[+] Added function <{:x}:0x{:x} {:x}-{:x}> to search index.'.format(exec_id, current_function.start, h1, h2)) self.metadata.add(exec_id, current_function.start, bv.file.filename, current_function.name) def find_hash(self, bv, current_function): """ Find functions similar to the current one. """ if not self.sim_hash_location: self.init_db() # Supported platform check if bv.platform.name not in supported_arch: bn.log_error('[!] Right now this plugin supports only the following architectures: ' + str(supported_arch)) return -1 h1, h2 = self.extract_flowgraph_hash(current_function) if os.path.isfile(self.sim_hash_location): create_index = False else: create_index = True search_index = fss.SimHashSearchIndex(self.sim_hash_location, create_index, 28) results = search_index.query_top_N(h1, h2, 5) # TODO: refactor, possibly with report template report = "" if len(results) == 0: report += "# No similar functions found" else: #TODO: add better header, but that will require some refactoring of extract function report += "# Best match results\n" for r in results: print r m = self.metadata.get(r[1], r[2]) # file name, function name if len(m) == 0: line = "- {:f} - {:x}:0x{:x}".format(max(float(r[0]) / 128.0 - 0.5, 0.0)*2, r[1], r[2]) else: line = "- {:f} - {:x}:0x{:x} {} '{}'".format(max(float(r[0]) / 128.0 - 0.5, 0.0)*2, r[1], r[2], m[0], m[1]) report += line + "\n" # Display results bn.interaction.show_markdown_report('Function Similarity Search Report', report)
class Plugin: def __init__(self): self.sim_hash_location = None self.metadata = None def init_db(self): # Fetch location location = bn.interaction.get_open_filename_input( "Load SimHash database", ".simhash") if not location: bn.log_info( "[*] Using default location for SimHash database: {}".format( default_sim_hash_location)) location = default_sim_hash_location # setup metadata class self.sim_hash_location = location self.metadata = Metadata(location + '.meta') def extract_flowgraph_hash(self, function, minimum_size=5): """ Generates a flowgraph object that can be fed into FunctionSimSearch from a given address in Binary Ninja and returns set of hashes. """ nodes = [] graph = [] # Retrieve CFG data for block in function: local_node = [] shift = 0 position = block.start for instruction in block: local_node.append(instruction[0][0].text) shift += instruction[1] if instruction[0][ 0].text == 'call': # Split on call with assumption that we only care about x86/64 for now nodes.append((position, local_node)) local_node = [] graph.append((position, block.start + shift)) position = block.start + shift for edge in block.outgoing_edges: graph.append((position, edge.target.start)) if local_node: nodes.append((position, local_node)) else: graph.pop(-1) # Generate flowgraph flowgraph = fss.FlowgraphWithInstructions() for node in nodes: flowgraph.add_node(node[0]) flowgraph.add_instructions(node[0], tuple([((i), ()) for i in node[1] ])) # Format conversion for edge in graph: flowgraph.add_edge(edge[0], edge[1]) if flowgraph.number_of_branching_nodes() < minimum_size: return (None, None) hasher = fss.SimHasher() return hasher.calculate_hash(flowgraph) def get_exec_id(self, filename): h = hashlib.sha256() with open(filename, 'r') as fh: h.update(fh.read()) return long(h.hexdigest()[0:16], 16) def save_single_function_hash(self, bv, search_index, function): """ Save the hash of a given function into a given search index. """ # TODO: detect if we are opening database instead of binary exec_id = self.get_exec_id(bv.file.filename) h1, h2 = self.extract_flowgraph_hash(function) if h1 and h2: search_index.add_function(h1, h2, exec_id, function.start) bn.log_info( '[+] Added function <{:x}:0x{:x} {:x}-{:x}> to search index.'. format(exec_id, function.start, h1, h2)) self.metadata.add(exec_id, function.start, bv.file.filename, function.name) else: bn.log_info( '[-] Did not add function <{:x}:0x{:x}> to search index.'. format(exec_id, function.start)) def init_index(self, bv, current_function): if not self.sim_hash_location: self.init_db() # Supported platform check if bv.platform.name not in supported_arch: bn.log_error( '[!] Right now this plugin supports only the following architectures: ' + str(supported_arch)) return -1 if os.path.isfile(self.sim_hash_location): create_index = False else: create_index = True search_index = fss.SimHashSearchIndex(self.sim_hash_location, create_index, 50) return search_index def save_hash(self, bv, current_function): """ Save hash of current function into search index. """ search_index = self.init_index(bv, current_function) self.save_single_function_hash(bv, search_index, current_function) def save_all_functions(self, bv, current_function): """ Walk through all functions and save them into the index. """ search_index = self.init_index(bv, current_function) for function in bv.functions: self.save_single_function_hash(bv, search_index, function) def add_report_from_result(self, results, report, address, minimal_match=100): results = [r for r in results if r[0] > minimal_match] if len(results) > 0: report += "## Best match results for 0x{:x}\n".format(address) for r in results: m = self.metadata.get(r[1], r[2]) # file name, function name if not m or len(m) == 0: line = "- {:f} - {:x}:0x{:x}".format( max(float(r[0]) / 128.0 - 0.5, 0.0) * 2, r[1], r[2]) else: line = "- {:f} - {:x}:0x{:x} {} '{}'".format( max(float(r[0]) / 128.0 - 0.5, 0.0) * 2, r[1], r[2], m[0], m[1]) report += line + "\n" return report def find_function_hash(self, bv, h1, h2, address, search_index, report): results = search_index.query_top_N(h1, h2, 5) return self.add_report_from_result(results, report, address) def find_hash(self, bv, current_function): """ Find functions similar to the current one. """ search_index = self.init_index(bv, current_function) h1, h2 = self.extract_flowgraph_hash(current_function) if h1 and h2: report = self.find_function_hash(bv, h1, h2, current_function.start, search_index, "") bn.interaction.show_markdown_report( 'Function Similarity Search Report', report) else: bn.log_info( '[-] Did not search for function <{:x}:0x{:x}> to search index.' .format(exec_id, function.start)) def find_all_hashes(self, bv, current_function): search_index = self.init_index(bv, current_function) report = "" for function in bv.functions: h1, h2 = self.extract_flowgraph_hash(function) if h1 and h2: report = self.find_function_hash(bv, h1, h2, function.start, search_index, report) else: bn.log_info('[-] Did not search for function 0x{:x}.'.format( function.start)) bn.interaction.show_markdown_report( 'Function Similarity Search Report', report)
class FreyaFS(Operations): def __init__(self, root, mountpoint): self.root = root # Retrieve FreyaFS metadata self.metadata = Metadata(os.path.join(root, ".freyafs")) # Keep track of open files self.cache = Cache() print(f"[*] FreyaFS mounted") print(f"Now, through the FreyaFS mountpoint ({mountpoint}), you can use a Mix&Slice encrypted filesystem seemlessly.") print(f"FreyaFS will persist your encrypted data at {root}.") # --------------------------------------------------------------------- Helpers def _full_path(self, partial): partial = partial.lstrip("/") path = os.path.join(self.root, partial) return path def _is_file(self, path): if not os.path.exists(self._full_path(path)): return False attr = self.getattr(path) return attr['st_mode'] & stat.S_IFREG == stat.S_IFREG # --------------------------------------------------------------------- Filesystem methods def access(self, path, mode): full_path = self._full_path(path) if not os.access(full_path, mode): raise FuseOSError(errno.EACCES) def chmod(self, path, mode): full_path = self._full_path(path) return os.chmod(full_path, mode) def chown(self, path, uid, gid): full_path = self._full_path(path) return os.chown(full_path, uid, gid) # Attributi di path (file o cartella) def getattr(self, path, fh=None): full_path = self._full_path(path) st = os.lstat(full_path) if path not in self.metadata: return dict((key, getattr(st, key)) for key in ('st_atime', 'st_ctime', 'st_gid', 'st_mode', 'st_mtime', 'st_nlink', 'st_size', 'st_uid')) try: return { 'st_mode': stat.S_IFREG | (st.st_mode & ~stat.S_IFDIR), 'st_nlink': 1, 'st_atime': st.st_atime, 'st_ctime': st.st_ctime, 'st_gid': st.st_gid, 'st_mtime': st.st_mtime, 'st_size': self.metadata[path].size, 'st_uid': st.st_uid } except: return dict((key, getattr(st, key)) for key in ('st_atime', 'st_ctime', 'st_gid', 'st_mode', 'st_mtime', 'st_nlink', 'st_size', 'st_uid')) def readdir(self, path, fh): full_path = self._full_path(path) dirents = ['.', '..'] if os.path.isdir(full_path): real_stuff = os.listdir(full_path) virtual_stuff = [ x for x in real_stuff if not is_metadata(x)] dirents.extend(virtual_stuff) for r in dirents: yield r def readlink(self, path): pathname = os.readlink(self._full_path(path)) if pathname.startswith("/"): # Path name is absolute, sanitize it. return os.path.relpath(pathname, self.root) else: return pathname def mknod(self, path, mode, dev): return os.mknod(self._full_path(path), mode, dev) def rmdir(self, path): os.rmdir(self._full_path(path)) def mkdir(self, path, mode): os.mkdir(self._full_path(path), mode) def statfs(self, path): full_path = self._full_path(path) stv = os.statvfs(full_path) return dict((key, getattr(stv, key)) for key in ('f_bavail', 'f_bfree', 'f_blocks', 'f_bsize', 'f_favail', 'f_ffree', 'f_files', 'f_flag', 'f_frsize', 'f_namemax')) def unlink(self, path): full_path = self._full_path(path) shutil.rmtree(full_path) self.metadata.remove(path) return def symlink(self, name, target): return os.symlink(name, self._full_path(target)) def rename(self, old, new): full_old_path = self._full_path(old) full_new_path = self._full_path(new) if self._is_file(old): # Rinomino un file if self._is_file(new): self.unlink(new) os.rename(full_old_path, full_new_path) if full_old_path in self.cache: self.cache.rename(full_old_path, full_new_path) self.metadata.rename(old, new) else: # Rinomino una cartella os.rename(full_old_path, full_new_path) self.metadata.renamedir(old, new) def link(self, target, name): return os.link(self._full_path(target), self._full_path(name)) def utimens(self, path, times=None): os.utime(self._full_path(path), times) # --------------------------------------------------------------------- File methods def open(self, path, flags): full_path = self._full_path(path) info = self.metadata[path] attr = self.getattr(path) mtime = attr['st_mtime'] self.cache.open(full_path, info.key, info.iv, mtime) return 0 def create(self, path, mode, fi=None): full_path = self._full_path(path) key, iv = self.metadata.add(path) self.cache.create(full_path, key, iv) return 0 def read(self, path, length, offset, fh): full_path = self._full_path(path) if full_path in self.cache: return self.cache.read_bytes(full_path, offset, length) os.lseek(fh, offset, os.SEEK_SET) return os.read(fh, length) def write(self, path, buf, offset, fh): full_path = self._full_path(path) if full_path in self.cache: bytes_written = self.cache.write_bytes(full_path, buf, offset) self.metadata.update(path, self.cache.get_size(full_path)) return bytes_written os.lseek(fh, offset, os.SEEK_SET) return os.write(fh, buf) def truncate(self, path, length, fh=None): full_path = self._full_path(path) if full_path in self.cache: self.cache.truncate_bytes(full_path, length) self.metadata.update(path, length) return with open(full_path, 'r+') as f: f.truncate(length) def flush(self, path, fh): full_path = self._full_path(path) if full_path in self.cache: info = self.metadata[path] self.cache.flush(full_path, info.key, info.iv) return 0 return os.fsync(fh) def release(self, path, fh): full_path = self._full_path(path) if full_path in self.cache: self.cache.release(full_path) return 0 return os.close(fh) def fsync(self, path, fdatasync, fh): return self.flush(path, fh)