def fuzzy_search(self, args, file, opts): results = [] fuzzy = bytes( self.ssdeep(None, file.sha256_digest)['ssdeep'], 'utf-8') # pylint: disable=no-value-for-parameter documents = db.file_collection.select_all( {'file_type': enums.FileType.FILE}) # Only get file type 'file' for document in documents: if 'ssdeep' not in document: _ssdeep = self.ssdeep(None, document['sha256_digest'])['ssdeep'] # pylint: disable=no-value-for-parameter else: _ssdeep = document['ssdeep'] _ssdeep = bytes(_ssdeep, 'utf-8') if _ssdeep == fuzzy: continue score = pydeep.compare(fuzzy, _ssdeep) if score > 40: results.append( (document['name'], document['sha256_digest'], document['file_type'], pydeep.compare(fuzzy, _ssdeep))) output = [] for result in results: output += [(str(result[0]), result[1], '/store/' + result[1], str(result[3]))] return output
def ssdeeprate(self): json_list = self.get_result(self.md5_list) sdeep_list = [] for sample in json_list: key_list = ['target', 'file', 'ssdeep'] kee = None result = self.get_item(sample, kee, *key_list) sdeep_list.append(result) print result i = 0 for x in sdeep_list[i:]: for y in sdeep_list[i + 1:]: print pydeep.compare(x, y), x, ' || ', y i += 1
def run(self, obj, config): threshold = config.get("threshold", 50) target_ssdeep = obj.ssdeep target_md5 = obj.md5 target_mimetype = obj.mimetype if not target_ssdeep: logger.error = "Could not get the target ssdeep value for sample" self._error("Could not get the target ssdeep value for sample") return # setup the sample space to compare against # first use the mimetype as a comparator if available query_filter = {} if target_mimetype: query_filter['mimetype'] = target_mimetype # then use only samples with a multiple of chunksize chunk_size = int(target_ssdeep.split(":")[0]) query_filter["$or"] = [] query_filter["$or"].append({"ssdeep": {"$regex": "^%d:" % chunk_size * 2}}) query_filter["$or"].append({"ssdeep": {"$regex": "^%d:" % chunk_size}}) query_filter["$or"].append({"ssdeep": {"$regex": "^%d:" % (chunk_size / 2)}}) result_filter = {'md5': 1, 'ssdeep': 1, 'description':1} candidate_space = Sample.objects(__raw__=query_filter).only(*result_filter) match_list = [] for candidate in candidate_space: if "ssdeep" in candidate: score = pydeep.compare(target_ssdeep, candidate["ssdeep"]) if score >= threshold and candidate["md5"] != target_md5: match_list.append({'md5': candidate["md5"], 'description': candidate["description"], 'score': score}) # finally sort the results match_list.sort(key=lambda sample: sample["score"], reverse=True) for match in match_list: self._add_result("ssdeep_match (MD5)", match["md5"], {'description': match["description"], 'score': match["score"]})
def run(self): if self.results["target"]["category"] == "file": target_ssdeep = self.results["target"]["file"]["ssdeep"] target_sha1 = self.results["target"]["file"]["sha1"] target_size = self.results["target"]["file"]["size"] if target_ssdeep == "" or target_ssdeep == None: return False for drop in self.results["dropped"]: if drop["sha1"] == target_sha1: continue if fabs(target_size - drop["size"]) >= 1024: continue drop_ssdeep = drop["ssdeep"] if drop_ssdeep == "" or drop_ssdeep == None: continue try: if pydeep.compare(target_ssdeep, drop_ssdeep) > 20: self.add_match(None, 'dropped file', drop) except: continue return self.has_matches()
def _scan(self, context): threshold = self.config.get("threshold", 50) target_ssdeep = context.sample_dict.get("ssdeep", None) target_md5 = context.md5 target_mimetype = context.sample_dict.get("mimetype", None) if not target_ssdeep: logger.error = "Could not get the target ssdeep value for sample" self._error("Could not get the target ssdeep value for sample") return # setup the sample space to compare against # first use the mimetype as a comparator if available query_filter = {} if target_mimetype: query_filter["mimetype"] = target_mimetype # then use only samples with a multiple of chunksize chunk_size = int(target_ssdeep.split(":")[0]) query_filter["$or"] = [] query_filter["$or"].append({"ssdeep": {"$regex": "^%d:" % chunk_size * 2}}) query_filter["$or"].append({"ssdeep": {"$regex": "^%d:" % chunk_size}}) query_filter["$or"].append({"ssdeep": {"$regex": "^%d:" % (chunk_size / 2)}}) result_filter = {"md5": 1, "ssdeep": 1} candidate_space = self._fetch_meta(query_filter, result_filter) match_list = [] for candidate in candidate_space: if "ssdeep" in candidate: score = pydeep.compare(target_ssdeep, candidate["ssdeep"]) if score >= threshold and candidate["md5"] != target_md5: match_list.append({"md5": candidate["md5"], "score": score}) # finally sort the results match_list.sort(key=lambda sample: sample["score"], reverse=True) for match in match_list: self._add_result("ssdeep_match", match["md5"], {"md5": match["md5"], "score": match["score"]})
def on_complete(self): package = self.get_results("info",{})["package"] found_polymorphic = False target_ssdeep = self.get_results("target",{})["file"]["ssdeep"] target_sha1 = self.get_results("target",{})["file"]["sha1"] target_size = self.get_results("target",{})["file"]["size"] if target_ssdeep == "" or target_ssdeep == None: return False if True: for drop in self.get_results("dropped",{}): if package == "xls" and len(drop["guest_paths"]) == 1 and drop["guest_paths"][0].endswith("\\Temp\\" + self.get_results("target",{})["file"]["name"]): continue if drop["sha1"] == target_sha1: continue if fabs(target_size - drop["size"]) >= 1024: continue drop_ssdeep = drop["ssdeep"] if drop_ssdeep == "" or drop_ssdeep == None: continue try: percent = pydeep.compare(target_ssdeep, drop_ssdeep) if percent > 20: found_polymorphic = True for path in drop["guest_paths"]: self.mark_ioc("file" , path) self.mark_ioc("percent_match" , percent) except: continue return found_polymorphic
def run(self): found_polymorphic = False if self.results["target"]["category"] == "file": target_ssdeep = self.results["target"]["file"]["ssdeep"] target_sha1 = self.results["target"]["file"]["sha1"] target_size = self.results["target"]["file"]["size"] if target_ssdeep == "" or target_ssdeep == None: return False for drop in self.results["dropped"]: if drop["sha1"] == target_sha1: continue if fabs(target_size - drop["size"]) >= 1024: continue drop_ssdeep = drop["ssdeep"] if drop_ssdeep == "" or drop_ssdeep == None: continue try: percent = pydeep.compare(target_ssdeep, drop_ssdeep) if percent > 20: found_polymorphic = True for path in drop["guest_paths"]: self.data.append({"file" : path}) self.data.append({"percent_match" : percent}) except: continue return found_polymorphic
def ssdeep_compare(fuzzy, md5): #Compare Fuzzy hash of file to all files in db #fuzzy_threshold defined in settings.py - default = 10 all_samples = URL.objects.all() ssdeep_compare_res = "" res_dict = {} for sample in all_samples: if sample.md5 != md5: if sample.fuzzy: fuzzy_res = pydeep.compare(fuzzy,sample.fuzzy) if fuzzy_res > fuzzy_threshold: res_dict[sample.md5] = [str(fuzzy_res), sample.uri] else: continue for k, v in res_dict.iteritems(): ssdeep_compare_res += "<a href='../" ssdeep_compare_res += k ssdeep_compare_res += "'>" ssdeep_compare_res += v[1] ssdeep_compare_res += "</a>\t" ssdeep_compare_res += v[0] ssdeep_compare_res += "\r\n" return ssdeep_compare_res
def run(self): package = self.results["info"]["package"] found_polymorphic = False target_ssdeep = self.results["target"]["file"]["ssdeep"] target_sha1 = self.results["target"]["file"]["sha1"] target_size = self.results["target"]["file"]["size"] if target_ssdeep == "" or target_ssdeep == None: return False for drop in self.results["dropped"]: if package == "xls" and len(drop["guest_paths"]) == 1 and drop["guest_paths"][0].endswith("\\Temp\\" + self.results["target"]["file"]["name"]): continue if drop["sha1"] == target_sha1: continue if fabs(target_size - drop["size"]) >= 1024: continue drop_ssdeep = drop["ssdeep"] if drop_ssdeep == "" or drop_ssdeep == None: continue try: percent = pydeep.compare(target_ssdeep, drop_ssdeep) if percent > 20: found_polymorphic = True for path in drop["guest_paths"]: self.data.append({"file" : path}) self.data.append({"percent_match" : percent}) except: continue return found_polymorphic
def run(self): if not __session__.is_set(): print_error("No session opened") return if not HAVE_PYDEEP: print_error("Missing dependency, install pydeep (`pip install pydeep`)") return if not __session__.file.ssdeep: print_error("No ssdeep hash available for opened file") return db = Database() samples = db.find(key='all') for sample in samples: if sample.sha256 == __session__.file.sha256: continue if not sample.ssdeep: continue score = pydeep.compare(__session__.file.ssdeep, sample.ssdeep) if score > 40: print("Match {0}%: {1}".format(score, sample.sha256))
def run(self): found_polymorphic = False if self.results["target"]["category"] == "file": target_ssdeep = self.results["target"]["file"]["ssdeep"] target_sha1 = self.results["target"]["file"]["sha1"] target_size = self.results["target"]["file"]["size"] if target_ssdeep == "" or target_ssdeep == None: return False for drop in self.results["dropped"]: if drop["sha1"] == target_sha1: continue if fabs(target_size - drop["size"]) >= 1024: continue drop_ssdeep = drop["ssdeep"] if drop_ssdeep == "" or drop_ssdeep == None: continue try: percent = pydeep.compare(target_ssdeep, drop_ssdeep) if percent > 20: found_polymorphic = True for path in drop["guest_paths"]: self.data.append({"file": path}) self.data.append({"percent_match": percent}) except: continue return found_polymorphic
def search(md5, page=0): import pydeep if not md5: return jsonify({"result": False, "msg": "File not found"}) fileobjs = cache_ssdeep() results = list() search_ssdeep = "" for fileobj in fileobjs: if "md5" in fileobj.keys() and fileobj['md5'] == md5: search_ssdeep = fileobj['ssdeep'] if not search_ssdeep: return jsonify({"result": False, "msg": "No such file:" + str(md5)}) for fileobj in fileobjs: if "ssdeep" in fileobj.keys(): score = pydeep.compare(search_ssdeep, fileobj['ssdeep']) results.append((score, fileobj)) results = sorted(results, key=lambda x:x[0])[::-1] start = (page-1)*10 end = (page)*10 if 0 < start < len(results): return jsonify({"result": True, "data": results[start: end]}) else: return jsonify({"result": True, "data": results[0: 10]})
def run(self): package = self.results["info"]["package"] found_polymorphic = False target_ssdeep = self.results["target"]["file"]["ssdeep"] target_sha1 = self.results["target"]["file"]["sha1"] target_size = self.results["target"]["file"]["size"] if target_ssdeep == "" or target_ssdeep == None: return False for drop in self.results["dropped"]: if package == "xls" and len( drop["guest_paths"]) == 1 and drop["guest_paths"][ 0].endswith("\\Temp\\" + self.results["target"]["file"]["name"]): continue if drop["sha1"] == target_sha1: continue if fabs(target_size - drop["size"]) >= 1024: continue drop_ssdeep = drop["ssdeep"] if drop_ssdeep == "" or drop_ssdeep == None: continue try: percent = pydeep.compare(target_ssdeep, drop_ssdeep) if percent > 20: found_polymorphic = True for path in drop["guest_paths"]: self.data.append({"file": path}) self.data.append({"percent_match": percent}) except: continue return found_polymorphic
def on_complete(self): if not HAVE_SSDEEP: return if self.get_results("target", {}).get("category") != "file": return target_ssdeep = self.get_results("target", {})["file"]["ssdeep"] target_sha1 = self.get_results("target", {})["file"]["sha1"] target_size = self.get_results("target", {})["file"]["size"] if not target_ssdeep: return for drop in self.get_results("dropped", []): if drop["sha1"] == target_sha1: continue if fabs(target_size - drop["size"]) >= 1024: continue drop_ssdeep = drop["ssdeep"] if not drop_ssdeep: continue if pydeep.compare(target_ssdeep, drop_ssdeep) > 20: self.mark( file=drop, description="Possibly a polymorphic version of itself") return self.has_marks()
def run(self, obj, config): threshold = config.get("threshold", 50) target_ssdeep = obj.ssdeep target_md5 = obj.md5 target_mimetype = obj.mimetype if not target_ssdeep: logger.error = "Could not get the target ssdeep value for sample" self._error("Could not get the target ssdeep value for sample") return # setup the sample space to compare against # first use the mimetype as a comparator if available query_filter = {} if target_mimetype: query_filter['mimetype'] = target_mimetype # then use only samples with a multiple of chunksize chunk_size = int(target_ssdeep.split(":")[0]) query_filter["$or"] = [] query_filter["$or"].append({"ssdeep": {"$regex": "^%d:" % chunk_size * 2}}) query_filter["$or"].append({"ssdeep": {"$regex": "^%d:" % chunk_size}}) query_filter["$or"].append({"ssdeep": {"$regex": "^%d:" % (chunk_size / 2)}}) result_filter = {'md5': 1, 'ssdeep': 1} candidate_space = Sample.objects(__raw__=query_filter).only(*result_filter) match_list = [] for candidate in candidate_space: if "ssdeep" in candidate: score = pydeep.compare(target_ssdeep, candidate["ssdeep"]) if score >= threshold and candidate["md5"] != target_md5: match_list.append({'md5': candidate["md5"], 'score': score}) # finally sort the results match_list.sort(key=lambda sample: sample["score"], reverse=True) for match in match_list: self._add_result("ssdeep_match", match["md5"], {'md5': match["md5"], 'score': match["score"]})
def hash_compare(hash, mem_hashes): i = 1 for offset, mem_hash in mem_hashes.iteritems(): like = pydeep.compare(hash, mem_hash) if like > 20: print i, offset, hex(offset), like i += 1
def on_complete(self): if not HAVE_SSDEEP: return if self.get_results("target", {}).get("category") != "file": return target_ssdeep = self.get_results("target", {})["file"]["ssdeep"] target_sha1 = self.get_results("target", {})["file"]["sha1"] target_size = self.get_results("target", {})["file"]["size"] if not target_ssdeep: return for drop in self.get_results("dropped", []): if drop["sha1"] == target_sha1: continue if fabs(target_size - drop["size"]) >= 1024: continue drop_ssdeep = drop["ssdeep"] if not drop_ssdeep: continue if pydeep.compare(target_ssdeep, drop_ssdeep) > 20: self.mark(file=drop, description="Possibly a polymorphic version of itself") return self.has_marks()
def ssdeep_compare_ssdeep(hash1, hash2): import pydeep try: result = pydeep.compare(hash1, hash2) return jsonify({"result": True, "data": result}) except Exception as e: return jsonify({"result": False, "data": str(e)})
def fuzzycompare(Rows, thehash): fuzzyRet = [] if Rows: for rows in Rows: cfuz = rows[1] percent = pydeep.compare(thehash, cfuz) if percent > 0: fuzzyRet.append([rows[0], percent]) return fuzzyRet
def find_matches(key, r): similar_hashes = r.smembers(key) if len(similar_hashes) > 1: cur_hash = similar_hashes.pop() cur_ssdeep = r.hget(cur_hash, 'ssdeep') p = r.pipeline(False) for sha256 in similar_hashes: score = pydeep.compare(cur_ssdeep, r.hget(sha256, 'ssdeep')) if score > 0: p.zadd('matches_{}'.format(cur_hash), score, sha256) p.zadd('matches_{}'.format(sha256), score, cur_hash) p.execute()
def run(self): super(Fuzzy, self).run() if not __sessions__.is_set(): self.log('error', "No session opened") return if not HAVE_PYDEEP: self.log( 'error', "Missing dependency, install pydeep (`pip install pydeep`)") return if not __sessions__.current.file.ssdeep: self.log('error', "No ssdeep hash available for opened file") return arg_verbose = False if self.args and self.args.verbose: arg_verbose = True db = Database() samples = db.find(key='all') matches = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue if not sample.ssdeep: continue score = pydeep.compare(__sessions__.current.file.ssdeep, sample.ssdeep) if score > 40: matches.append( ['{0}%'.format(score), sample.name, sample.sha256]) if arg_verbose: self.log( 'info', "Match {0}%: {2} [{1}]".format(score, sample.name, sample.sha256)) self.log('info', "{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: self.log('table', dict(header=['Score', 'Name', 'SHA256'], rows=matches))
def ssdeep_compare_tasks(task_id1, task_id2): import pydeep try: report_path = cwd("storage", "analyses", "%d" % task_id1, "reports", "report.json") report_content = json.loads(open(report_path, "rb").read()) ssdeep1 = report_content['target']['file']['ssdeep'] report_path = cwd("storage", "analyses", "%d" % task_id2, "reports", "report.json") report_content = json.loads(open(report_path, "rb").read()) ssdeep2 = report_content['target']['file']['ssdeep'] result = pydeep.compare(ssdeep1, ssdeep2) return jsonify({"result": True, "data": result}) except Exception as e: return jsonify({"result": False, "data": str(e)})
def find_matches(self, key): similar_hashes = self.r.smembers(key) if len(similar_hashes) > 1: cur_hash = similar_hashes.pop() cur_ssdeep = self.r.hget(cur_hash, "ssdeep") p = self.r.pipeline(False) for sha256 in similar_hashes: score = pydeep.compare(cur_ssdeep.encode("utf-8"), self.r.hget(sha256, "ssdeep").encode("utf-8")) if score > 0: key1 = "ssdeep:matches_{}".format(cur_hash) key2 = "ssdeep:matches_{}".format(sha256) p.zadd(key1, score, sha256) p.zadd(key2, score, cur_hash) p.sadd("ssdeep:all_matches", key1) p.sadd("ssdeep:all_matches", key2) p.execute()
def ssdeep_compare(ssdeep, md5, all_samples): # Compare ssdeep hash of file to all files in db # fuzzy_threshold defined in settings.py - default = 10 # Returns matches as comma-separated MD5 hashes matches = [] for sample in all_samples: print sample if md5 != sample.md5: print "needle", ssdeep print "haystack", sample.ssdeep fuzzy_res = pydeep.compare(ssdeep, sample.ssdeep) if fuzzy_res >= fuzzy_threshold: matches.append(sample.md5) return ",".join(matches)
def scan(self, offset, maxlen): # Start scanning from offset until maxlen: i = offset pydeep_hash = self.pydeep_hash while i < offset + maxlen: # Read some data and match it. data = self.address_space.zread(i, 0x1000) if data: data_hash = pydeep.hash_buf(data) alike = pydeep.compare(pydeep_hash, data_hash) if alike > 10: yield data_hash, i, alike i += 0x1000
def run(self): super(Fuzzy, self).run() if not __sessions__.is_set(): self.log('error', "No session opened") return if not HAVE_PYDEEP: self.log('error', "Missing dependency, install pydeep (`pip install pydeep`)") return if not __sessions__.current.file.ssdeep: self.log('error', "No ssdeep hash available for opened file") return arg_verbose = False if self.args and self.args.verbose: arg_verbose = True db = Database() samples = db.find(key='all') matches = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue if not sample.ssdeep: continue score = pydeep.compare(__sessions__.current.file.ssdeep, sample.ssdeep) if score > 40: matches.append(['{0}%'.format(score), sample.name, sample.sha256]) if arg_verbose: self.log('info', "Match {0}%: {2} [{1}]".format(score, sample.name, sample.sha256)) self.log('info', "{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: self.log('table', dict(header=['Score', 'Name', 'SHA256'], rows=matches))
def compare_hashes(self, config, my_fuzzy): """ Compare the current hash to all of the fuzzy hashes already collected. """ log = logging.getLogger('Mastiff.Plugins.' + self.name + '.compare') db = DB.open_db_conf(config) conn = db.cursor() log.info('Comparing fuzzy hashes.') fuzz_results = list() my_md5 = config.get_var('Misc', 'hashes')[0] query = 'SELECT md5, fuzzy FROM mastiff WHERE fuzzy NOT NULL' try: # compare current hash for all fuzzy hashes for results in conn.execute(query): percent = pydeep.compare(my_fuzzy, results[1]) if percent > 0 and my_md5 != results[0]: fuzz_results.append([results[0], percent]) except sqlite3.OperationalError, err: log.error('Could not grab other fuzzy hashes: %s', err) return None
#!/usr/bin/env python #ssdeep massive comparision engine (fancy way of saying... shitty script) #there's no reason to do this... idk wtf I was doing. This makes no sense. Broken. import sys import pydeep pd0 = pydeep.hash_file(sys.argv[1]) pd1 = sys.argv[2] #Import our ssdeep file. def deepImp(pd1) : deepDict = [] with open(pd1, 'r') as deepFiles: for row in deepFiles: deepDict.append(row[1]) return ssdeepDir deepImpList = deepImp(pd1) x = str(pydeep.compare(pd0, pd1)) print "SSDeep has determined that the DeepImportHash between" + (sys.argv[1]) + " and " + deepImpList + " are " + x +"% alike."
def process_file(self, file_path, CAPE_output, append_file): """Process file. @return: file_info """ global cape_config cape_name = "" strings = [] buf = self.options.get("buffer", BUFSIZE) if file_path.endswith("_info.txt"): return texttypes = [ "ASCII", "Windows Registry text", "XML document text", "Unicode text", ] textchars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7f}) is_binary_file = lambda bytes: bool(bytes.translate(None, textchars)) if os.path.exists(file_path + "_info.txt"): with open(file_path + "_info.txt", 'r') as f: metastring = f.readline() else: metastring = "" file_info = File(file_path, metastring).get_all() # Get the file data with open(file_info["path"], "r") as file_open: file_data = file_open.read(buf + 1) if is_binary_file(file_data[:8192]): file_info["data"] = None else: if len(file_data) > buf: file_info["data"] = convert_to_printable(file_data[:buf] + " <truncated>") else: file_info["data"] = convert_to_printable(file_data) metastrings = metastring.split(",") if len(metastrings) > 1: file_info["pid"] = metastrings[1] if len(metastrings) > 2: file_info["process_path"] = metastrings[2] file_info["process_name"] = metastrings[2].split("\\")[-1] if len(metastrings) > 3: file_info["module_path"] = metastrings[3] file_info["cape_type_code"] = 0 file_info["cape_type"] = "" if metastrings != "": try: file_info["cape_type_code"] = int(metastrings[0]) except Exception as e: pass if file_info["cape_type_code"] == COMPRESSION: file_info["cape_type"] = "Decompressed PE Image" if file_info["cape_type_code"] == INJECTION_PE: file_info["cape_type"] = "Injected PE Image" if len(metastrings) > 4: file_info["target_path"] = metastrings[4] file_info["target_process"] = metastrings[4].split( "\\")[-1] file_info["target_pid"] = metastrings[5] if file_info["cape_type_code"] == INJECTION_SHELLCODE: file_info["cape_type"] = "Injected Shellcode/Data" if len(metastrings) > 4: file_info["target_path"] = metastrings[4] file_info["target_process"] = metastrings[4].split( "\\")[-1] file_info["target_pid"] = metastrings[5] if file_info["cape_type_code"] == INJECTION_SECTION: file_info["cape_type"] = "Injected Section" if len(metastrings) > 4: file_info["section_handle"] = metastrings[4] if file_info["cape_type_code"] == EXTRACTION_PE: file_info["cape_type"] = "Extracted PE Image" if len(metastrings) > 4: file_info["virtual_address"] = metastrings[4] if file_info["cape_type_code"] == EXTRACTION_SHELLCODE: file_info["cape_type"] = "Extracted Shellcode" if len(metastrings) > 4: file_info["virtual_address"] = metastrings[4] type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" # PlugX if file_info["cape_type_code"] == PLUGX_CONFIG: file_info["cape_type"] = "PlugX Config" plugx_parser = plugx.PlugXConfig() plugx_config = plugx_parser.parse_config( file_data, len(file_data)) if not "cape_config" in cape_config and plugx_config: cape_config["cape_config"] = {} for key, value in plugx_config.items(): cape_config["cape_config"].update({key: [value]}) cape_name = "PlugX" else: log.error( "CAPE: PlugX config parsing failure - size many not be handled." ) append_file = False if file_info["cape_type_code"] == PLUGX_PAYLOAD: file_info["cape_type"] = "PlugX Payload" type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" # EvilGrab if file_info["cape_type_code"] == EVILGRAB_PAYLOAD: file_info["cape_type"] = "EvilGrab Payload" type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if file_info["cape_type_code"] == EVILGRAB_DATA: cape_name = "EvilGrab" file_info["cape_type"] = "EvilGrab Data" if not "cape_config" in cape_config: cape_config["cape_config"] = {} if file_info["size"] == 256 or file_info["size"] == 260: ConfigItem = "filepath" ConfigData = format(file_data) cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) if file_info["size"] > 0x1000: append_file = True else: append_file = False # Sedreco if file_info["cape_type_code"] == SEDRECO_DATA: cape_name = "Sedreco" cape_config["cape_type"] = "Sedreco Config" if not "cape_config" in cape_config: cape_config["cape_config"] = {} if len(metastrings) > 4: SedrecoConfigIndex = metastrings[4] if SedrecoConfigIndex == '0x0': ConfigItem = "Timer1" elif SedrecoConfigIndex == '0x1': ConfigItem = "Timer2" elif SedrecoConfigIndex == '0x2': ConfigItem = "Computer Name" elif SedrecoConfigIndex == '0x3': ConfigItem = "C&C1" elif SedrecoConfigIndex == '0x4': ConfigItem = "C&C2" elif SedrecoConfigIndex == '0x5': ConfigItem = "Operation Name" elif SedrecoConfigIndex == '0x6': ConfigItem = "Keylogger MaxBuffer" elif SedrecoConfigIndex == '0x7': ConfigItem = "Keylogger MaxTimeout" elif SedrecoConfigIndex == '0x8': ConfigItem = "Keylogger Flag" elif SedrecoConfigIndex == '0x9': ConfigItem = "C&C3" else: ConfigItem = "Unknown" ConfigData = format(file_data) if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) append_file = False # Cerber if file_info["cape_type_code"] == CERBER_CONFIG: file_info["cape_type"] = "Cerber Config" cape_config["cape_type"] = "Cerber Config" cape_name = "Cerber" if not "cape_config" in cape_config: cape_config["cape_config"] = {} ConfigItem = "JSON Data" parsed = json.loads(file_data.rstrip(b'\0')) ConfigData = json.dumps(parsed, indent=4, sort_keys=True) cape_config["cape_config"].update({ConfigItem: [ConfigData]}) append_file = True if file_info["cape_type_code"] == CERBER_PAYLOAD: file_info["cape_type"] = "Cerber Payload" cape_config["cape_type"] = "Cerber Payload" cape_name = "Cerber" type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" append_file = True # Ursnif if file_info["cape_type_code"] == URSNIF_CONFIG: file_info["cape_type"] = "Ursnif Config" cape_config["cape_type"] = "Ursnif Config" cape_name = "Ursnif" malwareconfig_loaded = False try: malwareconfig_parsers = os.path.join( CUCKOO_ROOT, "modules", "processing", "parsers", "malwareconfig") file, pathname, description = imp.find_module( cape_name, [malwareconfig_parsers]) module = imp.load_module(cape_name, file, pathname, description) malwareconfig_loaded = True log.info("CAPE: Imported malwareconfig.com parser %s", cape_name) except ImportError: log.info( "CAPE: malwareconfig.com parser: No module named %s", cape_name) if malwareconfig_loaded: try: if not "cape_config" in cape_config: cape_config["cape_config"] = {} malwareconfig_config = module.config(file_data) if isinstance(malwareconfig_config, list): for (key, value) in malwareconfig_config[0].iteritems(): cape_config["cape_config"].update( {key: [value]}) elif isinstance(malwareconfig_config, dict): for (key, value) in malwareconfig_config.iteritems(): cape_config["cape_config"].update( {key: [value]}) except Exception as e: log.error( "CAPE: malwareconfig parsing error with %s: %s", cape_name, e) append_file = False # Hancitor if file_info["cape_type_code"] == HANCITOR_PAYLOAD: cape_name = "Hancitor" cape_config["cape_type"] = "Hancitor Payload" file_info["cape_type"] = "Hancitor Payload" if file_info["cape_type_code"] == HANCITOR_CONFIG: cape_name = "Hancitor" cape_config["cape_type"] = "Hancitor Config" file_info["cape_type"] = "Hancitor Config" if not "cape_config" in cape_config: cape_config["cape_config"] = {} ConfigStrings = file_data.split('\0') ConfigStrings = filter(None, ConfigStrings) ConfigItem = "Campaign Code" cape_config["cape_config"].update( {ConfigItem: [ConfigStrings[0]]}) GateURLs = ConfigStrings[1].split('|') for index, value in enumerate(GateURLs): ConfigItem = "Gate URL " + str(index + 1) cape_config["cape_config"].update({ConfigItem: [value]}) append_file = False # QakBot if file_info["cape_type_code"] == QAKBOT_CONFIG: file_info["cape_type"] = "QakBot Config" cape_config["cape_type"] = "QakBot Config" cape_name = "QakBot" if not "cape_config" in cape_config: cape_config["cape_config"] = {} for line in file_data.splitlines(): if '=' in line: index = line.split('=')[0] data = line.split('=')[1] if index == '10': ConfigItem = "Botnet name" ConfigData = data if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) if index == '11': ConfigItem = "Number of C2 servers" ConfigData = data if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) if index == '47': ConfigItem = "Bot ID" ConfigData = data if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) if index == '3': ConfigItem = "Config timestamp" ConfigData = datetime.datetime.fromtimestamp( int(data)).strftime('%H:%M:%S %d-%m-%Y') if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) if index == '22': values = data.split(':') ConfigItem = "Password #1" try: ConfigData = values[2] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) except: pass ConfigItem = "Username #1" try: ConfigData = values[1] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) except: pass ConfigItem = "C2 #1" try: ConfigData = values[0] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) except: pass if index == '23': values = data.split(':') ConfigItem = "Password #2" try: ConfigData = values[2] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) except: pass ConfigItem = "Username #2" try: ConfigData = values[1] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) except: pass ConfigItem = "C2 #2" try: ConfigData = values[0] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) except: pass if index == '24': values = data.split(':') ConfigItem = "Password #3" try: ConfigData = values[2] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) except: pass ConfigItem = "Username #3" try: ConfigData = values[1] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) except: pass ConfigItem = "C2 #3" try: ConfigData = values[0] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) except: pass if index == '25': values = data.split(':') ConfigItem = "Password #4" try: ConfigData = values[2] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) except: pass ConfigItem = "Username #4" try: ConfigData = values[1] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) except: pass ConfigItem = "C2 #4" try: ConfigData = values[0] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) except: pass if index == '26': values = data.split(':') ConfigItem = "Password #5" try: ConfigData = values[2] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) except: pass ConfigItem = "Username #5" try: ConfigData = values[1] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) except: pass ConfigItem = "C2 #5" try: ConfigData = values[0] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) except: pass append_file = False if file_info["cape_type_code"] == QAKBOT_PAYLOAD: file_info["cape_type"] = "QakBot Payload" cape_config["cape_type"] = "QakBot Payload" cape_name = "QakBot" type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" append_file = True # UPX package output if file_info["cape_type_code"] == UPX: file_info["cape_type"] = "Unpacked PE Image" type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" # Attempt to decrypt script dump if file_info["cape_type_code"] == SCRIPT_DUMP: data = file_data.decode("utf-16").replace("\x00", "") file_info["data"] = data cape_name = "ScriptDump" malwareconfig_loaded = False try: malwareconfig_parsers = os.path.join( CUCKOO_ROOT, "modules", "processing", "parsers", "malwareconfig") file, pathname, description = imp.find_module( cape_name, [malwareconfig_parsers]) module = imp.load_module(cape_name, file, pathname, description) malwareconfig_loaded = True log.info("CAPE: Imported malwareconfig.com parser %s", cape_name) except ImportError: log.info( "CAPE: malwareconfig.com parser: No module named %s", cape_name) if malwareconfig_loaded: try: script_data = module.config(self, data) if script_data and "more_eggs" in script_data["type"]: bindata = script_data["data"] sha256 = hashlib.sha256(bindata).hexdigest() filepath = os.path.join(self.CAPE_path, sha256) tmpstr = file_info["pid"] tmpstr += "," + file_info["process_path"] tmpstr += "," + file_info["module_path"] if "text" in script_data["datatype"]: file_info["cape_type"] = "MoreEggsJS" outstr = str( MOREEGGSJS_PAYLOAD) + "," + tmpstr + "\n" with open(filepath + "_info.txt", "w") as infofd: infofd.write(outstr) with open(filepath, 'w') as cfile: cfile.write(bindata) elif "binary" in script_data["datatype"]: file_info["cape_type"] = "MoreEggsBin" outstr = str( MOREEGGSBIN_PAYLOAD) + "," + tmpstr + "\n" with open(filepath + "_info.txt", "w") as infofd: infofd.write(outstr) with open(filepath, 'wb') as cfile: cfile.write(bindata) if os.path.exists(filepath): self.script_dump_files.append(filepath) else: file_info["cape_type"] = "Script Dump" log.info( "CAPE: Script Dump does not contain known encrypted payload." ) except Exception as e: log.error( "CAPE: malwareconfig parsing error with %s: %s", cape_name, e) append_file = True # More_Eggs if file_info["cape_type_code"] == MOREEGGSJS_PAYLOAD: file_info["cape_type"] = "More Eggs JS Payload" cape_name = "MoreEggs" append_file = True if file_info["cape_type_code"] == MOREEGGSBIN_PAYLOAD: file_info["cape_type"] = "More_Eggs Binary Payload" cape_name = "MoreEggs" type_strings = file_info["type"].split() if type_strings[0] == "PE32+": file_info["cape_type"] += ": 64-bit " if type_strings[2] == "(DLL)": file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == "PE32": file_info["cape_type"] += ": 32-bit " if type_strings[2] == "(DLL)": file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" append_file = True # Process CAPE Yara hits for hit in file_info["cape_yara"]: # Check to see if file is packed with UPX if hit["name"] == "UPX": log.info( "CAPE: Found UPX Packed sample - attempting to unpack") self.upx_unpack(file_data, CAPE_output) # Check for a payload or config hit extraction_types = ["payload", "config", "loader"] try: for type in extraction_types: if type in hit["meta"]["cape_type"].lower(): file_info["cape_type"] = hit["meta"]["cape_type"] cape_name = hit["name"].replace('_', ' ') except: pass type_strings = file_info["type"].split() if "-bit" not in file_info["cape_type"]: if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" suppress_parsing_list = [ "Cerber", "Emotet_Payload", "Ursnif", "QakBot" ] if hit["name"] in suppress_parsing_list: continue # Attempt to import a parser for the hit # DC3-MWCP mwcp_loaded = False if cape_name: try: mwcp_parsers = os.path.join(CUCKOO_ROOT, "modules", "processing", "parsers", "mwcp", "parsers") mwcp = reporter.Reporter(parserdir=mwcp_parsers) kwargs = {} mwcp.run_parser(cape_name, data=file_data, **kwargs) if mwcp.errors == []: log.info("CAPE: Imported DC3-MWCP parser %s", cape_name) mwcp_loaded = True else: error_lines = mwcp.errors[0].split("\n") for line in error_lines: if line.startswith('ImportError: '): log.info("CAPE: DC3-MWCP parser: %s", line.split(': ')[1]) except ImportError: pass # malwareconfig malwareconfig_loaded = False if cape_name and mwcp_loaded == False: try: malwareconfig_parsers = os.path.join( CUCKOO_ROOT, "modules", "processing", "parsers", "malwareconfig") file, pathname, description = imp.find_module( cape_name, [malwareconfig_parsers]) module = imp.load_module(cape_name, file, pathname, description) malwareconfig_loaded = True log.info("CAPE: Imported malwareconfig.com parser %s", cape_name) except ImportError: log.info( "CAPE: malwareconfig.com parser: No module named %s", cape_name) # Get config data if mwcp_loaded: try: if not "cape_config" in cape_config: cape_config["cape_config"] = {} cape_config["cape_config"] = convert(mwcp.metadata) else: cape_config["cape_config"].update( convert(mwcp.metadata)) except Exception as e: log.error( "CAPE: DC3-MWCP config parsing error with %s: %s", cape_name, e) elif malwareconfig_loaded: try: if not "cape_config" in cape_config: cape_config["cape_config"] = {} malwareconfig_config = module.config(file_data) if isinstance(malwareconfig_config, list): for (key, value) in malwareconfig_config[0].iteritems(): cape_config["cape_config"].update({key: [value]}) elif isinstance(malwareconfig_config, dict): for (key, value) in malwareconfig_config.iteritems(): cape_config["cape_config"].update({key: [value]}) except Exception as e: log.error("CAPE: malwareconfig parsing error with %s: %s", cape_name, e) if "cape_config" in cape_config: if cape_config["cape_config"] == {}: del cape_config["cape_config"] if cape_name: if "cape_config" in cape_config and "cape_name" not in cape_config: cape_config["cape_name"] = format(cape_name) if not "cape" in self.results: if cape_name != "UPX": self.results["cape"] = cape_name # Remove duplicate payloads from web ui for cape_file in CAPE_output: if file_info["size"] == cape_file["size"]: if HAVE_PYDEEP: ssdeep_grade = pydeep.compare(file_info["ssdeep"], cape_file["ssdeep"]) if ssdeep_grade >= ssdeep_threshold: append_file = False if file_info["entrypoint"] and file_info["entrypoint"] == cape_file["entrypoint"] \ and file_info["ep_bytes"] == cape_file["ep_bytes"]: append_file = False if append_file == True: CAPE_output.append(file_info) return file_info
def process_file(self, file_path, append_file, metadata=None): """Process file. @return: file_info """ if metadata is None: metadata = {} cape_name = "" type_string = "" if not os.path.exists(file_path): return file_info, pefile_object = File(file_path, metadata.get("metadata", "")).get_all() cape_names = set() if pefile_object: self.results.setdefault("pefiles", {}).setdefault(file_info["sha256"], pefile_object) if file_info.get("clamav") and processing_conf.detections.clamav: clamav_detection = get_clamav_consensus(file_info["clamav"]) if clamav_detection: add_family_detection(self.results, clamav_detection, "ClamAV", file_info["sha256"]) # should we use dropped path here? static_file_info( file_info, file_path, str(self.task["id"]), self.task.get("package", ""), self.task.get("options", ""), self.self_extracted, self.results, ) # Get the file data with open(file_info["path"], "rb") as file_open: file_data = file_open.read() if metadata.get("pids", False): file_info["pid"] = metadata["pids"][0] if len( metadata["pids"]) == 1 else ",".join(metadata["pids"]) metastrings = metadata.get("metadata", "").split(";?") if len(metastrings) > 2: file_info["process_path"] = metastrings[1] file_info["process_name"] = metastrings[1].rsplit("\\", 1)[-1] if len(metastrings) > 3: file_info["module_path"] = metastrings[2] file_info["cape_type_code"] = 0 file_info["cape_type"] = "" if metastrings and metastrings[0] and metastrings[0].isdigit(): file_info["cape_type_code"] = int(metastrings[0]) if file_info["cape_type_code"] == TYPE_STRING: if len(metastrings) > 4: type_string = metastrings[3] elif file_info["cape_type_code"] == COMPRESSION: file_info["cape_type"] = "Decompressed PE Image" elif file_info["cape_type_code"] in inject_map: file_info["cape_type"] = inject_map[ file_info["cape_type_code"]] if len(metastrings) > 4: file_info["target_path"] = metastrings[3] file_info["target_process"] = metastrings[3].rsplit( "\\", 1)[-1] file_info["target_pid"] = metastrings[4] elif file_info["cape_type_code"] in unpack_map: file_info["cape_type"] = unpack_map[ file_info["cape_type_code"]] if len(metastrings) > 4: file_info["virtual_address"] = metastrings[3] type_strings = file_info["type"].split() if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if file_info["cape_type_code"] in code_mapping: file_info["cape_type"] = code_mapping[ file_info["cape_type_code"]] type_strings = file_info["type"].split() if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if file_info["cape_type_code"] in name_mapping: cape_name = name_mapping[file_info["cape_type_code"]] append_file = True # PlugX elif file_info["cape_type_code"] == PLUGX_CONFIG: file_info["cape_type"] = "PlugX Config" if plugx_parser: plugx_config = plugx_parser.parse_config( file_data, len(file_data)) if plugx_config: cape_name = "PlugX" self.update_cape_configs(cape_name, plugx_config) cape_names.add(cape_name) else: log.error( "CAPE: PlugX config parsing failure - size many not be handled" ) append_file = False # Attempt to decrypt script dump elif file_info["cape_type_code"] == SCRIPT_DUMP: data = file_data.decode("utf-16").replace("\x00", "") cape_name = "ScriptDump" malwareconfig_loaded = False try: malwareconfig_parsers = os.path.join( CUCKOO_ROOT, "modules", "processing", "parsers", "CAPE") file, pathname, description = imp.find_module( cape_name, [malwareconfig_parsers]) module = imp.load_module(cape_name, file, pathname, description) malwareconfig_loaded = True log.debug("CAPE: Imported parser %s", cape_name) except ImportError: log.debug("CAPE: parser: No module named %s", cape_name) if malwareconfig_loaded: try: script_data = module.config(self, data) if script_data and "more_eggs" in script_data["type"]: bindata = script_data["data"] sha256 = hashlib.sha256(bindata).hexdigest() filepath = os.path.join(self.CAPE_path, sha256) if "text" in script_data["datatype"]: file_info["cape_type"] = "MoreEggsJS" elif "binary" in script_data["datatype"]: file_info["cape_type"] = "MoreEggsBin" with open(filepath, "w") as cfile: cfile.write(bindata) self.script_dump_files.append(filepath) else: file_info["cape_type"] = "Script Dump" log.info( "CAPE: Script Dump does not contain known encrypted payload" ) except Exception as e: log.error( "CAPE: malwareconfig parsing error with %s: %s", cape_name, e) append_file = True # More_Eggs elif file_info["cape_type_code"] == MOREEGGSJS_PAYLOAD: file_info["cape_type"] = "More Eggs JS Payload" cape_name = "MoreEggs" append_file = True # Process CAPE Yara hits # Prefilter extracted data + beauty is better than oneliner: all_files = [] for extracted_file in file_info.get("extracted_files", []): yara_hits = extracted_file["cape_yara"] if not yara_hits: continue if extracted_file.get("data", b""): extracted_file_data = make_bytes(extracted_file["data"]) else: with open(extracted_file["path"], "rb") as fil: extracted_file_data = fil.read() for yara in yara_hits: all_files.append(( f"[{extracted_file.get('sha256', '')}]{file_info['path']}", extracted_file_data, yara, )) for yara in file_info["cape_yara"]: all_files.append((file_info["path"], file_data, yara)) executed_config_parsers = collections.defaultdict(set) for tmp_path, tmp_data, hit in all_files: # Check for a payload or config hit try: if File.yara_hit_provides_detection(hit): file_info["cape_type"] = hit["meta"]["cape_type"] cape_name = File.get_cape_name_from_yara_hit(hit) cape_names.add(cape_name) except Exception as e: print(f"Cape type error: {e}") type_strings = file_info["type"].split() if "-bit" not in file_info["cape_type"]: if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] file_info["cape_type"] += "DLL" if type_strings[2] == ( "(DLL)") else "executable" if cape_name and cape_name not in executed_config_parsers[tmp_path]: tmp_config = static_config_parsers(cape_name, tmp_path, tmp_data) self.update_cape_configs(cape_name, tmp_config) executed_config_parsers[tmp_path].add(cape_name) if type_string: log.info("CAPE: type_string: %s", type_string) tmp_cape_name = File.get_cape_name_from_cape_type(type_string) if tmp_cape_name and tmp_cape_name not in executed_config_parsers: tmp_config = static_config_parsers(tmp_cape_name, file_info["path"], file_data) if tmp_config: cape_name = tmp_cape_name cape_names.add(cape_name) log.info("CAPE: config returned for: %s", cape_name) self.update_cape_configs(cape_name, tmp_config) self.add_family_detections(file_info, cape_names) # Remove duplicate payloads from web ui for cape_file in self.cape["payloads"] or []: if file_info["size"] == cape_file["size"]: if HAVE_PYDEEP: ssdeep_grade = pydeep.compare(file_info["ssdeep"].encode(), cape_file["ssdeep"].encode()) if ssdeep_grade >= ssdeep_threshold: log.debug( "CAPE duplicate output file skipped: ssdeep grade %d, threshold %d", ssdeep_grade, ssdeep_threshold) append_file = False if file_info.get("entrypoint") and file_info.get( "ep_bytes") and cape_file.get("entrypoint"): if (file_info["entrypoint"] == cape_file["entrypoint"] and file_info["cape_type_code"] == cape_file["cape_type_code"] and file_info["ep_bytes"] == cape_file["ep_bytes"]): log.debug( "CAPE duplicate output file skipped: matching entrypoint" ) append_file = False if append_file: if HAVE_FLARE_CAPA: pretime = timeit.default_timer() capa_details = flare_capa_details(file_path, "cape") if capa_details: file_info["flare_capa"] = capa_details self.add_statistic_tmp("flare_capa", "time", pretime=pretime) self.cape["payloads"].append(file_info)
def ssdeep_cluster(ssdeep_values, score_threshold=0): hashes = {} integerdb = {} matches = {} scores = {} def add_to_integer_db(block_size, chunk, path): if block_size not in integerdb: integerdb[block_size] = {} similar_to = set() for i in chunk: if i not in integerdb[block_size]: integerdb[block_size][i] = set() else: similar_to |= integerdb[block_size][i] integerdb[block_size][i].add(path) return similar_to for ssdeep_value in ssdeep_values: hashes[ssdeep_value] = ssdeep_value block_size, chunk, double_chunk = preprocess_hash(hashes[ssdeep_value]) similar_to = add_to_integer_db( block_size, chunk, ssdeep_value) | add_to_integer_db( block_size * 2, double_chunk, ssdeep_value) h = hashes[ssdeep_value] matches[ssdeep_value] = set() for other in similar_to: score = pydeep.compare(h, hashes[other]) if score > score_threshold: matches[ssdeep_value].add(other) matches[other].add(ssdeep_value) if ssdeep_value not in scores: scores[ssdeep_value] = {} if other not in scores[ssdeep_value]: scores[ssdeep_value][other] = score if other not in scores: scores[other] = {} if ssdeep_value not in scores[other]: scores[other][ssdeep_value] = score groups = [] for ssdeep_value in matches.keys(): in_a_group = False for g in xrange(len(groups)): if ssdeep_value in groups[g]: in_a_group = True continue should_add = True for h in groups[g]: if h not in matches[ssdeep_value]: should_add = False if should_add: groups[g].append(ssdeep_value) in_a_group = True if not in_a_group: groups.append([ssdeep_value]) for g in xrange(len(groups)): groups[g].sort() return groups
def process_file(self, file_path, append_file, metadata={}): """Process file. @return: file_info """ config = {} cape_name = "" type_string = "" if not os.path.exists(file_path): return buf = self.options.get("buffer", BUFSIZE) file_info, pefile_object = File(file_path, metadata.get("metadata", "")).get_all() if pefile_object: self.results.setdefault("pefiles", {}) self.results["pefiles"].setdefault(file_info["sha256"], pefile_object) # Get the file data with open(file_info["path"], "rb") as file_open: file_data = file_open.read() if metadata.get("pids", False): if len(metadata["pids"]) == 1: file_info["pid"] = metadata["pids"][0] else: file_info["pid"] = ",".join(metadata["pids"]) metastrings = metadata.get("metadata", "").split(";?") if len(metastrings) > 2: file_info["process_path"] = metastrings[1] file_info["process_name"] = metastrings[1].split("\\")[-1] if len(metastrings) > 3: file_info["module_path"] = metastrings[2] file_info["cape_type_code"] = 0 file_info["cape_type"] = "" if metastrings and metastrings[0] and metastrings[0].isdigit(): file_info["cape_type_code"] = int(metastrings[0]) if file_info["cape_type_code"] == TYPE_STRING: if len(metastrings) > 4: type_string = metastrings[3] if file_info["cape_type_code"] == COMPRESSION: file_info["cape_type"] = "Decompressed PE Image" if file_info["cape_type_code"] in inject_map: file_info["cape_type"] = inject_map[ file_info["cape_type_code"]] if len(metastrings) > 4: file_info["target_path"] = metastrings[3] file_info["target_process"] = metastrings[3].split( "\\")[-1] file_info["target_pid"] = metastrings[4] if file_info["cape_type_code"] in unpack_map: file_info["cape_type"] = unpack_map[ file_info["cape_type_code"]] if len(metastrings) > 4: file_info["virtual_address"] = metastrings[3] type_strings = file_info["type"].split() if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if file_info["cape_type_code"] in code_mapping: file_info["cape_type"] = code_mapping[ file_info["cape_type_code"]] type_strings = file_info["type"].split() if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if file_info["cape_type_code"] in name_mapping: cape_name = name_mapping[file_info["cape_type_code"]] append_file = True """ ConfigData = format(file_data) if ConfigData: config[cape_name].update({ConfigItem: [ConfigData]}) """ append_file = False # PlugX if file_info["cape_type_code"] == PLUGX_CONFIG: file_info["cape_type"] = "PlugX Config" if plugx_parser: plugx_config = plugx_parser.parse_config( file_data, len(file_data)) if plugx_config: cape_name = "PlugX" config[cape_name] = dict() for key, value in plugx_config.items(): config[cape_name].update({key: [value]}) else: log.error( "CAPE: PlugX config parsing failure - size many not be handled." ) append_file = False # Attempt to decrypt script dump if file_info["cape_type_code"] == SCRIPT_DUMP: data = file_data.decode("utf-16").replace("\x00", "") cape_name = "ScriptDump" malwareconfig_loaded = False try: malwareconfig_parsers = os.path.join( CUCKOO_ROOT, "modules", "processing", "parsers", "CAPE") file, pathname, description = imp.find_module( cape_name, [malwareconfig_parsers]) module = imp.load_module(cape_name, file, pathname, description) malwareconfig_loaded = True log.debug("CAPE: Imported malwareconfig.com parser %s", cape_name) except ImportError: log.debug( "CAPE: malwareconfig.com parser: No module named %s", cape_name) if malwareconfig_loaded: try: script_data = module.config(self, data) if script_data and "more_eggs" in script_data["type"]: bindata = script_data["data"] sha256 = hashlib.sha256(bindata).hexdigest() filepath = os.path.join(self.CAPE_path, sha256) tmpstr = file_info["pid"] tmpstr += "," + file_info["process_path"] tmpstr += "," + file_info["module_path"] if "text" in script_data["datatype"]: file_info["cape_type"] = "MoreEggsJS" outstr = str( MOREEGGSJS_PAYLOAD) + "," + tmpstr + "\n" with open(filepath + "_info.txt", "w") as infofd: infofd.write(outstr) with open(filepath, "w") as cfile: cfile.write(bindata) elif "binary" in script_data["datatype"]: file_info["cape_type"] = "MoreEggsBin" outstr = str( MOREEGGSBIN_PAYLOAD) + "," + tmpstr + "\n" with open(filepath + "_info.txt", "w") as infofd: infofd.write(outstr) with open(filepath, "wb") as cfile: cfile.write(bindata) if os.path.exists(filepath): self.script_dump_files.append(filepath) else: file_info["cape_type"] = "Script Dump" log.info( "CAPE: Script Dump does not contain known encrypted payload." ) except Exception as e: log.error( "CAPE: malwareconfig parsing error with %s: %s", cape_name, e) append_file = True # More_Eggs if file_info["cape_type_code"] == MOREEGGSJS_PAYLOAD: file_info["cape_type"] = "More Eggs JS Payload" cape_name = "MoreEggs" append_file = True # Process CAPE Yara hits for hit in file_info["cape_yara"]: # Check to see if file is packed with UPX if hit["name"] == "UPX": log.info( "CAPE: Found UPX Packed sample - attempting to unpack") self.upx_unpack(file_data) # Check for a payload or config hit extraction_types = ("payload", "config", "loader") try: if any([ file_type in hit["meta"].get("cape_type", "").lower() for file_type in extraction_types ]): file_info["cape_type"] = hit["meta"]["cape_type"] cape_name = hit["name"].replace("_", " ") except Exception as e: print("Cape type error: {}".format(e)) type_strings = file_info["type"].split() if "-bit" not in file_info["cape_type"]: if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if hit["name"] == "GuLoader": self.detect2pid(file_info["pid"], "GuLoader") tmp_config = static_config_parsers(hit["name"], file_data) if tmp_config and tmp_config[hit["name"].replace("_", " ")]: config.update(tmp_config) if type_string: log.info("CAPE: type_string: %s", type_string) tmp_config = static_config_parsers( type_string.split(" ")[0], file_data) if tmp_config: cape_name = type_string.split(" ")[0] log.info("CAPE: config returned for: %s", cape_name) config.update(tmp_config) if cape_name: if "detections" not in self.results: if cape_name != "UPX": # ToDo list of keys self.results["detections"] = cape_name if file_info.get("pid"): self.detect2pid(file_info["pid"], cape_name) # Remove duplicate payloads from web ui for cape_file in self.cape["payloads"] or []: if file_info["size"] == cape_file["size"]: if HAVE_PYDEEP: ssdeep_grade = pydeep.compare( file_info["ssdeep"].encode("utf-8"), cape_file["ssdeep"].encode("utf-8")) if ssdeep_grade >= ssdeep_threshold: append_file = False if file_info.get("entrypoint") and file_info.get( "ep_bytes") and cape_file.get("entrypoint"): if (file_info.get("entrypoint") and file_info["entrypoint"] == cape_file["entrypoint"] and file_info["cape_type_code"] == cape_file["cape_type_code"] and file_info["ep_bytes"] == cape_file["ep_bytes"]): log.debug("CAPE duplicate output file skipped") append_file = False if append_file is True: if HAVE_FLARE_CAPA: pretime = datetime.now() capa_details = flare_capa_details(file_path, "cape") if capa_details: file_info["flare_capa"] = capa_details self.add_statistic_tmp("flare_capa", "time", pretime=pretime) self.cape["payloads"].append(file_info) if config and config not in self.cape["configs"]: self.cape["configs"].append(config)
import pydeep file1 = 'calc.exe' file2 = 'notepad.exe' file1hash = '1536:JEl14rQcWAkN7GAlqbkfAGQGV8aMbrNyrf1w+noPvLV6eBsCXKc:JYmZWXyaiedMbrN6pnoXL1BsC' file2hash = '1536:0awOnbNQKLjWDyy1o5RefYMJUEbooPRrKKRl1P3:0YNQKPWDyDRefVJltZrpRl1P3' data1 = open(file1).read() data2 = open(file2).read() assert len(data1) == 114688, "File length error" assert len(data2) == 69120, "File lendth error" hash01 = pydeep.hash_buf(data1) hash02 = pydeep.hash_buf(data2) assert hash01 == file1hash, "Error hashing file1" assert hash02 == file2hash, "Error hashing file2" hash1 = pydeep.hash_file(file1) hash2 = pydeep.hash_file(file2) assert hash1 == file1hash, "Error hashing file1" assert hash2 == file2hash, "Error hashing file2" assert pydeep.compare(hash1, hash2) == 0, "Error fuzzy compare value" print 'Stuff looks fine..'
def test_pydeep_compare(): hash1 = testL[0][-1] hash2 = testL[1][-1] hash3 = testL[2][-1] assert pydeep.compare(hash1, hash2) == 0, "Error fuzzy compare value"
def render_text(self, outfd, data): for process, module, hook, addr_space in data: if not self._config.NO_WHITELIST: if process: process_name = str(process.ImageFileName) else: process_name = '' if self.whitelist(hook.hook_mode | hook.hook_type, process_name, hook.VictimModule, hook.HookModule, hook.Function): #debug.debug("Skipping whitelisted function: {0} {1} {2} {3}".format( # process_name, hook.VictimModule, hook.HookModule, # hook.Function)) continue if self._config.SSDEEP and has_pydeep: skip = False # read from the start of the page containing the hook, then hash it page_address = hook.hook_address & 0xFFFFF000 hook_buf = addr_space.zread(page_address, 0x1000) hook_hash = pydeep.hash_buf(hook_buf) # loop through all the whitelist hashes and compare for (whitelist_name, whitelist_hash) in whitelist_ssdeep: alike = pydeep.compare(hook_hash, whitelist_hash) # the comparison is greater than the threshold so display an informational message # then skip the rest of the output in normal malfind if alike > self._config.THRESHOLD: if process: outfd.write('Process: {0} ({1})\n'.format( process.UniqueProcessId, process.ImageFileName)) outfd.write( "Hook at 0x{0:x} in page 0x{1:x} is {2}% similar to whitelist hook {3}\n" .format(hook.hook_address, page_address, alike, whitelist_name)) #outfd.write(" hook: {0}\n".format(hook_hash)) #outfd.write(" whitelist: {0}\n".format(whitelist_hash)) outfd.write("\n") skip = True continue if skip: continue outfd.write("*" * 72 + "\n") outfd.write("Hook mode: {0}\n".format(hook.Mode)) outfd.write("Hook type: {0}\n".format(hook.Type)) if process: outfd.write('Process: {0} ({1})\n'.format( process.UniqueProcessId, process.ImageFileName)) outfd.write("Victim module: {0} ({1:#x} - {2:#x})\n".format( str(module.BaseDllName or '') or ntpath.basename(str(module.FullDllName or '')), module.DllBase, module.DllBase + module.SizeOfImage)) outfd.write("Function: {0}\n".format(hook.Detail)) outfd.write("Hook address: {0:#x}\n".format(hook.hook_address)) outfd.write("Hooking module: {0}\n\n".format(hook.HookModule)) for n, info in enumerate(hook.disassembled_hops): (address, data) = info s = [ "{0:#x} {1:<16} {2}".format(o, h, i) for o, i, h in malfind.Disassemble(data, int(address), bits="32bit" if hook.decode_bits == distorm3.Decode32Bits else "64bit") ] outfd.write("Disassembly({0}):\n{1}".format(n, "\n".join(s))) outfd.write("\n\n")
def ssdeep_cluster(root_paths, recursive=False, dontcompute=False, calculate_sha256=False, should_print=False, score_threshold=0): paths = enumerate_paths(root_paths, recursive) hashes = {} sha256s = {} integerdb = {} matches = {} scores = {} def add_to_integer_db(block_size, chunk, path): if block_size not in integerdb: integerdb[block_size] = {} similar_to = set() for i in chunk: if i not in integerdb[block_size]: integerdb[block_size][i] = set() else: similar_to |= integerdb[block_size][i] integerdb[block_size][i].add(path) return similar_to if dontcompute: real_paths = set() for path in paths: with open(path, "r") as f: for line in f: line = line.strip() if len(line) == 0: continue real_paths.add(line) paths = list(real_paths) for path in paths: if not dontcompute: hashes[path] = pydeep.hash_file(path) if calculate_sha256: sha256s[path] = hashlib.sha256(file(path, 'rb').read()).hexdigest() else: if "," in path: shash, path = path.split(",", 1) path = path.strip('"') else: shash = path hashes[path] = shash if calculate_sha256: sha256s[path] = \ hashlib.sha256(file(path, 'rb').read()).hexdigest() if isfile(path)\ else hashlib.sha256(path).hexdigest() block_size, chunk, double_chunk = preprocess_hash(hashes[path]) similar_to = add_to_integer_db(block_size, chunk, path) | add_to_integer_db( block_size * 2, double_chunk, path) h = hashes[path] matches[path] = set() for other in similar_to: score = pydeep.compare(h, hashes[other]) if score > score_threshold: matches[path].add(other) matches[other].add(path) if path not in scores: scores[path] = {} if other not in scores[path]: scores[path][other] = score if other not in scores: scores[other] = {} if path not in scores[other]: scores[other][path] = score if should_print: print "{0}\tSHA256: {1}\tssdeep: {2}".format( path, sha256s.get(path), hashes[path]) groups = [] for path in matches.keys(): in_a_group = False for g in xrange(len(groups)): if path in groups[g]: in_a_group = True continue should_add = True for h in groups[g]: if h not in matches[path]: should_add = False if should_add: groups[g].append(path) in_a_group = True if not in_a_group: groups.append([path]) for g in xrange(len(groups)): groups[g].sort() return groups, hashes, scores, sha256s
def run(self): super(Fuzzy, self).run() if not HAVE_PYDEEP: self.log( 'error', "Missing dependency, install pydeep (`pip install pydeep`)") return arg_verbose = False arg_cluster = False if self.args: if self.args.verbose: arg_verbose = self.args.verbose if self.args.cluster: arg_cluster = self.args.cluster db = Database() samples = db.find(key='all') # Check if we're operating in cluster mode, otherwise we run on the # currently opened file. if arg_cluster: self.log('info', "Generating clusters, this might take a while...") clusters = dict() for sample in samples: if not sample.ssdeep: continue if arg_verbose: self.log( 'info', "Testing file {0} with ssdeep {1}".format( sample.md5, sample.ssdeep)) clustered = False for cluster_name, cluster_members in clusters.items(): # Check if sample is already in the cluster. if sample.md5 in cluster_members: continue if arg_verbose: self.log( 'info', "Testing {0} in cluser {1}".format( sample.md5, cluster_name)) for member in cluster_members: if sample.md5 == member[0]: continue member_hash = member[0] member_name = member[1] member_ssdeep = db.find( key='md5', value=member_hash)[0].ssdeep if pydeep.compare(sample.ssdeep, member_ssdeep) > 40: if arg_verbose: self.log( 'info', "Found home for {0} in cluster {1}". format(sample.md5, cluster_name)) clusters[cluster_name].append( [sample.md5, sample.name]) clustered = True break if not clustered: cluster_id = len(clusters) + 1 clusters[cluster_id] = [ [sample.md5, sample.name], ] ordered_clusters = collections.OrderedDict( sorted(clusters.items())) self.log( 'info', "Following are the identified clusters with more than one member" ) for cluster_name, cluster_members in ordered_clusters.items(): # We include in the results only clusters with more than just # one member. if len(cluster_members) <= 1: continue self.log('info', "Ssdeep cluster {0}".format(bold(cluster_name))) self.log( 'table', dict(header=['MD5', 'Name'], rows=cluster_members)) # We're running against the already opened file. else: if not __sessions__.is_set(): self.log('error', "No open session") return if not __sessions__.current.file.ssdeep: self.log('error', "No ssdeep hash available for opened file") return matches = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue if not sample.ssdeep: continue score = pydeep.compare(__sessions__.current.file.ssdeep, sample.ssdeep) if score > 40: matches.append( ['{0}%'.format(score), sample.name, sample.sha256]) if arg_verbose: self.log( 'info', "Match {0}%: {2} [{1}]".format( score, sample.name, sample.sha256)) self.log( 'info', "{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: self.log( 'table', dict(header=['Score', 'Name', 'SHA256'], rows=matches))
def run(self): if not __session__.is_set(): print_error("No session opened") return if not HAVE_PYDEEP: print_error("Missing dependency, install pydeep (`pip install pydeep`)") return if not __session__.file.ssdeep: print_error("No ssdeep hash available for opened file") return def usage(): print("usage: fuzzy [-v]") def help(): usage() print("") print("Options:") print("\t--help (-h)\tShow this help message") print("\t--verbose (-v)\tPrints verbose logging") print("") arg_verbose = False try: opts, argv = getopt.getopt(self.args[0:], 'hv', ['help', 'verbose']) except getopt.GetoptError as e: print(e) return for opt, value in opts: if opt in ('-h', '--help'): help() return elif opt in ('-v', '--verbose'): arg_verbose = True db = Database() samples = db.find(key='all') matches = [] for sample in samples: if sample.sha256 == __session__.file.sha256: continue if not sample.ssdeep: continue score = pydeep.compare(__session__.file.ssdeep, sample.ssdeep) if score > 40: matches.append(['{0}%'.format(score), sample.name, sample.sha256]) if arg_verbose: print("Match {0}%: {2} [{1}]".format(score, sample.name, sample.sha256)) print_info("{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: print(table(header=['Score', 'Name', 'SHA256'], rows=matches))
def render_text(self, outfd, data): for process, module, hook, addr_space in data: if not self._config.NO_WHITELIST: if process: process_name = str(process.ImageFileName) else: process_name = '' if self.whitelist(hook.hook_mode | hook.hook_type, process_name, hook.VictimModule, hook.HookModule, hook.Function): #debug.debug("Skipping whitelisted function: {0} {1} {2} {3}".format( # process_name, hook.VictimModule, hook.HookModule, # hook.Function)) continue if self._config.SSDEEP and has_pydeep: skip = False # read from the start of the page containing the hook, then hash it page_address = hook.hook_address & 0xFFFFF000 hook_buf = addr_space.zread(page_address, 0x1000) hook_hash = pydeep.hash_buf(hook_buf) # loop through all the whitelist hashes and compare for (whitelist_name, whitelist_hash) in whitelist_ssdeep: alike = pydeep.compare(hook_hash, whitelist_hash) # the comparison is greater than the threshold so display an informational message # then skip the rest of the output in normal malfind if alike > self._config.THRESHOLD: if process: outfd.write('Process: {0} ({1})\n'.format( process.UniqueProcessId, process.ImageFileName)) outfd.write("Hook at 0x{0:x} in page 0x{1:x} is {2}% similar to whitelist hook {3}\n".format(hook.hook_address, page_address, alike, whitelist_name)) #outfd.write(" hook: {0}\n".format(hook_hash)) #outfd.write(" whitelist: {0}\n".format(whitelist_hash)) outfd.write("\n") skip = True continue if skip: continue outfd.write("*" * 72 + "\n") outfd.write("Hook mode: {0}\n".format(hook.Mode)) outfd.write("Hook type: {0}\n".format(hook.Type)) if process: outfd.write('Process: {0} ({1})\n'.format( process.UniqueProcessId, process.ImageFileName)) outfd.write("Victim module: {0} ({1:#x} - {2:#x})\n".format( str(module.BaseDllName or '') or ntpath.basename(str(module.FullDllName or '')), module.DllBase, module.DllBase + module.SizeOfImage)) outfd.write("Function: {0}\n".format(hook.Detail)) outfd.write("Hook address: {0:#x}\n".format(hook.hook_address)) outfd.write("Hooking module: {0}\n\n".format(hook.HookModule)) for n, info in enumerate(hook.disassembled_hops): (address, data) = info s = ["{0:#x} {1:<16} {2}".format(o, h, i) for o, i, h in malfind.Disassemble(data, int(address), bits = "32bit" if hook.decode_bits == distorm3.Decode32Bits else "64bit") ] outfd.write("Disassembly({0}):\n{1}".format(n, "\n".join(s))) outfd.write("\n\n")
def process_file(self, file_path, CAPE_output, append_file, metadata={}): """Process file. @return: file_info """ global cape_config cape_name = "" buf = self.options.get("buffer", BUFSIZE) file_info = File(file_path, metadata.get("metadata", "")).get_all() # Get the file data try: with open(file_info["path"], "rb") as file_open: file_data = file_open.read() if len(file_data) > buf: file_info["data"] = convert_to_printable(file_data[:buf] + " <truncated>") else: file_info["data"] = convert_to_printable(file_data) except UnicodeDecodeError as e: with open(file_info["path"], "rb") as file_open: file_data = file_open.read() if metadata.get("pids", False): if len(metadata["pids"]) == 1: file_info["pid"] = metadata["pids"][0] else: file_info["pid"] = ",".join(metadata["pids"]) metastrings = metadata.get("metadata", "").split(";?") if len(metastrings) > 2: file_info["process_path"] = metastrings[1] file_info["process_name"] = metastrings[1].split("\\")[-1] if len(metastrings) > 3: file_info["module_path"] = metastrings[2] file_info["cape_type_code"] = 0 file_info["cape_type"] = "" if metastrings != "": try: file_info["cape_type_code"] = int(metastrings[0]) except Exception as e: pass if file_info["cape_type_code"] == COMPRESSION: file_info["cape_type"] = "Decompressed PE Image" if file_info["cape_type_code"] in inject_map: file_info["cape_type"] = inject_map[file_info["cape_type_code"]] if len(metastrings) > 4: file_info["target_path"] = metastrings[3] file_info["target_process"] = metastrings[3].split("\\")[-1] file_info["target_pid"] = metastrings[4] if file_info["cape_type_code"] == INJECTION_SECTION: file_info["cape_type"] = "Injected Section" if len(metastrings) > 4: file_info["section_handle"] = metastrings[4] simple_cape_type_map = { UNPACKED_PE: "Unpacked PE Image", UNPACKED_SHELLCODE: "Unpacked Shellcode", } if file_info["cape_type_code"] in simple_cape_type_map: file_info["cape_type"] = simple_cape_type_map[file_info["cape_type_code"]] if len(metastrings) > 4: file_info["virtual_address"] = metastrings[3] type_strings = file_info["type"].split() if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" # PlugX if file_info["cape_type_code"] == PLUGX_CONFIG: file_info["cape_type"] = "PlugX Config" if plugx_parser: plugx_config = plugx_parser.parse_config(file_data, len(file_data)) if not "cape_config" in cape_config and plugx_config: cape_config["cape_config"] = {} for key, value in plugx_config.items(): cape_config["cape_config"].update({key: [value]}) cape_name = "PlugX" else: log.error("CAPE: PlugX config parsing failure - size many not be handled.") append_file = False if file_info["cape_type_code"] in code_mapping: file_info["cape_type"] = code_mapping[file_info["cape_type_code"]] if file_info["cape_type_code"] in config_mapping: file_info["cape_type"] = code_mapping[file_info["cape_type_code"]] type_strings = file_info["type"].split() if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if file_info["cape_type_code"] in name_mapping: cape_name = name_mapping[file_info["cape_type_code"]] append_file = True if file_info["cape_type_code"] == EVILGRAB_DATA: cape_name = "EvilGrab" file_info["cape_type"] = "EvilGrab Data" if not "cape_config" in cape_config: cape_config["cape_config"] = {} if file_info["size"] == 256 or file_info["size"] == 260: cape_config["cape_config"].update({"filepath": [format(file_data)]}) if file_info["size"] > 0x1000: append_file = True else: append_file = False # Sedreco if file_info["cape_type_code"] == SEDRECO_DATA: cape_name = "Sedreco" cape_config["cape_type"] = "Sedreco Config" if not "cape_config" in cape_config: cape_config["cape_config"] = {} if len(metastrings) > 4: SedrecoConfigIndex = metastrings[4] if SedrecoConfigIndex in sedreco_map: ConfigItem = sedreco_map[SedrecoConfigIndex] else: ConfigItem = "Unknown" ConfigData = format(file_data) if ConfigData: cape_config["cape_config"].update({ConfigItem: [ConfigData]}) append_file = False # Cerber if file_info["cape_type_code"] == CERBER_CONFIG: file_info["cape_type"] = "Cerber Config" cape_config["cape_type"] = "Cerber Config" cape_name = "Cerber" if not "cape_config" in cape_config: cape_config["cape_config"] = {} parsed = json.loads(file_data.rstrip(b'\0')) cape_config["cape_config"].update({"JSON Data": [json.dumps(parsed, indent=4, sort_keys=True)]}) append_file = True # Ursnif if file_info["cape_type_code"] == URSNIF_PAYLOAD: cape_name = "Ursnif" cape_config["cape_type"] = "Ursnif Payload" file_info["cape_type"] = "Ursnif Payload" if file_info["cape_type_code"] == URSNIF_CONFIG: file_info["cape_type"] = "Ursnif Config" cape_config["cape_type"] = "Ursnif Config" cape_name = "Ursnif" malwareconfig_loaded = False try: malwareconfig_parsers = os.path.join(CUCKOO_ROOT, "modules", "processing", "parsers", "malwareconfig") file, pathname, description = imp.find_module(cape_name,[malwareconfig_parsers]) module = imp.load_module(cape_name, file, pathname, description) malwareconfig_loaded = True log.info("CAPE: Imported malwareconfig.com parser %s", cape_name) except ImportError: log.info("CAPE: malwareconfig.com parser: No module named %s", cape_name) if malwareconfig_loaded: try: if not "cape_config" in cape_config: cape_config["cape_config"] = {} malwareconfig_config = module.config(file_data) if isinstance(malwareconfig_config, list): for (key, value) in malwareconfig_config[0].items(): cape_config["cape_config"].update({key: [value]}) elif isinstance(malwareconfig_config, dict): for (key, value) in malwareconfig_config.items(): cape_config["cape_config"].update({key: [value]}) except Exception as e: log.error("CAPE: malwareconfig parsing error with %s: %s", cape_name, e) append_file = False # Hancitor if file_info["cape_type_code"] == HANCITOR_PAYLOAD: cape_name = "Hancitor" cape_config["cape_type"] = "Hancitor Payload" file_info["cape_type"] = "Hancitor Payload" if file_info["cape_type_code"] == HANCITOR_CONFIG: cape_name = "Hancitor" cape_config["cape_type"] = "Hancitor Config" file_info["cape_type"] = "Hancitor Config" if not "cape_config" in cape_config: cape_config["cape_config"] = {} ConfigStrings = file_data.split(b'\0') ConfigStrings = [_f for _f in ConfigStrings if _f] ConfigItem = "Campaign Code" cape_config["cape_config"].update({ConfigItem: [ConfigStrings[0]]}) GateURLs = ConfigStrings[1].split(b'|') for index, value in enumerate(GateURLs): ConfigItem = "Gate URL " + str(index+1) cape_config["cape_config"].update({ConfigItem: [value]}) append_file = False # QakBot if file_info["cape_type_code"] == QAKBOT_CONFIG: file_info["cape_type"] = "QakBot Config" cape_config["cape_type"] = "QakBot Config" cape_name = "QakBot" cape_config = static_config_parsers(cape_name, file_data, cape_config) append_file = False # Attempt to decrypt script dump if file_info["cape_type_code"] == SCRIPT_DUMP: data = file_data.decode("utf-16").replace("\x00", "") file_info["data"] = data cape_name = "ScriptDump" malwareconfig_loaded = False try: malwareconfig_parsers = os.path.join(CUCKOO_ROOT, "modules", "processing", "parsers", "malwareconfig") file, pathname, description = imp.find_module(cape_name, [malwareconfig_parsers]) module = imp.load_module(cape_name, file, pathname, description) malwareconfig_loaded = True log.info("CAPE: Imported malwareconfig.com parser %s", cape_name) except ImportError: log.info("CAPE: malwareconfig.com parser: No module named %s", cape_name) if malwareconfig_loaded: try: script_data = module.config(self, data) if script_data and "more_eggs" in script_data["type"]: bindata = script_data["data"] sha256 = hashlib.sha256(bindata).hexdigest() filepath = os.path.join(self.CAPE_path, sha256) tmpstr = file_info["pid"] tmpstr += "," + file_info["process_path"] tmpstr += "," + file_info["module_path"] if "text" in script_data["datatype"]: file_info["cape_type"] = "MoreEggsJS" outstr = str(MOREEGGSJS_PAYLOAD) + "," + tmpstr + "\n" with open(filepath + "_info.txt", "w") as infofd: infofd.write(outstr) with open(filepath, 'w') as cfile: cfile.write(bindata) elif "binary" in script_data["datatype"]: file_info["cape_type"] = "MoreEggsBin" outstr = str(MOREEGGSBIN_PAYLOAD) + "," + tmpstr + "\n" with open(filepath + "_info.txt", "w") as infofd: infofd.write(outstr) with open(filepath, 'wb') as cfile: cfile.write(bindata) if os.path.exists(filepath): self.script_dump_files.append(filepath) else: file_info["cape_type"] = "Script Dump" log.info("CAPE: Script Dump does not contain known encrypted payload.") except Exception as e: log.error("CAPE: malwareconfig parsing error with %s: %s", cape_name, e) append_file = True # More_Eggs if file_info["cape_type_code"] == MOREEGGSJS_PAYLOAD: file_info["cape_type"] = "More Eggs JS Payload" cape_name = "MoreEggs" append_file = True # Process CAPE Yara hits for hit in file_info["cape_yara"]: # Check to see if file is packed with UPX if hit["name"] == "UPX": log.info("CAPE: Found UPX Packed sample - attempting to unpack") self.upx_unpack(file_data, CAPE_output) # Check for a payload or config hit extraction_types = [ "payload", "config", "loader" ] try: for type in extraction_types: if type in hit["meta"].get("cape_type", "").lower(): file_info["cape_type"] = hit["meta"]["cape_type"] cape_name = hit["name"].replace('_', ' ') except Exception as e: print("Cape type error: {}".format(e)) type_strings = file_info["type"].split() if "-bit" not in file_info["cape_type"]: if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" suppress_parsing_list = ["Cerber", "Ursnif", "QakBot"] if hit["name"] in suppress_parsing_list: continue cape_config = static_config_parsers(hit["name"], file_data, cape_config) if cape_name: if "cape_config" in cape_config and "cape_name" not in cape_config: cape_config["cape_name"] = format(cape_name) if not "detections" in self.results: if cape_name != "UPX": self.results["detections"] = cape_name # Remove duplicate payloads from web ui for cape_file in CAPE_output: if file_info["size"] == cape_file["size"]: if HAVE_PYDEEP: ssdeep_grade = pydeep.compare(file_info["ssdeep"].encode("utf-8"), cape_file["ssdeep"].encode("utf-8")) if ssdeep_grade >= ssdeep_threshold: append_file = False if file_info.get("entrypoint") and file_info.get("ep_bytes") and cape_file.get("entrypoint"): if file_info.get("entrypoint") and file_info["entrypoint"] == cape_file["entrypoint"] \ and file_info["ep_bytes"] == cape_file["ep_bytes"]: append_file = False if append_file is True: CAPE_output.append(file_info) return file_info
#!/usr/bin/env python #ssdeep insto hashin'! - since ssdeep is natively an unwieldy PITA. #https://pypi.python.org/pypi/pydeep #PhG import sys, pydeep pd1 = pydeep.hash_file(sys.argv[1]) pd2 = pydeep.hash_file(sys.argv[2]) percent = str(pydeep.compare(pd1, pd2)) print "SSDeep has determined that these files are " + percent +"% alike." #pd0 = pydeep.hash_file("VirusShare_ab208f0b517ba9850f1551c9555b5313") #pd1 = pydeep.hash_file("VirusShare_6570163cd34454b3d1476c134d44b9d9")
def process_file(self, file_path, CAPE_output, append_file): """Process file. @return: file_info """ global cape_config cape_name = "" strings = [] buf = self.options.get("buffer", BUFSIZE) if file_path.endswith("_info.txt"): return texttypes = [ "ASCII", "Windows Registry text", "XML document text", "Unicode text", ] if os.path.exists(file_path + "_info.txt"): with open(file_path + "_info.txt", 'r') as f: metastring = f.readline() else: metastring = "" file_info = File(file_path, metastring).get_all() # Get the file data with open(file_info["path"], "r") as file_open: file_data = file_open.read(buf + 1) if len(file_data) > buf: file_info["data"] = binascii.b2a_hex(file_data[:buf] + " <truncated>") else: file_info["data"] = binascii.b2a_hex(file_data) metastrings = metastring.split(",") if len(metastrings) > 1: file_info["pid"] = metastrings[1] if len(metastrings) > 2: file_info["process_path"] = metastrings[2] file_info["process_name"] = metastrings[2].split("\\")[-1] if len(metastrings) > 3: file_info["module_path"] = metastrings[3] file_info["cape_type_code"] = 0 file_info["cape_type"] = "" if metastrings != "": try: file_info["cape_type_code"] = int(metastrings[0]) except Exception as e: pass if file_info["cape_type_code"] == COMPRESSION: file_info["cape_type"] = "Decompressed PE Image" if file_info["cape_type_code"] == INJECTION_PE: file_info["cape_type"] = "Injected PE Image" if len(metastrings) > 4: file_info["target_path"] = metastrings[4] file_info["target_process"] = metastrings[4].split( "\\")[-1] file_info["target_pid"] = metastrings[5] if file_info["cape_type_code"] == INJECTION_SHELLCODE: file_info["cape_type"] = "Injected Shellcode/Data" if len(metastrings) > 4: file_info["target_path"] = metastrings[4] file_info["target_process"] = metastrings[4].split( "\\")[-1] file_info["target_pid"] = metastrings[5] if file_info["cape_type_code"] == INJECTION_SECTION: file_info["cape_type"] = "Injected Section" if len(metastrings) > 4: file_info["section_handle"] = metastrings[4] if file_info["cape_type_code"] == EXTRACTION_PE: file_info["cape_type"] = "Extracted PE Image" if len(metastrings) > 4: file_info["virtual_address"] = metastrings[4] if file_info["cape_type_code"] == EXTRACTION_SHELLCODE: file_info["cape_type"] = "Extracted Shellcode" if len(metastrings) > 4: file_info["virtual_address"] = metastrings[4] type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" # PlugX if file_info["cape_type_code"] == PLUGX_CONFIG: file_info["cape_type"] = "PlugX Config" plugx_parser = plugx.PlugXConfig() plugx_config = plugx_parser.parse_config( file_data, len(file_data)) if not "cape_config" in cape_config and plugx_config: cape_config["cape_config"] = {} for key, value in plugx_config.items(): cape_config["cape_config"].update({key: [value]}) cape_name = "PlugX" else: log.error( "CAPE: PlugX config parsing failure - size many not be handled." ) append_file = False if file_info["cape_type_code"] == PLUGX_PAYLOAD: file_info["cape_type"] = "PlugX Payload" type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" # EvilGrab if file_info["cape_type_code"] == EVILGRAB_PAYLOAD: file_info["cape_type"] = "EvilGrab Payload" type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if file_info["cape_type_code"] == EVILGRAB_DATA: cape_name = "EvilGrab" file_info["cape_type"] = "EvilGrab Data" if not "cape_config" in cape_config: cape_config["cape_config"] = {} if file_info["size"] == 256 or file_info["size"] == 260: ConfigItem = "filepath" ConfigData = format(file_data) cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) if file_info["size"] > 0x1000: append_file = True else: append_file = False # Sedreco if file_info["cape_type_code"] == SEDRECO_DATA: cape_name = "Sedreco" cape_config["cape_type"] = "Sedreco Config" if not "cape_config" in cape_config: cape_config["cape_config"] = {} if len(metastrings) > 4: SedrecoConfigIndex = metastrings[4] if SedrecoConfigIndex == '0x0': ConfigItem = "Timer1" elif SedrecoConfigIndex == '0x1': ConfigItem = "Timer2" elif SedrecoConfigIndex == '0x2': ConfigItem = "Computer Name" elif SedrecoConfigIndex == '0x3': ConfigItem = "C&C1" elif SedrecoConfigIndex == '0x4': ConfigItem = "C&C2" elif SedrecoConfigIndex == '0x5': ConfigItem = "Operation Name" elif SedrecoConfigIndex == '0x6': ConfigItem = "Keylogger MaxBuffer" elif SedrecoConfigIndex == '0x7': ConfigItem = "Keylogger MaxTimeout" elif SedrecoConfigIndex == '0x8': ConfigItem = "Keylogger Flag" elif SedrecoConfigIndex == '0x9': ConfigItem = "C&C3" else: ConfigItem = "Unknown" ConfigData = format(file_data) if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) append_file = False # Cerber if file_info["cape_type_code"] == CERBER_CONFIG: file_info["cape_type"] = "Cerber Config" cape_config["cape_type"] = "Cerber Config" cape_name = "Cerber" if not "cape_config" in cape_config: cape_config["cape_config"] = {} ConfigItem = "JSON Data" parsed = json.loads(file_data.rstrip(b'\0')) ConfigData = json.dumps(parsed, indent=4, sort_keys=True) cape_config["cape_config"].update({ConfigItem: [ConfigData]}) append_file = True if file_info["cape_type_code"] == CERBER_PAYLOAD: file_info["cape_type"] = "Cerber Payload" cape_config["cape_type"] = "Cerber Payload" cape_name = "Cerber" type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" append_file = True # Ursnif if file_info["cape_type_code"] == URSNIF_CONFIG: file_info["cape_type"] = "Ursnif Config" cape_config["cape_type"] = "Ursnif Config" cape_name = "Ursnif" malwareconfig_loaded = False try: malwareconfig_parsers = os.path.join( CUCKOO_ROOT, "modules", "processing", "parsers", "malwareconfig") file, pathname, description = imp.find_module( cape_name, [malwareconfig_parsers]) module = imp.load_module(cape_name, file, pathname, description) malwareconfig_loaded = True log.info("CAPE: Imported malwareconfig.com parser %s", cape_name) except ImportError: log.info( "CAPE: malwareconfig.com parser: No module named %s", cape_name) if malwareconfig_loaded: try: if not "cape_config" in cape_config: cape_config["cape_config"] = {} malwareconfig_config = module.config(file_data) if isinstance(malwareconfig_config, list): for (key, value) in malwareconfig_config[0].iteritems(): cape_config["cape_config"].update( {key: [value]}) elif isinstance(malwareconfig_config, dict): for (key, value) in malwareconfig_config.iteritems(): cape_config["cape_config"].update( {key: [value]}) except Exception as e: log.error( "CAPE: malwareconfig parsing error with %s: %s", cape_name, e) append_file = False # Hancitor if file_info["cape_type_code"] == HANCITOR_PAYLOAD: cape_name = "Hancitor" cape_config["cape_type"] = "Hancitor Payload" file_info["cape_type"] = "Hancitor Payload" if file_info["cape_type_code"] == HANCITOR_CONFIG: cape_name = "Hancitor" cape_config["cape_type"] = "Hancitor Config" file_info["cape_type"] = "Hancitor Config" if not "cape_config" in cape_config: cape_config["cape_config"] = {} ConfigStrings = file_data.split('\0') ConfigStrings = filter(None, ConfigStrings) ConfigItem = "Campaign Code" cape_config["cape_config"].update( {ConfigItem: [ConfigStrings[0]]}) GateURLs = ConfigStrings[1].split('|') for index, value in enumerate(GateURLs): ConfigItem = "Gate URL " + str(index + 1) cape_config["cape_config"].update({ConfigItem: [value]}) append_file = False # UPX package output if file_info["cape_type_code"] == UPX: file_info["cape_type"] = "Unpacked PE Image" type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" # Process CAPE Yara hits for hit in file_info["cape_yara"]: # Check to see if file is packed with UPX if hit["name"] == "UPX": log.info( "CAPE: Found UPX Packed sample - attempting to unpack") self.upx_unpack(file_data, CAPE_output) # Check for a payload or config hit try: if "payload" in hit["meta"]["cape_type"].lower( ) or "config" in hit["meta"]["cape_type"].lower(): file_info["cape_type"] = hit["meta"]["cape_type"] cape_name = hit["name"] except: pass type_strings = file_info["type"].split() if "-bit" not in file_info["cape_type"]: if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" suppress_parsing_list = ["Cerber", "Ursnif"] if hit["name"] in suppress_parsing_list: continue # Attempt to import a parser for the hit # DC3-MWCP mwcp_loaded = False if cape_name: try: mwcp = malwareconfigreporter.malwareconfigreporter( analysis_path=self.analysis_path) kwargs = {} mwcp.run_parser(cape_name, data=file_data, **kwargs) if mwcp.errors == []: log.info("CAPE: Imported DC3-MWCP parser %s", cape_name) mwcp_loaded = True else: error_lines = mwcp.errors[0].split("\n") for line in error_lines: if line.startswith('ImportError: '): log.info("CAPE: DC3-MWCP parser: %s", line.split(': ')[1]) except ImportError: pass # malwareconfig malwareconfig_loaded = False if cape_name and mwcp_loaded == False: try: malwareconfig_parsers = os.path.join( CUCKOO_ROOT, "modules", "processing", "parsers", "malwareconfig") file, pathname, description = imp.find_module( cape_name, [malwareconfig_parsers]) module = imp.load_module(cape_name, file, pathname, description) malwareconfig_loaded = True log.info("CAPE: Imported malwareconfig.com parser %s", cape_name) except ImportError: log.info( "CAPE: malwareconfig.com parser: No module named %s", cape_name) # Get config data if mwcp_loaded: try: if not "cape_config" in cape_config: cape_config["cape_config"] = {} cape_config["cape_config"] = convert(mwcp.metadata) else: cape_config["cape_config"].update( convert(mwcp.metadata)) except Exception as e: log.error( "CAPE: DC3-MWCP config parsing error with %s: %s", cape_name, e) elif malwareconfig_loaded: try: if not "cape_config" in cape_config: cape_config["cape_config"] = {} malwareconfig_config = module.config(file_data) if isinstance(malwareconfig_config, list): for (key, value) in malwareconfig_config[0].iteritems(): cape_config["cape_config"].update({key: [value]}) elif isinstance(malwareconfig_config, dict): for (key, value) in malwareconfig_config.iteritems(): cape_config["cape_config"].update({key: [value]}) except Exception as e: log.error("CAPE: malwareconfig parsing error with %s: %s", cape_name, e) if "cape_config" in cape_config: if cape_config["cape_config"] == {}: del cape_config["cape_config"] if cape_name: if "cape_config" in cape_config: cape_config["cape_name"] = format(cape_name) if not "cape" in self.results: if cape_name != "UPX": self.results["cape"] = cape_name # Remove duplicate payloads from web ui for cape_file in CAPE_output: if file_info["size"] == cape_file["size"]: if HAVE_PYDEEP: ssdeep_grade = pydeep.compare(file_info["ssdeep"], cape_file["ssdeep"]) if ssdeep_grade >= ssdeep_threshold: append_file = False if file_info["entrypoint"] and file_info["entrypoint"] == cape_file["entrypoint"] \ and file_info["ep_bytes"] == cape_file["ep_bytes"]: append_file = False if append_file == True: CAPE_output.append(file_info) return file_info
def run(self): super(Fuzzy, self).run() if not HAVE_PYDEEP: self.log('error', "Missing dependency, install pydeep (`pip install pydeep`)") return arg_verbose = False arg_cluster = False if self.args: if self.args.verbose: arg_verbose = self.args.verbose if self.args.cluster: arg_cluster = self.args.cluster db = Database() samples = db.find(key='all') # Check if we're operating in cluster mode, otherwise we run on the # currently opened file. if arg_cluster: self.log('info', "Generating clusters, this might take a while...") clusters = dict() for sample in samples: if not sample.ssdeep: continue if arg_verbose: self.log('info', "Testing file {0} with ssdeep {1}".format(sample.md5, sample.ssdeep)) clustered = False for cluster_name, cluster_members in clusters.items(): # Check if sample is already in the cluster. if sample.md5 in cluster_members: continue if arg_verbose: self.log('info', "Testing {0} in cluster {1}".format(sample.md5, cluster_name)) for member in cluster_members: if sample.md5 == member[0]: continue member_hash = member[0] member_ssdeep = db.find(key='md5', value=member_hash)[0].ssdeep if pydeep.compare(self._get_ssdeep_bytes(sample.ssdeep), self._get_ssdeep_bytes(member_ssdeep)) > 40: if arg_verbose: self.log('info', "Found home for {0} in cluster {1}".format(sample.md5, cluster_name)) clusters[cluster_name].append([sample.md5, sample.name]) clustered = True break if not clustered: cluster_id = len(clusters) + 1 clusters[cluster_id] = [[sample.md5, sample.name], ] ordered_clusters = collections.OrderedDict(sorted(clusters.items())) self.log('info', "Following are the identified clusters with more than one member") for cluster_name, cluster_members in ordered_clusters.items(): # We include in the results only clusters with more than just # one member. if len(cluster_members) <= 1: continue self.log('info', "Ssdeep cluster {0}".format(bold(cluster_name))) self.log('table', dict(header=['MD5', 'Name'], rows=cluster_members)) # We're running against the already opened file. else: if not __sessions__.is_set(): self.log('error', "No open session") return if not __sessions__.current.file.ssdeep: self.log('error', "No ssdeep hash available for opened file") return matches = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue if not sample.ssdeep: continue score = pydeep.compare(self._get_ssdeep_bytes(__sessions__.current.file.ssdeep), self._get_ssdeep_bytes(sample.ssdeep)) if score > 40: matches.append(['{0}%'.format(score), sample.name, sample.sha256]) if arg_verbose: self.log('info', "Match {0}%: {2} [{1}]".format(score, sample.name, sample.sha256)) self.log('info', "{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: self.log('table', dict(header=['Score', 'Name', 'SHA256'], rows=matches))
def render_text(self, outfd, data): if not has_distorm3: debug.warning("For best results please install distorm3") if self._config.DUMP_DIR and not os.path.isdir(self._config.DUMP_DIR): debug.error(self._config.DUMP_DIR + " is not a directory") for task in data: for vad, address_space in task.get_vads(vad_filter = task._injection_filter): if self._is_vad_empty(vad, address_space): continue if self._config.SSDEEP and has_pydeep: skip = False # read the first page of the VAD then hash it inject_buf = address_space.zread(vad.Start, 0x1000) inject_hash = pydeep.hash_buf(inject_buf) # loop through all the whitelist hashes and compare for (whitelist_name, whitelist_hash) in whitelist_ssdeep: alike = pydeep.compare(inject_hash, whitelist_hash) # the comparison is greater than the threshold so display an informational message # then skip the rest of the output in normal malfind if alike > self._config.THRESHOLD: outfd.write("Process: {0} Pid: {1} Address: {2:#x}\n".format( task.ImageFileName, task.UniqueProcessId, vad.Start)) outfd.write("Injection is {0}% similar to whitelist hook {1}\n".format(alike, whitelist_name)) #outfd.write(" hook: {0}\n".format(inject_hash)) #outfd.write(" whitelist: {0}\n".format(whitelist_hash)) skip = True continue if skip: continue content = address_space.zread(vad.Start, 64) outfd.write("Process: {0} Pid: {1} Address: {2:#x}\n".format( task.ImageFileName, task.UniqueProcessId, vad.Start)) outfd.write("Vad Tag: {0} Protection: {1}\n".format( vad.Tag, vadinfo.PROTECT_FLAGS.get(vad.u.VadFlags.Protection.v(), ""))) outfd.write("Flags: {0}\n".format(str(vad.u.VadFlags))) outfd.write("\n") outfd.write("{0}\n".format("\n".join( ["{0:#010x} {1:<48} {2}".format(vad.Start + o, h, ''.join(c)) for o, h, c in utils.Hexdump(content) ]))) outfd.write("\n") outfd.write("\n".join( ["{0:#x} {1:<16} {2}".format(o, h, i) for o, i, h in malfind.Disassemble(content, vad.Start) ])) # Dump the data if --dump-dir was supplied if self._config.DUMP_DIR: filename = os.path.join(self._config.DUMP_DIR, "process.{0:#x}.{1:#x}.dmp".format( task.obj_offset, vad.Start)) self.dump_vad(filename, vad, address_space) outfd.write("\n\n")
def run(self): if not __sessions__.is_set(): print_error("No session opened") return if not HAVE_PYDEEP: print_error("Missing dependency, install pydeep (`pip install pydeep`)") return if not __sessions__.current.file.ssdeep: print_error("No ssdeep hash available for opened file") return def usage(): print("usage: fuzzy [-v]") def help(): usage() print("") print("Options:") print("\t--help (-h)\tShow this help message") print("\t--verbose (-v)\tPrints verbose logging") print("") arg_verbose = False try: opts, argv = getopt.getopt(self.args[0:], 'hv', ['help', 'verbose']) except getopt.GetoptError as e: print(e) return for opt, value in opts: if opt in ('-h', '--help'): help() return elif opt in ('-v', '--verbose'): arg_verbose = True db = Database() samples = db.find(key='all') matches = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue if not sample.ssdeep: continue score = pydeep.compare(__sessions__.current.file.ssdeep, sample.ssdeep) if score > 40: matches.append(['{0}%'.format(score), sample.name, sample.sha256]) if arg_verbose: print("Match {0}%: {2} [{1}]".format(score, sample.name, sample.sha256)) print_info("{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: print(table(header=['Score', 'Name', 'SHA256'], rows=matches))
import pydeep file1 = 'calc.exe' file2 = 'notepad.exe' file3 = 'bc' file1hash = '1536:JEl14rQcWAkN7GAlqbkfAGQGV8aMbrNyrf1w+noPvLV6eBsCXKc:JYmZWXyaiedMbrN6pnoXL1BsC' file2hash = '1536:0awOnbNQKLjWDyy1o5RefYMJUEbooPRrKKRl1P3:0YNQKPWDyDRefVJltZrpRl1P3' file3hash = '1536:MsjYdR3Bul8hcURWhEcg4/btZzDcQflbCUPEBEh8wkcGDioxMYeo7:TYf8l8htRWA4ztZsGlWUPEBEh8wmxMYe' data1 = open(file1).read() data2 = open(file2).read() data3 = open(file3).read() assert len(data1) == 114688, "File length error" assert len(data2) == 69120, "File length error" assert len(data3) == 77168, "File length error" hash01 = pydeep.hash_buf(data1) hash02 = pydeep.hash_buf(data2) hash03 = pydeep.hash_buf(data3) assert hash01 == file1hash, "Error hashing file1" assert hash02 == file2hash, "Error hashing file2" assert hash03 == file3hash, "Error hashing file2" hash1 = pydeep.hash_file(file1) hash2 = pydeep.hash_file(file2) hash3 = pydeep.hash_file(file3) assert hash1 == file1hash, "Error hashing file1" assert hash2 == file2hash, "Error hashing file2" assert hash3 == file3hash, "Error hashing file3" assert pydeep.compare(hash1,hash2) == 0, "Error fuzzy compare value" print 'Stuff looks fine..'