def run(self): """Run analysis. @return: structured results. """ self.key = "procmemory" results = [] if os.path.exists(self.pmemory_path): for dmp in os.listdir(self.pmemory_path): if not dmp.endswith(".dmp"): continue dump_path = os.path.join(self.pmemory_path, dmp) dump_file = File(dump_path) if "-" in os.path.basename(dump_path): pid = int(os.path.basename(dump_path).split("-")[0]) else: pid = int(os.path.basename(dump_path).split(".")[0]) proc = dict( file=dump_path, pid=pid, yara=dump_file.get_yara("memory"), urls=list(dump_file.get_urls()), regions=list(self.read_dump(dump_path)), ) if self.options.get("idapro"): self.create_idapy(proc) results.append(proc) return results
def run(self): """Run analysis. @return: list of dropped files with related information. """ self.key = "dropped" dropped_files, meta = [], {} if os.path.exists(self.dropped_meta_path): for line in open(self.dropped_meta_path, "rb"): entry = json.loads(line) filepath = os.path.join(self.analysis_path, entry["path"]) meta[filepath] = { "pids": entry["pids"], "filepath": entry["filepath"], } for dir_name, dir_names, file_names in os.walk(self.dropped_path): for file_name in file_names: file_path = os.path.join(dir_name, file_name) file_info = File(file_path=file_path).get_all() file_info.update(meta.get(file_info["path"], {})) dropped_files.append(file_info) for dir_name, dir_names, file_names in os.walk(self.package_files): for file_name in file_names: file_path = os.path.join(dir_name, file_name) file_info = File(file_path=file_path).get_all() dropped_files.append(file_info) return dropped_files
def run(self): """Run analysis. @return: structured results. """ self.key = "procmemory" results = [] if os.path.exists(self.pmemory_path): for dmp in os.listdir(self.pmemory_path): dmp_path = os.path.join(self.pmemory_path, dmp) dmp_file = File(dmp_path) process_name = "" process_path = "" process_id = int(os.path.splitext(os.path.basename(dmp_path))[0]) if "behavior" in self.results and "processes" in self.results["behavior"]: for process in self.results["behavior"]["processes"]: if process_id == process["process_id"]: process_name = process["process_name"] process_path = process["module_path"] proc = dict( file=dmp_path, pid=process_id, name=process_name, path=process_path, yara=dmp_file.get_yara(os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar")), address_space=self.parse_dump(dmp_path) ) results.append(proc) return results
def run(self): """Run analysis. @return: structured results. """ self.key = "procmemory" results = [] if os.path.exists(self.pmemory_path): for dmp in os.listdir(self.pmemory_path): dmp_path = os.path.join(self.pmemory_path, dmp) dmp_file = File(dmp_path) # Let's hope the file is not too big. buf = open(dmp_path, "rb").read() urls = set() for url in re.findall(HTTP_REGEX, buf): if not is_whitelisted_domain(url[1]): urls.add("".join(url)) proc = dict( file=dmp_path, pid=os.path.splitext(os.path.basename(dmp_path))[0], yara=dmp_file.get_yara(os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar")), urls=list(urls), ) results.append(proc) return results
def run(self): """Run androguard to extract static android information @return: list of static features """ self.key = "apkinfo" apkinfo = {} if "file" not in self.task["category"] or not HAVE_ANDROGUARD: return f = File(self.task["target"]) if f.get_name().endswith((".zip", ".apk")) or "zip" in f.get_type(): if not os.path.exists(self.file_path): raise CuckooProcessingError("Sample file doesn't exist: \"%s\"" % self.file_path) try: a = APK(self.file_path) if a.is_valid_APK(): manifest = {} apkinfo["files"] = self._apk_files(a) manifest["package"] = a.get_package() # manifest["permissions"]=a.get_details_permissions_new() manifest["main_activity"] = a.get_main_activity() manifest["activities"] = a.get_activities() manifest["services"] = a.get_services() manifest["receivers"] = a.get_receivers() # manifest["receivers_actions"]=a.get__extended_receivers() manifest["providers"] = a.get_providers() manifest["libraries"] = a.get_libraries() apkinfo["manifest"] = manifest # apkinfo["certificate"] = a.get_certificate() static_calls = {} if self.check_size(apkinfo["files"]): vm = DalvikVMFormat(a.get_dex()) vmx = uVMAnalysis(vm) static_calls["all_methods"] = self.get_methods(vmx) static_calls["is_native_code"] = analysis.is_native_code(vmx) static_calls["is_dynamic_code"] = analysis.is_dyn_code(vmx) static_calls["is_reflection_code"] = analysis.is_reflection_code(vmx) # static_calls["dynamic_method_calls"]= analysis.get_show_DynCode(vmx) # static_calls["reflection_method_calls"]= analysis.get_show_ReflectionCode(vmx) # static_calls["permissions_method_calls"]= analysis.get_show_Permissions(vmx) # static_calls["crypto_method_calls"]= analysis.get_show_CryptoCode(vmx) # static_calls["native_method_calls"]= analysis.get_show_NativeMethods(vmx) else: log.warning("Dex size bigger than: %s", self.options.decompilation_threshold) apkinfo["static_method_calls"] = static_calls except (IOError, OSError, BadZipfile) as e: raise CuckooProcessingError("Error opening file %s" % e) return apkinfo
def run(self): """Run analysis. @return: structured results. """ self.key = "procmemory" results = [] if self.options.get("extract_img") and not HAVE_PEFILE: log.warning( "In order to extract PE files from memory dumps it is " "required to have pefile installed (`pip install pefile`)." ) if os.path.exists(self.pmemory_path): for dmp in os.listdir(self.pmemory_path): if not dmp.endswith(".dmp"): continue dump_path = os.path.join(self.pmemory_path, dmp) dump_file = File(dump_path) pid, num = map(int, re.findall("(\\d+)", dmp)) proc = dict( file=dump_path, pid=pid, num=num, yara=dump_file.get_yara("memory"), urls=list(dump_file.get_urls()), regions=list(self.read_dump(dump_path)), ) if self.options.get("idapro"): self.create_idapy(proc) if self.options.get("extract_img") and HAVE_PEFILE: proc["extracted"] = list(self.dump_images(proc)) if self.options.get("dump_delete"): try: os.remove(dump_path) except OSError: log.error("Unable to delete memory dump file at path \"%s\"", dump_path) results.append(proc) results.sort(key=lambda x: (x["pid"], x["num"])) return results
def run(self): """Run analysis. @return: structured results. """ self.key = "procmemory" results = [] for dmp in os.listdir(self.pmemory_path): dmp_path = os.path.join(self.pmemory_path, dmp) dmp_file = File(dmp_path) proc = dict( yara=dmp_file.get_yara(os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar")) ) results.append(proc) return results
def run(self): """Run analysis. @return: structured results. """ self.key = "procmemory" results = [] if os.path.exists(self.pmemory_path): for dmp in os.listdir(self.pmemory_path): if not dmp.endswith(".dmp"): continue dump_path = os.path.join(self.pmemory_path, dmp) dump_file = File(dump_path) dump_name = os.path.basename(dump_path) pid = int(re.findall("(\d{2,5})", dump_name)[0]) proc = dict( file=dump_path, pid=pid, yara=dump_file.get_yara("memory"), urls=list(dump_file.get_urls()), regions=list(self.read_dump(dump_path)), ) if self.options.get("idapro"): self.create_idapy(proc) if self.options.get("dump_delete"): try: os.remove(dump_path) except OSError: log.error("Unable to delete memory dump file at path \"%s\"", dump_path) results.append(proc) return results
def demux_sample(filename, package, options): """ If file is a ZIP, extract its included files and return their file paths If file is an email, extracts its attachments and return their file paths (later we'll also extract URLs) If file is a password-protected Office doc and password is supplied, return path to decrypted doc """ magic = File(filename).get_type() # if file is an Office doc and password is supplied, try to decrypt the doc if "Microsoft" in magic or "Composite Document File" in magic or "CDFV2 Encrypted" in magic: password = None if "password="******",") for field in fields: try: key, value = field.split("=", 1) if key == "password": password = value break except: pass if password: return demux_office(filename, password) else: return [filename] # if a package was specified, then don't do anything special # this will allow for the ZIP package to be used to analyze binaries with included DLL dependencies # do the same if file= is specified in the options if package or "file=" in options: return [filename] # don't try to extract from Java archives or executables if "Java Jar" in magic: return [filename] if "PE32" in magic or "MS-DOS executable" in magic: return [filename] retlist = demux_zip(filename, options) if not retlist: retlist = demux_rar(filename, options) if not retlist: retlist = demux_tar(filename, options) if not retlist: retlist = demux_email(filename, options) if not retlist: retlist = demux_msg(filename, options) if not retlist: retlist = demux_tnef(filename, options) # handle ZIPs/RARs inside extracted files if retlist: newretlist = [] for item in retlist: zipext = demux_zip(item, options) if zipext: newretlist.extend(zipext) else: rarext = demux_rar(item, options) if rarext: newretlist.extend(rarext) else: tarext = demux_tar(item, options) if tarext: newretlist.extend(tarext) else: newretlist.append(item) retlist = newretlist # if it wasn't a ZIP or an email or we weren't able to obtain anything interesting from either, then just submit the # original file if not retlist: retlist.append(filename) return retlist
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to S3. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. self.s3_region = self.options.get("region", "us-west-2") self.s3_access_key = self.options.get("access_key", "") self.s3_secret_key = self.options.get("secret_key", "") s3_reports_bucket_name = self.options.get("reports_bucket", "") s3_shots_bucket_name = self.options.get("shots_bucket", "") s3_samples_bucket_name = self.options.get("samples_bucket", "") s3_files_bucket_name = self.options.get("files_bucket", "") s3_aux_bucket_name = self.options.get("aux_bucket", "") s3_logs_bucket_name = self.options.get("logs_bucket", "") s3_pcap_bucket_name = self.options.get("pcap_bucket", "") s3_md5_bucket_name = self.options.get("md5_bucket", "") cleanup = self.options.get("cleanup", False) # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) if not "network" in report: report["network"] = {} # Add screenshot paths report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): shots = [shot for shot in os.listdir(shots_path) if shot.endswith(".jpg")] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) screenshot = File(shot_path) if screenshot.valid(): #report["shots"].append("{0}/{1}".format(results['info']['id'], shot_file)) report["shots"].append(shot_file.replace(".jpg", "")) # Store chunks of API calls in a different collection and reference # those chunks back in the report. # Also allows paging of the reports. if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] chunk_count = 0 # Using this type of prefix is useful because you can always re-construct it from # the original results #chunk_prefix = str(results['info']['id']) + '/' + process['process_name'] chunk_prefix = str(results['info']['id']) + '/' + str(process['process_id']) # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in S1. if len(chunk) == 100: chunk_name = "{0}.{1}".format(chunk_prefix, chunk_count) #log.debug("INFO TIME!") #log.debug("%s %s %s" %(s3_reports_bucket_name, chunk_name, chunk_prefix)) #log.debug(chunk_prefix) err = self.save_to_s3(s3_reports_bucket_name, chunk_name, json.dumps(chunk)) if err != '': log.error("Non-size related issue saving analysis JSON to S3 for chunk {0} - {1}".format(chunk_name, err)) else: chunks_ids.append("{0}.{1}".format(chunk_prefix, chunk_count)) chunk_count += 1 chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: chunk_name = "{0}.{1}".format(chunk_prefix, chunk_count) #log.debug("%s %s %s" %(s3_reports_bucket_name, chunk_name, chunk_prefix)) err = self.save_to_s3(s3_reports_bucket_name, chunk_name, json.dumps(chunk)) if err != '': log.error("Non-size related issue saving analysis JSON to S3 for chunk {0} - {1}".format(chunk_name, err)) else: chunks_ids.append("{0}.{1}".format(chunk_prefix, chunk_count)) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes #Other info we want Quick access to from the web UI if results.has_key("virustotal") and results["virustotal"] and results["virustotal"].has_key("positives") and results["virustotal"].has_key("total"): report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"], results["virustotal"]["total"]) if results.has_key("suricata") and results["suricata"]: if results["suricata"].has_key("tls") and len(results["suricata"]["tls"]) > 0: report["suri_tls_cnt"] = len(results["suricata"]["tls"]) if results["suricata"].has_key("alerts") and len(results["suricata"]["alerts"]) > 0: report["suri_alert_cnt"] = len(results["suricata"]["alerts"]) if results["suricata"].has_key("files") and len(results["suricata"]["files"]) > 0: report["suri_file_cnt"] = len(results["suricata"]["files"]) if results["suricata"].has_key("http") and len(results["suricata"]["http"]) > 0: report["suri_http_cnt"] = len(results["suricata"]["http"]) if results["suricata"].has_key("ssh") and len(results["suricata"]["ssh"]) > 0: report["suri_ssh_cnt"] = len(results["suricata"]["ssh"]) if results["suricata"].has_key("dns") and len(results["suricata"]["dns"]) > 0: report["suri_dns_cnt"] = len(results["suricata"]["dns"]) # Store the report (it's 'object id' is simply the analysis id) # First make sure it's not too big (5gb limit) data = json.dumps(report) if len(data) < 5000000000: err = self.save_to_s3(s3_reports_bucket_name, results['info']['id'], data) if err != '': log.error("Non-size related issue saving analysis JSON to S3 for report {0} - {1}".format(results['info']['id'], err)) else: log.error("JSON for analysis id {0} is greater than 5GB".format(results['info']['id'])) #processes the rest of the analysis files and put them in S3 if s3_shots_bucket_name != '': shots_path = os.path.join(results['info']['id'], self.analysis_path, "shots") self.relocate_to_s3(results['info']['id'], shots_path, s3_shots_bucket_name) if s3_pcap_bucket_name != '': if os.path.isfile(self.analysis_path + '/dump.pcap'): with open(self.analysis_path + '/dump.pcap', 'rb') as infile: self.save_to_s3(s3_pcap_bucket_name, "{0}/dump.pcap".format(results['info']['id']), infile.read()) if os.path.isfile(self.analysis_path + '/dump_sorted.pcap'): with open(self.analysis_path + '/dump_sorted.pcap', 'rb') as infile: self.save_to_s3(s3_pcap_bucket_name, "{0}/dump_sorted.pcap".format(results['info']['id']), infile.read()) if s3_aux_bucket_name != '': aux_path = os.path.join(results['info']['id'], self.analysis_path, "aux") self.relocate_to_s3(results['info']['id'], aux_path, s3_aux_bucket_name) if s3_logs_bucket_name != '': logs_path = os.path.join(results['info']['id'], self.analysis_path, "logs") self.relocate_to_s3(results['info']['id'], logs_path, s3_logs_bucket_name) if s3_samples_bucket_name != '': sample = os.path.realpath(self.analysis_path + '/binary') with open(sample, 'rb') as infile: self.save_to_s3(s3_samples_bucket_name, results['target']['file']['sha256'], infile.read()) #log.debug(s3_files_bucket_name) if s3_files_bucket_name != '': #log.debug(self.analysis_path) for root, dirnames, filenames in os.walk(self.analysis_path + '/files'): #log.debug(filenames) for filename in filenames: key_name = str(results['info']['id']) + '/' + root.split(os.sep)[-1] + '/' + filename if not filename.endswith('_info.txt'): key_name = str(results['info']['id']) + '/' + root.split(os.sep)[-1] #log.debug(key_name) with open(os.path.join(root, filename), 'rb') as infile: self.save_to_s3(s3_files_bucket_name, key_name, infile.read()) if s3_md5_bucket_name != '': info = {} info.update(report['info']) info.update(report['target']) self.save_to_s3(s3_md5_bucket_name, results['target']['file']['md5'], json.dumps(info)) if cleanup: shutil.rmtree(self.analysis_path)
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set an unique index on stored files, to avoid duplicates. # From pymongo docs: # Returns the name of the created index if an index is actually # created. # Returns None if the index already exists. self.db.fs.files.ensure_index("sha256", unique=True, sparse=True, name="sha256_unique") # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) # Store the sample in GridFS. if results["info"]["category"] == "file": sample = File(self.file_path) if sample.valid(): fname = results["target"]["file"]["name"] sample_id = self.store_file(sample, filename=fname) report["target"] = {"file_id": sample_id} report["target"].update(results["target"]) # Store the PCAP file in GridFS and reference it back in the report. pcap_path = os.path.join(self.analysis_path, "dump.pcap") pcap = File(pcap_path) if pcap.valid(): pcap_id = self.store_file(pcap) report["network"] = {"pcap_id": pcap_id} report["network"].update(results["network"]) # Walk through the dropped files, store them in GridFS and update the # report with the ObjectIds. new_dropped = [] for dropped in report["dropped"]: new_drop = dict(dropped) drop = File(dropped["path"]) if drop.valid(): dropped_id = self.store_file(drop, filename=dropped["name"]) new_drop["object_id"] = dropped_id new_dropped.append(new_drop) report["dropped"] = new_dropped # Add screenshots. report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): # Walk through the files and select the JPGs. shots = [shot for shot in os.listdir(shots_path) if shot.endswith(".jpg")] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) shot = File(shot_path) # If the screenshot path is a valid file, store it and # reference it back in the report. if shot.valid(): shot_id = self.store_file(shot) report["shots"].append(shot_id) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in MongoDB. if len(chunk) == 100: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes # Store the report and retrieve its object id. self.db.analysis.save(report) self.conn.disconnect()
def index(request, resubmit_hash=False): if request.method == "POST": static, package, timeout, priority, options, machine, platform, tags, custom, memory, \ clock, enforce_timeout, shrike_url, shrike_msg, shrike_sid, shrike_refer, unique, referrer, \ tlp = parse_request_arguments(request) # This is done to remove spaces in options but not breaks custom paths options = ",".join("=".join(value.strip() for value in option.split("=", 1)) for option in options.split(",") if option and "=" in option) opt_filename = get_user_filename(options, custom) if priority and web_conf.public.enabled and web_conf.public.priority: priority = web_conf.public.priority if timeout and web_conf.public.enabled and web_conf.public.timeout: timeout = web_conf.public.timeout if options: options += "," if referrer: options += "referrer=%s," % (referrer) if request.POST.get("free"): options += "free=yes," if request.POST.get("nohuman"): options += "nohuman=yes," if request.POST.get("tor"): options += "tor=yes," if request.POST.get("route", None): options += "route={0},".format(request.POST.get("route", None)) if request.POST.get("process_dump"): options += "procdump=0," if request.POST.get("process_memory"): options += "procmemdump=1," if request.POST.get("import_reconstruction"): options += "import_reconstruction=1," if request.POST.get("disable_cape"): options += "disable_cape=1," if request.POST.get("kernel_analysis"): options += "kernel_analysis=yes," if request.POST.get("norefer"): options += "norefer=1," if request.POST.get("oldloader"): options += "loader=oldloader.exe,loader_64=oldloader_x64.exe," if request.POST.get("unpack"): options += "unpack=yes," options = options[:-1] task_machines = [] opt_apikey = False opts = get_options(options) if opts: opt_apikey = opts.get("apikey", False) status = "ok" task_ids_tmp = list() details = { "errors": [], "content": False, "request": request, "task_ids": [], "url": False, "params": {}, "headers": {}, "service": "Local", "path": "", "fhash": False, "options": options, "only_extraction": False, "task_machines": task_machines, } if "hash" in request.POST and request.POST.get( "hash", False) and request.POST.get("hash")[0] != '': resubmission_hash = request.POST.get("hash").strip() paths = db.sample_path_by_hash(resubmission_hash) if paths: content = get_file_content(paths) if not content: return render( request, "error.html", { "error": "Can't find {} on disk, {}".format( resubmission_hash, str(paths)) }) folder = os.path.join(settings.TEMP_PATH, "cape-resubmit") if not os.path.exists(folder): os.makedirs(folder) base_dir = tempfile.mkdtemp(prefix='resubmit_', dir=folder) if opt_filename: filename = base_dir + "/" + opt_filename else: filename = base_dir + "/" + sanitize_filename( resubmission_hash) path = store_temp_file(content, filename) details["path"] = path details["content"] = content status, task_ids_tmp = download_file(**details) if status == "error": details["errors"].append( {os.path.basename(filename): task_ids_tmp}) else: details["task_ids"] = task_ids_tmp else: return render( request, "error.html", {"error": "File not found on hdd for resubmission"}) elif "sample" in request.FILES: samples = request.FILES.getlist("sample") details["service"] = "WebGUI" for sample in samples: # Error if there was only one submitted sample and it's empty. # But if there are multiple and one was empty, just ignore it. if not sample.size: details["errors"].append( {sample.name: "You uploaded an empty file."}) continue elif sample.size > settings.MAX_UPLOAD_SIZE: details["errors"].append({ sample.name: "You uploaded a file that exceeds the maximum allowed upload size specified in web/web/local_settings.py." }) continue if opt_filename: filename = opt_filename else: filename = sanitize_filename(sample.name) # Moving sample from django temporary file to CAPE temporary storage to let it persist between reboot (if user like to configure it in that way). path = store_temp_file(sample.read(), filename) if unique and db.check_file_uniq(File(path).get_sha256()): return render( request, "error.html", { "error": "Duplicated file, disable unique option to force submission" }) if timeout and web_conf.public.enabled and web_conf.public.timeout and timeout > web_conf.public.timeout: timeout = web_conf.public.timeout magic_type = get_magic_type(path) platform = get_platform(magic_type) if machine.lower() == "all": details["task_machines"] = [ vm.name for vm in db.list_machines(platform=platform) ] elif machine: machine_details = db.view_machine(machine) if hasattr(machine_details, "platform" ) and not machine_details.platform == platform: return render( request, "error.html", { "error": "Wrong platform, {} VM selected for {} sample". format(machine_details.platform, platform) }, ) else: details["task_machines"] = [machine] else: details["task_machines"] = ["first"] details["path"] = path details["content"] = get_file_content(path) status, task_ids = download_file(**details) elif "quarantine" in request.FILES: samples = request.FILES.getlist("quarantine") for sample in samples: # Error if there was only one submitted sample and it's empty. # But if there are multiple and one was empty, just ignore it. if not sample.size: if len(samples) != 1: continue return render( request, "error.html", {"error": "You uploaded an empty quarantine file."}) elif sample.size > settings.MAX_UPLOAD_SIZE: return render( request, "error.html", { "error": "You uploaded a quarantine file that exceeds the maximum allowed upload size specified in web/web/local_settings.py." }) # Moving sample from django temporary file to Cuckoo temporary storage to # let it persist between reboot (if user like to configure it in that way). tmp_path = store_temp_file(sample.read(), sample.name) path = unquarantine(tmp_path) try: os.remove(tmp_path) except Exception as e: print(e) if not path: return render(request, "error.html", { "error": "You uploaded an unsupported quarantine file." }) if machine.lower() == "all": task_machines = [ vm.name for vm in db.list_machines(platform="windows") ] elif machine: machine_details = db.view_machine(machine) if not machine_details.platform == "windows": return render( request, "error.html", { "error": "Wrong platform, linux VM selected for {} sample" .format(machine_details.platform) }) else: task_machines = [machine] details["path"] = path details["content"] = get_file_content(path) status, task_ids_tmp = download_file(**details) if status == "error": details["errors"].append({sample.name: task_ids_tmp}) else: details["task_ids"] = task_ids_tmp elif "static" in request.FILES: samples = request.FILES.getlist("static") for sample in samples: if not sample.size: if len(samples) != 1: continue return render(request, "error.html", {"error": "You uploaded an empty file."}) elif sample.size > settings.MAX_UPLOAD_SIZE: return render( request, "error.html", { "error": "You uploaded a file that exceeds the maximum allowed upload size specified in web/web/local_settings.py." }) # Moving sample from django temporary file to Cuckoo temporary storage to # let it persist between reboot (if user like to configure it in that way). path = store_temp_file(sample.read(), sample.name) task_id = db.add_static(file_path=path, priority=priority, tlp=tlp) if not task_id: return render( request, "error.html", {"error": "We don't have static extractor for this"}) task_ids.append(task_id) elif "pcap" in request.FILES: samples = request.FILES.getlist("pcap") for sample in samples: if not sample.size: if len(samples) != 1: continue return render( request, "error.html", {"error": "You uploaded an empty PCAP file."}) elif sample.size > settings.MAX_UPLOAD_SIZE: return render( request, "error.html", { "error": "You uploaded a PCAP file that exceeds the maximum allowed upload size specified in web/web/local_settings.py." }) # Moving sample from django temporary file to Cuckoo temporary storage to # let it persist between reboot (if user like to configure it in that way). path = store_temp_file(sample.read(), sample.name) if sample.name.lower().endswith(".saz"): saz = saz_to_pcap(path) if saz: try: os.remove(path) except Exception as e: pass path = saz else: return render( request, "error.html", {"error": "Conversion from SAZ to PCAP failed."}) task_id = db.add_pcap(file_path=path, priority=priority, tlp=tlp) if task_id: task_ids.append(task_id) elif "url" in request.POST and request.POST.get("url").strip(): url = request.POST.get("url").strip() if not url: return render(request, "error.html", {"error": "You specified an invalid URL!"}) url = url.replace("hxxps://", "https://").replace( "hxxp://", "http://").replace("[.]", ".") if machine.lower() == "all": details["task_machines"] = [ vm.name for vm in db.list_machines(platform="windows") ] elif machine: machine_details = db.view_machine(machine) if hasattr(machine_details, "platform" ) and not machine_details.platform == "windows": details["errors"].append({ os.path.basename(url): "Wrong platform, linux VM selected for {} sample". format(machine_details.platform) }) else: details["task_machines"] = [machine] details["path"] = path details["content"] = get_file_content(path) status, task_ids_tmp = download_file(**details) if status == "error": details["errors"].append({sample.name: task_ids_tmp}) else: details["task_ids"] = task_ids_tmp elif "dlnexec" in request.POST and request.POST.get("dlnexec").strip(): url = request.POST.get("dlnexec").strip() if not url: return render(request, "error.html", {"error": "You specified an invalid URL!"}) url = url.replace("hxxps://", "https://").replace( "hxxp://", "http://").replace("[.]", ".") response = _download_file(request.POST.get("route", None), url, options) if not response: return render(request, "error.html", {"error": "Was impossible to retrieve url"}) name = os.path.basename(url) if not "." in name: name = get_user_filename(options, custom) or generate_fake_name() if machine.lower() == "all": details["task_machines"] = [ vm.name for vm in db.list_machines(platform=platform) ] elif machine: machine_details = db.view_machine(machine[0]) if hasattr(machine_details, "platform" ) and not machine_details.platform == platform: return render( request, "error.html", { "error": "Wrong platform, {} VM selected for {} sample". format(machine_details.platform, platform) }) else: details["task_machines"] = [machine] path = store_temp_file(response, name) details["path"] = path details["content"] = get_file_content(path) details["service"] = "DLnExec" status, task_ids_tmp = download_file(**details) if status == "error": details["errors"].append({name: task_ids_tmp}) else: details["task_ids"] = task_ids_tmp elif settings.VTDL_ENABLED and "vtdl" in request.POST and request.POST.get( "vtdl", False) and request.POST.get("vtdl")[0] != "": if not settings.VTDL_KEY or not settings.VTDL_PATH: return render( request, "error.html", { "error": "You specified VirusTotal but must edit the file and specify your VTDL_PRIV_KEY or VTDL_INTEL_KEY variable and VTDL_PATH base directory" }) else: if opt_apikey: details["apikey"] = opt_apikey details = download_from_vt( request.POST.get("vtdl").strip(), details, opt_filename, settings) if details.get("task_ids"): tasks_count = len(details["task_ids"]) else: tasks_count = 0 if tasks_count > 0: data = { "tasks": details["task_ids"], "tasks_count": tasks_count, "errors": details["errors"] } return render(request, "submission/complete.html", data) else: return render( request, "error.html", { "error": "Error adding task(s) to CAPE's database.", "errors": details["errors"] }) else: enabledconf = dict() enabledconf["vt"] = settings.VTDL_ENABLED enabledconf["kernel"] = settings.OPT_ZER0M0N enabledconf["memory"] = processing.memory.get("enabled") enabledconf["procmemory"] = processing.procmemory.get("enabled") enabledconf["dlnexec"] = settings.DLNEXEC enabledconf["url_analysis"] = settings.URL_ANALYSIS enabledconf["tags"] = False enabledconf[ "dist_master_storage_only"] = repconf.distributed.master_storage_only enabledconf["linux_on_gui"] = web_conf.linux.enabled enabledconf["tlp"] = web_conf.tlp.enabled if all_vms_tags: enabledconf["tags"] = True if not enabledconf["tags"]: # load multi machinery tags: # Get enabled machinery machinery = cfg.cuckoo.get("machinery") if machinery == "multi": for mmachinery in Config(machinery).multi.get( "machinery").split(","): vms = [ x.strip() for x in getattr(Config( mmachinery), mmachinery).get("machines").split(",") ] if any([ "tags" in list(getattr(Config(mmachinery), vmtag).keys()) for vmtag in vms ]): enabledconf["tags"] = True break else: # Get VM names for machinery config elements vms = [ x.strip() for x in getattr(Config( machinery), machinery).get("machines").split(",") ] # Check each VM config element for tags if any([ "tags" in list(getattr(Config(machinery), vmtag).keys()) for vmtag in vms ]): enabledconf["tags"] = True packages, machines = get_form_data("windows") socks5s = _load_socks5_operational() socks5s_random = "" if socks5s: socks5s_random = random.choice(list(socks5s.values())).get( "description", False) return render( request, "submission/index.html", { "packages": sorted(packages), "machines": machines, "vpns": list(vpns.values()), "socks5s": list(socks5s.values()), "socks5s_random": socks5s_random, "route": routing.routing.route, "internet": routing.routing.internet, "inetsim": routing.inetsim.enabled, "tor": routing.tor.enabled, "config": enabledconf, "resubmit": resubmit_hash, "tags": sorted(list(set(all_vms_tags))), }, )
def index(request, resubmit_hash=False): if request.method == "POST": package = request.POST.get("package", "") timeout = min(force_int(request.POST.get("timeout")), 60 * 60 * 24) options = request.POST.get("options", "") lin_options = request.POST.get("lin_options", "") priority = force_int(request.POST.get("priority")) machine = request.POST.get("machine", "") clock = request.POST.get( "clock", datetime.datetime.now().strftime("%m-%d-%Y %H:%M:%S")) if not clock: clock = datetime.datetime.now().strftime("%m-%d-%Y %H:%M:%S") if "1970" in clock: clock = datetime.datetime.now().strftime("%m-%d-%Y %H:%M:%S") custom = request.POST.get("custom", "") memory = bool(request.POST.get("memory", False)) enforce_timeout = bool(request.POST.get("enforce_timeout", False)) referrer = validate_referrer(request.POST.get("referrer", None)) tags = request.POST.get("tags", None) static = bool(request.POST.get("static", False)) all_tags = load_vms_tags() if tags and not all( [tag.strip() in all_tags for tag in tags.split(",")]): return render(request, "error.html", { "error": "Check Tags help, you have introduced incorrect tag(s)" }) if lin_options: options = lin_options # This is done to remove spaces in options but not breaks custom paths options = ','.join('='.join(value.strip() for value in option.split("=", 1)) for option in options.split(",") if option and '=' in option) opt_filename = get_user_filename(options, custom) if referrer: if options: options += "," options += "referrer=%s" % (referrer) if request.POST.get("free"): if options: options += "," options += "free=yes" if request.POST.get("nohuman"): if options: options += "," options += "nohuman=yes" if request.POST.get("tor"): if options: options += "," options += "tor=yes" if request.POST.get("route", None): if options: options += "," options += "route={0}".format(request.POST.get("route", None)) if request.POST.get("process_dump"): if options: options += "," options += "procmemdump=1,procdump=1" if request.POST.get("process_memory"): if options: options += "," options += "procmemdump=1,procdump=1" if request.POST.get("import_reconstruction"): if options: options += "," options += "import_reconstruction=1" if request.POST.get("disable_cape"): if options: options += "," options += "disable_cape=1" if request.POST.get("kernel_analysis"): if options: options += "," options += "kernel_analysis=yes" if request.POST.get("norefer"): if options: options += "," options += "norefer=1" if request.POST.get("oldloader"): if options: options += "," options += "loader=oldloader.exe,loader_64=oldloader_x64.exe" if request.POST.get("unpack"): if options: options += "," options += "unpack=yes" unique = request.POST.get("unique", False) orig_options = options task_ids = [] task_machines = [] status = "ok" failed_hashes = list() task_ids_tmp = list() if "hash" in request.POST and request.POST.get( "hash", False) and request.POST.get("hash")[0] != '': resubmission_hash = request.POST.get("hash").strip() paths = db.sample_path_by_hash(resubmission_hash) if paths: paths = [ _f for _f in [ path if os.path.exists(path) else False for path in paths ] if _f ] if not paths and FULL_DB: tasks = results_db.analysis.find( {"dropped.sha256": resubmission_hash}) if tasks: for task in tasks: # grab task id and replace in path if needed aka distributed hack path = os.path.join(settings.CUCKOO_PATH, "storage", "analyses", str(task["info"]["id"]), "files", resubmission_hash) if os.path.exists(path): paths = [path] break if paths: content = False content = get_file_content(paths) if not content: return render( request, "error.html", { "error": "Can't find {} on disk, {}".format( resubmission_hash, str(paths)) }) base_dir = tempfile.mkdtemp(prefix='resubmit_', dir=settings.TEMP_PATH) if opt_filename: filename = base_dir + "/" + opt_filename else: filename = base_dir + "/" + sanitize_filename( resubmission_hash) path = store_temp_file(content, filename) headers = {} url = 'local' params = {} status, task_ids = download_file( False, content, request, db, task_ids, url, params, headers, "Local", path, package, timeout, options, priority, machine, clock, custom, memory, enforce_timeout, referrer, tags, orig_options, "", static) else: return render( request, "error.html", {"error": "File not found on hdd for resubmission"}) elif "sample" in request.FILES: samples = request.FILES.getlist("sample") for sample in samples: # Error if there was only one submitted sample and it's empty. # But if there are multiple and one was empty, just ignore it. if not sample.size: if len(samples) != 1: continue return render(request, "error.html", {"error": "You uploaded an empty file."}) elif sample.size > settings.MAX_UPLOAD_SIZE: return render( request, "error.html", { "error": "You uploaded a file that exceeds the maximum allowed upload size " "specified in web/web/local_settings.py." }) if opt_filename: filename = opt_filename else: filename = sample.name # Moving sample from django temporary file to Cuckoo temporary storage to # let it persist between reboot (if user like to configure it in that way). path = store_temp_file(sample.read(), filename) if unique and db.check_file_uniq(File(path).get_sha256()): return render( request, "error.html", { "error": "Duplicated file, disable unique option to force submission" }) magic_type = get_magic_type(path) if disable_x64 is True: if magic_type and ("x86-64" in magic_type or "PE32+" in magic_type): if len(samples) == 1: return render( request, "error.html", {"error": "Sorry no x64 support yet"}) else: continue orig_options, timeout, enforce_timeout = recon( path, orig_options, timeout, enforce_timeout) platform = get_platform(magic_type) if machine.lower() == "all": task_machines = [ vm.name for vm in db.list_machines(platform=platform) ] elif machine: machine_details = db.view_machine(machine) if not machine_details.platform == platform: return render( request, "error.html", { "error": "Wrong platform, {} VM selected for {} sample". format(machine_details.platform, platform) }) else: task_machines.append(machine) else: task_machines.append("first") for entry in task_machines: if entry == "first": entry = None try: task_ids_new = db.demux_sample_and_add_to_db( file_path=path, package=package, timeout=timeout, options=options, priority=priority, machine=entry, custom=custom, memory=memory, platform=platform, enforce_timeout=enforce_timeout, tags=tags, clock=clock, static=static) task_ids.extend(task_ids_new) except CuckooDemuxError as err: return render(request, "error.html", {"error": err}) elif "quarantine" in request.FILES: samples = request.FILES.getlist("quarantine") for sample in samples: # Error if there was only one submitted sample and it's empty. # But if there are multiple and one was empty, just ignore it. if not sample.size: if len(samples) != 1: continue return render( request, "error.html", {"error": "You uploaded an empty quarantine file."}) elif sample.size > settings.MAX_UPLOAD_SIZE: return render( request, "error.html", { "error": "You uploaded a quarantine file that exceeds the maximum \ allowed upload size specified in web/web/local_settings.py." }) # Moving sample from django temporary file to Cuckoo temporary storage to # let it persist between reboot (if user like to configure it in that way). tmp_path = store_temp_file(sample.read(), sample.name) path = unquarantine(tmp_path) try: os.remove(tmp_path) except Exception as e: pass if not path: return render(request, "error.html", { "error": "You uploaded an unsupported quarantine file." }) if machine.lower() == "all": task_machines = [ vm.name for vm in db.list_machines(platform="windows") ] elif machine: machine_details = db.view_machine(machine) if not machine_details.platform == "windows": return render( request, "error.html", { "error": "Wrong platform, linux VM selected for {} sample" .format(machine_details.platform) }) else: task_machines.append(machine) if not task_machines: task_machines.append("first") for entry in task_machines: if entry == "first": entry = None task_ids_new = db.demux_sample_and_add_to_db( file_path=path, package=package, timeout=timeout, options=options, priority=priority, machine=entry, custom=custom, memory=memory, tags=tags, enforce_timeout=enforce_timeout, clock=clock) if task_ids_new: task_ids.extend(task_ids_new) elif "static" in request.FILES: samples = request.FILES.getlist("static") for sample in samples: if not sample.size: if len(samples) != 1: continue return render(request, "error.html", {"error": "You uploaded an empty file."}) elif sample.size > settings.MAX_UPLOAD_SIZE: return render( request, "error.html", { "error": "You uploaded a file that exceeds the maximum \ allowed upload size specified in web/web/local_settings.py." }) # Moving sample from django temporary file to Cuckoo temporary storage to # let it persist between reboot (if user like to configure it in that way). path = store_temp_file(sample.read(), sample.name) task_id = db.add_static(file_path=path, priority=priority) if not task_id: return render( request, "error.html", {"error": "We don't have static extractor for this"}) task_ids.append(task_id) elif "pcap" in request.FILES: samples = request.FILES.getlist("pcap") for sample in samples: if not sample.size: if len(samples) != 1: continue return render( request, "error.html", {"error": "You uploaded an empty PCAP file."}) elif sample.size > settings.MAX_UPLOAD_SIZE: return render( request, "error.html", { "error": "You uploaded a PCAP file that exceeds the maximum \ allowed upload size specified in web/web/local_settings.py." }) # Moving sample from django temporary file to Cuckoo temporary storage to # let it persist between reboot (if user like to configure it in that way). path = store_temp_file(sample.read(), sample.name) if sample.name.lower().endswith(".saz"): saz = saz_to_pcap(path) if saz: try: os.remove(path) except Exception as e: pass path = saz else: return render( request, "error.html", {"error": "Conversion from SAZ to PCAP failed."}) task_id = db.add_pcap(file_path=path, priority=priority) if task_id: task_ids.append(task_id) elif "url" in request.POST and request.POST.get("url").strip(): url = request.POST.get("url").strip() if not url: return render(request, "error.html", {"error": "You specified an invalid URL!"}) url = url.replace("hxxps://", "https://").replace( "hxxp://", "http://").replace("[.]", ".") if machine.lower() == "all": task_machines = [ vm.name for vm in db.list_machines(platform="windows") ] elif machine: machine_details = db.view_machine(machine) if not machine_details.platform == "windows": return render( request, "error.html", { "error": "Wrong platform, linux VM selected for {} sample". format(machine_details.platform) }) else: task_machines.append(machine) else: task_machines.append("first") for entry in task_machines: if entry == "first": entry = None task_ids_new = db.add_url(url=url, package=package, timeout=timeout, options=options, priority=priority, machine=entry, custom=custom, memory=memory, enforce_timeout=enforce_timeout, tags=tags, clock=clock) if task_ids_new: task_ids.extend(task_ids_new) elif "dlnexec" in request.POST and request.POST.get("dlnexec").strip(): url = request.POST.get("dlnexec").strip() if not url: return render(request, "error.html", {"error": "You specified an invalid URL!"}) url = url.replace("hxxps://", "https://").replace( "hxxp://", "http://").replace("[.]", ".") response = _download_file(request.POST.get("route", None), url, options) if not response: return render(request, "error.html", {"error": "Was impossible to retrieve url"}) name = os.path.basename(url) if not "." in name: name = get_user_filename(options, custom) or generate_fake_name() path = store_temp_file(response, name) magic_type = get_magic_type(path) platform = get_platform(magic_type) if machine.lower() == "all": task_machines = [ vm.name for vm in db.list_machines(platform=platform) ] elif machine: machine_details = db.view_machine(machine[0]) if not machine_details.platform == platform: return render( request, "error.html", { "error": "Wrong platform, {} VM selected for {} sample". format(machine_details.platform, platform) }) else: task_machines.append(machine) else: task_machines.append("first") for entry in task_machines: if entry == "first": entry = None task_ids_new = db.demux_sample_and_add_to_db( file_path=path, package=package, timeout=timeout, options=options, priority=priority, machine=entry, custom=custom, memory=memory, enforce_timeout=enforce_timeout, tags=tags, platform=platform, clock=clock) if task_ids_new: task_ids.extend(task_ids_new) elif settings.VTDL_ENABLED and "vtdl" in request.POST and request.POST.get("vtdl", False) \ and request.POST.get("vtdl")[0] != '': vtdl = request.POST.get("vtdl").strip() if (not settings.VTDL_PRIV_KEY and not settings.VTDL_INTEL_KEY) or not settings.VTDL_PATH: return render( request, "error.html", { "error": "You specified VirusTotal but must edit the file and specify your " "VTDL_PRIV_KEY or VTDL_INTEL_KEY variable and VTDL_PATH base directory" }) else: hashlist = [] if "," in vtdl: hashlist = [ _f for _f in vtdl.replace(" ", "").strip().split(",") if _f ] else: hashlist.append(vtdl) for h in hashlist: base_dir = tempfile.mkdtemp(prefix='cuckoovtdl', dir=settings.VTDL_PATH) task_ids_tmp = list() if opt_filename: filename = base_dir + "/" + opt_filename else: filename = base_dir + "/" + sanitize_filename(h) headers = {} paths = db.sample_path_by_hash(h) content = False if paths: content = get_file_content(paths) if settings.VTDL_PRIV_KEY: headers = {'x-apikey': settings.VTDL_PRIV_KEY} elif settings.VTDL_INTEL_KEY: headers = {'x-apikey': settings.VTDL_INTEL_KEY} url = "https://www.virustotal.com/api/v3/files/{id}/download".format( id=h) params = {} if not content: status, task_ids_tmp = download_file( False, content, request, db, task_ids, url, params, headers, "VirusTotal", filename, package, timeout, options, priority, machine, clock, custom, memory, enforce_timeout, referrer, tags, orig_options, "", static, h) else: status, task_ids_tmp = download_file( False, content, request, db, task_ids, url, params, headers, "Local", filename, package, timeout, options, priority, machine, clock, custom, memory, enforce_timeout, referrer, tags, orig_options, "", static, h) if status is "ok": task_ids = task_ids_tmp else: failed_hashes.append(h) if not isinstance(task_ids, list) and status == "error": # is render msg return task_ids if not isinstance(task_ids_tmp, list) and status == "error": # is render msg return task_ids_tmp if isinstance(task_ids, list): tasks_count = len(task_ids) else: # ToDo improve error msg tasks_count = 0 tasks_count = len(task_ids) if tasks_count > 0: data = {"tasks": task_ids, "tasks_count": tasks_count} if failed_hashes: data["failed_hashes"] = failed_hashes return render(request, "submission/complete.html", data) else: return render(request, "error.html", {"error": "Error adding task to Cuckoo's database."}) else: enabledconf = dict() enabledconf["vt"] = settings.VTDL_ENABLED enabledconf["kernel"] = settings.OPT_ZER0M0N enabledconf["memory"] = processing.memory.get("enabled") enabledconf["procmemory"] = processing.procmemory.get("enabled") enabledconf["dlnexec"] = settings.DLNEXEC enabledconf["tags"] = False enabledconf[ "dist_master_storage_only"] = repconf.distributed.master_storage_only all_tags = load_vms_tags() if all_tags: enabledconf["tags"] = True if not enabledconf["tags"]: # load multi machinery tags: # Get enabled machinery machinery = cfg.cuckoo.get("machinery") if machinery == "multi": for mmachinery in Config(machinery).multi.get( "machinery").split(","): vms = [ x.strip() for x in getattr(Config( mmachinery), mmachinery).get("machines").split(",") ] if any([ "tags" in list(getattr(Config(mmachinery), vmtag).keys()) for vmtag in vms ]): enabledconf["tags"] = True break else: # Get VM names for machinery config elements vms = [ x.strip() for x in getattr(Config( machinery), machinery).get("machines").split(",") ] # Check each VM config element for tags if any([ "tags" in list(getattr(Config(machinery), vmtag).keys()) for vmtag in vms ]): enabledconf["tags"] = True packages, machines = get_form_data("windows") socks5s = _load_socks5_operational() socks5s_random = "" if socks5s: socks5s_random = random.choice(list(socks5s.values())).get( "description", False) return render( request, "submission/index.html", { "packages": sorted(packages), "machines": machines, "vpns": list(vpns.values()), "socks5s": list(socks5s.values()), "socks5s_random": socks5s_random, "route": routing.routing.route, "internet": routing.routing.internet, "inetsim": routing.inetsim.enabled, "tor": routing.tor.enabled, "config": enabledconf, "resubmit": resubmit_hash, "tags": sorted(list(set(all_tags))) })
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to Elasticsearch. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_ELASTICSEARCH: raise CuckooDependencyError("Unable to import elasticsearch " "(install with `pip install elasticsearch`)") self.connect() index_prefix = self.options.get("index", "cuckoo") search_only = self.options.get("searchonly", False) # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) idxdate = report["info"]["started"].split(" ")[0] self.index_name = '{0}-{1}'.format(index_prefix, idxdate) if not search_only: if not "network" in report: report["network"] = {} # Store API calls in chunks for pagination in Django if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in Elastcisearch. if len(chunk) == 100: to_insert = {"pid": process["process_id"], "calls": chunk} pchunk = self.es.index(index=self.index_name, doc_type="calls", body=to_insert) chunk_id = pchunk['_id'] chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} pchunk = self.es.index(index=self.index_name, doc_type="calls", body=to_insert) chunk_id = pchunk['_id'] chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes # Add screenshot paths report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): shots = [shot for shot in os.listdir(shots_path) if shot.endswith(".jpg")] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) screenshot = File(shot_path) if screenshot.valid(): # Strip the extension as it's added later # in the Django view report["shots"].append(shot_file.replace(".jpg", "")) if results.has_key("suricata") and results["suricata"]: if results["suricata"].has_key("tls") and len(results["suricata"]["tls"]) > 0: report["suri_tls_cnt"] = len(results["suricata"]["tls"]) if results["suricata"] and results["suricata"].has_key("alerts") and len(results["suricata"]["alerts"]) > 0: report["suri_alert_cnt"] = len(results["suricata"]["alerts"]) if results["suricata"].has_key("files") and len(results["suricata"]["files"]) > 0: report["suri_file_cnt"] = len(results["suricata"]["files"]) if results["suricata"].has_key("http") and len(results["suricata"]["http"]) > 0: report["suri_http_cnt"] = len(results["suricata"]["http"]) else: report = {} report["task_id"] = results["info"]["id"] report["info"] = results.get("info") report["target"] = results.get("target") report["summary"] = results.get("behavior", {}).get("summary") report["network"] = results.get("network") report["virustotal"] = results.get("virustotal") # Other info we want Quick access to from the web UI if results.has_key("virustotal") and results["virustotal"] and results["virustotal"].has_key("positives") and results["virustotal"].has_key("total"): report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"],results["virustotal"]["total"]) # Store the report and retrieve its object id. self.es.index(index=self.index_name, doc_type="analysis", id=results["info"]["id"], body=report)
def vt_lookup(category: str, target: str, results: dict = {}, on_demand: bool = False): if not processing_conf.virustotal.enabled or processing_conf.virustotal.get("on_demand", False) and not on_demand: return {} if category not in ("file", "url"): return {"error": True, "msg": "VT category isn't supported"} if category == "file": if not do_file_lookup: return {"error": True, "msg": "VT File lookup disabled in processing.conf"} if not os.path.exists(target) and len(target) != 64: return {"error": True, "msg": "File doesn't exist"} sha256 = target if len(target) == 64 else File(target).get_sha256() url = VIRUSTOTAL_FILE_URL.format(id=sha256) elif category == "url": if not do_url_lookup: return {"error": True, "msg": "VT URL lookup disabled in processing.conf"} if urlscrub: urlscrub_compiled_re = None try: urlscrub_compiled_re = re.compile(urlscrub) except Exception as e: log.error(f"Failed to compile urlscrub regex: {e}") return {} try: target = re.sub(urlscrub_compiled_re, "", target) except Exception as e: return {"error": True, "msg": f"Failed to scrub url: {e}"} # normalize the URL the way VT appears to if not target.lower().startswith(("http://", "https://")): target = f"http://{target}" slashsplit = target.split("/") slashsplit[0] = slashsplit[0].lower() slashsplit[2] = slashsplit[2].lower() if len(slashsplit) == 3: slashsplit.append("") target = "/".join(slashsplit) sha256 = hashlib.sha256(target.encode()).hexdigest() url = VIRUSTOTAL_URL_URL.format(id=target) try: r = requests.get(url, headers=headers, verify=True, timeout=timeout) if not r.ok: return {"error": True, "msg": f"Unable to complete connection to VirusTotal. Status code: {r.status_code}"} vt_response = r.json() engines = vt_response.get("data", {}).get("attributes", {}).get("last_analysis_results", {}) if not engines: return {} virustotal = { "names": vt_response.get("data", {}).get("attributes", {}).get("names"), "scan_id": vt_response.get("data", {}).get("id"), "md5": vt_response.get("data", {}).get("attributes", {}).get("md5"), "sha1": vt_response.get("data", {}).get("attributes", {}).get("sha1"), "sha256": vt_response.get("data", {}).get("attributes", {}).get("sha256"), "tlsh": vt_response.get("data", {}).get("attributes", {}).get("tlsh"), "positive": vt_response.get("data", {}).get("attributes", {}).get("last_analysis_stats", {}).get("malicious"), "total": len(engines.keys()), "permalink": vt_response.get("data", {}).get("links", {}).get("self"), } if remove_empty: virustotal["scans"] = {engine.replace(".", "_"): block for engine, block in engines.items() if block["result"]} else: virustotal["scans"] = {engine.replace(".", "_"): block for engine, block in engines.items()} virustotal["resource"] = sha256 virustotal["results"] = [] detectnames = [] for engine, block in engines.items(): virustotal["results"] += [{"vendor": engine.replace(".", "_"), "sig": block["result"]}] if block["result"] and "Trojan.Heur." not in block["result"]: # weight Microsoft's detection, they seem to be more accurate than the rest if engine == "Microsoft": detectnames.append(block["result"]) detectnames.append(block["result"]) virustotal["detection"] = get_vt_consensus(detectnames) if virustotal.get("detection", False) and results: add_family_detection(results, virustotal["detection"], "VirusTotal", virustotal["sha256"]) if virustotal.get("positives", False) and virustotal.get("total", False): virustotal["summary"] = f"{virustotal['positives']}/{virustotal['total']}" return virustotal except requests.exceptions.RequestException as e: return { "error": True, "msg": f"Unable to complete connection to VirusTotal: {e}", } return {}
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to Elasticsearch. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_ELASTICSEARCH: raise CuckooDependencyError( "Unable to import elasticsearch " "(install with `pip3 install elasticsearch`)") self.connect() index_prefix = self.options.get("index", "cuckoo") search_only = self.options.get("searchonly", False) # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) idxdate = report["info"]["started"].split(" ")[0] self.index_name = '{0}-{1}'.format(index_prefix, idxdate) if not search_only: if not "network" in report: report["network"] = {} # Store API calls in chunks for pagination in Django if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in Elastcisearch. if len(chunk) == 100: to_insert = { "pid": process["process_id"], "calls": chunk } pchunk = self.es.index(index=self.index_name, doc_type="calls", body=to_insert) chunk_id = pchunk['_id'] chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = { "pid": process["process_id"], "calls": chunk } pchunk = self.es.index(index=self.index_name, doc_type="calls", body=to_insert) chunk_id = pchunk['_id'] chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes # Add screenshot paths report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): shots = [ shot for shot in os.listdir(shots_path) if shot.endswith(".jpg") ] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) screenshot = File(shot_path) if screenshot.valid(): # Strip the extension as it's added later # in the Django view report["shots"].append(shot_file.replace(".jpg", "")) # Other info we want Quick access to from the web UI if "virustotal" in results and results[ "virustotal"] and "positives" in results[ "virustotal"] and "total" in results["virustotal"]: report["virustotal_summary"] = "%s/%s" % ( results["virustotal"]["positives"], results["virustotal"]["total"]) if "suricata" in results and results["suricata"]: if "tls" in results["suricata"] and len( results["suricata"]["tls"]) > 0: report["suri_tls_cnt"] = len(results["suricata"]["tls"]) if results["suricata"] and "alerts" in results[ "suricata"] and len(results["suricata"]["alerts"]) > 0: report["suri_alert_cnt"] = len( results["suricata"]["alerts"]) if "files" in results["suricata"] and len( results["suricata"]["files"]) > 0: report["suri_file_cnt"] = len(results["suricata"]["files"]) if "http" in results["suricata"] and len( results["suricata"]["http"]) > 0: report["suri_http_cnt"] = len(results["suricata"]["http"]) else: report = {} report["task_id"] = results["info"]["id"] report["info"] = results.get("info") report["target"] = results.get("target") report["summary"] = results.get("behavior", {}).get("summary") report["network"] = results.get("network") report["virustotal"] = results.get("virustotal") report["virustotal_summary"] = "%s/%s" % ( results["virustotal"]["positives"], results["virustotal"]["total"]) # Store the report and retrieve its object id. self.es.index(index=self.index_name, doc_type="analysis", id=results["info"]["id"], body=report)
def run(self): """Run analysis. @return: list of process dumps with related information. """ self.key = "procdump" procdump_files = [] buf = self.options.get("buffer", 8192) if not hasattr(self, "procdump_path") or not os.path.exists( self.procdump_path): return None file_names = os.listdir(self.procdump_path) for file_name in file_names: file_path = os.path.join(self.procdump_path, file_name) if not os.path.isfile(file_path): continue if file_name.endswith("_info.txt"): continue with open(file_path + "_info.txt", 'r') as f: metastring = f.readline() file_info = File(file_path=file_path, guest_paths=metastring, file_name=file_name).get_all() metastrings = metastring.split(",") file_info["process_path"] = metastrings[2] file_info["module_path"] = metastrings[3] file_info["process_name"] = file_info["process_path"].split( "\\")[-1] file_info["pid"] = metastrings[1] file_info["cape_type"] = "PE image" type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " elif type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" texttypes = [ "ASCII", "Windows Registry text", "XML document text", "Unicode text", ] readit = False for texttype in texttypes: if texttype in file_info["type"]: readit = True break if readit: with open(file_info["path"], "r") as drop_open: filedata = drop_open.read(buf + 1) if len(filedata) > buf: file_info["data"] = convert_to_printable(filedata[:buf] + " <truncated>") else: file_info["data"] = convert_to_printable(filedata) procdump_files.append(file_info) return procdump_files
def process_file(self, file_path, CAPE_files, append_file): """Process file. @return: file_info """ global cape_config cape_name = "" strings = [] buf = self.options.get("buffer", BUFSIZE) if file_path.endswith("_info.txt"): return texttypes = [ "ASCII", "Windows Registry text", "XML document text", "Unicode text", ] if os.path.exists(file_path + "_info.txt"): with open(file_path + "_info.txt", 'r') as f: metastring = f.readline() else: metastring = "" file_info = File(file_path, metastring).get_all() # Get the file data with open(file_info["path"], "r") as file_open: filedata = file_open.read(buf + 1) if len(filedata) > buf: file_info["data"] = binascii.b2a_hex(filedata[:buf] + " <truncated>") else: file_info["data"] = binascii.b2a_hex(filedata) metastrings = metastring.split(",") if len(metastrings) > 1: file_info["pid"] = metastrings[1] if len(metastrings) > 2: file_info["process_path"] = metastrings[2] file_info["process_name"] = metastrings[2].split("\\")[-1] if len(metastrings) > 3: file_info["module_path"] = metastrings[3] file_info["cape_type_code"] = 0 file_info["cape_type"] = "" if metastrings != "": try: file_info["cape_type_code"] = int(metastrings[0]) except Exception as e: pass if file_info["cape_type_code"] == COMPRESSION: file_info["cape_type"] = "Decompressed PE Image" if file_info["cape_type_code"] == INJECTION_PE: file_info["cape_type"] = "Injected PE Image" if len(metastrings) > 4: file_info["target_path"] = metastrings[4] file_info["target_process"] = metastrings[4].split( "\\")[-1] file_info["target_pid"] = metastrings[5] if file_info["cape_type_code"] == INJECTION_SHELLCODE: file_info["cape_type"] = "Injected Shellcode/Data" if len(metastrings) > 4: file_info["target_path"] = metastrings[4] file_info["target_process"] = metastrings[4].split( "\\")[-1] file_info["target_pid"] = metastrings[5] if file_info["cape_type_code"] == EXTRACTION_PE: file_info["cape_type"] = "Extracted PE Image" if len(metastrings) > 4: file_info["virtual_address"] = metastrings[4] if file_info["cape_type_code"] == EXTRACTION_SHELLCODE: file_info["cape_type"] = "Extracted Shellcode" if len(metastrings) > 4: file_info["virtual_address"] = metastrings[4] type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" # PlugX if file_info["cape_type_code"] == PLUGX_CONFIG: file_info["cape_type"] = "PlugX Config" plugx_parser = plugx.PlugXConfig() plugx_config = plugx_parser.parse_config( filedata, len(filedata)) if not "cape_config" in cape_config and plugx_config: cape_config["cape_config"] = {} for key, value in plugx_config.items(): cape_config["cape_config"].update({key: [value]}) cape_name = "PlugX" append_file = False if file_info["cape_type_code"] == PLUGX_PAYLOAD: file_info["cape_type"] = "PlugX Payload" type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" # EvilGrab if file_info["cape_type_code"] == EVILGRAB_PAYLOAD: file_info["cape_type"] = "EvilGrab Payload" type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if file_info["cape_type_code"] == EVILGRAB_DATA: cape_name = "EvilGrab" file_info["cape_type"] = "EvilGrab Data" if not "cape_config" in cape_config: cape_config["cape_config"] = {} if file_info["size"] == 256 or file_info["size"] == 260: ConfigItem = "filepath" ConfigData = format(filedata) cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) if file_info["size"] > 0x1000: append_file = True else: append_file = False # Azzy if file_info["cape_type_code"] == AZZY_DATA: cape_name = "Azzy" cape_config["cape_type"] = "Azzy Config" if not "cape_config" in cape_config: cape_config["cape_config"] = {} if len(metastrings) > 4: AzzyConfigIndex = metastrings[4] if AzzyConfigIndex == '0x0': ConfigItem = "Timer1" elif AzzyConfigIndex == '0x1': ConfigItem = "Timer2" elif AzzyConfigIndex == '0x2': ConfigItem = "Computer Name" elif AzzyConfigIndex == '0x3': ConfigItem = "C&C1" elif AzzyConfigIndex == '0x4': ConfigItem = "C&C2" elif AzzyConfigIndex == '0x5': ConfigItem = "Operation Name" elif AzzyConfigIndex == '0x6': ConfigItem = "Keylogger MaxBuffer" elif AzzyConfigIndex == '0x7': ConfigItem = "Keylogger MaxTimeout" elif AzzyConfigIndex == '0x8': ConfigItem = "Keylogger Flag" elif AzzyConfigIndex == '0x9': ConfigItem = "C&C3" else: ConfigItem = "Unknown" ConfigData = format(filedata) if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) append_file = False # UPX if file_info["cape_type_code"] == UPX: file_info["cape_type"] = "Unpacked PE Image" type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" # Process CAPE Yara hits for hit in file_info["cape_yara"]: cape_name = hit["name"] try: file_info["cape_type"] = hit["meta"]["cape_type"] except: file_info["cape_type"] = cape_name + " Payload" type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" # UPX Check and unpack if cape_name == 'UPX': log.info( "CAPE: Found UPX Packed sample - attempting to unpack") unpacked_file = upx_unpack(filedata) if unpacked_file and os.path.exists(unpacked_file): unpacked_yara = File(unpacked_file).get_yara( CAPE_YARA_RULEPATH) for unpacked_hit in unpacked_yara: unpacked_name = unpacked_hit["name"] if unpacked_name == 'UPX': # Failed to unpack log.info("CAPE: Failed to unpack UPX") os.unlink(unpacked_file) break if not os.path.exists(self.CAPE_path): os.makedirs(self.CAPE_path) newname = os.path.join(self.CAPE_path, os.path.basename(unpacked_file)) os.rename(unpacked_file, newname) infofd = open(newname + "_info.txt", "a") infofd.write(os.path.basename(unpacked_file) + "\n") infofd.close() # Recursive process of unpacked file upx_extract = self.process_file(newname, CAPE_files, True) if upx_extract["type"]: upx_extract["cape_type"] = "UPX-extracted " type_strings = upx_extract["type"].split() if type_strings[0] == ("PE32+"): upx_extract["cape_type"] += " 64-bit " if type_strings[2] == ("(DLL)"): upx_extract["cape_type"] += "DLL" else: upx_extract["cape_type"] += "executable" if type_strings[0] == ("PE32"): upx_extract["cape_type"] += " 32-bit " if type_strings[2] == ("(DLL)"): upx_extract["cape_type"] += "DLL" else: upx_extract["cape_type"] += "executable" # Attempt to import a parser for the yara hit # DC3-MWCP try: mwcp = malwareconfigreporter.malwareconfigreporter() kwargs = {} mwcp.run_parser(cape_name, data=filedata, **kwargs) if mwcp.errors == []: log.info("CAPE: Imported DC3-MWCP parser %s", cape_name) mwcp_loaded = True else: error_lines = mwcp.errors[0].split("\n") for line in error_lines: if line.startswith('ImportError: '): log.info("CAPE: DC3-MWCP parser: %s", line.split(': ')[1]) mwcp_loaded = False except ImportError: mwcp_loaded = False # malwareconfig try: malwareconfig_parsers = os.path.join(CUCKOO_ROOT, "modules", "processing", "parsers", "malwareconfig") file, pathname, description = imp.find_module( cape_name, [malwareconfig_parsers]) module = imp.load_module(cape_name, file, pathname, description) malwareconfig_loaded = True log.info("CAPE: Imported malwareconfig.com parser %s", cape_name) except ImportError: #log.info("CAPE: No malwareconfig.com parser for %s", cape_name) malwareconfig_loaded = False # Get config data if mwcp_loaded: try: if not "cape_config" in cape_config: cape_config["cape_config"] = {} cape_config["cape_config"] = convert(mwcp.metadata) else: cape_config["cape_config"].update( convert(mwcp.metadata)) except Exception as e: log.error( "CAPE: DC3-MWCP config parsing error with %s: %s", cape_name, e) elif malwareconfig_loaded: try: if not "cape_config" in cape_config: cape_config["cape_config"] = {} malwareconfig_config = module.config(filedata) if isinstance(malwareconfig_config, list): for (key, value) in module.config(filedata)[0].iteritems(): cape_config["cape_config"].update({key: [value]}) elif isinstance(malwareconfig_config, dict): for (key, value) in module.config(filedata).iteritems(): cape_config["cape_config"].update({key: [value]}) except Exception as e: log.error("CAPE: malwareconfig parsing error with %s: %s", cape_name, e) if cape_name: cape_config["cape_name"] = format(cape_name) if not "cape" in self.results: #self.results["cape"] = [] self.results["cape"] = cape_name #if cape_name not in self.results["cape"]: # self.results["cape"].append(cape_name) if append_file == True: CAPE_files.append(file_info) return file_info
def run(self): """Runs VirusTotal processing @return: full VirusTotal report. """ self.key = "virustotal" virustotal = [] key = self.options.get("key", None) timeout = self.options.get("timeout", 60) urlscrub = self.options.get("urlscrub", None) do_file_lookup = self.getbool(self.options.get("do_file_lookup", False)) do_url_lookup = self.getbool(self.options.get("do_url_lookup", False)) if not key: raise CuckooProcessingError("VirusTotal API key not " "configured, skip") if self.task["category"] == "file" and do_file_lookup: if not os.path.exists(self.file_path): raise CuckooProcessingError( "File {0} not found, skipping it".format(self.file_path)) resource = File(self.file_path).get_sha256() url = VIRUSTOTAL_FILE_URL elif self.task["category"] == "url" and do_url_lookup: resource = self.task["target"] if urlscrub: urlscrub_compiled_re = None try: urlscrub_compiled_re = re.compile(urlscrub) except Exception as e: raise CuckooProcessingError( "Failed to compile urlscrub regex" % (e)) try: resource = re.sub(urlscrub_compiled_re, "", resource) except Exception as e: raise CuckooProcessingError("Failed to scrub url" % (e)) # normalize the URL the way VT appears to if not resource.lower().startswith( "http://") and not resource.lower().startswith("https://"): resource = "http://" + resource slashsplit = resource.split('/') slashsplit[0] = slashsplit[0].lower() slashsplit[2] = slashsplit[2].lower() if len(slashsplit) == 3: slashsplit.append("") resource = "/".join(slashsplit) resource = hashlib.sha256(resource).hexdigest() url = VIRUSTOTAL_URL_URL else: # Not supported type, exit. return virustotal data = {"resource": resource, "apikey": key} try: r = requests.get(url, params=data, verify=True, timeout=int(timeout)) response_data = r.content except requests.exceptions.RequestException as e: raise CuckooProcessingError("Unable to complete connection " "to VirusTotal: {0}".format(e)) try: virustotal = json.loads(response_data) except ValueError as e: raise CuckooProcessingError("Unable to convert response to " "JSON: {0}".format(e)) # Work around VT brain-damage if isinstance(virustotal, list) and len(virustotal): virustotal = virustotal[0] if "scans" in virustotal: items = virustotal["scans"].items() virustotal["scans"] = dict((engine.replace(".", "_"), signature) for engine, signature in items) virustotal["resource"] = resource virustotal["results"] = list(({ "vendor": engine.replace(".", "_"), "sig": signature["result"] }) for engine, signature in items) return virustotal
def process_file(self, file_path, append_file, metadata=None): """Process file. @return: file_info """ if metadata is None: metadata = {} cape_name = "" type_string = "" if not os.path.exists(file_path): return file_info, pefile_object = File(file_path, metadata.get("metadata", "")).get_all() cape_names = set() if pefile_object: self.results.setdefault("pefiles", {}).setdefault(file_info["sha256"], pefile_object) if file_info.get("clamav") and processing_conf.detections.clamav: clamav_detection = get_clamav_consensus(file_info["clamav"]) if clamav_detection: add_family_detection(self.results, clamav_detection, "ClamAV", file_info["sha256"]) # should we use dropped path here? static_file_info( file_info, file_path, str(self.task["id"]), self.task.get("package", ""), self.task.get("options", ""), self.self_extracted, self.results, ) # Get the file data with open(file_info["path"], "rb") as file_open: file_data = file_open.read() if metadata.get("pids", False): file_info["pid"] = metadata["pids"][0] if len( metadata["pids"]) == 1 else ",".join(metadata["pids"]) metastrings = metadata.get("metadata", "").split(";?") if len(metastrings) > 2: file_info["process_path"] = metastrings[1] file_info["process_name"] = metastrings[1].rsplit("\\", 1)[-1] if len(metastrings) > 3: file_info["module_path"] = metastrings[2] file_info["cape_type_code"] = 0 file_info["cape_type"] = "" if metastrings and metastrings[0] and metastrings[0].isdigit(): file_info["cape_type_code"] = int(metastrings[0]) if file_info["cape_type_code"] == TYPE_STRING: if len(metastrings) > 4: type_string = metastrings[3] elif file_info["cape_type_code"] == COMPRESSION: file_info["cape_type"] = "Decompressed PE Image" elif file_info["cape_type_code"] in inject_map: file_info["cape_type"] = inject_map[ file_info["cape_type_code"]] if len(metastrings) > 4: file_info["target_path"] = metastrings[3] file_info["target_process"] = metastrings[3].rsplit( "\\", 1)[-1] file_info["target_pid"] = metastrings[4] elif file_info["cape_type_code"] in unpack_map: file_info["cape_type"] = unpack_map[ file_info["cape_type_code"]] if len(metastrings) > 4: file_info["virtual_address"] = metastrings[3] type_strings = file_info["type"].split() if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if file_info["cape_type_code"] in code_mapping: file_info["cape_type"] = code_mapping[ file_info["cape_type_code"]] type_strings = file_info["type"].split() if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if file_info["cape_type_code"] in name_mapping: cape_name = name_mapping[file_info["cape_type_code"]] append_file = True # PlugX elif file_info["cape_type_code"] == PLUGX_CONFIG: file_info["cape_type"] = "PlugX Config" if plugx_parser: plugx_config = plugx_parser.parse_config( file_data, len(file_data)) if plugx_config: cape_name = "PlugX" self.update_cape_configs(cape_name, plugx_config) cape_names.add(cape_name) else: log.error( "CAPE: PlugX config parsing failure - size many not be handled" ) append_file = False # Attempt to decrypt script dump elif file_info["cape_type_code"] == SCRIPT_DUMP: data = file_data.decode("utf-16").replace("\x00", "") cape_name = "ScriptDump" malwareconfig_loaded = False try: malwareconfig_parsers = os.path.join( CUCKOO_ROOT, "modules", "processing", "parsers", "CAPE") file, pathname, description = imp.find_module( cape_name, [malwareconfig_parsers]) module = imp.load_module(cape_name, file, pathname, description) malwareconfig_loaded = True log.debug("CAPE: Imported parser %s", cape_name) except ImportError: log.debug("CAPE: parser: No module named %s", cape_name) if malwareconfig_loaded: try: script_data = module.config(self, data) if script_data and "more_eggs" in script_data["type"]: bindata = script_data["data"] sha256 = hashlib.sha256(bindata).hexdigest() filepath = os.path.join(self.CAPE_path, sha256) if "text" in script_data["datatype"]: file_info["cape_type"] = "MoreEggsJS" elif "binary" in script_data["datatype"]: file_info["cape_type"] = "MoreEggsBin" with open(filepath, "w") as cfile: cfile.write(bindata) self.script_dump_files.append(filepath) else: file_info["cape_type"] = "Script Dump" log.info( "CAPE: Script Dump does not contain known encrypted payload" ) except Exception as e: log.error( "CAPE: malwareconfig parsing error with %s: %s", cape_name, e) append_file = True # More_Eggs elif file_info["cape_type_code"] == MOREEGGSJS_PAYLOAD: file_info["cape_type"] = "More Eggs JS Payload" cape_name = "MoreEggs" append_file = True # Process CAPE Yara hits # Prefilter extracted data + beauty is better than oneliner: all_files = [] for extracted_file in file_info.get("extracted_files", []): yara_hits = extracted_file["cape_yara"] if not yara_hits: continue if extracted_file.get("data", b""): extracted_file_data = make_bytes(extracted_file["data"]) else: with open(extracted_file["path"], "rb") as fil: extracted_file_data = fil.read() for yara in yara_hits: all_files.append(( f"[{extracted_file.get('sha256', '')}]{file_info['path']}", extracted_file_data, yara, )) for yara in file_info["cape_yara"]: all_files.append((file_info["path"], file_data, yara)) executed_config_parsers = collections.defaultdict(set) for tmp_path, tmp_data, hit in all_files: # Check for a payload or config hit try: if File.yara_hit_provides_detection(hit): file_info["cape_type"] = hit["meta"]["cape_type"] cape_name = File.get_cape_name_from_yara_hit(hit) cape_names.add(cape_name) except Exception as e: print(f"Cape type error: {e}") type_strings = file_info["type"].split() if "-bit" not in file_info["cape_type"]: if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] file_info["cape_type"] += "DLL" if type_strings[2] == ( "(DLL)") else "executable" if cape_name and cape_name not in executed_config_parsers[tmp_path]: tmp_config = static_config_parsers(cape_name, tmp_path, tmp_data) self.update_cape_configs(cape_name, tmp_config) executed_config_parsers[tmp_path].add(cape_name) if type_string: log.info("CAPE: type_string: %s", type_string) tmp_cape_name = File.get_cape_name_from_cape_type(type_string) if tmp_cape_name and tmp_cape_name not in executed_config_parsers: tmp_config = static_config_parsers(tmp_cape_name, file_info["path"], file_data) if tmp_config: cape_name = tmp_cape_name cape_names.add(cape_name) log.info("CAPE: config returned for: %s", cape_name) self.update_cape_configs(cape_name, tmp_config) self.add_family_detections(file_info, cape_names) # Remove duplicate payloads from web ui for cape_file in self.cape["payloads"] or []: if file_info["size"] == cape_file["size"]: if HAVE_PYDEEP: ssdeep_grade = pydeep.compare(file_info["ssdeep"].encode(), cape_file["ssdeep"].encode()) if ssdeep_grade >= ssdeep_threshold: log.debug( "CAPE duplicate output file skipped: ssdeep grade %d, threshold %d", ssdeep_grade, ssdeep_threshold) append_file = False if file_info.get("entrypoint") and file_info.get( "ep_bytes") and cape_file.get("entrypoint"): if (file_info["entrypoint"] == cape_file["entrypoint"] and file_info["cape_type_code"] == cape_file["cape_type_code"] and file_info["ep_bytes"] == cape_file["ep_bytes"]): log.debug( "CAPE duplicate output file skipped: matching entrypoint" ) append_file = False if append_file: if HAVE_FLARE_CAPA: pretime = timeit.default_timer() capa_details = flare_capa_details(file_path, "cape") if capa_details: file_info["flare_capa"] = capa_details self.add_statistic_tmp("flare_capa", "time", pretime=pretime) self.cape["payloads"].append(file_info)
def tasks_create_file(): response = {} data = request.files.file pcap = request.POST.get("pcap", "") package = request.forms.get("package", "") timeout = request.forms.get("timeout", "") priority = request.forms.get("priority", 1) options = request.forms.get("options", "") machine = request.forms.get("machine", "") platform = request.forms.get("platform", "") tags = request.forms.get("tags", None) custom = request.forms.get("custom", "") memory = request.forms.get("memory", "False") clock = request.forms.get("clock", datetime.now().strftime("%m-%d-%Y %H:%M:%S")) if clock is False or clock is None: clock = datetime.now().strftime("%m-%d-%Y %H:%M:%S") if "1970" in clock: clock = datetime.now().strftime("%m-%d-%Y %H:%M:%S") shrike_url = request.forms.get("shrike_url", None) shrike_msg = request.forms.get("shrike_msg", None) shrike_sid = request.forms.get("shrike_sid", None) shrike_refer = request.forms.get("shrike_refer", None) static = bool(request.POST.get("static", False)) unique = bool(request.forms.get("unique", False)) if memory.upper() == "FALSE" or memory == "0": memory = False else: memory = True enforce_timeout = request.forms.get("enforce_timeout", "False") if enforce_timeout.upper() == "FALSE" or enforce_timeout == "0": enforce_timeout = False else: enforce_timeout = True temp_file_path = store_temp_file(data.file.read(), data.filename) if unique and db.check_file_uniq(File(temp_file_path).get_sha256()): resp = { "error": True, "error_value": "Duplicated file, disable unique option to force submission" } return jsonize(resp) if pcap: if data.filename.lower().endswith(".saz"): saz = saz_to_pcap(temp_file_path) if saz: path = saz try: os.remove(temp_file_path) except: pass else: resp = { "error": True, "error_value": "Failed to convert PCAP to SAZ" } return jsonize(resp) else: path = temp_file_path task_id = db.add_pcap(file_path=path) task_ids = [task_id] else: try: task_ids, extra_details = db.demux_sample_and_add_to_db( file_path=temp_file_path, package=package, timeout=timeout, options=options, priority=priority, machine=machine, platform=platform, custom=custom, memory=memory, enforce_timeout=enforce_timeout, tags=tags, clock=clock, shrike_url=shrike_url, shrike_msg=shrike_msg, shrike_sid=shrike_sid, shrike_refer=shrike_refer, static=static, ) except CuckooDemuxError as e: return HTTPError(500, e) response["task_ids"] = task_ids return jsonize(response)
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set mongo schema version. # TODO: This is not optimal becuase it run each analysis. Need to run # only one time at startup. if "cuckoo_schema" in self.db.collection_names(): if self.db.cuckoo_schema.find_one()["version"] != self.SCHEMA_VERSION: CuckooReportError("Mongo schema version not expected, check data migration tool") else: self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) # Set an unique index on stored files, to avoid duplicates. # From pymongo docs: # Returns the name of the created index if an index is actually # created. # Returns None if the index already exists. # TODO: This is not optimal because it run each analysis. Need to run # only one time at startup. self.db.fs.files.ensure_index("sha256", unique=True, sparse=True, name="sha256_unique") # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) if not "network" in report: report["network"] = {} # Store the sample in GridFS. if results["info"]["category"] == "file" and "target" in results: sample = File(self.file_path) if sample.valid(): fname = results["target"]["file"]["name"] sample_id = self.store_file(sample, filename=fname) report["target"] = {"file_id": sample_id} report["target"].update(results["target"]) # Store the PCAP file in GridFS and reference it back in the report. pcap_path = os.path.join(self.analysis_path, "dump.pcap") pcap = File(pcap_path) if pcap.valid(): pcap_id = self.store_file(pcap) report["network"]["pcap_id"] = pcap_id sorted_pcap_path = os.path.join(self.analysis_path, "dump_sorted.pcap") spcap = File(sorted_pcap_path) if spcap.valid(): spcap_id = self.store_file(spcap) report["network"]["sorted_pcap_id"] = spcap_id if "procmemory" in report: # Store the process memory dump file in GridFS and reference it back in the report. for idx, procmem in enumerate(report['procmemory']): procmem_path = os.path.join(self.analysis_path, "memory", "{0}.dmp".format(procmem['pid'])) procmem_file = File(procmem_path) if procmem_file.valid(): procmem_id = self.store_file(procmem_file) report["procmemory"][idx].update({"procmem_id": procmem_id}) # Store the suri extracted files in GridFS and reference it back in the report. suri_extracted_zip_path = os.path.join(self.analysis_path, "logs/files.zip") suri_extracted_zip = File(suri_extracted_zip_path) if suri_extracted_zip.valid(): suri_extracted_zip_id = self.store_file(suri_extracted_zip) report["suricata"] = {"suri_extracted_zip": suri_extracted_zip_id} report["suricata"].update(results["suricata"]) # Walk through the dropped files, store them in GridFS and update the # report with the ObjectIds. new_dropped = [] if "dropped" in report: for dropped in report["dropped"]: new_drop = dict(dropped) drop = File(dropped["path"]) if drop.valid(): dropped_id = self.store_file(drop, filename=dropped["name"]) new_drop["object_id"] = dropped_id new_dropped.append(new_drop) report["dropped"] = new_dropped # Store the Zipped Droppings file in GridFS and reference it back in the report. #cuckoo_dropped_zip_path = os.path.join(self.analysis_path, "cuckoodroppings.zip") #cuckoo_dropped_zip = File(cuckoo_dropped_zip_path) #if cuckoo_dropped_zip.valid(): # cuckoo_droppings_id = self.store_file(cuckoo_dropped_zip) # report["zippeddroppings"] = {"cuckoo_droppings_id": cuckoo_droppings_id} # report["zippeddroppings"].update(results["zippeddroppings"]) # Walk through the suricata extracted files, store them in GridFS and update the # report with the ObjectIds. new_suricata_files = [] if results.has_key("suricata") and results["suricata"]: if results["suricata"].has_key("files") and results["suricata"]["files"]: for suricata_file_e in results["suricata"]["files"]: if suricata_file_e.has_key("file_info"): tmp_suricata_file_d = dict(suricata_file_e) suricata_file = File(suricata_file_e["file_info"]["path"]) if suricata_file.valid(): suricata_file_id = self.store_file(suricata_file, filename=suricata_file_e["file_info"]["name"]) tmp_suricata_file_d["object_id"] = suricata_file_id new_suricata_files.append(tmp_suricata_file_d) report["suricata"]["files"] = new_suricata_files # Add screenshots. report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): # Walk through the files and select the JPGs. shots = [shot for shot in os.listdir(shots_path) if shot.endswith(".jpg")] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) shot = File(shot_path) # If the screenshot path is a valid file, store it and # reference it back in the report. if shot.valid(): shot_id = self.store_file(shot) report["shots"].append(shot_id) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in MongoDB. if len(chunk) == 100: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes #Other info we want Quick access to from the web UI if results.has_key("virustotal") and results["virustotal"] and results["virustotal"].has_key("positives") and results["virustotal"].has_key("total"): report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"],results["virustotal"]["total"]) if results.has_key("suricata") and results["suricata"]: if results["suricata"].has_key("tls") and len(results["suricata"]["tls"]) > 0: report["suri_tls_cnt"] = len(results["suricata"]["tls"]) if results["suricata"] and results["suricata"].has_key("alerts") and len(results["suricata"]["alerts"]) > 0: report["suri_alert_cnt"] = len(results["suricata"]["alerts"]) if results["suricata"].has_key("files") and len(results["suricata"]["files"]) > 0: report["suri_file_cnt"] = len(results["suricata"]["files"]) if results["suricata"].has_key("http") and len(results["suricata"]["http"]) > 0: report["suri_http_cnt"] = len(results["suricata"]["http"]) # Store the report and retrieve its object id. try: self.db.analysis.save(report) except InvalidDocument as e: parent_key, psize = self.debug_dict_size(report)[0] child_key, csize = self.debug_dict_size(report[parent_key])[0] if not self.options.get("fix_large_docs", False): # Just log the error and problem keys log.error(str(e)) log.error("Largest parent key: %s (%d MB)" % (parent_key, int(psize) / 1048576)) log.error("Largest child key: %s (%d MB)" % (child_key, int(csize) / 1048576)) else: # Delete the problem keys and check for more error_saved = True while error_saved: log.warn("results['%s']['%s'] deleted due to >16MB size (%dMB)" % (parent_key, child_key, int(psize) / 1048576)) del report[parent_key][child_key] try: self.db.analysis.save(report) error_saved = False except InvalidDocument as e: parent_key, psize = self.debug_dict_size(report)[0] child_key, csize = self.debug_dict_size(report[parent_key])[0] log.error(str(e)) log.error("Largest parent key: %s (%d bytes)" % (parent_key, int(psize) / 1048576)) log.error("Largest child key: %s (%d bytes)" % (child_key, int(csize) / 1048576)) self.conn.close()
def process_file(self, file_path, append_file, metadata={}): """Process file. @return: file_info """ config = {} cape_name = "" if not os.path.exists(file_path): return buf = self.options.get("buffer", BUFSIZE) file_info, pefile_object = File(file_path, metadata.get("metadata", "")).get_all() if pefile_object: self.results.setdefault("pefiles", {}) self.results["pefiles"].setdefault(file_info["sha256"], pefile_object) # Get the file data try: with open(file_info["path"], "rb") as file_open: file_data = file_open.read() except UnicodeDecodeError as e: with open(file_info["path"], "rb") as file_open: file_data = file_open.read() if metadata.get("pids", False): if len(metadata["pids"]) == 1: file_info["pid"] = metadata["pids"][0] else: file_info["pid"] = ",".join(metadata["pids"]) metastrings = metadata.get("metadata", "").split(";?") if len(metastrings) > 2: file_info["process_path"] = metastrings[1] file_info["process_name"] = metastrings[1].split("\\")[-1] if len(metastrings) > 3: file_info["module_path"] = metastrings[2] file_info["cape_type_code"] = 0 file_info["cape_type"] = "" if metastrings != "": try: file_info["cape_type_code"] = int(metastrings[0]) except Exception as e: pass if file_info["cape_type_code"] == COMPRESSION: file_info["cape_type"] = "Decompressed PE Image" if file_info["cape_type_code"] in inject_map: file_info["cape_type"] = inject_map[ file_info["cape_type_code"]] if len(metastrings) > 4: file_info["target_path"] = metastrings[3] file_info["target_process"] = metastrings[3].split( "\\")[-1] file_info["target_pid"] = metastrings[4] if file_info["cape_type_code"] == INJECTION_SECTION: file_info["cape_type"] = "Injected Section" if len(metastrings) > 4: file_info["section_handle"] = metastrings[4] simple_cape_type_map = { UNPACKED_PE: "Unpacked PE Image", UNPACKED_SHELLCODE: "Unpacked Shellcode", } if file_info["cape_type_code"] in simple_cape_type_map: file_info["cape_type"] = simple_cape_type_map[ file_info["cape_type_code"]] if len(metastrings) > 4: file_info["virtual_address"] = metastrings[3] type_strings = file_info["type"].split() if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" # PlugX if file_info["cape_type_code"] == PLUGX_CONFIG: file_info["cape_type"] = "PlugX Config" if plugx_parser: plugx_config = plugx_parser.parse_config( file_data, len(file_data)) if plugx_config: cape_name = "PlugX" config[cape_name] = dict() for key, value in plugx_config.items(): config[cape_name].update({key: [value]}) else: log.error( "CAPE: PlugX config parsing failure - size many not be handled." ) append_file = False if file_info["cape_type_code"] in code_mapping: file_info["cape_type"] = code_mapping[ file_info["cape_type_code"]] if file_info["cape_type_code"] in config_mapping: file_info["cape_type"] = code_mapping[ file_info["cape_type_code"]] type_strings = file_info["type"].split() if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if file_info["cape_type_code"] in name_mapping: cape_name = name_mapping[file_info["cape_type_code"]] append_file = True if file_info["cape_type_code"] == EVILGRAB_DATA: cape_name = "EvilGrab" file_info["cape_type"] = "EvilGrab Data" if file_info["size"] == 256 or file_info["size"] == 260: config[cape_name].update({"filepath": [format(file_data)]}) if file_info["size"] > 0x1000: append_file = True else: append_file = False if file_info["cape_type_code"] == SEDRECO_DATA: cape_name = "Sedreco" config[cape_name] = dict() config[cape_name]["cape_type"] = "Sedreco Config" if len(metastrings) > 4: SedrecoConfigIndex = metastrings[4] if SedrecoConfigIndex in sedreco_map: ConfigItem = sedreco_map[SedrecoConfigIndex] else: ConfigItem = "Unknown" ConfigData = format(file_data) if ConfigData: config[cape_name].update({ConfigItem: [ConfigData]}) append_file = False if file_info["cape_type_code"] == CERBER_CONFIG: file_info["cape_type"] = "Cerber Config" cape_name = "Cerber" config[cape_name] = dict() config["cape_type"] = "Cerber Config" parsed = json.loads(file_data.rstrip(b"\0")) config[cape_name].update({ "JSON Data": [json.dumps(parsed, indent=4, sort_keys=True)] }) append_file = True if file_info["cape_type_code"] == URSNIF_PAYLOAD: cape_name = "Ursnif" config[cape_name] = dict() config[cape_name]["cape_type"] = "Ursnif Payload" file_info["cape_type"] = "Ursnif Payload" if file_info["cape_type_code"] == URSNIF_CONFIG: file_info["cape_type"] = "Ursnif Config" cape_name = "Ursnif" malwareconfig_loaded = False try: malwareconfig_parsers = os.path.join( CUCKOO_ROOT, "modules", "processing", "parsers", "CAPE") file, pathname, description = imp.find_module( cape_name, [malwareconfig_parsers]) module = imp.load_module(cape_name, file, pathname, description) malwareconfig_loaded = True log.debug("CAPE: Imported malwareconfig.com parser %s", cape_name) except ImportError: log.debug( "CAPE: malwareconfig.com parser: No module named %s", cape_name) if malwareconfig_loaded: try: malwareconfig_config = module.config(file_data) if malwareconfig_config: config[cape_name] = dict() config[cape_name]["cape_type"] = "Ursnif Config" if isinstance(malwareconfig_config, list): for (key, value) in malwareconfig_config[0].items(): config[cape_name].update({key: [value]}) elif isinstance(malwareconfig_config, dict): for (key, value) in malwareconfig_config.items(): config[cape_name].update({key: [value]}) except Exception as e: log.error( "CAPE: malwareconfig parsing error with %s: %s", cape_name, e) append_file = False # Hancitor if file_info["cape_type_code"] == HANCITOR_PAYLOAD: cape_name = "Hancitor" config[cape_name] = dict() config[cape_name]["cape_type"] = "Hancitor Payload" file_info["cape_type"] = "Hancitor Payload" if file_info["cape_type_code"] == HANCITOR_CONFIG: cape_name = "Hancitor" file_info["cape_type"] = "Hancitor Config" ConfigStrings = file_data.split(b"\0") ConfigStrings = [_f for _f in ConfigStrings if _f] ConfigItem = "Campaign Code" config[cape_name] = dict() config[cape_name]["cape_type"] = "Hancitor Config" config[cape_name].update({ConfigItem: [ConfigStrings[0]]}) GateURLs = ConfigStrings[1].split(b"|") for index, value in enumerate(GateURLs): ConfigItem = "Gate URL " + str(index + 1) config[cape_name].update({ConfigItem: [value]}) append_file = False # QakBot if file_info["cape_type_code"] == QAKBOT_CONFIG: file_info["cape_type"] = "QakBot Config" cape_name = "QakBot" config[cape_name] = dict() config[cape_name]["cape_type"] = "QakBot Config" config_tmp = static_config_parsers(cape_name, file_data) if config_tmp and config_tmp[cape_name]: config.update(config_tmp) append_file = False # Attempt to decrypt script dump if file_info["cape_type_code"] == SCRIPT_DUMP: data = file_data.decode("utf-16").replace("\x00", "") cape_name = "ScriptDump" malwareconfig_loaded = False try: malwareconfig_parsers = os.path.join( CUCKOO_ROOT, "modules", "processing", "parsers", "CAPE") file, pathname, description = imp.find_module( cape_name, [malwareconfig_parsers]) module = imp.load_module(cape_name, file, pathname, description) malwareconfig_loaded = True log.debug("CAPE: Imported malwareconfig.com parser %s", cape_name) except ImportError: log.debug( "CAPE: malwareconfig.com parser: No module named %s", cape_name) if malwareconfig_loaded: try: script_data = module.config(self, data) if script_data and "more_eggs" in script_data["type"]: bindata = script_data["data"] sha256 = hashlib.sha256(bindata).hexdigest() filepath = os.path.join(self.CAPE_path, sha256) tmpstr = file_info["pid"] tmpstr += "," + file_info["process_path"] tmpstr += "," + file_info["module_path"] if "text" in script_data["datatype"]: file_info["cape_type"] = "MoreEggsJS" outstr = str( MOREEGGSJS_PAYLOAD) + "," + tmpstr + "\n" with open(filepath + "_info.txt", "w") as infofd: infofd.write(outstr) with open(filepath, "w") as cfile: cfile.write(bindata) elif "binary" in script_data["datatype"]: file_info["cape_type"] = "MoreEggsBin" outstr = str( MOREEGGSBIN_PAYLOAD) + "," + tmpstr + "\n" with open(filepath + "_info.txt", "w") as infofd: infofd.write(outstr) with open(filepath, "wb") as cfile: cfile.write(bindata) if os.path.exists(filepath): self.script_dump_files.append(filepath) else: file_info["cape_type"] = "Script Dump" log.info( "CAPE: Script Dump does not contain known encrypted payload." ) except Exception as e: log.error( "CAPE: malwareconfig parsing error with %s: %s", cape_name, e) append_file = True # More_Eggs if file_info["cape_type_code"] == MOREEGGSJS_PAYLOAD: file_info["cape_type"] = "More Eggs JS Payload" cape_name = "MoreEggs" append_file = True # Process CAPE Yara hits for hit in file_info["cape_yara"]: # Check to see if file is packed with UPX if hit["name"] == "UPX": log.info( "CAPE: Found UPX Packed sample - attempting to unpack") self.upx_unpack(file_data) # Check for a payload or config hit extraction_types = ["payload", "config", "loader"] try: for type in extraction_types: if type in hit["meta"].get("cape_type", "").lower(): file_info["cape_type"] = hit["meta"]["cape_type"] cape_name = hit["name"].replace("_", " ") except Exception as e: print("Cape type error: {}".format(e)) type_strings = file_info["type"].split() if "-bit" not in file_info["cape_type"]: if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" suppress_parsing_list = ["Cerber", "Ursnif"] if hit["name"] in suppress_parsing_list: continue tmp_config = static_config_parsers(hit["name"].replace("_", " "), file_data) if tmp_config and tmp_config[hit["name"].replace("_", " ")]: config.update(tmp_config) if cape_name: if not "detections" in self.results: if cape_name != "UPX": #ToDo list of keys self.results["detections"] = cape_name # Remove duplicate payloads from web ui for cape_file in self.cape["payloads"] or []: if file_info["size"] == cape_file["size"]: if HAVE_PYDEEP: ssdeep_grade = pydeep.compare( file_info["ssdeep"].encode("utf-8"), cape_file["ssdeep"].encode("utf-8")) if ssdeep_grade >= ssdeep_threshold: append_file = False if file_info.get("entrypoint") and file_info.get( "ep_bytes") and cape_file.get("entrypoint"): if (file_info.get("entrypoint") and file_info["entrypoint"] == cape_file["entrypoint"] and file_info["cape_type_code"] == cape_file["cape_type_code"] and file_info["ep_bytes"] == cape_file["ep_bytes"]): log.debug("CAPE duplicate output file skipped") append_file = False if append_file is True: pretime = datetime.now() capa_details = flare_capa_details(file_path, "CAPE") if capa_details: file_info["flare_capa"] = capa_details self.add_statistic_tmp("flare_capa", "time", pretime=pretime) self.cape["payloads"].append(file_info) if config and config not in self.cape["configs"]: self.cape["configs"].append(config)
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set mongo schema version. # TODO: This is not optimal becuase it run each analysis. Need to run # only one time at startup. if "cuckoo_schema" in self.db.collection_names(): if self.db.cuckoo_schema.find_one()["version"] != self.SCHEMA_VERSION: CuckooReportError("Mongo schema version not expected, check data migration tool") else: self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) # Set an unique index on stored files, to avoid duplicates. # From pymongo docs: # Returns the name of the created index if an index is actually # created. # Returns None if the index already exists. # TODO: This is not optimal because it run each analysis. Need to run # only one time at startup. self.db.fs.files.ensure_index("sha256", unique=True, sparse=True, name="sha256_unique") # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) # Store the sample in GridFS. if results["info"]["category"] == "file": sample = File(self.file_path) if sample.valid(): fname = results["target"]["file"]["name"] sample_id = self.store_file(sample, filename=fname) report["target"] = {"file_id": sample_id} report["target"].update(results["target"]) # Store the PCAP file in GridFS and reference it back in the report. pcap_path = os.path.join(self.analysis_path, "dump.pcap") pcap = File(pcap_path) if pcap.valid(): pcap_id = self.store_file(pcap) report["network"] = {"pcap_id": pcap_id} report["network"].update(results["network"]) # Walk through the dropped files, store them in GridFS and update the # report with the ObjectIds. new_dropped = [] for dropped in report["dropped"]: new_drop = dict(dropped) drop = File(dropped["path"]) if drop.valid(): dropped_id = self.store_file(drop, filename=dropped["name"]) new_drop["object_id"] = dropped_id new_dropped.append(new_drop) report["dropped"] = new_dropped # Store the Zipped Droppings file in GridFS and reference it back in the report. #cuckoo_dropped_zip_path = os.path.join(self.analysis_path, "cuckoodroppings.zip") #cuckoo_dropped_zip = File(cuckoo_dropped_zip_path) #if cuckoo_dropped_zip.valid(): # cuckoo_droppings_id = self.store_file(cuckoo_dropped_zip) # report["zippeddroppings"] = {"cuckoo_droppings_id": cuckoo_droppings_id} # report["zippeddroppings"].update(results["zippeddroppings"]) # Add screenshots. report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): # Walk through the files and select the JPGs. shots = [shot for shot in os.listdir(shots_path) if shot.endswith(".jpg")] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) shot = File(shot_path) # If the screenshot path is a valid file, store it and # reference it back in the report. if shot.valid(): shot_id = self.store_file(shot) report["shots"].append(shot_id) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in MongoDB. if len(chunk) == 100: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes #Other info we want Quick access to from the web UI if report.has_key("virustotal") and report["virustotal"] and report["virustotal"].has_key("positives") and results["virustotal"].has_key("total"): report["virustotal_summary"] = "%s/%s" % (report["virustotal"]["positives"],report["virustotal"]["total"]) new_suricata_files = [] if report.has_key("suricata") and report["suricata"]: suricata={} suricata["info"]={} suricata["info"]["id"]=report["info"]["id"] # Walk through the suricata extracted files, store them in GridFS and update the # report with the ObjectIds # Store the suri extracted files in GridFS and reference it back in the report. suri_extracted_zip_path = os.path.join(self.analysis_path, "logs/files.zip") suri_extracted_zip = File(suri_extracted_zip_path) if suri_extracted_zip.valid(): suri_extracted_zip_id = self.store_file(suri_extracted_zip) suricata["suri_extracted_zip"]=suri_extracted_zip_id if report["suricata"].has_key("files") and len(report["suricata"]["files"]) > 0: suricata["file_cnt"] = len(report["suricata"]["files"]) for suricata_file_e in report["suricata"]["files"]: if suricata_file_e.has_key("file_info"): tmp_suricata_file_d = dict(suricata_file_e) suricata_file = File(suricata_file_e["file_info"]["path"]) if suricata_file.valid(): suricata_file_id = self.store_file(suricata_file, filename=suricata_file_e["file_info"]["name"]) tmp_suricata_file_d["object_id"] = suricata_file_id new_suricata_files.append(tmp_suricata_file_d) suricata["files"] = new_suricata_files if report["suricata"].has_key("alert_log_full_path") and report["suricata"]["alert_log_full_path"]: suricata_alert_log = File(report["suricata"]["alert_log_full_path"]) if suricata_alert_log.valid(): suricata_alert_log_id = self.store_file(suricata_alert_log) suricata["alert_log_id"] = suricata_alert_log_id if report["suricata"].has_key("tls_log_full_path") and report["suricata"]["tls_log_full_path"]: tls_log = File(report["suricata"]["tls_log_full_path"]) if tls_log.valid(): tls_log_id = self.store_file(tls_log) suricata["tls_log_id"] = tls_log_id if report["suricata"].has_key("http_log_full_path") and report["suricata"]["http_log_full_path"]: http_log = File(report["suricata"]["http_log_full_path"]) if http_log.valid(): http_log_id = self.store_file(http_log) suricata["http_log_id"] = http_log_id if report["suricata"].has_key("file_log_full_path") and report["suricata"]["file_log_full_path"]: file_log = File(report["suricata"]["file_log_full_path"]) if file_log.valid(): file_log_id = self.store_file(file_log) suricata["file_log_id"] = file_log_id if report["suricata"].has_key("dns_log_full_path") and report["suricata"]["dns_log_full_path"]: dns_log = File(report["suricata"]["dns_log_full_path"]) if dns_log.valid(): dns_log_id = self.store_file(dns_log) suricata["dns_log_id"] = dns_log_id if report["suricata"].has_key("ssh_log_full_path") and report["suricata"]["ssh_log_full_path"]: ssh_log = File(report["suricata"]["ssh_log_full_path"]) if ssh_log.valid(): ssh_log_id = self.store_file(ssh_log) suricata["ssh_log_id"] = ssh_log_id if report["suricata"].has_key("tls") and len(report["suricata"]["tls"]) > 0: suricata["tls_cnt"] = len(report["suricata"]["tls"]) suricata["tls"]=report["suricata"]["tls"] if report["suricata"] and report["suricata"].has_key("alerts") and len(report["suricata"]["alerts"]) > 0: suricata["alert_cnt"] = len(report["suricata"]["alerts"]) suricata["alerts"]=report["suricata"]["alerts"] if results["suricata"].has_key("http") and len(report["suricata"]["http"]) > 0: suricata["http_cnt"] = len(report["suricata"]["http"]) suricata["http"]=report["suricata"]["http"] self.db.suricata.save(suricata) #do not store this in analysis collection del report["suricata"] if results.has_key("behavior") and results["behavior"].has_key("martianlist") and results["behavior"]["martianlist"] and len(results["behavior"]["martianlist"]) > 0: report["mlist_cnt"] = len(results["behavior"]["martianlist"]) # Store the report and retrieve its object id. self.db.analysis.save(report) self.conn.disconnect()
def run(self): """Run Suricata. @return: hash with alerts """ self.key = "suricata" # General SURICATA_CONF = self.options.get("conf", None) SURICATA_EVE_LOG = self.options.get("evelog", None) SURICATA_ALERT_LOG = self.options.get("alertlog", None) SURICATA_TLS_LOG = self.options.get("tlslog", None) SURICATA_HTTP_LOG = self.options.get("httplog", None) SURICATA_SSH_LOG = self.options.get("sshlog", None) SURICATA_DNS_LOG = self.options.get("dnslog", None) SURICATA_FILE_LOG = self.options.get("fileslog", None) SURICATA_FILES_DIR = self.options.get("filesdir", None) SURICATA_RUNMODE = self.options.get("runmode", None) SURICATA_FILE_BUFFER = self.options.get("buffer", 8192) Z7_PATH = self.options.get("7zbin", None) FILES_ZIP_PASS = self.options.get("zippass", None) SURICATA_FILE_COPY_DST_DIR = self.options.get("file_copy_dest_dir", None) SURICATA_FILE_COPY_MAGIC_RE = self.options.get("file_magic_re", None) if SURICATA_FILE_COPY_MAGIC_RE: try: SURICATA_FILE_COPY_MAGIC_RE = re.compile( SURICATA_FILE_COPY_MAGIC_RE) except: log.warning("Failed to compile suricata copy magic RE" % (SURICATA_FILE_COPY_MAGIC_RE)) SURICATA_FILE_COPY_MAGIC_RE = None # Socket SURICATA_SOCKET_PATH = self.options.get("socket_file", None) SURICATA_SOCKET_PYLIB = self.options.get("pylib_dir", None) # Command Line SURICATA_BIN = self.options.get("bin", None) suricata = {} suricata["alerts"] = [] suricata["tls"] = [] suricata["perf"] = [] suricata["files"] = [] suricata["http"] = [] suricata["dns"] = [] suricata["ssh"] = [] suricata["file_info"] = [] suricata["eve_log_full_path"] = None suricata["alert_log_full_path"] = None suricata["tls_log_full_path"] = None suricata["http_log_full_path"] = None suricata["file_log_full_path"] = None suricata["ssh_log_full_path"] = None suricata["dns_log_full_path"] = None SURICATA_ALERT_LOG_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_ALERT_LOG) SURICATA_TLS_LOG_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_TLS_LOG) SURICATA_HTTP_LOG_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_HTTP_LOG) SURICATA_SSH_LOG_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_SSH_LOG) SURICATA_DNS_LOG_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_DNS_LOG) SURICATA_EVE_LOG_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_EVE_LOG) SURICATA_FILE_LOG_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_FILE_LOG) SURICATA_FILES_DIR_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_FILES_DIR) if not os.path.exists(SURICATA_CONF): log.warning("Unable to Run Suricata: Conf File %s Does Not Exist" % (SURICATA_CONF)) return suricata["alerts"] if not os.path.exists(self.pcap_path): log.warning("Unable to Run Suricata: Pcap file %s Does Not Exist" % (self.pcap_path)) return suricata["alerts"] # Add to this if you wish to ignore any SIDs for the suricata alert logs # Useful for ignoring SIDs without disabling them. Ex: surpress an alert for # a SID which is a dependent of another. (Bad TCP data for HTTP(S) alert) sid_blacklist = [ # SURICATA FRAG IPv6 Fragmentation overlap 2200074, # ET INFO InetSim Response from External Source Possible SinkHole 2017363, # SURICATA UDPv4 invalid checksum 2200075, # ET POLICY SSLv3 outbound connection from client vulnerable to POODLE attack 2019416, ] if SURICATA_RUNMODE == "socket": if SURICATA_SOCKET_PYLIB != None: sys.path.append(SURICATA_SOCKET_PYLIB) try: from suricatasc import SuricataSC except Exception as e: log.warning("Failed to import suricatasc lib %s" % (e)) return suricata loopcnt = 0 maxloops = 24 loopsleep = 5 args = {} args["filename"] = self.pcap_path args["output-dir"] = self.logs_path suris = SuricataSC(SURICATA_SOCKET_PATH) try: suris.connect() suris.send_command("pcap-file", args) except Exception as e: log.warning( "Failed to connect to socket and send command %s: %s" % (SURICATA_SOCKET_PATH, e)) return suricata while loopcnt < maxloops: try: pcap_flist = suris.send_command("pcap-file-list") current_pcap = suris.send_command("pcap-current") log.debug("pcapfile list: %s current pcap: %s" % (pcap_flist, current_pcap)) if self.pcap_path not in pcap_flist["message"][ "files"] and current_pcap[ "message"] != self.pcap_path: log.debug( "Pcap not in list and not current pcap lets assume it's processed" ) break else: loopcnt = loopcnt + 1 time.sleep(loopsleep) except Exception as e: log.warning( "Failed to get pcap status breaking out of loop %s" % (e)) break if loopcnt == maxloops: log.warning( "Loop timeout of %ssec occured waiting for file %s to finish processing" % (maxloops * loopsleep, pcapfile)) return suricata elif SURICATA_RUNMODE == "cli": if not os.path.exists(SURICATA_BIN): log.warning( "Unable to Run Suricata: Bin File %s Does Not Exist" % (SURICATA_CONF)) return suricata["alerts"] cmd = "%s -c %s -k none -l %s -r %s" % ( SURICATA_BIN, SURICATA_CONF, self.logs_path, self.pcap_path) ret, stdout, stderr = self.cmd_wrapper(cmd) if ret != 0: log.warning( "Suricata returned a Exit Value Other than Zero %s" % (stderr)) return suricata else: log.warning("Unknown Suricata Runmode") return suricata datalist = [] if os.path.exists(SURICATA_EVE_LOG_FULL_PATH): suricata["eve_log_full_path"] = SURICATA_EVE_LOG_FULL_PATH with open(SURICATA_EVE_LOG_FULL_PATH, "rb") as eve_log: datalist.append(eve_log.read()) else: paths = [("alert_log_full_path", SURICATA_ALERT_LOG_FULL_PATH), ("tls_log_full_path", SURICATA_TLS_LOG_FULL_PATH), ("http_log_full_path", SURICATA_HTTP_LOG_FULL_PATH), ("ssh_log_full_path", SURICATA_SSH_LOG_FULL_PATH), ("dns_log_full_path", SURICATA_DNS_LOG_FULL_PATH)] for path in paths: if os.path.exists(path[1]): suricata[path[0]] = path[1] with open(path[1], "rb") as the_log: datalist.append(the_log.read()) if not datalist: log.warning("Suricata: Failed to find usable Suricata log file") for data in datalist: for line in data.splitlines(): try: parsed = json.loads(line) except: log.warning("Suricata: Failed to parse line as json" % (line)) continue if parsed["event_type"] == "alert": if (parsed["alert"]["signature_id"] not in sid_blacklist and not parsed["alert"]["signature"].startswith( "SURICATA STREAM")): alog = dict() if parsed["alert"]["gid"] == '': alog["gid"] = "None" else: alog["gid"] = parsed["alert"]["gid"] if parsed["alert"]["rev"] == '': alog["rev"] = "None" else: alog["rev"] = parsed["alert"]["rev"] if parsed["alert"]["severity"] == '': alog["severity"] = "None" else: alog["severity"] = parsed["alert"]["severity"] alog["sid"] = parsed["alert"]["signature_id"] alog["srcport"] = parsed["src_port"] alog["srcip"] = parsed["src_ip"] alog["dstport"] = parsed["dest_port"] alog["dstip"] = parsed["dest_ip"] alog["protocol"] = parsed["proto"] alog["timestamp"] = parsed["timestamp"].replace( "T", " ") if parsed["alert"]["category"] == '': alog["category"] = "None" else: alog["category"] = parsed["alert"]["category"] alog["signature"] = parsed["alert"]["signature"] suricata["alerts"].append(alog) elif parsed["event_type"] == "http": hlog = dict() hlog["srcport"] = parsed["src_port"] hlog["srcip"] = parsed["src_ip"] hlog["dstport"] = parsed["dest_port"] hlog["dstip"] = parsed["dest_ip"] hlog["timestamp"] = parsed["timestamp"].replace("T", " ") try: hlog["uri"] = parsed["http"]["url"] except: hlog["uri"] = "None" hlog["length"] = parsed["http"]["length"] try: hlog["hostname"] = parsed["http"]["hostname"] except: hlog["hostname"] = "None" try: hlog["status"] = str(parsed["http"]["status"]) except: hlog["status"] = "None" try: hlog["method"] = parsed["http"]["http_method"] except: hlog["method"] = "None" try: hlog["contenttype"] = parsed["http"][ "http_content_type"] except: hlog["contenttype"] = "None" try: hlog["ua"] = parsed["http"]["http_user_agent"] except: hlog["ua"] = "None" try: hlog["referrer"] = parsed["http"]["http_refer"] except: hlog["referrer"] = "None" suricata["http"].append(hlog) elif parsed["event_type"] == "tls": tlog = dict() tlog["srcport"] = parsed["src_port"] tlog["srcip"] = parsed["src_ip"] tlog["dstport"] = parsed["dest_port"] tlog["dstip"] = parsed["dest_ip"] tlog["timestamp"] = parsed["timestamp"].replace("T", " ") tlog["fingerprint"] = parsed["tls"]["fingerprint"] tlog["issuer"] = parsed["tls"]["issuerdn"] tlog["version"] = parsed["tls"]["version"] tlog["subject"] = parsed["tls"]["subject"] suricata["tls"].append(tlog) elif parsed["event_type"] == "ssh": suricata["ssh"].append(parsed) elif parsed["event_type"] == "dns": suricata["dns"].append(parsed) if os.path.exists(SURICATA_FILE_LOG_FULL_PATH): suricata["file_log_full_path"] = SURICATA_FILE_LOG_FULL_PATH f = open(SURICATA_FILE_LOG_FULL_PATH, "rb").readlines() for l in f: try: d = json.loads(l) except: log.warning("failed to load JSON from file log") continue # Some log entries do not have an id if "id" not in d: continue src_file = "%s/file.%s" % (SURICATA_FILES_DIR_FULL_PATH, d["id"]) if os.path.exists(src_file): if SURICATA_FILE_COPY_MAGIC_RE and SURICATA_FILE_COPY_DST_DIR and os.path.exists( SURICATA_FILE_COPY_DST_DIR): try: m = re.search(SURICATA_FILE_COPY_MAGIC_RE, d["magic"]) if m: dst_file = "%s/%s" % ( SURICATA_FILE_COPY_DST_DIR, d["md5"]) shutil.copy2(src_file, dst_file) log.warning("copied %s to %s" % (src_file, dst_file)) except Exception, e: log.warning("Unable to copy suricata file: %s" % e) file_info = File(file_path=src_file).get_all() texttypes = [ "ASCII", "Windows Registry text", "XML document text", "Unicode text", ] readit = False for texttype in texttypes: if texttype in file_info["type"]: readit = True break if readit: with open(file_info["path"], "rb") as drop_open: filedata = drop_open.read(SURICATA_FILE_BUFFER + 1) if len(filedata) > SURICATA_FILE_BUFFER: file_info["data"] = convert_to_printable( filedata[:SURICATA_FILE_BUFFER] + " <truncated>") else: file_info["data"] = convert_to_printable(filedata) d["file_info"] = file_info if "/" in d["filename"]: d["filename"] = d["filename"].split("/")[-1] suricata["files"].append(d)
def process_file(self, file_path, append_file, metadata={}): """Process file. @return: file_info """ config = {} cape_name = "" type_string = "" if not os.path.exists(file_path): return buf = self.options.get("buffer", BUFSIZE) file_info, pefile_object = File(file_path, metadata.get("metadata", "")).get_all() if pefile_object: self.results.setdefault("pefiles", {}) self.results["pefiles"].setdefault(file_info["sha256"], pefile_object) # Allows to put execute file extractors/unpackers generic_file_extractors(file_path, self.dropped_path, file_info.get("type", ""), file_info) # Get the file data with open(file_info["path"], "rb") as file_open: file_data = file_open.read() is_text_file(file_info, self.CAPE_path, buf, file_data) if metadata.get("pids", False): if len(metadata["pids"]) == 1: file_info["pid"] = metadata["pids"][0] else: file_info["pid"] = ",".join(metadata["pids"]) metastrings = metadata.get("metadata", "").split(";?") if len(metastrings) > 2: file_info["process_path"] = metastrings[1] file_info["process_name"] = metastrings[1].rsplit("\\", 1)[-1] if len(metastrings) > 3: file_info["module_path"] = metastrings[2] file_info["cape_type_code"] = 0 file_info["cape_type"] = "" if metastrings and metastrings[0] and metastrings[0].isdigit(): file_info["cape_type_code"] = int(metastrings[0]) if file_info["cape_type_code"] == TYPE_STRING: if len(metastrings) > 4: type_string = metastrings[3] if file_info["cape_type_code"] == COMPRESSION: file_info["cape_type"] = "Decompressed PE Image" if file_info["cape_type_code"] in inject_map: file_info["cape_type"] = inject_map[file_info["cape_type_code"]] if len(metastrings) > 4: file_info["target_path"] = metastrings[3] file_info["target_process"] = metastrings[3].rsplit("\\", 1)[-1] file_info["target_pid"] = metastrings[4] if file_info["cape_type_code"] in unpack_map: file_info["cape_type"] = unpack_map[file_info["cape_type_code"]] if len(metastrings) > 4: file_info["virtual_address"] = metastrings[3] type_strings = file_info["type"].split() if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if file_info["cape_type_code"] in code_mapping: file_info["cape_type"] = code_mapping[file_info["cape_type_code"]] type_strings = file_info["type"].split() if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if file_info["cape_type_code"] in name_mapping: cape_name = name_mapping[file_info["cape_type_code"]] append_file = True """ ConfigData = format(file_data) if ConfigData: config[cape_name].update({ConfigItem: [ConfigData]}) """ # PlugX if file_info["cape_type_code"] == PLUGX_CONFIG: file_info["cape_type"] = "PlugX Config" if plugx_parser: plugx_config = plugx_parser.parse_config(file_data, len(file_data)) if plugx_config: cape_name = "PlugX" config[cape_name] = {} for key, value in plugx_config.items(): config[cape_name].update({key: [value]}) else: log.error("CAPE: PlugX config parsing failure - size many not be handled") append_file = False # Attempt to decrypt script dump if file_info["cape_type_code"] == SCRIPT_DUMP: data = file_data.decode("utf-16").replace("\x00", "") cape_name = "ScriptDump" malwareconfig_loaded = False try: malwareconfig_parsers = os.path.join(CUCKOO_ROOT, "modules", "processing", "parsers", "CAPE") file, pathname, description = imp.find_module(cape_name, [malwareconfig_parsers]) module = imp.load_module(cape_name, file, pathname, description) malwareconfig_loaded = True log.debug("CAPE: Imported parser %s", cape_name) except ImportError: log.debug("CAPE: parser: No module named %s", cape_name) if malwareconfig_loaded: try: script_data = module.config(self, data) if script_data and "more_eggs" in script_data["type"]: bindata = script_data["data"] sha256 = hashlib.sha256(bindata).hexdigest() filepath = os.path.join(self.CAPE_path, sha256) tmpstr = file_info["pid"] tmpstr += f",{file_info['process_path']}" tmpstr += f",{file_info['module_path']}" if "text" in script_data["datatype"]: file_info["cape_type"] = "MoreEggsJS" outstr = f"{MOREEGGSJS_PAYLOAD},{tmpstr}\n" # with open(f"{filepath}_info.txt", "w") as infofd: # infofd.write(outstr) with open(filepath, "w") as cfile: cfile.write(bindata) elif "binary" in script_data["datatype"]: file_info["cape_type"] = "MoreEggsBin" outstr = f"{MOREEGGSBIN_PAYLOAD},{tmpstr}\n" # with open(f"{filepath}_info.txt", "w") as infofd: # infofd.write(outstr) with open(filepath, "wb") as cfile: cfile.write(bindata) if os.path.exists(filepath): self.script_dump_files.append(filepath) else: file_info["cape_type"] = "Script Dump" log.info("CAPE: Script Dump does not contain known encrypted payload") except Exception as e: log.error("CAPE: malwareconfig parsing error with %s: %s", cape_name, e) append_file = True # More_Eggs if file_info["cape_type_code"] == MOREEGGSJS_PAYLOAD: file_info["cape_type"] = "More Eggs JS Payload" cape_name = "MoreEggs" append_file = True # Process CAPE Yara hits for hit in file_info["cape_yara"]: # Check to see if file is packed with UPX if hit["name"] == "UPX": log.info("CAPE: Found UPX Packed sample - attempting to unpack") self.upx_unpack(file_data) # Check for a payload or config hit extraction_types = ("payload", "config", "loader") try: if any([file_type in hit["meta"].get("cape_type", "").lower() for file_type in extraction_types]): file_info["cape_type"] = hit["meta"]["cape_type"] cape_name = hit["name"].replace("_", " ") except Exception as e: print(f"Cape type error: {e}") type_strings = file_info["type"].split() if "-bit" not in file_info["cape_type"]: if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if hit["name"] == "GuLoader": self.detect2pid(file_info["pid"], "GuLoader") cape_name = hit["name"].replace("_", " ") tmp_config = static_config_parsers(hit["name"], file_data) if tmp_config and tmp_config.get(cape_name): config.update(tmp_config[cape_name]) if type_string: log.info("CAPE: type_string: %s", type_string) tmp_config = static_config_parsers(type_string.split(" ", 1)[0], file_data) if tmp_config: cape_name = type_string.split(" ", 1)[0] log.info("CAPE: config returned for: %s", cape_name) config.update(tmp_config) if cape_name: if "detections" not in self.results: if cape_name != "UPX": # ToDo list of keys self.results["detections"] = cape_name if file_info.get("pid"): self.detect2pid(file_info["pid"], cape_name) # Remove duplicate payloads from web ui for cape_file in self.cape["payloads"] or []: if file_info["size"] == cape_file["size"]: if HAVE_PYDEEP: ssdeep_grade = pydeep.compare(file_info["ssdeep"].encode(), cape_file["ssdeep"].encode()) if ssdeep_grade >= ssdeep_threshold: append_file = False if file_info.get("entrypoint") and file_info.get("ep_bytes") and cape_file.get("entrypoint"): if ( file_info.get("entrypoint") and file_info["entrypoint"] == cape_file["entrypoint"] and file_info["cape_type_code"] == cape_file["cape_type_code"] and file_info["ep_bytes"] == cape_file["ep_bytes"] ): log.debug("CAPE duplicate output file skipped") append_file = False if append_file: if HAVE_FLARE_CAPA: pretime = datetime.now() capa_details = flare_capa_details(file_path, "cape") if capa_details: file_info["flare_capa"] = capa_details self.add_statistic_tmp("flare_capa", "time", pretime=pretime) self.cape["payloads"].append(file_info) if config and config not in self.cape["configs"]: if cape_name in multi_block_config and self.cape["configs"]: for conf in self.cape["configs"]: if cape_name in conf: conf[cape_name].update(config) else: # in case if malware name is missed it will break conf visualization if cape_name not in config: config = {cape_name: config} if config not in self.cape["configs"]: self.cape["configs"].append(config)
def launch_analysis(self): """Start analysis.""" succeeded = False dead_machine = False self.socks5s = _load_socks5_operational() log.info("Task #{0}: Starting analysis of {1} '{2}'".format( self.task.id, self.task.category.upper(), convert_to_printable(self.task.target))) # Initialize the analysis folders. if not self.init_storage(): log.debug("Failed to initialize the analysis folder") return False sha256 = File(self.task.target).get_sha256() if self.task.category in ["file", "pcap", "static"]: # Check whether the file has been changed for some unknown reason. # And fail this analysis if it has been modified. if not self.check_file(sha256): return False # Store a copy of the original file. if not self.store_file(sha256): return False if self.task.category in ("pcap", "static"): if self.task.category == "pcap": if hasattr(os, "symlink"): os.symlink(self.binary, os.path.join(self.storage, "dump.pcap")) else: shutil.copy(self.binary, os.path.join(self.storage, "dump.pcap")) # create the logs/files directories as # normally the resultserver would do it dirnames = ["logs", "files", "aux"] for dirname in dirnames: try: os.makedirs(os.path.join(self.storage, dirname)) except: pass return True # Acquire analysis machine. try: self.acquire_machine() self.db.set_task_vm(self.task.id, self.machine.label, self.machine.id) # At this point we can tell the ResultServer about it. except CuckooOperationalError as e: machine_lock.release() log.error("Task #{0}: Cannot acquire machine: {1}".format( self.task.id, e)) return False # Generate the analysis configuration file. options = self.build_options() try: ResultServer().add_task(self.task, self.machine) except Exception as e: machinery.release(self.machine.label) log.exception(e) self.errors.put(e) aux = RunAuxiliary(task=self.task, machine=self.machine) try: unlocked = False # Mark the selected analysis machine in the database as started. guest_log = self.db.guest_start(self.task.id, self.machine.name, self.machine.label, machinery.__class__.__name__) # Start the machine. machinery.start(self.machine.label) # Enable network routing. self.route_network() # By the time start returns it will have fully started the Virtual # Machine. We can now safely release the machine lock. machine_lock.release() unlocked = True aux.start() # Initialize the guest manager. guest = GuestManager(self.machine.name, self.machine.ip, self.machine.platform, self.task.id, self) options["clock"] = self.db.update_clock(self.task.id) self.db.guest_set_status(self.task.id, "starting") # Start the analysis. guest.start_analysis(options) if self.db.guest_get_status(self.task.id) == "starting": self.db.guest_set_status(self.task.id, "running") guest.wait_for_completion() self.db.guest_set_status(self.task.id, "stopping") succeeded = True except CuckooMachineError as e: if not unlocked: machine_lock.release() log.error(str(e), extra={"task_id": self.task.id}) dead_machine = True except CuckooGuestError as e: if not unlocked: machine_lock.release() log.error(str(e), extra={"task_id": self.task.id}) finally: # Stop Auxiliary modules. aux.stop() # Take a memory dump of the machine before shutting it off. if self.cfg.cuckoo.memory_dump or self.task.memory: try: dump_path = get_memdump_path(self.task.id) need_space, space_available = free_space_monitor( os.path.dirname(dump_path), return_value=True) if need_space: log.error( "Not enough free disk space! Could not dump ram (Only %d MB!)", space_available) else: machinery.dump_memory(self.machine.label, dump_path) except NotImplementedError: log.error("The memory dump functionality is not available " "for the current machine manager.") except CuckooMachineError as e: log.error(e) try: # Stop the analysis machine. machinery.stop(self.machine.label) except CuckooMachineError as e: log.warning( "Task #{0}: Unable to stop machine {1}: {2}".format( self.task.id, self.machine.label, e)) # Mark the machine in the database as stopped. Unless this machine # has been marked as dead, we just keep it as "started" in the # database so it'll not be used later on in this session. self.db.guest_stop(guest_log) # After all this, we can make the ResultServer forget about the # internal state for this analysis task. ResultServer().del_task(self.task, self.machine) # Drop the network routing rules if any. self.unroute_network() if dead_machine: # Remove the guest from the database, so that we can assign a # new guest when the task is being analyzed with another # machine. self.db.guest_remove(guest_log) # Remove the analysis directory that has been created so # far, as launch_analysis() is going to be doing that again. shutil.rmtree(self.storage) # This machine has turned dead, so we throw an exception here # which informs the AnalysisManager that it should analyze # this task again with another available machine. raise CuckooDeadMachine() try: # Release the analysis machine. But only if the machine has # not turned dead yet. machinery.release(self.machine.label) except CuckooMachineError as e: log.error("Task #{0}: Unable to release machine {1}, reason " "{2}. You might need to restore it manually.".format( self.task.id, self.machine.label, e)) return succeeded
def run(self, results): """Writes report. @param results: Cuckoo results dict. @raise CuckooReportError: if fails to write report. """ if not HAVE_JINJA2: raise CuckooReportError( "Failed to generate HTML report: Jinja2 library is not " "installed (install `pip install jinja2`)") shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): shots = [] counter = 1 for shot_name in os.listdir(shots_path): if not shot_name.endswith(".jpg"): continue shot_path = os.path.join(shots_path, shot_name) if not os.path.getsize(shot_path): continue shot = {} shot["id"] = os.path.splitext(File(shot_path).get_name())[0] shot["data"] = base64.b64encode(open(shot_path, "rb").read()) shots.append(shot) counter += 1 shots.sort(key=lambda shot: shot["id"]) results["screenshots"] = shots else: results["screenshots"] = [] env = Environment(autoescape=True) env.loader = FileSystemLoader(os.path.join(CUCKOO_ROOT, "data", "html")) processed = None mapping = [ ("file_read", "File", "Read"), ("file_written", "File", "Written"), ("file_deleted", "File", "Deleted"), ("file_opened", "File", "Opened"), ("file_copied", "File", "Copied"), ("file_moved", "File", "Moved"), ("connects_ip", "Network", "Connects IP"), ("resolves_url", "Network", "Resolves URL"), ("fetches_url", "Network", "Fetches URL"), ("connects_host", "Network", "Connects Host"), ("downloads_file_url", "Network", "Downloads File URL"), ("directory_created", "Directory", "Created"), ("directory_removed", "Directory", "Removed"), ("directory_enumerated", "Directory", "Enumerated"), ("regkey_opened", "Registry Key", "Opened"), ("regkey_deleted", "Registry Key", "Deleted"), ("regkey_read", "Registry Key", "Read"), ("regkey_written", "Registry Key", "Written"), ("mutex", "Mutex", "Accessed"), ] processed = {} for proc in results.get("behavior", {}).get("generic", []): for orig, cat, subcat in mapping: if cat not in processed: processed[cat] = {} if subcat not in processed[cat]: processed[cat][subcat] = [] # Special handling required for file moved/copied. if orig == "file_moved" or orig == "file_copied": for src, dst in proc.get("summary", {}).get(orig, []): entry = "%s -> %s" % (src, dst) processed[cat][subcat].append(entry) continue if "summary" in proc and orig in proc["summary"]: for content in proc["summary"][orig]: processed[cat][subcat].append(content) try: tpl = env.get_template("report.html") html = tpl.render({ "results": results, "processed": processed, "mapping": mapping }) except Exception as e: raise CuckooReportError("Failed to generate HTML report: %s" % e) try: report_path = os.path.join(self.reports_path, "report.html") with codecs.open(report_path, "w", encoding="utf-8") as report: report.write(html) except (TypeError, IOError) as e: raise CuckooReportError("Failed to write HTML report: %s" % e) return True
def run(self): """Run analysis. @return: structured results. """ self.key = "procmemory" results = [] do_strings = self.options.get("strings", False) nulltermonly = self.options.get("nullterminated_only", True) minchars = self.options.get("minchars", 5) if os.path.exists(self.pmemory_path): for dmp in os.listdir(self.pmemory_path): # if we're re-processing this task, this means if zips are enabled, we won't do any reprocessing on the # process dumps (only matters for now for Yara) if not dmp.endswith(".dmp"): continue dmp_path = os.path.join(self.pmemory_path, dmp) if os.path.getsize(dmp_path) == 0: continue dmp_file = File(dmp_path) process_name = "" process_path = "" process_id = int( os.path.splitext(os.path.basename(dmp_path))[0]) for process in self.results.get("behavior", {}).get( "processes", []) or []: if process_id == process["process_id"]: process_name = process["process_name"] process_path = process["module_path"] procdump = ProcDump(dmp_path, pretty=True) proc = dict( file=dmp_path, pid=process_id, name=process_name, path=process_path, yara=dmp_file.get_yara( os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar")), address_space=procdump.pretty_print(), ) endlimit = "" if not HAVE_RE2: endlimit = "8192" if do_strings: if nulltermonly: apat = "([\x20-\x7e]{" + str( minchars) + "," + endlimit + "})\x00" upat = "((?:[\x20-\x7e][\x00]){" + str( minchars) + "," + endlimit + "})\x00\x00" else: apat = "[\x20-\x7e]{" + str( minchars) + "," + endlimit + "}" upat = "(?:[\x20-\x7e][\x00]){" + str( minchars) + "," + endlimit + "}" matchdict = procdump.search(apat, all=True) strings = matchdict["matches"] matchdict = procdump.search(upat, all=True) ustrings = matchdict["matches"] for ws in ustrings: strings.append(str(ws.decode("utf-16le"))) proc["strings_path"] = dmp_path + ".strings" f = open(proc["strings_path"], "w") f.write("\n".join(strings)) f.close() procdump.close() results.append(proc) return results
def process_file(self, file_path, CAPE_output, append_file): """Process file. @return: file_info """ global cape_config cape_name = "" strings = [] buf = self.options.get("buffer", BUFSIZE) if file_path.endswith("_info.txt"): return texttypes = [ "ASCII", "Windows Registry text", "XML document text", "Unicode text", ] if os.path.exists(file_path + "_info.txt"): with open(file_path + "_info.txt", 'r') as f: metastring = f.readline() else: metastring = "" file_info = File(file_path, metastring).get_all() # Get the file data with open(file_info["path"], "r") as file_open: file_data = file_open.read(buf + 1) if len(file_data) > buf: file_info["data"] = binascii.b2a_hex(file_data[:buf] + " <truncated>") else: file_info["data"] = binascii.b2a_hex(file_data) metastrings = metastring.split(",") if len(metastrings) > 1: file_info["pid"] = metastrings[1] if len(metastrings) > 2: file_info["process_path"] = metastrings[2] file_info["process_name"] = metastrings[2].split("\\")[-1] if len(metastrings) > 3: file_info["module_path"] = metastrings[3] file_info["cape_type_code"] = 0 file_info["cape_type"] = "" if metastrings != "": try: file_info["cape_type_code"] = int(metastrings[0]) except Exception as e: pass if file_info["cape_type_code"] == COMPRESSION: file_info["cape_type"] = "Decompressed PE Image" if file_info["cape_type_code"] == INJECTION_PE: file_info["cape_type"] = "Injected PE Image" if len(metastrings) > 4: file_info["target_path"] = metastrings[4] file_info["target_process"] = metastrings[4].split( "\\")[-1] file_info["target_pid"] = metastrings[5] if file_info["cape_type_code"] == INJECTION_SHELLCODE: file_info["cape_type"] = "Injected Shellcode/Data" if len(metastrings) > 4: file_info["target_path"] = metastrings[4] file_info["target_process"] = metastrings[4].split( "\\")[-1] file_info["target_pid"] = metastrings[5] if file_info["cape_type_code"] == INJECTION_SECTION: file_info["cape_type"] = "Injected Section" if len(metastrings) > 4: file_info["section_handle"] = metastrings[4] if file_info["cape_type_code"] == EXTRACTION_PE: file_info["cape_type"] = "Extracted PE Image" if len(metastrings) > 4: file_info["virtual_address"] = metastrings[4] if file_info["cape_type_code"] == EXTRACTION_SHELLCODE: file_info["cape_type"] = "Extracted Shellcode" if len(metastrings) > 4: file_info["virtual_address"] = metastrings[4] type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" # PlugX if file_info["cape_type_code"] == PLUGX_CONFIG: file_info["cape_type"] = "PlugX Config" plugx_parser = plugx.PlugXConfig() plugx_config = plugx_parser.parse_config( file_data, len(file_data)) if not "cape_config" in cape_config and plugx_config: cape_config["cape_config"] = {} for key, value in plugx_config.items(): cape_config["cape_config"].update({key: [value]}) cape_name = "PlugX" else: log.error( "CAPE: PlugX config parsing failure - size many not be handled." ) append_file = False if file_info["cape_type_code"] == PLUGX_PAYLOAD: file_info["cape_type"] = "PlugX Payload" type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" # EvilGrab if file_info["cape_type_code"] == EVILGRAB_PAYLOAD: file_info["cape_type"] = "EvilGrab Payload" type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if file_info["cape_type_code"] == EVILGRAB_DATA: cape_name = "EvilGrab" file_info["cape_type"] = "EvilGrab Data" if not "cape_config" in cape_config: cape_config["cape_config"] = {} if file_info["size"] == 256 or file_info["size"] == 260: ConfigItem = "filepath" ConfigData = format(file_data) cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) if file_info["size"] > 0x1000: append_file = True else: append_file = False # Sedreco if file_info["cape_type_code"] == SEDRECO_DATA: cape_name = "Sedreco" cape_config["cape_type"] = "Sedreco Config" if not "cape_config" in cape_config: cape_config["cape_config"] = {} if len(metastrings) > 4: SedrecoConfigIndex = metastrings[4] if SedrecoConfigIndex == '0x0': ConfigItem = "Timer1" elif SedrecoConfigIndex == '0x1': ConfigItem = "Timer2" elif SedrecoConfigIndex == '0x2': ConfigItem = "Computer Name" elif SedrecoConfigIndex == '0x3': ConfigItem = "C&C1" elif SedrecoConfigIndex == '0x4': ConfigItem = "C&C2" elif SedrecoConfigIndex == '0x5': ConfigItem = "Operation Name" elif SedrecoConfigIndex == '0x6': ConfigItem = "Keylogger MaxBuffer" elif SedrecoConfigIndex == '0x7': ConfigItem = "Keylogger MaxTimeout" elif SedrecoConfigIndex == '0x8': ConfigItem = "Keylogger Flag" elif SedrecoConfigIndex == '0x9': ConfigItem = "C&C3" else: ConfigItem = "Unknown" ConfigData = format(file_data) if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) append_file = False # Cerber if file_info["cape_type_code"] == CERBER_CONFIG: file_info["cape_type"] = "Cerber Config" cape_config["cape_type"] = "Cerber Config" cape_name = "Cerber" if not "cape_config" in cape_config: cape_config["cape_config"] = {} ConfigItem = "JSON Data" parsed = json.loads(file_data.rstrip(b'\0')) ConfigData = json.dumps(parsed, indent=4, sort_keys=True) cape_config["cape_config"].update({ConfigItem: [ConfigData]}) append_file = True if file_info["cape_type_code"] == CERBER_PAYLOAD: file_info["cape_type"] = "Cerber Payload" cape_config["cape_type"] = "Cerber Payload" cape_name = "Cerber" type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" append_file = True # Ursnif if file_info["cape_type_code"] == URSNIF_CONFIG: file_info["cape_type"] = "Ursnif Config" cape_config["cape_type"] = "Ursnif Config" cape_name = "Ursnif" malwareconfig_loaded = False try: malwareconfig_parsers = os.path.join( CUCKOO_ROOT, "modules", "processing", "parsers", "malwareconfig") file, pathname, description = imp.find_module( cape_name, [malwareconfig_parsers]) module = imp.load_module(cape_name, file, pathname, description) malwareconfig_loaded = True log.info("CAPE: Imported malwareconfig.com parser %s", cape_name) except ImportError: log.info( "CAPE: malwareconfig.com parser: No module named %s", cape_name) if malwareconfig_loaded: try: if not "cape_config" in cape_config: cape_config["cape_config"] = {} malwareconfig_config = module.config(file_data) if isinstance(malwareconfig_config, list): for (key, value) in malwareconfig_config[0].iteritems(): cape_config["cape_config"].update( {key: [value]}) elif isinstance(malwareconfig_config, dict): for (key, value) in malwareconfig_config.iteritems(): cape_config["cape_config"].update( {key: [value]}) except Exception as e: log.error( "CAPE: malwareconfig parsing error with %s: %s", cape_name, e) append_file = False # Hancitor if file_info["cape_type_code"] == HANCITOR_PAYLOAD: cape_name = "Hancitor" cape_config["cape_type"] = "Hancitor Payload" file_info["cape_type"] = "Hancitor Payload" if file_info["cape_type_code"] == HANCITOR_CONFIG: cape_name = "Hancitor" cape_config["cape_type"] = "Hancitor Config" file_info["cape_type"] = "Hancitor Config" if not "cape_config" in cape_config: cape_config["cape_config"] = {} ConfigStrings = file_data.split('\0') ConfigStrings = filter(None, ConfigStrings) ConfigItem = "Campaign Code" cape_config["cape_config"].update( {ConfigItem: [ConfigStrings[0]]}) GateURLs = ConfigStrings[1].split('|') for index, value in enumerate(GateURLs): ConfigItem = "Gate URL " + str(index + 1) cape_config["cape_config"].update({ConfigItem: [value]}) append_file = False # QakBot if file_info["cape_type_code"] == QAKBOT_CONFIG: file_info["cape_type"] = "QakBot Config" cape_config["cape_type"] = "QakBot Config" cape_name = "QakBot" if not "cape_config" in cape_config: cape_config["cape_config"] = {} for line in file_data.splitlines(): if '=' in line: index = line.split('=')[0] data = line.split('=')[1] if index == '10': ConfigItem = "Botnet name" ConfigData = data if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) if index == '11': ConfigItem = "Number of C2 servers" ConfigData = data if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) if index == '47': ConfigItem = "Bot ID" ConfigData = data if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) if index == '3': ConfigItem = "Config timestamp" ConfigData = datetime.datetime.fromtimestamp( int(data)).strftime('%H:%M:%S %d-%m-%Y') if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) if index == '22': values = data.split(':') ConfigItem = "Password #1" ConfigData = values[2] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) ConfigItem = "Username #1" ConfigData = values[1] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) ConfigItem = "C2 #1" ConfigData = values[0] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) if index == '23': values = data.split(':') ConfigItem = "Password #2" ConfigData = values[2] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) ConfigItem = "Username #2" ConfigData = values[1] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) ConfigItem = "C2 #2" ConfigData = values[0] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) if index == '24': values = data.split(':') ConfigItem = "Password #3" ConfigData = values[2] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) ConfigItem = "Username #3" ConfigData = values[1] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) ConfigItem = "C2 #3" ConfigData = values[0] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) if index == '25': values = data.split(':') ConfigItem = "Password #4" ConfigData = values[2] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) ConfigItem = "Username #4" ConfigData = values[1] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) ConfigItem = "C2 #4" ConfigData = values[0] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) if index == '26': values = data.split(':') ConfigItem = "Password #5" ConfigData = values[2] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) ConfigItem = "Username #5" ConfigData = values[1] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) ConfigItem = "C2 #5" ConfigData = values[0] if ConfigData: cape_config["cape_config"].update( {ConfigItem: [ConfigData]}) append_file = False if file_info["cape_type_code"] == QAKBOT_PAYLOAD: file_info["cape_type"] = "QakBot Payload" cape_config["cape_type"] = "QakBot Payload" cape_name = "QakBot" type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" append_file = True # UPX package output if file_info["cape_type_code"] == UPX: file_info["cape_type"] = "Unpacked PE Image" type_strings = file_info["type"].split() if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" # Process CAPE Yara hits for hit in file_info["cape_yara"]: # Check to see if file is packed with UPX if hit["name"] == "UPX": log.info( "CAPE: Found UPX Packed sample - attempting to unpack") self.upx_unpack(file_data, CAPE_output) # Check for a payload or config hit try: if "payload" in hit["meta"]["cape_type"].lower( ) or "config" in hit["meta"]["cape_type"].lower(): file_info["cape_type"] = hit["meta"]["cape_type"] cape_name = hit["name"] except: pass type_strings = file_info["type"].split() if "-bit" not in file_info["cape_type"]: if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" suppress_parsing_list = ["Cerber", "Ursnif", "QakBot"] if hit["name"] in suppress_parsing_list: continue # Attempt to import a parser for the hit # DC3-MWCP mwcp_loaded = False if cape_name: try: mwcp_parsers = os.path.join(CUCKOO_ROOT, "modules", "processing", "parsers", "mwcp", "parsers") mwcp = reporter.Reporter(parserdir=mwcp_parsers) kwargs = {} mwcp.run_parser(cape_name, data=file_data, **kwargs) if mwcp.errors == []: log.info("CAPE: Imported DC3-MWCP parser %s", cape_name) mwcp_loaded = True else: error_lines = mwcp.errors[0].split("\n") for line in error_lines: if line.startswith('ImportError: '): log.info("CAPE: DC3-MWCP parser: %s", line.split(': ')[1]) except ImportError: pass # malwareconfig malwareconfig_loaded = False if cape_name and mwcp_loaded == False: try: malwareconfig_parsers = os.path.join( CUCKOO_ROOT, "modules", "processing", "parsers", "malwareconfig") file, pathname, description = imp.find_module( cape_name, [malwareconfig_parsers]) module = imp.load_module(cape_name, file, pathname, description) malwareconfig_loaded = True log.info("CAPE: Imported malwareconfig.com parser %s", cape_name) except ImportError: log.info( "CAPE: malwareconfig.com parser: No module named %s", cape_name) # Get config data if mwcp_loaded: try: if not "cape_config" in cape_config: cape_config["cape_config"] = {} cape_config["cape_config"] = convert(mwcp.metadata) else: cape_config["cape_config"].update( convert(mwcp.metadata)) except Exception as e: log.error( "CAPE: DC3-MWCP config parsing error with %s: %s", cape_name, e) elif malwareconfig_loaded: try: if not "cape_config" in cape_config: cape_config["cape_config"] = {} malwareconfig_config = module.config(file_data) if isinstance(malwareconfig_config, list): for (key, value) in malwareconfig_config[0].iteritems(): cape_config["cape_config"].update({key: [value]}) elif isinstance(malwareconfig_config, dict): for (key, value) in malwareconfig_config.iteritems(): cape_config["cape_config"].update({key: [value]}) except Exception as e: log.error("CAPE: malwareconfig parsing error with %s: %s", cape_name, e) if "cape_config" in cape_config: if cape_config["cape_config"] == {}: del cape_config["cape_config"] if cape_name: if "cape_config" in cape_config: cape_config["cape_name"] = format(cape_name) if not "cape" in self.results: if cape_name != "UPX": self.results["cape"] = cape_name # Remove duplicate payloads from web ui for cape_file in CAPE_output: if file_info["size"] == cape_file["size"]: if HAVE_PYDEEP: ssdeep_grade = pydeep.compare(file_info["ssdeep"], cape_file["ssdeep"]) if ssdeep_grade >= ssdeep_threshold: append_file = False if file_info["entrypoint"] and file_info["entrypoint"] == cape_file["entrypoint"] \ and file_info["ep_bytes"] == cape_file["ep_bytes"]: append_file = False if append_file == True: CAPE_output.append(file_info) return file_info
def run(self): """Run analysis. @return: list of process dumps with related information. """ self.key = "procdump" procdump_files = [] buf = self.options.get("buffer", 8192) if not os.path.exists(self.procdump_path): return None meta = dict() if os.path.exists(self.files_metadata): for line in open(self.files_metadata, "rb"): entry = json.loads(line) filepath = os.path.join(self.analysis_path, entry["path"]) meta[filepath] = { "pids": entry["pids"], "filepath": entry["filepath"], "metadata": entry["metadata"], } file_names = os.listdir(self.procdump_path) for file_name in file_names: file_path = os.path.join(self.procdump_path, file_name) file_info = File(file_path=file_path, guest_paths=meta[file_path]["metadata"], file_name=file_name).get_all() metastrings = meta[file_path].get("metadata", "").split(";?") if len(metastrings) < 3: continue file_info["process_path"] = metastrings[1] file_info["module_path"] = metastrings[2] file_info["process_name"] = file_info["process_path"].split( "\\")[-1] file_info["pid"] = meta[file_path]["pids"][0] type_strings = file_info["type"].split() if len(type_strings) < 3: continue if type_strings[0] == "MS-DOS": file_info["cape_type"] = "DOS MZ image: executable" else: file_info["cape_type"] = "PE image" if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " elif type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" texttypes = [ "ASCII", "Windows Registry text", "XML document text", "Unicode text", ] readit = False for texttype in texttypes: if texttype in file_info["type"]: readit = True break if readit: with open(file_info["path"], "r") as drop_open: filedata = drop_open.read(buf + 1) if len(filedata) > buf: file_info["data"] = convert_to_printable(filedata[:buf] + " <truncated>") else: file_info["data"] = convert_to_printable(filedata) procdump_files.append(file_info) return procdump_files
def reversing_labs_lookup(target: str, is_hash: bool = False): _headers = { "User-Agent": "CAPE Sandbox", "Content-Type": "application/json", "Authorization": f"Token {processing_conf.reversinglabs.key}", } report_fields = [ "id", "sha1", "sha256", "sha512", "md5", "category", "file_type", "file_subtype", "identification_name", "identification_version", "file_size", "extracted_file_count", "local_first_seen", "local_last_seen", "classification_origin", "classification_reason", "classification_source", "classification", "riskscore", "classification_result", "ticore", "tags", "summary", "discussion", "ticloud", "aliases", ] if not is_hash: sha256 = target if len(target) == 64 else File(target).get_sha256() else: sha256 = target full_report_lookup = { "hash_values": [sha256], "report_fields": report_fields } try: r = requests.post( url=processing_conf.reversinglabs.url + "/api/samples/v2/list/details/", headers=_headers, data=json.dumps(full_report_lookup), ) except requests.exceptions.RequestException as e: return { "error": True, "msg": f"Unable to complete connection to Reversing Labs: {e}", } reversing_labs_response = r.json() if r.status_code != 200: return { "error": True, "msg": f"Unable to complete lookup to Reversing Labs: {r.json().get('message')}", } if not reversing_labs_response.get("results"): return {"error": True, "msg": "No results found."} results = reversing_labs_response["results"][0] # most_recent_scan_engines = results["av_scanners"][-1] scanner_summary = results["av_scanners_summary"] sample_summary = results["sample_summary"] ticloud = results["ticloud"] ticore = results["ticore"] scanner_total = scanner_summary["scanner_count"] scanner_evil = scanner_summary["scanner_match"] classification = sample_summary["classification"] classification_result = sample_summary["classification_result"] file = ticore["info"]["file"] malicious = classification in [ "malicious", "suspicious" ] and sample_summary["goodware_override"] is False md5 = sample_summary["md5"] sha1 = sample_summary["sha1"] sha256 = sample_summary["sha256"] riskscore = ticloud["riskscore"] name = file["proposed_filename"] entropy = file["entropy"] story = ticore["story"] reversing_labs = { "name": name, "md5": md5, "sha1": sha1, "sha256": sha256, "malicious": malicious, "classification": classification, "classification_result": classification_result, "riskscore": riskscore, "detected": scanner_evil, "total": scanner_total, "story": story, "permalink": os.path.join(processing_conf.reversinglabs.url, sha256), } return reversing_labs
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set mongo schema version. # TODO: This is not optimal becuase it run each analysis. Need to run # only one time at startup. if "cuckoo_schema" in self.db.collection_names(): if self.db.cuckoo_schema.find_one( )["version"] != self.SCHEMA_VERSION: CuckooReportError( "Mongo schema version not expected, check data migration tool" ) else: self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) # Set an unique index on stored files, to avoid duplicates. # From pymongo docs: # Returns the name of the created index if an index is actually # created. # Returns None if the index already exists. # TODO: This is not optimal because it run each analysis. Need to run # only one time at startup. self.db.fs.files.ensure_index("sha256", unique=True, sparse=True, name="sha256_unique") # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) # Store the sample in GridFS. if results["info"]["category"] == "file": sample = File(self.file_path) if sample.valid(): fname = results["target"]["file"]["name"] sample_id = self.store_file(sample, filename=fname) report["target"] = {"file_id": sample_id} report["target"].update(results["target"]) # Store the PCAP file in GridFS and reference it back in the report. pcap_path = os.path.join(self.analysis_path, "dump.pcap") pcap = File(pcap_path) if pcap.valid(): pcap_id = self.store_file(pcap) report["network"] = {"pcap_id": pcap_id} report["network"].update(results["network"]) # Store the process memory dump file in GridFS and reference it back in the report. if "procmemory" in report and self.options.get("store_memdump", False): for idx, procmem in enumerate(report["procmemory"]): procmem_path = os.path.join(self.analysis_path, "memory", "{0}.dmp".format(procmem["pid"])) procmem_file = File(procmem_path) if procmem_file.valid(): procmem_id = self.store_file(procmem_file) report["procmemory"][idx].update( {"procmem_id": procmem_id}) # Walk through the dropped files, store them in GridFS and update the # report with the ObjectIds. new_dropped = [] if "dropped" in report: for dropped in report["dropped"]: new_drop = dict(dropped) drop = File(dropped["path"]) if drop.valid(): dropped_id = self.store_file(drop, filename=dropped["name"]) new_drop["object_id"] = dropped_id new_dropped.append(new_drop) report["dropped"] = new_dropped # Add screenshots. report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): # Walk through the files and select the JPGs. shots = [ shot for shot in os.listdir(shots_path) if shot.endswith(".jpg") ] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) shot = File(shot_path) # If the screenshot path is a valid file, store it and # reference it back in the report. if shot.valid(): shot_id = self.store_file(shot) report["shots"].append(shot_id) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in MongoDB. if len(chunk) == 100: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes # Store the report and retrieve its object id. self.db.analysis.save(report) self.conn.disconnect()
def run(self): """Run Google play unofficial python api the get the google play information @return: list of google play features """ self.key = "googleplay" googleplay = {} if not HAVE_GOOGLEPLAY: log.error("Unable to import the GooglePlay library, has it been " "installed properly?") return if not HAVE_ANDROGUARD: log.error("Could not find the Androguard library, please install " "it. (`pip install androguard`)") if ("file" not in self.task["category"]): return f = File(self.task["target"]) if f.get_name().endswith((".zip", ".apk")) or "zip" in f.get_type(): if not os.path.exists(self.file_path): raise CuckooProcessingError("Sample file doesn't exist: \"%s\"" % self.file_path) android_id = self.options.get("android_id") google_login = self.options.get("google_login") google_password = self.options.get("google_password") # auth_token = self.options.get("auth_token", None) if not android_id and not google_login and not google_password: raise CuckooProcessingError("Google Play Credentials not configured, skip") try: a = APK(self.file_path) if a.is_valid_APK(): package = a.get_package() # Connect api = GooglePlayAPI(android_id) api.login(google_login, google_password, None) # Get the version code and the offer type from the app details app_data = api.details(package) app_detail = app_data.docV2.details.appDetails if not app_detail.installationSize: return googleplay googleplay["title"] = app_detail.title googleplay["app_category"] = app_detail.appCategory._values googleplay["version_code"] = app_detail.versionCode googleplay["app_type"] = app_detail.appType googleplay["content_rating"] = app_detail.contentRating googleplay["developer_email"] = app_detail.developerEmail googleplay["developer_name"] = app_detail.developerName googleplay["developer_website"] = app_detail.developerWebsite googleplay["installation_size"] = app_detail.installationSize googleplay["num_downloads"] = app_detail.numDownloads googleplay["upload_date"] = app_detail.uploadDate googleplay["permissions"] = app_detail.permission._values except (IOError, OSError, BadZipfile) as e: raise CuckooProcessingError("Error opening file %s" % e) return googleplay
def build_options(self): """Generate analysis options. @return: options dict. """ options = {} options["id"] = self.task.id options["ip"] = self.machine.resultserver_ip options["port"] = self.machine.resultserver_port options["category"] = self.task.category options["target"] = self.task.target options["package"] = self.task.package options["options"] = self.task.options options["enforce_timeout"] = self.task.enforce_timeout options["clock"] = self.task.clock options["terminate_processes"] = self.cfg.cuckoo.terminate_processes options["upload_max_size"] = self.cfg.resultserver.upload_max_size options["do_upload_max_size"] = int( self.cfg.resultserver.do_upload_max_size) if not self.task.timeout or self.task.timeout == 0: options["timeout"] = self.cfg.timeouts.default else: options["timeout"] = self.task.timeout if self.task.category == "file": options["file_name"] = File(self.task.target).get_name() options["file_type"] = File(self.task.target).get_type() # if it's a PE file, collect export information to use in more smartly determining the right # package to use options["exports"] = "" if HAVE_PEFILE and ("PE32" in options["file_type"] or "MS-DOS executable" in options["file_type"]): try: pe = pefile.PE(self.task.target) if hasattr(pe, "DIRECTORY_ENTRY_EXPORT"): exports = [] for exported_symbol in pe.DIRECTORY_ENTRY_EXPORT.symbols: try: if not exported_symbol.name: continue if isinstance(exported_symbol.name, bytes): exports.append( re.sub(b"[^A-Za-z0-9_?@-]", b"", exported_symbol.name).decode( "utf-8")) else: exports.append( re.sub("[^A-Za-z0-9_?@-]", "", exported_symbol.name)) except Exception as e: log.error(e, exc_info=True) options["exports"] = ",".join(exports) except Exception as e: log.error("PE type not recognised") log.error(e, exc_info=True) # options from auxiliar.conf for plugin in self.aux_cfg.auxiliar_modules.keys(): options[plugin] = self.aux_cfg.auxiliar_modules[plugin] return options
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set mongo schema version. # TODO: This is not optimal becuase it run each analysis. Need to run # only one time at startup. if "cuckoo_schema" in self.db.collection_names(): if self.db.cuckoo_schema.find_one()["version"] != self.SCHEMA_VERSION: CuckooReportError("Mongo schema version not expected, check data migration tool") else: self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) if not "network" in report: report["network"] = {} # Add screenshot paths report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): shots = [shot for shot in os.listdir(shots_path) if shot.endswith(".jpg")] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) screenshot = File(shot_path) if screenshot.valid(): # Strip the extension as it's added later # in the Django view report["shots"].append(shot_file.replace(".jpg", "")) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in MongoDB. if len(chunk) == 100: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes # Calculate the mlist_cnt for display if present to reduce db load if "signatures" in results: for entry in results["signatures"]: if entry["name"] == "ie_martian_children": report["mlist_cnt"] = len(entry["data"]) if entry["name"] == "office_martian_children": report["f_mlist_cnt"] = len(entry["data"]) #Other info we want Quick access to from the web UI if results.has_key("virustotal") and results["virustotal"] and results["virustotal"].has_key("positives") and results["virustotal"].has_key("total"): report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"],results["virustotal"]["total"]) if results.has_key("suricata") and results["suricata"]: if results["suricata"].has_key("tls") and len(results["suricata"]["tls"]) > 0: report["suri_tls_cnt"] = len(results["suricata"]["tls"]) if results["suricata"].has_key("alerts") and len(results["suricata"]["alerts"]) > 0: report["suri_alert_cnt"] = len(results["suricata"]["alerts"]) if results["suricata"].has_key("files") and len(results["suricata"]["files"]) > 0: report["suri_file_cnt"] = len(results["suricata"]["files"]) if results["suricata"].has_key("http") and len(results["suricata"]["http"]) > 0: report["suri_http_cnt"] = len(results["suricata"]["http"]) if results["suricata"].has_key("ssh") and len(results["suricata"]["ssh"]) > 0: report["suri_ssh_cnt"] = len(results["suricata"]["ssh"]) if results["suricata"].has_key("dns") and len(results["suricata"]["dns"]) > 0: report["suri_dns_cnt"] = len(results["suricata"]["dns"]) # Create an index based on the info.id dict key. Increases overall scalability # with large amounts of data. # Note: Silently ignores the creation if the index already exists. self.db.analysis.create_index("info.id", background=True) # Store the report and retrieve its object id. try: self.db.analysis.save(report) except InvalidDocument as e: parent_key, psize = self.debug_dict_size(report)[0] child_key, csize = self.debug_dict_size(report[parent_key])[0] if not self.options.get("fix_large_docs", False): # Just log the error and problem keys log.error(str(e)) log.error("Largest parent key: %s (%d MB)" % (parent_key, int(psize) / 1048576)) log.error("Largest child key: %s (%d MB)" % (child_key, int(csize) / 1048576)) else: # Delete the problem keys and check for more error_saved = True while error_saved: log.warn("results['%s']['%s'] deleted due to >16MB size (%dMB)" % (parent_key, child_key, int(psize) / 1048576)) del report[parent_key][child_key] try: self.db.analysis.save(report) error_saved = False except InvalidDocument as e: parent_key, psize = self.debug_dict_size(report)[0] child_key, csize = self.debug_dict_size(report[parent_key])[0] log.error(str(e)) log.error("Largest parent key: %s (%d MB)" % (parent_key, int(psize) / 1048576)) log.error("Largest child key: %s (%d MB)" % (child_key, int(csize) / 1048576)) self.conn.close()
def run(self, results): self.noinject = self.options.get("noinject", False) filesdict = {} self.task_options_stack = [] self.task_options = None self.task_custom = None report = dict(results) if report["info"].has_key( "options") and "resubmitjob=true" in report["info"]["options"]: return else: self.task_options_stack.append("resubmitjob=true") if self.noinject: self.task_options_stack.append("free=true") if self.task_options_stack: self.task_options = ','.join(self.task_options_stack) report = dict(results) for dropped in report["dropped"]: if os.path.isfile(dropped["path"]): if ("PE32" in dropped["type"] or "MS-DOS" in dropped["type"]) and "DLL" not in dropped["type"]: if not filesdict.has_key(dropped['sha256']): filesdict[dropped['sha256']] = dropped['path'] if report.has_key("suricata") and report["suricata"]: if report["suricata"].has_key( "files") and report["suricata"]["files"]: for suricata_file_e in results["suricata"]["files"]: if suricata_file_e.has_key("file_info"): tmp_suricata_file_d = dict(suricata_file_e) if os.path.isfile( suricata_file_e["file_info"]["path"]): ftype = suricata_file_e["file_info"]["type"] if ("PE32" in ftype or "MS-DOS" in ftype) and "DLL" not in ftype: if not filesdict.has_key( suricata_file_e["file_info"] ["sha256"]): filesdict[suricata_file_e["file_info"] ["sha256"]] = suricata_file_e[ "file_info"]["path"] db = Database() for e in filesdict: if not File(filesdict[e]).get_size(): continue if not db.find_sample(sha256=e) is None: continue self.task_custom = "Parent_Task_ID:%s" % report["info"]["id"] if report["info"].has_key("custom") and report["info"]["custom"]: self.task_custom = "%s Parent_Custom:%s" % ( self.task_custom, report["info"]["custom"]) task_id = db.add_path(file_path=filesdict[e], package='exe', timeout=200, options=self.task_options, priority=1, machine=None, platform=None, custom=self.task_custom, memory=False, enforce_timeout=False, clock=None, tags=None) if task_id: log.info(u"Resubmitexe file \"{0}\" added as task with ID {1}". format(filesdict[e], task_id)) else: log.warn("Error adding resubmitexe task to database")
def run(self): """Run analysis. @return: structured results. """ self.key = "procmemory" results = [] do_strings = self.options.get("strings", False) nulltermonly = self.options.get("nullterminated_only", True) minchars = self.options.get("minchars", 5) if os.path.exists(self.pmemory_path): for dmp in os.listdir(self.pmemory_path): # if we're re-processing this task, this means if zips are enabled, we won't do any reprocessing on the # process dumps (only matters for now for Yara) if not dmp.endswith(".dmp"): continue dmp_path = os.path.join(self.pmemory_path, dmp) dmp_file = File(dmp_path) process_name = "" process_path = "" process_id = int(os.path.splitext(os.path.basename(dmp_path))[0]) if "behavior" in self.results and "processes" in self.results["behavior"]: for process in self.results["behavior"]["processes"]: if process_id == process["process_id"]: process_name = process["process_name"] process_path = process["module_path"] procdump = ProcDump(dmp_path, pretty=True) proc = dict( file=dmp_path, pid=process_id, name=process_name, path=process_path, yara=dmp_file.get_yara(os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar")), address_space=procdump.pretty_print(), ) endlimit = "" if not HAVE_RE2: endlimit = "8192" if do_strings: if nulltermonly: apat = "([\x20-\x7e]{" + str(minchars) + "," + endlimit + "})\x00" upat = "((?:[\x20-\x7e][\x00]){" + str(minchars) + "," + endlimit + "})\x00\x00" else: apat = "[\x20-\x7e]{" + str(minchars) + "," + endlimit + "}" upat = "(?:[\x20-\x7e][\x00]){" + str(minchars) + "," + endlimit + "}" matchdict = procdump.search(apat, all=True) strings = matchdict["matches"] matchdict = procdump.search(upat, all=True) ustrings = matchdict["matches"] for ws in ustrings: strings.append(str(ws.decode("utf-16le"))) proc["strings_path"] = dmp_path + ".strings" f=open(proc["strings_path"], "w") f.write("\n".join(strings)) f.close() procdump.close() # Deduplicate configs if proc["yara"]: for match in proc["yara"]: # Dyre if match["name"] == "DyreCfgInjectsList": output = list() buf = "" recline = False for ystring in match["strings"]: for line in ystring.splitlines(): if line.startswith("<litem>"): buf = "" recline = True if recline: buf += line.strip() + "\n" if line.startswith("</litem>"): recline = False if buf not in output: output.append(buf) match["strings"] = ["".join(output)] match["meta"]["description"] += " (Observed %d unique inject elements)" % len(output) elif match["name"] == "DyreCfgRedirectList": output = list() buf = "" recline = False for ystring in match["strings"]: for line in ystring.splitlines(): if line.startswith("<rpcgroup>"): buf = "" recline = True if recline: buf += line.strip() + "\n" if line.startswith("</rpcgroup>"): recline = False if buf not in output: output.append(buf) match["strings"] = ["".join(output)] match["meta"]["description"] += " (Observed %d unique redirect elements)" % len(output) # DarkComet elif match["name"] == "DarkCometConfig": output = list() recline = False for ystring in match["strings"]: for line in ystring.splitlines(): if line.startswith("#BEGIN DARKCOMET"): buf = "" recline = True if recline: buf += line.strip() + "\n" if line.startswith("#EOF DARKCOMET"): recline = False if buf not in output: output.append(buf) match["strings"] = ["".join(output)] results.append(proc) return results
class TestFile: def setUp(self): self.tmp = tempfile.mkstemp() self.file = File(self.tmp[1]) def test_get_name(self): assert_equal(self.tmp[1].split("/")[-1], self.file.get_name()) def test_get_data(self): assert_equal("", self.file.get_data()) def test_get_size(self): assert_equal(0, self.file.get_size()) def test_get_crc32(self): assert_equal("00000000", self.file.get_crc32()) def test_get_md5(self): assert_equal("d41d8cd98f00b204e9800998ecf8427e", self.file.get_md5()) def test_get_sha1(self): assert_equal("da39a3ee5e6b4b0d3255bfef95601890afd80709", self.file.get_sha1()) def test_get_sha256(self): assert_equal("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", self.file.get_sha256()) def test_get_sha512(self): assert_equal("cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e", self.file.get_sha512()) def test_get_ssdeep(self): try: import pydeep assert_not_equal(None, self.file.get_ssdeep()) except ImportError: assert_equal(None, self.file.get_ssdeep()) def test_get_type(self): assert_equal("empty", self.file.get_type()) def test_get_all_type(self): assert isinstance(self.file.get_all(), dict) def test_get_all_keys(self): for key in ["name", "size", "crc32", "md5", "sha1", "sha256", "sha512", "ssdeep", "type"]: assert key in self.file.get_all() def tearDown(self): os.remove(self.tmp[1])
def run(self): """Run analysis. @return: structured results. """ self.key = "procmemory" results = [] if os.path.exists(self.pmemory_path): for dmp in os.listdir(self.pmemory_path): dmp_path = os.path.join(self.pmemory_path, dmp) dmp_file = File(dmp_path) process_name = "" process_path = "" process_id = int(os.path.splitext(os.path.basename(dmp_path))[0]) if "behavior" in self.results and "processes" in self.results["behavior"]: for process in self.results["behavior"]["processes"]: if process_id == process["process_id"]: process_name = process["process_name"] process_path = process["module_path"] proc = dict( file=dmp_path, pid=process_id, name=process_name, path=process_path, yara=dmp_file.get_yara(os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar")), address_space=self.parse_dump(dmp_path) ) # Deduplicate configs if proc["yara"]: for match in proc["yara"]: # Dyre if match["name"] == "DyreCfgInjectsList": output = list() buf = "" recline = False for ystring in match["strings"]: for line in ystring.splitlines(): if line.startswith("<litem>"): buf = "" recline = True if recline: buf += line.strip() + "\n" if line.startswith("</litem>"): recline = False if buf not in output: output.append(buf) match["strings"] = ["".join(output)] match["meta"]["description"] += " (Observed %d unique inject elements)" % len(output) elif match["name"] == "DyreCfgRedirectList": output = list() buf = "" recline = False for ystring in match["strings"]: for line in ystring.splitlines(): if line.startswith("<rpcgroup>"): buf = "" recline = True if recline: buf += line.strip() + "\n" if line.startswith("</rpcgroup>"): recline = False if buf not in output: output.append(buf) match["strings"] = ["".join(output)] match["meta"]["description"] += " (Observed %d unique redirect elements)" % len(output) # DarkComet elif match["name"] == "DarkCometConfig": output = list() recline = False for ystring in match["strings"]: for line in ystring.splitlines(): if line.startswith("#BEGIN DARKCOMET"): buf = "" recline = True if recline: buf += line.strip() + "\n" if line.startswith("#EOF DARKCOMET"): recline = False if buf not in output: output.append(buf) match["strings"] = ["".join(output)] results.append(proc) return results
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set mongo schema version. # TODO: This is not optimal becuase it run each analysis. Need to run # only one time at startup. if "cuckoo_schema" in self.db.collection_names(): if self.db.cuckoo_schema.find_one()["version"] != self.SCHEMA_VERSION: CuckooReportError("Mongo schema version not expected, check data migration tool") else: self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) # Set an unique index on stored files, to avoid duplicates. # From pymongo docs: # Returns the name of the created index if an index is actually # created. # Returns None if the index already exists. # TODO: This is not optimal because it run each analysis. Need to run # only one time at startup. self.db.fs.files.ensure_index("sha256", unique=True, sparse=True, name="sha256_unique") # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) if "network" not in report: report["network"] = {} # Store the sample in GridFS. if results["info"]["category"] == "file" and "target" in results: sample = File(self.file_path) if sample.valid(): fname = results["target"]["file"]["name"] sample_id = self.store_file(sample, filename=fname) report["target"] = {"file_id": sample_id} report["target"].update(results["target"]) # Store the PCAP file in GridFS and reference it back in the report. pcap_path = os.path.join(self.analysis_path, "dump.pcap") pcap = File(pcap_path) if pcap.valid(): pcap_id = self.store_file(pcap) report["network"]["pcap_id"] = pcap_id sorted_pcap_path = os.path.join(self.analysis_path, "dump_sorted.pcap") spcap = File(sorted_pcap_path) if spcap.valid(): spcap_id = self.store_file(spcap) report["network"]["sorted_pcap_id"] = spcap_id mitmproxy_path = os.path.join(self.analysis_path, "dump.mitm") mitmpr = File(mitmproxy_path) if mitmpr.valid(): mitmpr_id = self.store_file(mitmpr) report["network"]["mitmproxy_id"] = mitmpr_id # Store the process memory dump file in GridFS and reference it back in the report. if "procmemory" in report and self.options.get("store_memdump", False): for idx, procmem in enumerate(report["procmemory"]): procmem_path = os.path.join(self.analysis_path, "memory", "{0}.dmp".format(procmem["pid"])) procmem_file = File(procmem_path) if procmem_file.valid(): procmem_id = self.store_file(procmem_file) report["procmemory"][idx].update({"procmem_id": procmem_id}) # Walk through the dropped files, store them in GridFS and update the # report with the ObjectIds. new_dropped = [] if "dropped" in report: for dropped in report["dropped"]: new_drop = dict(dropped) drop = File(dropped["path"]) if drop.valid(): dropped_id = self.store_file(drop, filename=dropped["name"]) new_drop["object_id"] = dropped_id new_dropped.append(new_drop) report["dropped"] = new_dropped # Add screenshots. report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): # Walk through the files and select the JPGs. for shot_file in sorted(os.listdir(shots_path)): if not shot_file.endswith(".jpg"): continue shot_path = os.path.join(self.analysis_path, "shots", shot_file) shot = File(shot_path) # If the screenshot path is a valid file, store it and # reference it back in the report. if shot.valid(): shot_id = self.store_file(shot) report["shots"].append(shot_id) paginate = self.options.get("paginate", 100) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is paginate or if the loop is # completed then store the chunk in MongoDB. if len(chunk) == paginate: to_insert = {"pid": process["pid"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["pid"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes # Store the report and retrieve its object id. self.db.analysis.save(report) self.conn.close()
def run(self): """Runs VirusTotal processing @return: full VirusTotal report. """ self.key = "virustotal" virustotal = [] key = self.options.get("key", None) timeout = self.options.get("timeout", 60) urlscrub = self.options.get("urlscrub", None) do_file_lookup = self.getbool(self.options.get("do_file_lookup", False)) do_url_lookup = self.getbool(self.options.get("do_url_lookup", False)) if not key: raise CuckooProcessingError("VirusTotal API key not " "configured, skip") if self.task["category"] == "file" and do_file_lookup: if not os.path.exists(self.file_path): raise CuckooProcessingError("File {0} not found, skipping it".format(self.file_path)) resource = File(self.file_path).get_sha256() url = VIRUSTOTAL_FILE_URL elif self.task["category"] == "url" and do_url_lookup: resource = self.task["target"] if urlscrub: urlscrub_compiled_re = None try: urlscrub_compiled_re = re.compile(urlscrub) except Exception as e: raise CuckooProcessingError("Failed to compile urlscrub regex" % (e)) try: resource = re.sub(urlscrub_compiled_re,"",resource) except Exception as e: raise CuckooProcessingError("Failed to scrub url" % (e)) # normalize the URL the way VT appears to if not resource.lower().startswith("http://") and not resource.lower().startswith("https://"): resource = "http://" + resource slashsplit = resource.split('/') slashsplit[0] = slashsplit[0].lower() slashsplit[2] = slashsplit[2].lower() if len(slashsplit) == 3: slashsplit.append("") resource = "/".join(slashsplit) resource = hashlib.sha256(resource).hexdigest() url = VIRUSTOTAL_URL_URL else: # Not supported type, exit. return virustotal data = {"resource": resource, "apikey": key} try: r = requests.get(url, params=data, verify=True, timeout=int(timeout)) response_data = r.content except requests.exceptions.RequestException as e: raise CuckooProcessingError("Unable to complete connection " "to VirusTotal: {0}".format(e)) try: virustotal = json.loads(response_data) except ValueError as e: raise CuckooProcessingError("Unable to convert response to " "JSON: {0}".format(e)) # Work around VT brain-damage if isinstance(virustotal, list) and len(virustotal): virustotal = virustotal[0] if "scans" in virustotal: items = virustotal["scans"].items() virustotal["scans"] = dict((engine.replace(".", "_"), signature) for engine, signature in items) virustotal["resource"] = resource virustotal["results"]=list(({"vendor":engine.replace(".", "_"),"sig": signature["result"]}) for engine, signature in items) return virustotal
def run(self): """Runs VirusTotal processing @return: full VirusTotal report. """ self.key = "virustotal" virustotal = [] key = self.options.get("key", None) timeout = self.options.get("timeout", 60) urlscrub = self.options.get("urlscrub", None) do_file_lookup = self.getbool(self.options.get("do_file_lookup", False)) do_url_lookup = self.getbool(self.options.get("do_url_lookup", False)) if not key: raise CuckooProcessingError("VirusTotal API key not " "configured, skip") if self.task["category"] == "file" and do_file_lookup: if not os.path.exists(self.file_path): raise CuckooProcessingError( "File {0} not found, skipping it".format(self.file_path)) resource = File(self.file_path).get_md5() url = VIRUSTOTAL_FILE_URL elif self.task["category"] == "url" and do_url_lookup: resource = self.task["target"] if urlscrub: urlscrub_compiled_re = None try: urlscrub_compiled_re = re.compile(urlscrub) except Exception as e: raise CuckooProcessingError( "Failed to compile urlscrub regex" % (e)) try: resource = re.sub(urlscrub_compiled_re, "", resource) except Exception as e: raise CuckooProcessingError("Failed to scrub url" % (e)) url = VIRUSTOTAL_URL_URL else: # Not supported type, exit. return virustotal data = {"resource": resource, "apikey": key} try: r = requests.get(url, params=data, verify=True, timeout=int(timeout)) response_data = r.content except requests.exceptions.RequestException as e: raise CuckooProcessingError("Unable to complete connection " "to VirusTotal: {0}".format(e)) try: virustotal = json.loads(response_data) except ValueError as e: raise CuckooProcessingError("Unable to convert response to " "JSON: {0}".format(e)) if "scans" in virustotal: items = virustotal["scans"].items() virustotal["scans"] = dict((engine.replace(".", "_"), signature) for engine, signature in items) virustotal["results"] = list(({ "vendor": engine.replace(".", "_"), "sig": signature["result"] }) for engine, signature in items) return virustotal
def run(self, results): """Writes report. @param results: Cuckoo results dict. @raise CuckooReportError: if fails to connect or write to MongoDB. """ self._connect() # Set an unique index on stored files, to avoid duplicates. # From pymongo docs: # Returns the name of the created index if an index is actually created. # Returns None if the index already exists. self._db.fs.files.ensure_index("md5", unique=True, name="md5_unique") # Add pcap file, check for dups and in case add only reference. pcap_file = os.path.join(self.analysis_path, "dump.pcap") pcap = File(pcap_file) if pcap.valid(): pcap_id = self.store_file(pcap) # Preventive key check. if "network" in results and isinstance(results["network"], dict): results["network"]["pcap_id"] = pcap_id else: results["network"] = {"pcap_id": pcap_id} # Add dropped files, check for dups and in case add only reference. dropped_files = {} for dir_name, dir_names, file_names in os.walk(os.path.join(self.analysis_path, "files")): for file_name in file_names: file_path = os.path.join(dir_name, file_name) drop = File(file_path) dropped_files[drop.get_md5()] = drop result_files = dict((dropped.get("md5", None), dropped) for dropped in results["dropped"]) # hopefully the md5s in dropped_files and result_files should be the same if set(dropped_files.keys()) - set(result_files.keys()): log.warning("Dropped files in result dict are different from those in storage.") # store files in gridfs for md5, fileobj in dropped_files.items(): # only store in db if we have a filename for it in results (should be all) resultsdrop = result_files.get(md5, None) if resultsdrop and fileobj.valid(): drop_id = self.store_file(fileobj, filename=resultsdrop["name"]) resultsdrop["dropped_id"] = drop_id # Add screenshots. results["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): shots = [f for f in os.listdir(shots_path) if f.endswith(".jpg")] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) shot = File(shot_path) if shot.valid(): shot_id = self.store_file(shot) results["shots"].append(shot_id) # Save all remaining results. try: self._db.analysis.save(results, manipulate=False) except InvalidDocument: # The document is too big, we need to shrink it and re-save it. results["behavior"]["processes"] = "" # Let's add an error message to the debug block. error = ("The analysis results were too big to be stored, " + "the detailed behavioral analysis has been stripped out.") results["debug"]["errors"].append(error) # Try again to store, if it fails, just abort. try: self._db.analysis.save(results) except Exception as e: raise CuckooReportError("Failed to store the document into MongoDB: %s" % e)
class TestFile: def setUp(self): self.tmp = tempfile.mkstemp() self.file = File(self.tmp[1]) def test_get_name(self): assert_equal(self.tmp[1].split("/")[-1], self.file.get_name()) def test_get_data(self): assert_equal("", self.file.get_data()) def test_get_size(self): assert_equal(0, self.file.get_size()) def test_get_crc32(self): assert_equal("00000000", self.file.get_crc32()) def test_get_md5(self): assert_equal("d41d8cd98f00b204e9800998ecf8427e", self.file.get_md5()) def test_get_sha1(self): assert_equal("da39a3ee5e6b4b0d3255bfef95601890afd80709", self.file.get_sha1()) def test_get_sha256(self): assert_equal( "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", self.file.get_sha256()) def test_get_sha512(self): assert_equal( "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e", self.file.get_sha512()) def test_get_ssdeep(self): try: import pydeep assert_not_equal(None, self.file.get_ssdeep()) except ImportError: assert_equal(None, self.file.get_ssdeep()) def test_get_type(self): assert_equal("empty", self.file.get_type()) def test_get_all_type(self): assert isinstance(self.file.get_all(), dict) def test_get_all_keys(self): for key in [ "name", "size", "crc32", "md5", "sha1", "sha256", "sha512", "ssdeep", "type" ]: assert key in self.file.get_all() def tearDown(self): os.remove(self.tmp[1])
def run(self): """Run Suricata. @return: hash with alerts """ self.key = "suricata" # General SURICATA_CONF = self.options.get("conf", None) SURICATA_EVE_LOG = self.options.get("evelog", None) SURICATA_ALERT_LOG = self.options.get("alertlog", None) SURICATA_TLS_LOG = self.options.get("tlslog", None) SURICATA_HTTP_LOG = self.options.get("httplog", None) SURICATA_SSH_LOG = self.options.get("sshlog", None) SURICATA_DNS_LOG = self.options.get("dnslog", None) SURICATA_FILE_LOG = self.options.get("fileslog", None) SURICATA_FILES_DIR = self.options.get("filesdir", None) SURICATA_RUNMODE = self.options.get("runmode", None) SURICATA_FILE_BUFFER = self.options.get("buffer", 8192) Z7_PATH = self.options.get("7zbin", None) FILES_ZIP_PASS = self.options.get("zippass", None) # Socket SURICATA_SOCKET_PATH = self.options.get("socket_file", None) # Command Line SURICATA_BIN = self.options.get("bin", None) suricata = dict() suricata["alerts"] = [] suricata["tls"] = [] suricata["perf"] = [] suricata["files"] = [] suricata["http"] = [] suricata["dns"] = [] suricata["ssh"] = [] suricata["fileinfo"] = [] suricata["eve_log_full_path"] = None suricata["alert_log_full_path"] = None suricata["tls_log_full_path"] = None suricata["http_log_full_path"] = None suricata["file_log_full_path"] = None suricata["ssh_log_full_path"] = None suricata["dns_log_full_path"] = None tls_items = [ "fingerprint", "issuer", "version", "subject", "sni", "ja3", "serial" ] SURICATA_ALERT_LOG_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_ALERT_LOG) SURICATA_TLS_LOG_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_TLS_LOG) SURICATA_HTTP_LOG_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_HTTP_LOG) SURICATA_SSH_LOG_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_SSH_LOG) SURICATA_DNS_LOG_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_DNS_LOG) SURICATA_EVE_LOG_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_EVE_LOG) SURICATA_FILE_LOG_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_FILE_LOG) SURICATA_FILES_DIR_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_FILES_DIR) separate_log_paths = [ ("alert_log_full_path", SURICATA_ALERT_LOG_FULL_PATH), ("tls_log_full_path", SURICATA_TLS_LOG_FULL_PATH), ("http_log_full_path", SURICATA_HTTP_LOG_FULL_PATH), ("ssh_log_full_path", SURICATA_SSH_LOG_FULL_PATH), ("dns_log_full_path", SURICATA_DNS_LOG_FULL_PATH) ] # handle reprocessing all_log_paths = [x[1] for x in separate_log_paths] + \ [SURICATA_EVE_LOG_FULL_PATH, SURICATA_FILE_LOG_FULL_PATH] for log_path in all_log_paths: if os.path.exists(log_path): try: os.unlink(log_path) except: pass if os.path.isdir(SURICATA_FILES_DIR_FULL_PATH): try: shutil.rmtree(SURICATA_FILES_DIR_FULL_PATH, ignore_errors=True) except: pass if not os.path.exists(SURICATA_CONF): log.warning( "Unable to Run Suricata: Conf File {} Does Not Exist".format( SURICATA_CONF)) return suricata["alerts"] if not os.path.exists(self.pcap_path): log.warning( "Unable to Run Suricata: Pcap file {} Does Not Exist".format( self.pcap_path)) return suricata["alerts"] # Add to this if you wish to ignore any SIDs for the suricata alert logs # Useful for ignoring SIDs without disabling them. Ex: surpress an alert for # a SID which is a dependent of another. (Bad TCP data for HTTP(S) alert) sid_blacklist = [ # SURICATA FRAG IPv6 Fragmentation overlap 2200074, # ET INFO InetSim Response from External Source Possible SinkHole 2017363, # SURICATA UDPv4 invalid checksum 2200075, # ET POLICY SSLv3 outbound connection from client vulnerable to POODLE attack 2019416, ] if SURICATA_RUNMODE == "socket": try: #from suricatasc import SuricataSC from lib.cuckoo.common.suricatasc import SuricataSC except Exception as e: log.warning("Failed to import suricatasc lib {}".format(e)) return suricata loopcnt = 0 maxloops = 24 loopsleep = 5 args = dict() args["filename"] = self.pcap_path args["output-dir"] = self.logs_path suris = SuricataSC(SURICATA_SOCKET_PATH) try: suris.connect() suris.send_command("pcap-file", args) except Exception as e: log.warning( "Failed to connect to socket and send command {}: {}". format(SURICATA_SOCKET_PATH, e)) return suricata while loopcnt < maxloops: try: pcap_flist = suris.send_command("pcap-file-list") current_pcap = suris.send_command("pcap-current") log.debug("pcapfile list: {} current pcap: {}".format( pcap_flist, current_pcap)) if self.pcap_path not in pcap_flist["message"]["files"] and \ current_pcap["message"] != self.pcap_path: log.debug( "Pcap not in list and not current pcap lets assume it's processed" ) break else: loopcnt = loopcnt + 1 time.sleep(loopsleep) except Exception as e: log.warning( "Failed to get pcap status breaking out of loop {}". format(e)) break if loopcnt == maxloops: logstr = "Loop timeout of {} sec occurred waiting for file {} to finish processing" log.warning(logstr.format(maxloops * loopsleep, current_pcap)) return suricata elif SURICATA_RUNMODE == "cli": if not os.path.exists(SURICATA_BIN): log.warning( "Unable to Run Suricata: Bin File {} Does Not Exist". format(SURICATA_CONF)) return suricata["alerts"] cmdstr = "{} -c {} -k none -l {} -r {}" cmd = cmdstr.format(SURICATA_BIN, SURICATA_CONF, self.logs_path, self.pcap_path) ret, stdout, stderr = self.cmd_wrapper(cmd) if ret != 0: log.warning( "Suricata returned a Exit Value Other than Zero {}".format( stderr)) return suricata else: log.warning("Unknown Suricata Runmode") return suricata datalist = [] if os.path.exists(SURICATA_EVE_LOG_FULL_PATH): suricata["eve_log_full_path"] = SURICATA_EVE_LOG_FULL_PATH with open(SURICATA_EVE_LOG_FULL_PATH, "rb") as eve_log: datalist.append(eve_log.read()) else: for path in separate_log_paths: if os.path.exists(path[1]): suricata[path[0]] = path[1] with open(path[1], "rb") as the_log: datalist.append(the_log.read()) if not datalist: log.warning("Suricata: Failed to find usable Suricata log file") parsed_files = [] for data in datalist: for line in data.splitlines(): try: parsed = json.loads(line) except: log.warning( "Suricata: Failed to parse line {} as json".format( line)) continue if 'event_type' in parsed: if parsed["event_type"] == "alert": if (parsed["alert"]["signature_id"] not in sid_blacklist and not parsed["alert"] ["signature"].startswith("SURICATA STREAM")): alog = dict() if parsed["alert"]["gid"] == '': alog["gid"] = "None" else: alog["gid"] = parsed["alert"]["gid"] if parsed["alert"]["rev"] == '': alog["rev"] = "None" else: alog["rev"] = parsed["alert"]["rev"] if parsed["alert"]["severity"] == '': alog["severity"] = "None" else: alog["severity"] = parsed["alert"]["severity"] alog["sid"] = parsed["alert"]["signature_id"] try: alog["srcport"] = parsed["src_port"] except: alog["srcport"] = "None" alog["srcip"] = parsed["src_ip"] try: alog["dstport"] = parsed["dest_port"] except: alog["dstport"] = "None" alog["dstip"] = parsed["dest_ip"] alog["protocol"] = parsed["proto"] alog["timestamp"] = parsed["timestamp"].replace( "T", " ") if parsed["alert"]["category"] == '': alog["category"] = "None" else: alog["category"] = parsed["alert"]["category"] alog["signature"] = parsed["alert"]["signature"] suricata["alerts"].append(alog) elif parsed["event_type"] == "http": hlog = dict() hlog["srcport"] = parsed["src_port"] hlog["srcip"] = parsed["src_ip"] hlog["dstport"] = parsed["dest_port"] hlog["dstip"] = parsed["dest_ip"] hlog["timestamp"] = parsed["timestamp"].replace( "T", " ") keyword = ("uri", "length", "hostname", "status", "http_method", "contenttype", "ua", "referrer") keyword_suri = ("url", "length", "hostname", "status", "http_method", "http_content_type", "http_user_agent", "http_refer") for key, key_s in zip(keyword, keyword_suri): try: hlog[key] = parsed["http"].get(key_s, "None") except: hlog[key] = "None" suricata["http"].append(hlog) elif parsed["event_type"] == "tls": tlog = dict() tlog["srcport"] = parsed["src_port"] tlog["srcip"] = parsed["src_ip"] tlog["dstport"] = parsed["dest_port"] tlog["dstip"] = parsed["dest_ip"] tlog["timestamp"] = parsed["timestamp"].replace( "T", " ") for key in tls_items: if key in parsed["tls"]: tlog[key] = parsed["tls"][key] suricata["tls"].append(tlog) elif parsed["event_type"] == "ssh": suricata["ssh"].append(parsed) elif parsed["event_type"] == "dns": suricata["dns"].append(parsed) elif parsed["event_type"] == "fileinfo": flog = dict() flog["http_host"] = parsed.get("http", {}).get("hostname", "") flog["http_uri"] = parsed.get("http", {}).get("url", "") flog["http_referer"] = parsed.get("http", {}).get( "referer", "") flog["http_user_agent"] = parsed.get("http", {}).get( "http_user_agent", "") flog["protocol"] = parsed.get("proto", "") flog["magic"] = parsed.get("fileinfo", {}).get("magic", "") flog["size"] = parsed.get("fileinfo", {}).get("size", "") flog["stored"] = parsed.get("fileinfo", {}).get("stored", "") flog["sha256"] = parsed.get("fileinfo", {}).get("sha256", "") flog["md5"] = parsed.get("fileinfo", {}).get("md5", "") flog["filename"] = parsed.get("fileinfo", {}).get("filename", "") flog["file_info"] = dict() if "/" in flog["filename"]: flog["filename"] = flog["filename"].split("/")[-1] parsed_files.append(flog) if parsed_files: for sfile in parsed_files: if sfile.get("stored", False): filename = sfile["sha256"] src_file = "{}/{}/{}".format(SURICATA_FILES_DIR_FULL_PATH, filename[0:2], filename) dst_file = "{}/{}".format(SURICATA_FILES_DIR_FULL_PATH, filename) if os.path.exists(src_file): try: shutil.move(src_file, dst_file) except OSError as e: log.warning( "Unable to move suricata file: {}".format(e)) break file_info = File(file_path=dst_file).get_all() try: with open(file_info["path"], "r") as drop_open: filedata = drop_open.read( SURICATA_FILE_BUFFER + 1) if len(filedata) > SURICATA_FILE_BUFFER: file_info["data"] = convert_to_printable( filedata[:SURICATA_FILE_BUFFER] + " <truncated>") else: file_info["data"] = convert_to_printable( filedata) except UnicodeDecodeError as e: pass if file_info: sfile["file_info"] = file_info suricata["files"].append(sfile) with open(SURICATA_FILE_LOG_FULL_PATH, "w") as drop_log: drop_log.write(json.dumps(suricata["files"], indent=4)) # Cleanup file subdirectories left behind by messy Suricata for d in [ dirpath for (dirpath, dirnames, filenames) in os.walk(SURICATA_FILES_DIR_FULL_PATH) if len(dirnames) == 0 and len(filenames) == 0 ]: try: shutil.rmtree(d) except OSError as e: log.warning( "Unable to delete suricata file subdirectories: {}". format(e)) if SURICATA_FILES_DIR_FULL_PATH and os.path.exists(SURICATA_FILES_DIR_FULL_PATH) and Z7_PATH \ and os.path.exists(Z7_PATH): # /usr/bin/7z a -pinfected -y files.zip files-json.log files cmdstr = "cd {} && {} a -p{} -y files.zip {} {}" cmd = cmdstr.format(self.logs_path, Z7_PATH, FILES_ZIP_PASS, SURICATA_FILE_LOG, SURICATA_FILES_DIR) ret, stdout, stderr = self.cmd_wrapper(cmd) if ret > 1: log.warning( "Suricata: Failed to create {}/files.zip - Error {}". format(self.logs_path, ret)) suricata["alerts"] = self.sort_by_timestamp(suricata["alerts"]) suricata["http"] = self.sort_by_timestamp(suricata["http"]) suricata["tls"] = self.sort_by_timestamp(suricata["tls"]) return suricata
def setUp(self): self.tmp = tempfile.mkstemp() self.file = File(self.tmp[1])
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to S3. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. self.s3_region = self.options.get("region", "us-west-2") self.s3_access_key = self.options.get("access_key", "") self.s3_secret_key = self.options.get("secret_key", "") s3_reports_bucket_name = self.options.get("reports_bucket", "") s3_shots_bucket_name = self.options.get("shots_bucket", "") s3_samples_bucket_name = self.options.get("samples_bucket", "") s3_files_bucket_name = self.options.get("files_bucket", "") s3_aux_bucket_name = self.options.get("aux_bucket", "") s3_logs_bucket_name = self.options.get("logs_bucket", "") s3_pcap_bucket_name = self.options.get("pcap_bucket", "") s3_md5_bucket_name = self.options.get("md5_bucket", "") cleanup = self.options.get("cleanup", False) # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) if not "network" in report: report["network"] = {} # Add screenshot paths report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): shots = [ shot for shot in os.listdir(shots_path) if shot.endswith(".jpg") ] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) screenshot = File(shot_path) if screenshot.valid(): #report["shots"].append("{0}/{1}".format(results['info']['id'], shot_file)) report["shots"].append(shot_file.replace(".jpg", "")) # Store chunks of API calls in a different collection and reference # those chunks back in the report. # Also allows paging of the reports. if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] chunk_count = 0 # Using this type of prefix is useful because you can always re-construct it from # the original results #chunk_prefix = str(results['info']['id']) + '/' + process['process_name'] chunk_prefix = str(results['info']['id']) + '/' + str( process['process_id']) # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in S1. if len(chunk) == 100: chunk_name = "{0}.{1}".format(chunk_prefix, chunk_count) #log.debug("INFO TIME!") #log.debug("%s %s %s" %(s3_reports_bucket_name, chunk_name, chunk_prefix)) #log.debug(chunk_prefix) err = self.save_to_s3(s3_reports_bucket_name, chunk_name, json.dumps(chunk)) if err != '': log.error( "Non-size related issue saving analysis JSON to S3 for chunk {0} - {1}" .format(chunk_name, err)) else: chunks_ids.append("{0}.{1}".format( chunk_prefix, chunk_count)) chunk_count += 1 chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: chunk_name = "{0}.{1}".format(chunk_prefix, chunk_count) #log.debug("%s %s %s" %(s3_reports_bucket_name, chunk_name, chunk_prefix)) err = self.save_to_s3(s3_reports_bucket_name, chunk_name, json.dumps(chunk)) if err != '': log.error( "Non-size related issue saving analysis JSON to S3 for chunk {0} - {1}" .format(chunk_name, err)) else: chunks_ids.append("{0}.{1}".format( chunk_prefix, chunk_count)) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes #Other info we want Quick access to from the web UI if results.has_key("virustotal") and results["virustotal"] and results[ "virustotal"].has_key( "positives") and results["virustotal"].has_key("total"): report["virustotal_summary"] = "%s/%s" % ( results["virustotal"]["positives"], results["virustotal"]["total"]) if results.has_key("suricata") and results["suricata"]: if results["suricata"].has_key("tls") and len( results["suricata"]["tls"]) > 0: report["suri_tls_cnt"] = len(results["suricata"]["tls"]) if results["suricata"].has_key("alerts") and len( results["suricata"]["alerts"]) > 0: report["suri_alert_cnt"] = len(results["suricata"]["alerts"]) if results["suricata"].has_key("files") and len( results["suricata"]["files"]) > 0: report["suri_file_cnt"] = len(results["suricata"]["files"]) if results["suricata"].has_key("http") and len( results["suricata"]["http"]) > 0: report["suri_http_cnt"] = len(results["suricata"]["http"]) if results["suricata"].has_key("ssh") and len( results["suricata"]["ssh"]) > 0: report["suri_ssh_cnt"] = len(results["suricata"]["ssh"]) if results["suricata"].has_key("dns") and len( results["suricata"]["dns"]) > 0: report["suri_dns_cnt"] = len(results["suricata"]["dns"]) # Store the report (it's 'object id' is simply the analysis id) # First make sure it's not too big (5gb limit) data = json.dumps(report) if len(data) < 5000000000: err = self.save_to_s3(s3_reports_bucket_name, results['info']['id'], data) if err != '': log.error( "Non-size related issue saving analysis JSON to S3 for report {0} - {1}" .format(results['info']['id'], err)) else: log.error("JSON for analysis id {0} is greater than 5GB".format( results['info']['id'])) #processes the rest of the analysis files and put them in S3 if s3_shots_bucket_name != '': shots_path = os.path.join(results['info']['id'], self.analysis_path, "shots") self.relocate_to_s3(results['info']['id'], shots_path, s3_shots_bucket_name) if s3_pcap_bucket_name != '': if os.path.isfile(self.analysis_path + '/dump.pcap'): with open(self.analysis_path + '/dump.pcap', 'rb') as infile: self.save_to_s3( s3_pcap_bucket_name, "{0}/dump.pcap".format(results['info']['id']), infile.read()) if os.path.isfile(self.analysis_path + '/dump_sorted.pcap'): with open(self.analysis_path + '/dump_sorted.pcap', 'rb') as infile: self.save_to_s3( s3_pcap_bucket_name, "{0}/dump_sorted.pcap".format(results['info']['id']), infile.read()) if s3_aux_bucket_name != '': aux_path = os.path.join(results['info']['id'], self.analysis_path, "aux") self.relocate_to_s3(results['info']['id'], aux_path, s3_aux_bucket_name) if s3_logs_bucket_name != '': logs_path = os.path.join(results['info']['id'], self.analysis_path, "logs") self.relocate_to_s3(results['info']['id'], logs_path, s3_logs_bucket_name) if s3_samples_bucket_name != '': sample = os.path.realpath(self.analysis_path + '/binary') with open(sample, 'rb') as infile: self.save_to_s3(s3_samples_bucket_name, results['target']['file']['sha256'], infile.read()) #log.debug(s3_files_bucket_name) if s3_files_bucket_name != '': #log.debug(self.analysis_path) for root, dirnames, filenames in os.walk(self.analysis_path + '/files'): #log.debug(filenames) for filename in filenames: key_name = str(results['info']['id']) + '/' + root.split( os.sep)[-1] + '/' + filename if not filename.endswith('_info.txt'): key_name = str( results['info']['id']) + '/' + root.split( os.sep)[-1] #log.debug(key_name) with open(os.path.join(root, filename), 'rb') as infile: self.save_to_s3(s3_files_bucket_name, key_name, infile.read()) if s3_md5_bucket_name != '': info = {} info.update(report['info']) info.update(report['target']) self.save_to_s3(s3_md5_bucket_name, results['target']['file']['md5'], json.dumps(info)) if cleanup: shutil.rmtree(self.analysis_path)
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set mongo schema version. # TODO: This is not optimal becuase it run each analysis. Need to run # only one time at startup. if "cuckoo_schema" in self.db.collection_names(): if self.db.cuckoo_schema.find_one( )["version"] != self.SCHEMA_VERSION: CuckooReportError( "Mongo schema version not expected, check data migration tool" ) else: self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) if "network" not in report: report["network"] = {} # Add screenshot paths report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): shots = [ shot for shot in os.listdir(shots_path) if shot.endswith(".jpg") ] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) screenshot = File(shot_path) if screenshot.valid(): # Strip the extension as it's added later # in the Django view report["shots"].append(shot_file.replace(".jpg", "")) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. new_processes = [] for process in report.get("behavior", {}).get("processes", []) or []: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in MongoDB. if len(chunk) == 100: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes # Calculate the mlist_cnt for display if present to reduce db load if "signatures" in results: for entry in results["signatures"]: if entry["name"] == "ie_martian_children": report["mlist_cnt"] = len(entry["data"]) if entry["name"] == "office_martian_children": report["f_mlist_cnt"] = len(entry["data"]) # Other info we want quick access to from the web UI if results.has_key("virustotal") and results["virustotal"] and results[ "virustotal"].has_key( "positives") and results["virustotal"].has_key("total"): report["virustotal_summary"] = "%s/%s" % ( results["virustotal"]["positives"], results["virustotal"]["total"]) if results.get("suricata", False): keywords = ("tls", "alerts", "files", "http", "ssh", "dns") keywords_dict = ("suri_tls_cnt", "suri_alert_cnt", "suri_file_cnt", "suri_http_cnt", "suri_ssh_cnt", "suri_dns_cnt") for keyword, keyword_value in zip(keywords, keywords_dict): if results["suricata"].get(keyword, 0): report[keyword_value] = len(results["suricata"][keyword]) # Create an index based on the info.id dict key. Increases overall scalability # with large amounts of data. # Note: Silently ignores the creation if the index already exists. self.db.analysis.create_index("info.id", background=True) #trick for distributed api if results.get("info", {}).get("options", {}).get("main_task_id", ""): report["info"]["id"] = int( results["info"]["options"]["main_task_id"]) analyses = self.db.analysis.find( {"info.id": int(report["info"]["id"])}) if analyses.count() > 0: log.debug("Deleting analysis data for Task %s" % report["info"]["id"]) for analysis in analyses: for process in analysis["behavior"]["processes"]: for call in process["calls"]: self.db.calls.remove({"_id": ObjectId(call)}) self.db.analysis.remove({"_id": ObjectId(analysis["_id"])}) log.debug("Deleted previous MongoDB data for Task %s" % report["info"]["id"]) self.ensure_valid_utf8(report) # Store the report and retrieve its object id. try: self.db.analysis.save(report, check_keys=False) except InvalidDocument as e: parent_key, psize = self.debug_dict_size(report)[0] if not self.options.get("fix_large_docs", False): # Just log the error and problem keys log.error(str(e)) log.error("Largest parent key: %s (%d MB)" % (parent_key, int(psize) / MEGABYTE)) else: # Delete the problem keys and check for more error_saved = True size_filter = MONGOSIZELIMIT while error_saved: if type(report) == list: report = report[0] try: if type(report[parent_key]) == list: for j, parent_dict in enumerate( report[parent_key]): child_key, csize = self.debug_dict_size( parent_dict, parent_key)[0] if csize > size_filter: if parent_key == child_key: log.warn( "results['%s'] deleted due to size: %s" % (parent_key, csize)) del report[parent_key] break else: log.warn( "results['%s']['%s'] deleted due to size: %s" % (parent_key, child_key, csize)) del report[parent_key][j][child_key] else: child_key, csize = self.debug_dict_size( report[parent_key], parent_key)[0] if csize > size_filter: log.warn( "else - results['%s']['%s'] deleted due to size: %s" % (parent_key, child_key, csize)) del report[parent_key][child_key] try: self.db.analysis.save(report, check_keys=False) error_saved = False except InvalidDocument as e: parent_key, psize = self.debug_dict_size(report)[0] log.error(str(e)) log.error("Largest parent key: %s (%d MB)" % (parent_key, int(psize) / MEGABYTE)) size_filter = size_filter - MEGABYTE except Exception as e: log.error("Failed to delete child key: %s" % str(e)) error_saved = False self.conn.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument("target", type=str, help="URL, path to the file or folder to analyze") parser.add_argument("-d", "--debug", action="store_true", help="Enable debug logging") parser.add_argument( "--remote", type=str, action="store", default=None, help="Specify IP:port to a Cuckoo API server to submit remotely", required=False) parser.add_argument("--user", type=str, action="store", default=None, help="Username for Basic Auth", required=False) parser.add_argument("--password", type=str, action="store", default=None, help="Password for Basic Auth", required=False) parser.add_argument("--sslnoverify", action="store_true", default=False, help="Do not validate SSL cert", required=False) parser.add_argument("--ssl", action="store_true", default=False, help="Use SSL/TLS for remote", required=False) parser.add_argument("--url", action="store_true", default=False, help="Specify whether the target is an URL", required=False) parser.add_argument("--package", type=str, action="store", default="", help="Specify an analysis package", required=False) parser.add_argument("--custom", type=str, action="store", default="", help="Specify any custom value", required=False) parser.add_argument("--timeout", type=int, action="store", default=0, help="Specify an analysis timeout", required=False) parser.add_argument( "--options", type=str, action="store", default="", help= "Specify options for the analysis package (e.g. \"name=value,name2=value2\")", required=False) parser.add_argument( "--priority", type=int, action="store", default=1, help="Specify a priority for the analysis represented by an integer", required=False) parser.add_argument( "--machine", type=str, action="store", default="", help="Specify the identifier of a machine you want to use", required=False) parser.add_argument( "--platform", type=str, action="store", default="", help= "Specify the operating system platform you want to use (windows/darwin/linux)", required=False) parser.add_argument( "--memory", action="store_true", default=False, help="Enable to take a memory dump of the analysis machine", required=False) parser.add_argument( "--enforce-timeout", action="store_true", default=False, help="Enable to force the analysis to run for the full timeout period", required=False) parser.add_argument("--clock", type=str, action="store", default=None, help="Set virtual machine clock", required=False) parser.add_argument( "--tags", type=str, action="store", default=None, help="Specify tags identifier of a machine you want to use", required=False) parser.add_argument("--max", type=int, action="store", default=None, help="Maximum samples to add in a row", required=False) parser.add_argument("--pattern", type=str, action="store", default=None, help="Pattern of files to submit", required=False) parser.add_argument("--shuffle", action="store_true", default=False, help="Shuffle samples before submitting them", required=False) parser.add_argument("--unique", action="store_true", default=False, help="Only submit new samples, ignore duplicates", required=False) parser.add_argument("--quiet", action="store_true", default=False, help="Only print text on failure", required=False) parser.add_argument("--gateway", type=str, action="store", default=None, help="Set the default gateway for the task", required=False) try: args = parser.parse_args() except IOError as e: parser.error(e) return False # If the quiet flag has been set, then we also disable the "warning" # level of the logging module. (E.g., when pydeep has not been installed, # there will be a warning message, because Cuckoo can't resolve the # ssdeep hash of this particular sample.) if args.debug: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig() if args.quiet: logging.disable(logging.WARNING) db = Database() target = to_unicode(args.target) sane_timeout = min(args.timeout, 60 * 60 * 24) gateways = Config("auxiliary").gateways if args.gateway and args.gateway in gateways: if "," in gateways[args.gateway]: tgateway = random.choice(gateways[args.gateway].split(",")) ngateway = gateways[tgateway] else: ngateway = gateways[args.gateway] if args.options: args.options += "," args.options += "setgw=%s" % (ngateway) if args.url: if args.remote: if not HAVE_REQUESTS: print( bold(red("Error")) + ": you need to install python-requests (`pip install requests`)" ) return False if args.ssl: url = "https://{0}/tasks/create/url".format(args.remote) else: url = "http://{0}/tasks/create/url".format(args.remote) data = dict(url=target, package=args.package, timeout=sane_timeout, options=args.options, priority=args.priority, machine=args.machine, platform=args.platform, memory=args.memory, enforce_timeout=args.enforce_timeout, custom=args.custom, tags=args.tags) try: if args.user and args.password: if args.ssl: if args.sslnoverify: verify = False else: verify = True response = requests.post(url, auth=(args.user, args.password), data=data, verify=verify) else: response = requests.post(url, auth=(args.user, args.password), data=data) else: if args.ssl: if args.sslnoverify: verify = False else: verify = True response = requests.post(url, data=data, verify=verify) else: response = requests.post(url, data=data) except Exception as e: print( bold(red("Error")) + ": unable to send URL: {0}".format(e)) return False json = response.json() task_id = json["task_id"] else: task_id = db.add_url(target, package=args.package, timeout=sane_timeout, options=args.options, priority=args.priority, machine=args.machine, platform=args.platform, custom=args.custom, memory=args.memory, enforce_timeout=args.enforce_timeout, clock=args.clock, tags=args.tags) if task_id: if not args.quiet: print( bold(green("Success")) + u": URL \"{0}\" added as task with ID {1}".format( target, task_id)) else: print(bold(red("Error")) + ": adding task to database") else: # Get absolute path to deal with relative. path = to_unicode(os.path.abspath(target)) if not os.path.exists(path): print( bold(red("Error")) + u": the specified file/folder does not exist at path \"{0}\"". format(path)) return False files = [] if os.path.isdir(path): for dirname, dirnames, filenames in os.walk(path): for file_name in filenames: file_path = os.path.join(dirname, file_name) if os.path.isfile(file_path): if args.pattern: if fnmatch.fnmatch(file_name, args.pattern): files.append(to_unicode(file_path)) else: files.append(to_unicode(file_path)) else: files.append(path) if args.shuffle: random.shuffle(files) else: files = sorted(files) for file_path in files: if not File(file_path).get_size(): if not args.quiet: print( bold( yellow("Empty") + ": sample {0} (skipping file)".format(file_path))) continue if args.max is not None: # Break if the maximum number of samples has been reached. if not args.max: break args.max -= 1 if args.remote: if not HAVE_REQUESTS: print( bold(red("Error")) + ": you need to install python-requests (`pip install requests`)" ) return False if args.ssl: url = "https://{0}/tasks/create/file".format(args.remote) else: url = "http://{0}/tasks/create/file".format(args.remote) files = dict(file=open(file_path, "rb"), filename=os.path.basename(file_path)) data = dict(package=args.package, timeout=sane_timeout, options=args.options, priority=args.priority, machine=args.machine, platform=args.platform, memory=args.memory, enforce_timeout=args.enforce_timeout, custom=args.custom, tags=args.tags) try: if args.user and args.password: if args.ssl: if args.sslnoverify: verify = False else: verify = True response = requests.post(url, auth=(args.user, args.password), files=files, data=data, verify=verify) else: response = requests.post(url, auth=(args.user, args.password), files=files, data=data) else: if args.ssl: if args.sslnoverify: verify = False else: verify = True response = requests.post(url, files=files, data=data, verify=verify) else: response = requests.post(url, files=files, data=data) except Exception as e: print( bold(red("Error")) + ": unable to send file: {0}".format(e)) return False json = response.json() task_ids = [json.get("task_ids", None)] else: if args.unique: sha256 = File(file_path).get_sha256() if not db.find_sample(sha256=sha256) is None: msg = ": Sample {0} (skipping file)".format(file_path) if not args.quiet: print(bold(yellow("Duplicate")) + msg) continue task_ids = db.demux_sample_and_add_to_db( file_path=file_path, package=args.package, timeout=sane_timeout, options=args.options, priority=args.priority, machine=args.machine, platform=args.platform, memory=args.memory, custom=args.custom, enforce_timeout=args.enforce_timeout, clock=args.clock, tags=args.tags) tasks_count = len(task_ids) if tasks_count > 1: if not args.quiet: print( bold(green("Success")) + u": File \"{0}\" added as task with IDs {1}".format( file_path, task_ids)) elif tasks_count > 0: if not args.quiet: print( bold(green("Success")) + u": File \"{0}\" added as task with ID {1}".format( file_path, task_ids[0])) else: print(bold(red("Error")) + ": adding task to database")
def run(self): """Run analysis. @return: structured results. """ self.key = "procmemory" results = [] zipdump = self.options.get("zipdump", False) zipstrings = self.options.get("zipstrings", False) do_strings = self.options.get("strings", False) nulltermonly = self.options.get("nullterminated_only", True) minchars = self.options.get("minchars", 5) if os.path.exists(self.pmemory_path): for dmp in os.listdir(self.pmemory_path): # if we're re-processing this task, this means if zips are enabled, we won't do any reprocessing on the # process dumps (only matters for now for Yara) if not dmp.endswith(".dmp"): continue dmp_path = os.path.join(self.pmemory_path, dmp) dmp_file = File(dmp_path) process_name = "" process_path = "" process_id = int(os.path.splitext(os.path.basename(dmp_path))[0]) if "behavior" in self.results and "processes" in self.results["behavior"]: for process in self.results["behavior"]["processes"]: if process_id == process["process_id"]: process_name = process["process_name"] process_path = process["module_path"] proc = dict( file=dmp_path, pid=process_id, name=process_name, path=process_path, yara=dmp_file.get_yara(os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar")), address_space=self.parse_dump(dmp_path), zipdump=zipdump, zipstrings=zipstrings, ) if do_strings: try: data = open(dmp_path, "r").read() except (IOError, OSError) as e: raise CuckooProcessingError("Error opening file %s" % e) if nulltermonly: apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00" strings = re.findall(apat, data) upat = "((?:[\x20-\x7e][\x00]){" + str(minchars) + ",})\x00\x00" strings += [str(ws.decode("utf-16le")) for ws in re.findall(upat, data)] f = open(dmp_path + ".strings", "w") f.write("\n".join(strings)) f.close() proc["strings_path"] = dmp_path + ".strings" else: apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00" strings = re.findall(apat, data) upat = "(?:[\x20-\x7e][\x00]){" + str(minchars) + ",}" strings += [str(ws.decode("utf-16le")) for ws in re.findall(upat, data)] f = open(dmp_path + ".strings", "w") f.write("\n".join(strings)) f.close() proc["strings_path"] = dmp_path + ".strings" zipstrings = self.options.get("zipstrings", False) if zipstrings: try: f = zipfile.ZipFile("%s.zip" % (proc["strings_path"]), "w") f.write(proc["strings_path"], os.path.basename(proc["strings_path"]), zipfile.ZIP_DEFLATED) f.close() os.remove(proc["strings_path"]) proc["strings_path"] = "%s.zip" % (proc["strings_path"]) except: raise CuckooProcessingError("Error creating Process Memory Strings Zip File %s" % e) # Deduplicate configs if proc["yara"]: for match in proc["yara"]: # Dyre if match["name"] == "DyreCfgInjectsList": output = list() buf = "" recline = False for ystring in match["strings"]: for line in ystring.splitlines(): if line.startswith("<litem>"): buf = "" recline = True if recline: buf += line.strip() + "\n" if line.startswith("</litem>"): recline = False if buf not in output: output.append(buf) match["strings"] = ["".join(output)] match["meta"]["description"] += " (Observed %d unique inject elements)" % len(output) elif match["name"] == "DyreCfgRedirectList": output = list() buf = "" recline = False for ystring in match["strings"]: for line in ystring.splitlines(): if line.startswith("<rpcgroup>"): buf = "" recline = True if recline: buf += line.strip() + "\n" if line.startswith("</rpcgroup>"): recline = False if buf not in output: output.append(buf) match["strings"] = ["".join(output)] match["meta"]["description"] += " (Observed %d unique redirect elements)" % len(output) # DarkComet elif match["name"] == "DarkCometConfig": output = list() recline = False for ystring in match["strings"]: for line in ystring.splitlines(): if line.startswith("#BEGIN DARKCOMET"): buf = "" recline = True if recline: buf += line.strip() + "\n" if line.startswith("#EOF DARKCOMET"): recline = False if buf not in output: output.append(buf) match["strings"] = ["".join(output)] if zipdump: try: f = zipfile.ZipFile("%s.zip" % (dmp_path), "w") f.write(dmp_path, os.path.basename(dmp_path), zipfile.ZIP_DEFLATED) f.close() os.remove(dmp_path) proc["file"] = "%s.zip" % (dmp_path) except: raise CuckooProcessingError("Error creating Process Memory Zip File %s" % e) results.append(proc) return results
def run(self): """Run analysis. @return: structured results. """ self.key = "procmemory" results = [] do_strings = self.options.get("strings", False) nulltermonly = self.options.get("nullterminated_only", True) minchars = str(self.options.get("minchars", 5)).encode() if os.path.exists(self.pmemory_path): for dmp in os.listdir(self.pmemory_path): # if we're re-processing this task, this means if zips are enabled, we won't do any reprocessing on the # process dumps (only matters for now for Yara) if not dmp.endswith(".dmp"): continue dmp_path = os.path.join(self.pmemory_path, dmp) if os.path.getsize(dmp_path) == 0: continue dmp_file = File(dmp_path) process_name = "" process_path = "" process_id = int( os.path.splitext(os.path.basename(dmp_path))[0]) for process in self.results.get("behavior", {}).get( "processes", []) or []: if process_id == process["process_id"]: process_name = process["process_name"] process_path = process["module_path"] procdump = ProcDump(dmp_path, pretty=True) proc = dict( file=dmp_path, pid=process_id, name=process_name, path=process_path, yara=dmp_file.get_yara(category="memory"), cape_yara=dmp_file.get_yara(category="CAPE"), address_space=procdump.pretty_print(), ) for hit in proc["cape_yara"]: hit["memblocks"] = {} for item in hit["addresses"]: memblock = self.get_yara_memblock( proc["address_space"], hit["addresses"][item]) if memblock: hit["memblocks"][item] = memblock # if self.options.get("extract_pe", False) extracted_pes = self.get_procmemory_pe(proc) endlimit = b"" if not HAVE_RE2: endlimit = b"8192" if do_strings: if nulltermonly: apat = b"([\x20-\x7e]{" + minchars + b"," + endlimit + b"})\x00" upat = b"((?:[\x20-\x7e][\x00]){" + minchars + b"," + endlimit + b"})\x00\x00" else: apat = b"[\x20-\x7e]{" + minchars + b"," + endlimit + b"}" upat = b"(?:[\x20-\x7e][\x00]){" + minchars + b"," + endlimit + b"}" matchdict = procdump.search(apat, all=True) strings = matchdict["matches"] matchdict = procdump.search(upat, all=True) ustrings = matchdict["matches"] for ws in ustrings: strings.append(ws.decode("utf-16le").encode()) proc["strings_path"] = f"{dmp_path}.strings" proc["extracted_pe"] = extracted_pes f = open(proc["strings_path"], "wb") f.write(b"\n".join(strings)) f.close() procdump.close() results.append(proc) cape_name = cape_name_from_yara(proc, process_id, self.results) if cape_name and "detections" not in self.results: self.results["detections"] = cape_name return results