def process(task_id, target=None, copy_path=None, report=False, auto=False): assert isinstance(task_id, int) # This is the results container. It's what will be used by all the # reporting modules to make it consumable by humans and machines. # It will contain all the results generated by every processing # module available. Its structure can be observed through the JSON # dump in the analysis' reports folder. (If jsondump is enabled.) results = {} db = Database() if os.path.exists( os.path.join(REPORT_ROOT, "storage", "analyses", str(task_id), "logs")): GetFeeds(results=results).run() RunProcessing(task_id=task_id, results=results).run() RunSignatures(task_id=task_id, results=results).run() if report: try: task = db.view_task(task_id) results = generate_result(task, results) RunReporting(task_id=task_id, results=results).run() db.set_status(task_id, TASK_REPORTED) except Exception as e: log.error("Task #%d: reports generation failed: %s", task_id, e) db.set_status(task_id, TASK_FAILED_REPORTING) finally: del results if auto: if cfg.cuckoo.delete_original and os.path.exists(target): os.unlink(target) if cfg.cuckoo.delete_bin_copy and os.path.exists(copy_path): os.unlink(copy_path) if task_id < 0 and task.mode < 2: json_reports = [] targets = [] started = [] completed = [] sub_tasks = db.list_subtasks(task_id) for sub_task in sub_tasks: if sub_task.status not in [ TASK_REPORTED, TASK_FAILED_REPORTING, TASK_FAILED_ANALYSIS, TASK_FAILED_PROCESSING ]: return json_path = path.join(REPORT_ROOT, "storage", "analyses", str(sub_task.id), "reports", "report.json") if path.exists(json_path): json_report = json.load(open(json_path)) json_reports.append(json_report) targets.append(sub_task.target) started.append(sub_task.started_on) completed.append(sub_task.completed_on) scores = [report["scores"] for report in json_reports] # get the highest scores report base_report = json_reports[scores.index(max(scores))] base_assessment_result = { "scores": base_report["scores"], "severity": base_report["severity"], "summary": base_report["summary"], "details": base_report["details"], "description": base_report["description"] } # get parent_task details parent_task = db.view_parent_task(task_id) log.debug( "#%d: sub tasks reported, start to generate the final report." % parent_task.id) # get parent_task start and complete time started = min(started) completed = max(completed) db.set_time(parent_task.id, "started_on", started) db.set_time(parent_task.id, "completed_on", completed) duration = (completed - started).seconds targetdetail = {} if os.path.exists(parent_task.target): filedetail = File(parent_task.target).get_all() fmt_file = pd_fmt_file(parent_task.target.encode("utf-8")) targetdetail = { "target": filedetail["name"], "size": filedetail["size"], "extnomatch": 1 - fmt_file[1], "type": fmt_file[2], "md5": filedetail["md5"], "sha1": filedetail["sha1"] } report_result = { "category": parent_task.category, "targetdetail": targetdetail, "reporttime": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "duration": duration, "started": started.strftime("%Y-%m-%d %H:%M:%S"), "ended": completed.strftime("%Y-%m-%d %H:%M:%S"), } report_result.update(base_assessment_result) report_result["file_reports"] = json_reports try: reports_path = os.path.join(REPORT_ROOT, "storage", "analyses", str(parent_task.id), "reports") if not os.path.exists(reports_path): os.makedirs(reports_path) RunReporting(task_id=parent_task.id, results=report_result).run() db.set_status(parent_task.id, TASK_REPORTED) log.info("Task #%d: reports generation completed (path=%s)", parent_task.id, reports_path) except Exception as e: log.error("#%s generate report failed, msg:%s" % (parent_task.id, e)) db.set_status(parent_task.id, TASK_FAILED_REPORTING) finally: del report_result # remove uncompressed dir and delete all sub tasks and their storage if they exist _tail = "_z1p2d1r" uncompressed_dir = parent_task.target + _tail if path.exists(uncompressed_dir): shutil.rmtree(uncompressed_dir, ignore_errors=True) try: for sub_task in sub_tasks: db.delete_task(sub_task.id) db.delete_result(sub_task.id) db.delete_sub_task(sub_task.id) task_path = path.join(REPORT_ROOT, "storage", "analyses", str(sub_task.id)) if path.exists(task_path): shutil.rmtree(task_path, True) log.info("Delete submitted tasks successfully") except Exception as e: log.info("Delete submitted tasks failed, msg: %s" % e) if task_id < 0 and task.mode == 2: json_reports = [] targets = [] report_path = [] sub_tasks = db.list_subtasks(task_id) for sub_task in sub_tasks: if sub_task.status not in [ TASK_REPORTED, TASK_FAILED_REPORTING, TASK_FAILED_ANALYSIS, TASK_FAILED_PROCESSING ]: return json_path = path.join(REPORT_ROOT, "storage", "analyses", str(sub_task.id), "reports", "report.json") if path.exists(json_path): json_report = json.load(open(json_path)) json_reports.append(json_report) targets.append(sub_task.target) report_path.append( path.join(REPORT_ROOT, "storage", "analyses", str(sub_task.id))) max_malscore_index = max(enumerate(json_reports), key=lambda x: x[1]["scores"])[0] parent_task = db.view_parent_task(task_id) db.set_time(parent_task.id, "started_on", json_reports[max_malscore_index]["started"]) db.set_time(parent_task.id, "completed_on", json_reports[max_malscore_index]["completed"]) reports_path = path.join(REPORT_ROOT, "storage", "analyses", str(parent_task.id)) if not path.exists(reports_path): shutil.copytree(report_path[max_malscore_index], reports_path) db.set_status(parent_task.id, TASK_REPORTED) log.info("Task #%d: reports generation completed (path=%s)", parent_task.id, reports_path) try: for sub_task in sub_tasks: # TODO: delete negative task of mode==2 db.delete_task(sub_task.id) db.delete_result(sub_task.id) db.delete_sub_task(sub_task.id) task_path = path.join(REPORT_ROOT, "storage", "analyses", str(sub_task.id)) if path.exists(task_path): shutil.rmtree(task_path, True) log.info("Delete submitted tasks successfully") except Exception as e: log.info("Delete submitted tasks failed, msg: %s" % e) gc.collect()
def cuckoo_clean(): """Clean up cuckoo setup. It deletes logs, all stored data from file system and configured databases (SQL and MongoDB. """ # Init logging. # This need to init a console logger handler, because the standard # logger (init_logging()) logs to a file which will be deleted. create_structure() init_console_logging() # Initialize the database connection. try: db = Database() except CuckooDatabaseError as e: # If something is screwed due to incorrect database migrations or bad # database SqlAlchemy would be unable to connect and operate. log.warning( "Error connecting to database: it is suggested to check " "the connectivity, apply all migrations if needed or purge " "it manually. Error description: %s", e) else: # Drop all tables. db.drop() # Check if MongoDB reporting is enabled and drop that if it is. cfg = Config("reporting") if cfg.mongodb and cfg.mongodb.enabled: from pymongo import MongoClient host = cfg.mongodb.get("host", "127.0.0.1") port = cfg.mongodb.get("port", 27017) mdb = cfg.mongodb.get("db", "cuckoo") try: conn = MongoClient(host, port) conn.drop_database(mdb) conn.close() except: log.warning("Unable to drop MongoDB database: %s", mdb) # Paths to clean. paths = [ os.path.join(CUCKOO_ROOT, "db"), os.path.join(CUCKOO_ROOT, "log"), os.path.join(CUCKOO_ROOT, "storage"), ] # Delete various directories. for path in paths: if os.path.isdir(path): try: shutil.rmtree(path) except (IOError, OSError) as e: log.warning("Error removing directory %s: %s", path, e) # Delete all compiled Python objects ("*.pyc"). for dirpath, dirnames, filenames in os.walk(CUCKOO_ROOT): for fname in filenames: if not fname.endswith(".pyc"): continue path = os.path.join(CUCKOO_ROOT, dirpath, fname) try: os.unlink(path) except (IOError, OSError) as e: log.warning("Error removing file %s: %s", path, e)
def cuckoo_clean_before_day(args): """Clean up failed tasks It deletes all stored data from file system and configured databases (SQL and MongoDB for tasks completed before now - days. """ # Init logging. # This need to init a console logger handler, because the standard # logger (init_logging()) logs to a file which will be deleted. if not args.delete_older_than_days: print "No days argument provided bailing" return else: days = args.delete_older_than_days create_structure() init_console_logging() id_arr = [] # Initialize the database connection. db = Database() # Check if MongoDB reporting is enabled and drop that if it is. cfg = Config("reporting") if cfg.mongodb and cfg.mongodb.enabled: from pymongo import MongoClient host = cfg.mongodb.get("host", "127.0.0.1") port = cfg.mongodb.get("port", 27017) mdb = cfg.mongodb.get("db", "cuckoo") try: results_db = MongoClient(host, port)[mdb] except: log.warning("Unable to connect to MongoDB database: %s", mdb) return added_before = datetime.now() - timedelta(days=int(days)) if args.files_only_filter: print("file filter applied") old_tasks = db.list_tasks(added_before=added_before,category="file") elif args.urls_only_filter: print("url filter applied") old_tasks = db.list_tasks(added_before=added_before,category="url") else: old_tasks = db.list_tasks(added_before=added_before) for e in old_tasks: new = e.to_dict() print int(new["id"]) id_arr.append({"info.id":(int(new["id"]))}) print "number of matching records %s before suri/custom filter " % len(id_arr) if id_arr and args.suricata_zero_alert_filter: result = list(results_db.analysis.find({"suricata.alerts.alert": {"$exists": False}, "$or": id_arr},{"info.id":1})) tmp_arr =[] for entry in result: tmp_arr.append(entry["info"]["id"]) id_arr = tmp_arr if id_arr and args.custom_include_filter: result = list(results_db.analysis.find({"info.custom": {"$regex": args.custom_include_filter},"$or": id_arr},{"info.id":1})) tmp_arr = [] for entry in result: tmp_arr.append(entry["info"]["id"]) id_arr = tmp_arr print "number of matching records %s" % len(id_arr) for e in id_arr: try: print "removing %s from analysis db" % (e) results_db.analysis.remove({"info.id": e}) except: print "failed to remove analysis info (may not exist) %s" % (e) if db.delete_task(e): delete_folder(os.path.join(CUCKOO_ROOT, "storage", "analyses", "%s" % e)) else: print "failed to remove faile task %s from DB" % (e)
def main(): parser = argparse.ArgumentParser() parser.add_argument("target", type=str, help="URL, path to the file or folder to analyze") parser.add_argument("-d", "--debug", action="store_true", help="Enable debug logging") parser.add_argument( "--remote", type=str, action="store", default=None, help="Specify IP:port to a Cuckoo API server to submit remotely", required=False) parser.add_argument("--user", type=str, action="store", default=None, help="Username for Basic Auth", required=False) parser.add_argument("--password", type=str, action="store", default=None, help="Password for Basic Auth", required=False) parser.add_argument("--sslnoverify", action="store_true", default=False, help="Do not validate SSL cert", required=False) parser.add_argument("--ssl", action="store_true", default=False, help="Use SSL/TLS for remote", required=False) parser.add_argument("--url", action="store_true", default=False, help="Specify whether the target is an URL", required=False) parser.add_argument("--package", type=str, action="store", default="", help="Specify an analysis package", required=False) parser.add_argument("--custom", type=str, action="store", default="", help="Specify any custom value", required=False) parser.add_argument("--timeout", type=int, action="store", default=0, help="Specify an analysis timeout", required=False) parser.add_argument( "--options", type=str, action="store", default="", help= "Specify options for the analysis package (e.g. \"name=value,name2=value2\")", required=False) parser.add_argument( "--priority", type=int, action="store", default=1, help="Specify a priority for the analysis represented by an integer", required=False) parser.add_argument( "--machine", type=str, action="store", default="", help="Specify the identifier of a machine you want to use", required=False) parser.add_argument( "--platform", type=str, action="store", default="", help= "Specify the operating system platform you want to use (windows/darwin/linux)", required=False) parser.add_argument( "--memory", action="store_true", default=False, help="Enable to take a memory dump of the analysis machine", required=False) parser.add_argument( "--enforce-timeout", action="store_true", default=False, help="Enable to force the analysis to run for the full timeout period", required=False) parser.add_argument("--clock", type=str, action="store", default=None, help="Set virtual machine clock", required=False) parser.add_argument( "--tags", type=str, action="store", default=None, help="Specify tags identifier of a machine you want to use", required=False) parser.add_argument("--max", type=int, action="store", default=None, help="Maximum samples to add in a row", required=False) parser.add_argument("--pattern", type=str, action="store", default=None, help="Pattern of files to submit", required=False) parser.add_argument("--shuffle", action="store_true", default=False, help="Shuffle samples before submitting them", required=False) parser.add_argument("--unique", action="store_true", default=False, help="Only submit new samples, ignore duplicates", required=False) parser.add_argument("--quiet", action="store_true", default=False, help="Only print text on failure", required=False) try: args = parser.parse_args() except IOError as e: parser.error(e) return False # If the quiet flag has been set, then we also disable the "warning" # level of the logging module. (E.g., when pydeep has not been installed, # there will be a warning message, because Cuckoo can't resolve the # ssdeep hash of this particular sample.) if args.debug: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig() if args.quiet: logging.disable(logging.WARNING) db = Database() target = to_unicode(args.target) sane_timeout = min(args.timeout, 60 * 60 * 24) if args.url: if args.remote: if not HAVE_REQUESTS: print( bold(red("Error")) + ": you need to install python-requests (`pip install requests`)" ) return False if args.ssl: url = "https://{0}/tasks/create/url".format(args.remote) else: url = "http://{0}/tasks/create/url".format(args.remote) data = dict(url=target, package=args.package, timeout=sane_timeout, options=args.options, priority=args.priority, machine=args.machine, platform=args.platform, memory=args.memory, enforce_timeout=args.enforce_timeout, custom=args.custom, tags=args.tags) try: if args.user and args.password: if args.ssl: if args.sslnoverify: verify = False else: verify = True response = requests.post(url, auth=(args.user, args.password), data=data, verify=verify) else: response = requests.post(url, auth=(args.user, args.password), data=data) else: if args.ssl: if args.sslnoverify: verify = False else: verify = True response = requests.post(url, data=data, verify=verify) else: response = requests.post(url, data=data) except Exception as e: print( bold(red("Error")) + ": unable to send URL: {0}".format(e)) return False json = response.json() task_id = json["task_id"] else: task_id = db.add_url(target, package=args.package, timeout=sane_timeout, options=args.options, priority=args.priority, machine=args.machine, platform=args.platform, custom=args.custom, memory=args.memory, enforce_timeout=args.enforce_timeout, clock=args.clock, tags=args.tags) if task_id: if not args.quiet: print( bold(green("Success")) + u": URL \"{0}\" added as task with ID {1}".format( target, task_id)) else: print(bold(red("Error")) + ": adding task to database") else: # Get absolute path to deal with relative. path = to_unicode(os.path.abspath(target)) if not os.path.exists(path): print( bold(red("Error")) + u": the specified file/folder does not exist at path \"{0}\"". format(path)) return False files = [] if os.path.isdir(path): for dirname, dirnames, filenames in os.walk(path): for file_name in filenames: file_path = os.path.join(dirname, file_name) if os.path.isfile(file_path): if args.pattern: if fnmatch.fnmatch(file_name, args.pattern): files.append(to_unicode(file_path)) else: files.append(to_unicode(file_path)) else: files.append(path) if args.shuffle: random.shuffle(files) else: files = sorted(files) for file_path in files: if not File(file_path).get_size(): if not args.quiet: print( bold( yellow("Empty") + ": sample {0} (skipping file)".format(file_path))) continue if args.max is not None: # Break if the maximum number of samples has been reached. if not args.max: break args.max -= 1 if args.remote: if not HAVE_REQUESTS: print( bold(red("Error")) + ": you need to install python-requests (`pip install requests`)" ) return False if args.ssl: url = "https://{0}/tasks/create/file".format(args.remote) else: url = "http://{0}/tasks/create/file".format(args.remote) files = dict(file=open(file_path, "rb"), filename=os.path.basename(file_path)) data = dict(package=args.package, timeout=sane_timeout, options=args.options, priority=args.priority, machine=args.machine, platform=args.platform, memory=args.memory, enforce_timeout=args.enforce_timeout, custom=args.custom, tags=args.tags) try: if args.user and args.password: if args.ssl: if args.sslnoverify: verify = False else: verify = True response = requests.post(url, auth=(args.user, args.password), files=files, data=data, verify=verify) else: response = requests.post(url, auth=(args.user, args.password), files=files, data=data) else: if args.ssl: if args.sslnoverify: verify = False else: verify = True response = requests.post(url, files=files, data=data, verify=verify) else: response = requests.post(url, files=files, data=data) except Exception as e: print( bold(red("Error")) + ": unable to send file: {0}".format(e)) return False json = response.json() task_ids = [json["task_id"]] else: if args.unique: sha256 = File(file_path).get_sha256() if not db.find_sample(sha256=sha256) is None: msg = ": Sample {0} (skipping file)".format(file_path) if not args.quiet: print(bold(yellow("Duplicate")) + msg) continue task_ids = db.demux_sample_and_add_to_db( file_path=file_path, package=args.package, timeout=sane_timeout, options=args.options, priority=args.priority, machine=args.machine, platform=args.platform, memory=args.memory, custom=args.custom, enforce_timeout=args.enforce_timeout, clock=args.clock, tags=args.tags) tasks_count = len(task_ids) if tasks_count > 1: if not args.quiet: print( bold(green("Success")) + u": File \"{0}\" added as task with IDs {1}".format( file_path, task_ids)) elif tasks_count > 0: if not args.quiet: print( bold(green("Success")) + u": File \"{0}\" added as task with ID {1}".format( file_path, task_id)) else: print(bold(red("Error")) + ": adding task to database")
def cuckoo_clean_sorted_pcap_dump(): """Clean up failed tasks It deletes all stored data from file system and configured databases (SQL and MongoDB for failed tasks. """ # Init logging. # This need to init a console logger handler, because the standard # logger (init_logging()) logs to a file which will be deleted. create_structure() init_console_logging() # Initialize the database connection. db = Database() # Check if MongoDB reporting is enabled and drop that if it is. cfg = Config("reporting") if cfg.mongodb and cfg.mongodb.enabled: from pymongo import MongoClient host = cfg.mongodb.get("host", "127.0.0.1") port = cfg.mongodb.get("port", 27017) mdb = cfg.mongodb.get("db", "cuckoo") try: results_db = MongoClient(host, port)[mdb] except: log.warning("Unable to connect MongoDB database: %s", mdb) return done = False while not done: rtmp = results_db.analysis.find( { "network.sorted_pcap_id": { "$exists": True } }, { "info.id": 1 }, sort=[("_id", -1)]).limit(100) if rtmp and rtmp.count() > 0: for e in rtmp: if e["info"]["id"]: print e["info"]["id"] try: results_db.analysis.update( {"info.id": int(e["info"]["id"])}, {"$unset": { "network.sorted_pcap_id": "" }}) except: print "failed to remove sorted pcap from db for id %s" % ( e["info"]["id"]) try: path = os.path.join(CUCKOO_ROOT, "storage", "analyses", "%s" % (e["info"]["id"]), "dump_sorted.pcap") os.remove(path) except Exception as e: print "failed to remove sorted_pcap from disk %s" % ( e) else: done = True else: done = True
def cuckoo_clean(): """Clean up cuckoo setup. It deletes logs, all stored data from file system and configured databases (SQL and MongoDB. """ # Init logging. # This need to init a console logger handler, because the standard # logger (init_logging()) logs to a file which will be deleted. create_structure() init_console_logging() # Initialize the database connection. db = Database() # Drop all tables. db.drop() # Check if MongoDB reporting is enabled and drop that if it is. cfg = Config("reporting") if cfg.mongodb and cfg.mongodb.enabled: from pymongo import MongoClient host = cfg.mongodb.get("host", "127.0.0.1") port = cfg.mongodb.get("port", 27017) mdb = cfg.mongodb.get("db", "cuckoo") try: conn = MongoClient(host, port) conn.drop_database(mdb) conn.close() except: log.warning("Unable to drop MongoDB database: %s", mdb) # Check if ElasticSearch is enabled and delete that data if it is. if cfg.elasticsearchdb and cfg.elasticsearchdb.enabled and not cfg.elasticsearchdb.searchonly: from elasticsearch import Elasticsearch delidx = cfg.elasticsearchdb.index + "-*" try: es = Elasticsearch( hosts = [{ "host": cfg.elasticsearchdb.host, "port": cfg.elasticsearchdb.port, }], timeout = 60 ) except: log.warning("Unable to connect to ElasticSearch") if es: analyses = es.search( index=delidx, doc_type="analysis", q="*" )["hits"]["hits"] if analyses: for analysis in analyses: esidx = analysis["_index"] esid = analysis["_id"] # Check if behavior exists if analysis["_source"]["behavior"]: for process in analysis["_source"]["behavior"]["processes"]: for call in process["calls"]: es.delete( index=esidx, doc_type="calls", id=call, ) # Delete the analysis results es.delete( index=esidx, doc_type="analysis", id=esid, ) # Paths to clean. paths = [ os.path.join(CUCKOO_ROOT, "db"), os.path.join(CUCKOO_ROOT, "log"), os.path.join(CUCKOO_ROOT, "storage"), ] # Delete various directories. for path in paths: if os.path.isdir(path): try: shutil.rmtree(path) except (IOError, OSError) as e: log.warning("Error removing directory %s: %s", path, e) # Delete all compiled Python objects ("*.pyc"). for dirpath, dirnames, filenames in os.walk(CUCKOO_ROOT): for fname in filenames: if not fname.endswith(".pyc"): continue path = os.path.join(CUCKOO_ROOT, dirpath, fname) try: os.unlink(path) except (IOError, OSError) as e: log.warning("Error removing file %s: %s", path, e)
def process(target=None, copy_path=None, task=None, report=False, auto=False, capeproc=False): # This is the results container. It's what will be used by all the # reporting modules to make it consumable by humans and machines. # It will contain all the results generated by every processing # module available. Its structure can be observed through the JSON # dump in the analysis' reports folder. (If jsondump is enabled.) results = {} results["statistics"] = {} results["statistics"]["processing"] = list() results["statistics"]["signatures"] = list() results["statistics"]["reporting"] = list() GetFeeds(results=results).run() RunProcessing(task=task.to_dict(), results=results).run() RunSignatures(task=task.to_dict(), results=results).run() task_id = task.to_dict()["id"] if report: if repconf.mongodb.enabled: host = repconf.mongodb.host port = repconf.mongodb.port db = repconf.mongodb.db conn = MongoClient(host, port) mdata = conn[db] analyses = mdata.analysis.find({"info.id": int(task_id)}) if analyses.count() > 0: log.debug("Deleting analysis data for Task %s" % task_id) for analysis in analyses: for process in analysis["behavior"].get("processes", []): for call in process["calls"]: mdata.calls.remove({"_id": ObjectId(call)}) mdata.analysis.remove({"_id": ObjectId(analysis["_id"])}) conn.close() log.debug("Deleted previous MongoDB data for Task %s" % task_id) if repconf.elasticsearchdb.enabled and not repconf.elasticsearchdb.searchonly: analyses = es.search(index=fullidx, doc_type="analysis", q="info.id: \"%s\"" % task_id)["hits"]["hits"] if analyses: for analysis in analyses: esidx = analysis["_index"] esid = analysis["_id"] # Check if behavior exists if analysis["_source"]["behavior"]: for process in analysis["_source"]["behavior"][ "processes"]: for call in process["calls"]: es.delete( index=esidx, doc_type="calls", id=call, ) # Delete the analysis results es.delete( index=esidx, doc_type="analysis", id=esid, ) if auto or capeproc: reprocess = False else: reprocess = report RunReporting(task=task.to_dict(), results=results, reprocess=reprocess).run() Database().set_status(task_id, TASK_REPORTED) if auto: if cfg.cuckoo.delete_original and os.path.exists(target): os.unlink(target) if cfg.cuckoo.delete_bin_copy and os.path.exists(copy_path): os.unlink(copy_path)
def import_analysis(request): if request.method == "GET": return render(request, "analysis/import.html") db = Database() task_ids = [] analyses = request.FILES.getlist("sample") for analysis in analyses: if not analysis.size: return render(request, "error.html", { "error": "You uploaded an empty analysis.", }) # if analysis.size > settings.MAX_UPLOAD_SIZE: # return render(request, "error.html", { # "error": "You uploaded a file that exceeds that maximum allowed upload size.", # }) if not analysis.name.endswith(".zip"): return render( request, "error.html", { "error": "You uploaded an analysis that wasn't a .zip.", }) zf = zipfile.ZipFile(analysis) # As per Python documentation we have to make sure there are no # incorrect filenames. for filename in zf.namelist(): if filename.startswith("/") or ".." in filename or ":" in filename: return render( request, "error.html", { "error": "The zip file contains incorrect filenames, " "please provide a legitimate .zip file.", }) if "analysis.json" in zf.namelist(): analysis_info = json.loads(zf.read("analysis.json")) elif "binary" in zf.namelist(): analysis_info = { "target": { "category": "file", }, } else: analysis_info = { "target": { "category": "url", "url": "unknown", }, } category = analysis_info["target"]["category"] if category == "file": binary = store_temp_file(zf.read("binary"), "binary") if os.path.isfile(binary): task_id = db.add_path(file_path=binary, package="", timeout=0, options="", priority=0, machine="", custom="", memory=False, enforce_timeout=False, tags=None) if task_id: task_ids.append(task_id) elif category == "url": url = analysis_info["target"]["url"] if not url: return render(request, "error.html", { "error": "You specified an invalid URL!", }) task_id = db.add_url(url=url, package="", timeout=0, options="", priority=0, machine="", custom="", memory=False, enforce_timeout=False, tags=None) if task_id: task_ids.append(task_id) if not task_id: continue # Extract all of the files related to this analysis. This probably # requires some hacks depending on the user/group the Web # Interface is running under. analysis_path = os.path.join(CUCKOO_ROOT, "storage", "analyses", "%d" % task_id) if not os.path.exists(analysis_path): os.mkdir(analysis_path) zf.extractall(analysis_path) # We set this analysis as completed so that it will be processed # automatically (assuming process.py / process2.py is running). db.set_status(task_id, TASK_COMPLETED) if task_ids: return render(request, "submission/complete.html", { "tasks": task_ids, "baseurl": request.build_absolute_uri("/")[:-1], })
def run(self, results): self.noinject = self.options.get("noinject", False) self.resublimit = int(self.options.get("resublimit", 5)) filesdict = {} self.task_options_stack = [] self.task_options = None self.task_custom = None self.machine = None self.resubcnt = 0 report = dict(results) if report["info"].has_key( "options") and report["info"]["options"].has_key( "resubmitjob" ) and report["info"]["options"]["resubmitjob"]: return # copy all the options from current if "options" in report["info"] and report["info"]["options"]: for key, val in report["info"]["options"].items(): self.task_options_stack.append(key + "=" + str(val)) # copy machine label from current if "machine" in report["info"] and report["info"]["machine"]: self.machine = report["info"]["machine"]["label"] self.task_options_stack.append("resubmitjob=true") if self.noinject: self.task_options_stack.append("free=true") if self.task_options_stack: self.task_options = ','.join(self.task_options_stack) report = dict(results) for dropped in report["dropped"]: if self.resubcnt >= self.resublimit: break if os.path.isfile(dropped["path"]): if ("PE32" in dropped["type"] or "MS-DOS" in dropped["type"]) and "DLL" not in dropped["type"]: if not filesdict.has_key(dropped['sha256']): filesdict[dropped['sha256']] = dropped['path'] self.resubcnt = self.resubcnt + 1 if report.has_key("suricata") and report["suricata"]: if report["suricata"].has_key( "files") and report["suricata"]["files"]: for suricata_file_e in results["suricata"]["files"]: if self.resubcnt >= self.resublimit: break if suricata_file_e.has_key("file_info"): tmp_suricata_file_d = dict(suricata_file_e) if os.path.isfile( suricata_file_e["file_info"]["path"]): ftype = suricata_file_e["file_info"]["type"] if ("PE32" in ftype or "MS-DOS" in ftype) and "DLL" not in ftype: if not filesdict.has_key( suricata_file_e["file_info"] ["sha256"]): filesdict[suricata_file_e["file_info"] ["sha256"]] = suricata_file_e[ "file_info"]["path"] self.resubcnt = self.resubcnt + 1 db = Database() for e in filesdict: if not File(filesdict[e]).get_size(): continue if not db.find_sample(sha256=e) is None: continue self.task_custom = "Parent_Task_ID:%s" % report["info"]["id"] if report["info"].has_key("custom") and report["info"]["custom"]: self.task_custom = "%s Parent_Custom:%s" % ( self.task_custom, report["info"]["custom"]) task_id = db.add_path(file_path=filesdict[e], package='exe', timeout=200, options=self.task_options, priority=1, machine=self.machine, platform=None, custom=self.task_custom, memory=False, enforce_timeout=False, clock=None, tags=None, parent_id=int(report["info"]["id"])) if task_id: log.info(u"Resubmitexe file \"{0}\" added as task with ID {1}". format(filesdict[e], task_id)) else: log.warn("Error adding resubmitexe task to database")
def process(target=None, copy_path=None, task=None, report=False, auto=False, capeproc=False, memory_debugging=False): # This is the results container. It's what will be used by all the # reporting modules to make it consumable by humans and machines. # It will contain all the results generated by every processing # module available. Its structure can be observed through the JSON # dump in the analysis' reports folder. (If jsondump is enabled.) task_dict = task.to_dict() or {} task_id = task_dict.get("id") or 0 results = {"statistics": {"processing": [], "signatures": [], "reporting": []}} if memory_debugging: gc.collect() log.info("[%s] (1) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage)) if memory_debugging: gc.collect() log.info("[%s] (2) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage)) RunProcessing(task=task_dict, results=results).run() if memory_debugging: gc.collect() log.info("[%s] (3) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage)) RunSignatures(task=task_dict, results=results).run() if memory_debugging: gc.collect() log.info("[%s] (4) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage)) if report: if repconf.mongodb.enabled: conn, mdata, analyses = _load_mongo_report(task_id) if analyses: log.debug("Deleting analysis data for Task %s" % task_id) for analysis in analyses: for process in analysis.get("behavior", {}).get("processes", []): calls = [] for call in process["calls"]: calls.append(ObjectId(call)) mdata.calls.delete_many({"_id": {"$in": calls}}) mdata.analysis.delete_one({"_id": ObjectId(analysis["_id"])}) conn.close() log.debug("Deleted previous MongoDB data for Task %s" % task_id) if repconf.elasticsearchdb.enabled and not repconf.elasticsearchdb.searchonly: try: analyses = es.search( index=get_analysis_index(), query=get_query_by_info_id(task_id) )["hits"]["hits"] if analyses: for analysis in analyses: delete_analysis_and_related_calls(analysis["_id"]) except ESRequestError as e: print(e) if auto or capeproc: reprocess = False else: reprocess = report RunReporting(task=task.to_dict(), results=results, reprocess=reprocess).run() Database().set_status(task_id, TASK_REPORTED) if auto: if cfg.cuckoo.delete_original and os.path.exists(target): os.unlink(target) if copy_path is not None and cfg.cuckoo.delete_bin_copy and os.path.exists(copy_path): os.unlink(copy_path) if memory_debugging: gc.collect() log.info("[%s] (5) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage)) for i, obj in enumerate(gc.garbage): log.info("[%s] (garbage) GC object #%d: type=%s", task_id, i, type(obj).__name__)
def main(): parser = argparse.ArgumentParser() parser.add_argument("id", type=str, help="ID of the analysis to process (auto for continuous processing of unprocessed tasks).") parser.add_argument("-c", "--caperesubmit", help="Allow CAPE resubmit processing.", action="store_true", required=False) parser.add_argument("-d", "--debug", help="Display debug messages", action="store_true", required=False) parser.add_argument("-r", "--report", help="Re-generate report", action="store_true", required=False) parser.add_argument( "-p", "--parallel", help="Number of parallel threads to use (auto mode only).", type=int, required=False, default=1 ) parser.add_argument( "-fp", "--failed-processing", help="reprocess failed processing", action="store_true", required=False, default=False ) parser.add_argument( "-mc", "--maxtasksperchild", help="Max children tasks per worker", action="store", type=int, required=False, default=7 ) parser.add_argument( "-md", "--memory-debugging", help="Enable logging garbage collection related info", action="store_true", required=False, default=False, ) parser.add_argument( "-pt", "--processing-timeout", help="Max amount of time spent in processing before we fail a task", action="store", type=int, required=False, default=300, ) testing_args = parser.add_argument_group("Signature testing options") testing_args.add_argument( "-sig", "--signatures", help="Re-execute signatures on the report, doesn't work for signature with self.get_raw_argument, use self.get_argument", action="store_true", default=False, required=False, ) testing_args.add_argument( "-sn", "--signature-name", help="Run only one signature. To be used with --signature. Example -sig -sn cape_detected_threat", action="store", default=False, required=False, ) testing_args.add_argument( "-jr", "--json-report", help="Path to json report, only if data not in mongo/default report location", action="store", default=False, required=False, ) args = parser.parse_args() init_yara() init_modules() if args.id == "auto": init_logging(auto=True, debug=args.debug) autoprocess( parallel=args.parallel, failed_processing=args.failed_processing, maxtasksperchild=args.maxtasksperchild, memory_debugging=args.memory_debugging, processing_timeout=args.processing_timeout, ) else: if not os.path.exists(os.path.join(CUCKOO_ROOT, "storage", "analyses", args.id)): sys.exit(red("\n[-] Analysis folder doesn't exist anymore\n")) init_logging(tid=args.id, debug=args.debug) task = Database().view_task(int(args.id)) if args.signatures: conn = False report = False # check mongo if repconf.mongodb.enabled: conn, _, results = _load_mongo_report(int(args.id), return_one=True) if not results: # fallback to json report = os.path.join(CUCKOO_ROOT, "storage", "analyses", args.id, "reports", "report.json") if not os.path.exists(report): if args.json_report and not os.path.exists(args.json_report): report = args.json_report else: sys.exit("File {} doest exist".format(report)) if report: results = json.load(open(report)) if results is not None: RunSignatures(task=task.to_dict(), results=results).run(args.signature_name) else: process(task=task, report=args.report, capeproc=args.caperesubmit, memory_debugging=args.memory_debugging)
def autoprocess(parallel=1, failed_processing=False, maxtasksperchild=7, memory_debugging=False, processing_timeout=300): maxcount = cfg.cuckoo.max_analysis_count count = 0 db = Database() # pool = multiprocessing.Pool(parallel, init_worker) try: memory_limit() log.info("Processing analysis data") with pebble.ProcessPool(max_workers=parallel, max_tasks=maxtasksperchild, initializer=init_worker) as pool: # CAUTION - big ugly loop ahead. while count < maxcount or not maxcount: # If not enough free disk space is available, then we print an # error message and wait another round (this check is ignored # when the freespace configuration variable is set to zero). if cfg.cuckoo.freespace: # Resolve the full base path to the analysis folder, just in # case somebody decides to make a symbolic link out of it. dir_path = os.path.join(CUCKOO_ROOT, "storage", "analyses") need_space, space_available = free_space_monitor(dir_path, return_value=True, processing=True) if need_space: log.error( "Not enough free disk space! (Only %d MB!). You can change limits it in cuckoo.conf -> freespace", space_available, ) time.sleep(60) continue # If still full, don't add more (necessary despite pool). if len(pending_task_id_map) >= parallel: time.sleep(5) continue if failed_processing: tasks = db.list_tasks(status=TASK_FAILED_PROCESSING, limit=parallel, order_by=Task.completed_on.asc()) else: tasks = db.list_tasks(status=TASK_COMPLETED, limit=parallel, order_by=Task.completed_on.asc()) added = False # For loop to add only one, nice. (reason is that we shouldn't overshoot maxcount) for task in tasks: # Not-so-efficient lock. if pending_task_id_map.get(task.id): continue log.info("Processing analysis data for Task #%d", task.id) if task.category != "url": sample = db.view_sample(task.sample_id) copy_path = os.path.join(CUCKOO_ROOT, "storage", "binaries", str(task.id), sample.sha256) else: copy_path = None args = task.target, copy_path kwargs = dict(report=True, auto=True, task=task, memory_debugging=memory_debugging) if memory_debugging: gc.collect() log.info("[%d] (before) GC object counts: %d, %d", task.id, len(gc.get_objects()), len(gc.garbage)) # result = pool.apply_async(process, args, kwargs) future = pool.schedule(process, args, kwargs, timeout=processing_timeout) pending_future_map[future] = task.id pending_task_id_map[task.id] = future future.add_done_callback(processing_finished) if memory_debugging: gc.collect() log.info("[%d] (after) GC object counts: %d, %d", task.id, len(gc.get_objects()), len(gc.garbage)) count += 1 added = True copy_origin_path = os.path.join(CUCKOO_ROOT, "storage", "binaries", sample.sha256) if cfg.cuckoo.delete_bin_copy and os.path.exists(copy_origin_path): os.unlink(copy_origin_path) break if not added: # don't hog cpu time.sleep(5) except KeyboardInterrupt: # ToDo verify in finally # pool.terminate() raise except MemoryError: mem = get_memory() / 1024 / 1024 print("Remain: %.2f GB" % mem) sys.stderr.write("\n\nERROR: Memory Exception\n") sys.exit(1) except Exception as e: import traceback traceback.print_exc() finally: pool.close() pool.join()
def run(self, results): self.task_options_stack = [] self.task_options = None self.task_custom = None filesdict = {} report = dict(results) db = Database() detections = set() self.task_options = self.task["options"] if self.task_options and 'disable_cape=1' in self.task_options: return parent_package = report["info"].get("package") ##### Initial static hits from CAPE's yara signatures ##### if "target" in results: target = results["target"] if "file" in target: file = target["file"] if "cape_yara" in file: for entry in file["cape_yara"]: self.process_cape_yara(entry, detections) if "procdump" in results: if results["procdump"] is not None: for file in results["procdump"]: if "cape_yara" in file: for entry in file["cape_yara"]: self.process_cape_yara(entry, detections) if "CAPE" in results: if results["CAPE"] is not None: for file in results["CAPE"]: if "cape_yara" in file: for entry in file["cape_yara"]: self.process_cape_yara(entry, detections) if "dropped" in results: if results["dropped"] is not None: for file in results["dropped"]: if "cape_yara" in file: for entry in file["cape_yara"]: self.process_cape_yara(entry, detections) ##### Dynamic CAPE hits ##### Packers, injection or other generic dumping ##### if "signatures" in results: for entry in results["signatures"]: if entry["name"] == "InjectionCreateRemoteThread" or entry[ "name"] == "InjectionProcessHollowing" or entry[ "name"] == "InjectionSetWindowLong" or entry[ "name"] == "InjectionInterProcess": if report["info"].has_key("package"): if parent_package == 'doc': detections.add('Injection_doc') continue if parent_package == 'dll' or parent_package == 'regsvr': detections.add('Injection_dll') continue if parent_package == 'zip': detections.add('Injection_zip') continue if parent_package == 'pdf': detections.add('Injection_pdf') continue detections.add('Injection') elif entry["name"] == "Extraction": if report["info"].has_key("package"): if parent_package == 'doc': # detections.add('Extraction_doc') # Word triggers this so removed continue if parent_package == 'zip': detections.add('Extraction_zip') continue if parent_package == 'ps1': detections.add('Extraction_ps1') continue if parent_package == 'dll': detections.add('Extraction_dll') continue if parent_package == 'regsvr': detections.add('Extraction_regsvr') continue if parent_package == 'jar': detections.add('Extraction_jar') continue detections.add('Extraction') elif entry["name"] == "Compression": if report["info"].has_key("package"): if parent_package == 'zip': detections.add('Compression_zip') continue if parent_package == 'dll' or parent_package == 'regsvr': detections.add('Compression_dll') continue if parent_package == 'doc': detections.add('Compression_doc') continue detections.add('Compression') elif entry["name"] == "Doppelganging": if report["info"].has_key("package"): detections.add('Doppelganging') ##### Specific malware family packages ##### elif entry["name"] == "PlugX": if report["info"].has_key("package"): if parent_package == 'PlugXPayload': detections.add('PlugXPayload') continue if parent_package == 'zip': detections.add('PlugX_zip') continue if parent_package == 'doc': detections.add('PlugX_doc') continue if parent_package == 'dll': detections.add('PlugX_dll') continue detections.add('PlugX') elif entry["name"] == "PlugX fuzzy": if report["info"].has_key("package"): if parent_package == 'PlugXPayload': detections.add('PlugXPayload_fuzzy') continue if parent_package == 'zip': detections.add('PlugX_fuzzy_zip') continue if parent_package == 'doc': detections.add('PlugX_fuzzy_doc') continue if parent_package == 'dll': detections.add('PlugX_fuzzy_dll') continue detections.add('PlugX_fuzzy') elif entry["name"] == "EvilGrab": if report["info"].has_key("package"): detections.add('EvilGrab') # We only want to submit a single job if we have a # malware detection. A given package should do # everything we need for its respective family. package = None if 'PlugX_fuzzy' in detections: package = 'PlugX_fuzzy' elif 'PlugXPayload_fuzzy' in detections: package = 'PlugXPayload_fuzzy' elif 'PlugX_fuzzy_zip' in detections: package = 'PlugX_fuzzy_zip' elif 'PlugX_fuzzy_doc' in detections: package = 'PlugX_fuzzy_doc' elif 'PlugX_fuzzy_dll' in detections: package = 'PlugX_fuzzy_dll' # We may have both 'fuzzy' and non 'fuzzy' # but only want to submit non. if 'PlugX' in detections: package = 'PlugX' elif 'PlugXPayload' in detections: package = 'PlugXPayload' elif 'PlugX_zip' in detections: package = 'PlugX_zip' elif 'PlugX_doc' in detections: package = 'PlugX_doc' elif 'PlugX_dll' in detections: package = 'PlugX_dll' if 'EvilGrab' in detections: package = 'EvilGrab' if 'Sedreco' in detections: if parent_package == 'dll': package = 'Sedreco_dll' else: package = 'Sedreco' if 'Cerber' in detections: package = 'Cerber' if 'TrickBot' in detections: package = 'TrickBot' if 'Ursnif' in detections: package = 'Ursnif' # we want to switch off automatic process dumps in CAPE submissions if self.task_options and 'procdump=1' in self.task_options: self.task_options = self.task_options.replace( u"procdump=1", u"procdump=0", 1) if self.task_options_stack: self.task_options = ','.join(self.task_options_stack) if package and package != parent_package: self.task_custom = "Parent_Task_ID:%s" % report["info"]["id"] if report["info"].has_key("custom") and report["info"]["custom"]: self.task_custom = "%s Parent_Custom:%s" % ( self.task_custom, report["info"]["custom"]) task_id = db.add_path( file_path=self.task["target"], package=package, timeout=self.task["timeout"], options=self.task_options, priority=self.task["priority"] + 1, # increase priority to expedite related submission machine=self.task["machine"], platform=self.task["platform"], memory=self.task["memory"], enforce_timeout=self.task["enforce_timeout"], clock=None, tags=None, parent_id=int(report["info"]["id"])) if task_id: log.info( u"CAPE detection on file \"{0}\": {1} - added as CAPE task with ID {2}" .format(self.task["target"], package, task_id)) else: log.warn( "Error adding CAPE task to database: {0}".format(package)) else: # nothing submitted, only 'dumpers' left if parent_package in cape_package_list: return self.task_custom = "Parent_Task_ID:%s" % report["info"]["id"] if report["info"].has_key("custom") and report["info"]["custom"]: self.task_custom = "%s Parent_Custom:%s" % ( self.task_custom, report["info"]["custom"]) for dumper in detections: task_id = db.add_path( file_path=self.task["target"], package=dumper, timeout=self.task["timeout"], options=self.task_options, priority=self.task["priority"] + 1, # increase priority to expedite related submission machine=self.task["machine"], platform=self.task["platform"], memory=self.task["memory"], enforce_timeout=self.task["enforce_timeout"], clock=None, tags=None, parent_id=int(report["info"]["id"])) if task_id: log.info( u"CAPE detection on file \"{0}\": {1} - added as CAPE task with ID {2}" .format(self.task["target"], dumper, task_id)) else: log.warn("Error adding CAPE task to database: {0}".format( dumper)) return