def process_results(self): """Process the analysis results and generate the enabled reports.""" results = RunProcessing(task=self.task.to_dict()).run() RunSignatures(results=results).run() RunReporting(task=self.task.to_dict(), results=results).run() # If the target is a file and the user enabled the option, # delete the original copy. if self.task.category == "file" and self.cfg.cuckoo.delete_original: if not os.path.exists(self.task.target): log.warning("Original file does not exist anymore: \"%s\": " "File not found.", self.task.target) else: try: os.remove(self.task.target) except OSError as e: log.error("Unable to delete original file at path " "\"%s\": %s", self.task.target, e) # If the target is a file and the user enabled the delete copy of # the binary option, then delete the copy. if self.task.category == "file" and self.cfg.cuckoo.delete_bin_copy: if not os.path.exists(self.binary): log.warning("Copy of the original file does not exist anymore: \"%s\": File not found", self.binary) else: try: os.remove(self.binary) except OSError as e: log.error("Unable to delete the copy of the original file at path \"%s\": %s", self.binary, e) log.info("Task #%d: reports generation completed (path=%s)", self.task.id, self.storage) return True
def process(target=None, copy_path=None, task=None, report=False, auto=False): log.debug( str(datetime.datetime.now()) + " WORKER - Khaled: process(task=" + str(task['id']) + ") running proces") log.debug( str(datetime.datetime.now()) + " WORKER - Khaled: process(task=" + str(task['id']) + ") RunProcessing started") results = RunProcessing(task=task).run() log.debug( str(datetime.datetime.now()) + " WORKER - Khaled: process(task=" + str(task['id']) + ") done RunProcessing, starting RunSignatures") RunSignatures(results=results).run() log.debug( str(datetime.datetime.now()) + " WORKER - Khaled: process(task=" + str(task['id']) + ") Done RunSignatures, moving into reporting") if report: RunReporting(task=task, results=results).run() if auto: if cfg.cuckoo.delete_original and os.path.exists(target): os.unlink(target) if cfg.cuckoo.delete_bin_copy and copy_path and \ os.path.exists(copy_path): os.unlink(copy_path) log.debug( str(datetime.datetime.now()) + " WORKER - Khaled: process(task=" + str(task['id']) + ") task is DONE")
def do(aid, report=False): results = RunProcessing(task_id=aid).run() RunSignatures(results=results).run() if report: RunReporting(task_id=aid, results=results).run() Database().set_status(aid, TASK_REPORTED)
def process(target=None, copy_path=None, task=None): results = RunProcessing(task=task).run() RunSignatures(results=results).run() RunReporting(task=task, results=results).run() if cfg.cuckoo.delete_original and os.path.exists(target): os.unlink(target) if cfg.cuckoo.delete_bin_copy and copy_path and os.path.exists(copy_path): os.unlink(copy_path)
def process_results(self): """Process the analysis results and generate the enabled reports.""" # This is the results container. It's what will be used by all the # reporting modules to make it consumable by humans and machines. # It will contain all the results generated by every processing # module available. Its structure can be observed through the JSON # dump in the analysis' reports folder. (If jsondump is enabled.) results = {} results["statistics"] = {} results["statistics"]["processing"] = list() results["statistics"]["signatures"] = list() results["statistics"]["reporting"] = list() GetFeeds(results=results).run() RunProcessing(task=self.task.to_dict(), results=results).run() RunSignatures(task=self.task.to_dict(), results=results).run() RunReporting(task=self.task.to_dict(), results=results).run() # If the target is a file and the user enabled the option, # delete the original copy. if self.task.category == "file" and self.cfg.cuckoo.delete_original: if not os.path.exists(self.task.target): log.warning("Task #{0}: Original file does not exist anymore: " "'{1}': File not found.".format( self.task.id, self.task.target) ) else: try: os.remove(self.task.target) except OSError as e: log.error("Task #{0}: Unable to delete original file at " "path '{1}': {2}".format( self.task.id, self.task.target, e) ) # If the target is a file and the user enabled the delete copy of # the binary option, then delete the copy. if self.task.category == "file" and self.cfg.cuckoo.delete_bin_copy: if not os.path.exists(self.binary): log.warning("Task #{0}: Copy of the original file does not exist anymore: '{1}': " "File not found".format(self.task.id, self.binary) ) else: try: os.remove(self.binary) except OSError as e: log.error("Task #{0}: Unable to delete the copy of the original file at path " "'{1}': {2}".format(self.task.id, self.binary, e)) log.info("Task #{0}: reports generation completed (path={1})".format( self.task.id, self.storage) ) return True
def process(aid, target=None, copy_path=None, report=False, auto=False): results = RunProcessing(task_id=aid).run() RunSignatures(results=results).run() if report: RunReporting(task_id=aid, results=results).run() Database().set_status(aid, TASK_REPORTED) if auto: if cfg.cuckoo.delete_original and os.path.exists(target): os.unlink(target) if cfg.cuckoo.delete_bin_copy and os.path.exists(copy_path): os.unlink(copy_path)
def process_results(self): """Process the analysis results and generate the enabled reports.""" # This is the results container. It's what will be used by all the # reporting modules to make it consumable by humans and machines. # It will contain all the results generated by every processing # module available. Its structure can be observed through the JSON # dump in the analysis' reports folder. (If jsondump is enabled.) results = {} GetFeeds(results=results).run() RunProcessing(task_id=self.task.id, results=results).run() RunSignatures(task_id=self.task.id, results=results).run() try: log.info("Task #%d: start to generate report" % self.task.id) report_result = generate_result(self.task, results) RunReporting(task_id=self.task.id, results=report_result).run() Database().set_status(self.task.id, TASK_REPORTED) except Exception as e: log.error("#%s generate report failed, msg:%s" % (self.task.id, e)) self.db.set_status(self.task.id, TASK_FAILED_REPORTING) # If the target is a file and the user enabled the option, # delete the original copy. if self.task.category == "file" and self.cfg.cuckoo.delete_original and self.task.id > 0: if not os.path.exists(self.task.target): log.warning("Original file does not exist anymore: \"%s\": " "File not found.", self.task.target) else: try: os.remove(self.task.target) except OSError as e: log.error("Unable to delete original file at path " "\"%s\": %s", self.task.target, e) # If the target is a file and the user enabled the delete copy of # the binary option, then delete the copy. if self.task.category == "file" and self.cfg.cuckoo.delete_bin_copy and self.task > 0: if not os.path.exists(self.binary): log.warning("Copy of the original file does not exist anymore: \"%s\": File not found", self.binary) else: try: os.remove(self.binary) except OSError as e: log.error("Unable to delete the copy of the original file at path \"%s\": %s", self.binary, e) log.info("Task #%d: reports generation completed (path=%s)", self.task.id, self.storage) return True
def process(task_id, target=None, copy_path=None, report=False, auto=False): assert isinstance(task_id, int) # This is the results container. It's what will be used by all the # reporting modules to make it consumable by humans and machines. # It will contain all the results generated by every processing # module available. Its structure can be observed through the JSON # dump in the analysis' reports folder. (If jsondump is enabled.) results = {} GetFeeds(results=results).run() RunProcessing(task_id=task_id, results=results).run() RunSignatures(task_id=task_id, results=results).run() if report: RunReporting(task_id=task_id, results=results).run() Database().set_status(task_id, TASK_REPORTED) if auto: if cfg.cuckoo.delete_original and os.path.exists(target): os.unlink(target) if cfg.cuckoo.delete_bin_copy and os.path.exists(copy_path): os.unlink(copy_path)
def process(target=None, copy_path=None, task=None, report=False, auto=False): # This is the results container. It's what will be used by all the # reporting modules to make it consumable by humans and machines. # It will contain all the results generated by every processing # module available. Its structure can be observed through the JSON # dump in the analysis' reports folder. (If jsondump is enabled.) results = {} results["statistics"] = {} results["statistics"]["processing"] = list() results["statistics"]["signatures"] = list() results["statistics"]["reporting"] = list() GetFeeds(results=results).run() RunProcessing(task=task, results=results).run() RunSignatures(task=task, results=results).run() task_id = task["id"] if report: if repconf.mongodb.enabled: host = repconf.mongodb.host port = repconf.mongodb.port db = repconf.mongodb.db conn = MongoClient(host, port) mdata = conn[db] analyses = mdata.analysis.find({"info.id": int(task_id)}) if analyses.count() > 0: log.debug("Deleting analysis data for Task %s" % task_id) for analysis in analyses: for process in analysis["behavior"]["processes"]: for call in process["calls"]: mdata.calls.remove({"_id": ObjectId(call)}) mdata.analysis.remove({"_id": ObjectId(analysis["_id"])}) conn.close() log.debug("Deleted previous MongoDB data for Task %s" % task_id) if repconf.elasticsearchdb.enabled and not repconf.elasticsearchdb.searchonly: analyses = es.search(index=fullidx, doc_type="analysis", q="info.id: \"%s\"" % task_id)["hits"]["hits"] if analyses: for analysis in analyses: esidx = analysis["_index"] esid = analysis["_id"] # Check if behavior exists if analysis["_source"]["behavior"]: for process in analysis["_source"]["behavior"][ "processes"]: for call in process["calls"]: es.delete( index=esidx, doc_type="calls", id=call, ) # Delete the analysis results es.delete( index=esidx, doc_type="analysis", id=esid, ) RunReporting(task=task, results=results).run() Database().set_status(task_id, TASK_REPORTED) if auto: if cfg.cuckoo.delete_original and os.path.exists(target): os.unlink(target) if cfg.cuckoo.delete_bin_copy and os.path.exists(copy_path): os.unlink(copy_path)
def process(target=None, copy_path=None, task=None, report=False, auto=False, capeproc=False, memory_debugging=False): # This is the results container. It's what will be used by all the # reporting modules to make it consumable by humans and machines. # It will contain all the results generated by every processing # module available. Its structure can be observed through the JSON # dump in the analysis' reports folder. (If jsondump is enabled.) task_dict = task.to_dict() or {} task_id = task_dict.get("id") or 0 results = { "statistics": { "processing": [], "signatures": [], "reporting": [] } } if memory_debugging: gc.collect() log.info("[%s] (1) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage)) if memory_debugging: gc.collect() log.info("[%s] (2) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage)) RunProcessing(task=task_dict, results=results).run() if memory_debugging: gc.collect() log.info("[%s] (3) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage)) RunSignatures(task=task_dict, results=results).run() if memory_debugging: gc.collect() log.info("[%s] (4) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage)) if report: if repconf.mongodb.enabled: host = repconf.mongodb.host port = repconf.mongodb.port db = repconf.mongodb.db conn = MongoClient(host, port=port, username=repconf.mongodb.get("username", None), password=repconf.mongodb.get("password", None), authSource=db) mdata = conn[db] analyses = mdata.analysis.find({"info.id": int(task_id)}) if analyses.count() > 0: log.debug("Deleting analysis data for Task %s" % task_id) for analysis in analyses: for process in analysis["behavior"].get("processes", []): for call in process["calls"]: mdata.calls.remove({"_id": ObjectId(call)}) mdata.analysis.remove({"_id": ObjectId(analysis["_id"])}) conn.close() log.debug("Deleted previous MongoDB data for Task %s" % task_id) if repconf.elasticsearchdb.enabled and not repconf.elasticsearchdb.searchonly: analyses = es.search(index=fullidx, doc_type="analysis", q="info.id: \"%s\"" % task_id)["hits"]["hits"] if analyses: for analysis in analyses: esidx = analysis["_index"] esid = analysis["_id"] # Check if behavior exists if analysis["_source"]["behavior"]: for process in analysis["_source"]["behavior"][ "processes"]: for call in process["calls"]: es.delete( index=esidx, doc_type="calls", id=call, ) # Delete the analysis results es.delete( index=esidx, doc_type="analysis", id=esid, ) if auto or capeproc: reprocess = False else: reprocess = report RunReporting(task=task.to_dict(), results=results, reprocess=reprocess).run() Database().set_status(task_id, TASK_REPORTED) if auto: if cfg.cuckoo.delete_original and os.path.exists(target): os.unlink(target) if cfg.cuckoo.delete_bin_copy and os.path.exists(copy_path): os.unlink(copy_path) if memory_debugging: gc.collect() log.info("[%s] (5) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage)) for i, obj in enumerate(gc.garbage): log.info("[%s] (garbage) GC object #%d: type=%s", task_id, i, type(obj).__name__)
def process(task_id, target=None, copy_path=None, report=False, auto=False): assert isinstance(task_id, int) # This is the results container. It's what will be used by all the # reporting modules to make it consumable by humans and machines. # It will contain all the results generated by every processing # module available. Its structure can be observed through the JSON # dump in the analysis' reports folder. (If jsondump is enabled.) results = {} db = Database() if os.path.exists( os.path.join(REPORT_ROOT, "storage", "analyses", str(task_id), "logs")): GetFeeds(results=results).run() RunProcessing(task_id=task_id, results=results).run() RunSignatures(task_id=task_id, results=results).run() if report: try: task = db.view_task(task_id) results = generate_result(task, results) RunReporting(task_id=task_id, results=results).run() db.set_status(task_id, TASK_REPORTED) except Exception as e: log.error("Task #%d: reports generation failed: %s", task_id, e) db.set_status(task_id, TASK_FAILED_REPORTING) finally: del results if auto: if cfg.cuckoo.delete_original and os.path.exists(target): os.unlink(target) if cfg.cuckoo.delete_bin_copy and os.path.exists(copy_path): os.unlink(copy_path) if task_id < 0 and task.mode < 2: json_reports = [] targets = [] started = [] completed = [] sub_tasks = db.list_subtasks(task_id) for sub_task in sub_tasks: if sub_task.status not in [ TASK_REPORTED, TASK_FAILED_REPORTING, TASK_FAILED_ANALYSIS, TASK_FAILED_PROCESSING ]: return json_path = path.join(REPORT_ROOT, "storage", "analyses", str(sub_task.id), "reports", "report.json") if path.exists(json_path): json_report = json.load(open(json_path)) json_reports.append(json_report) targets.append(sub_task.target) started.append(sub_task.started_on) completed.append(sub_task.completed_on) scores = [report["scores"] for report in json_reports] # get the highest scores report base_report = json_reports[scores.index(max(scores))] base_assessment_result = { "scores": base_report["scores"], "severity": base_report["severity"], "summary": base_report["summary"], "details": base_report["details"], "description": base_report["description"] } # get parent_task details parent_task = db.view_parent_task(task_id) log.debug( "#%d: sub tasks reported, start to generate the final report." % parent_task.id) # get parent_task start and complete time started = min(started) completed = max(completed) db.set_time(parent_task.id, "started_on", started) db.set_time(parent_task.id, "completed_on", completed) duration = (completed - started).seconds targetdetail = {} if os.path.exists(parent_task.target): filedetail = File(parent_task.target).get_all() fmt_file = pd_fmt_file(parent_task.target.encode("utf-8")) targetdetail = { "target": filedetail["name"], "size": filedetail["size"], "extnomatch": 1 - fmt_file[1], "type": fmt_file[2], "md5": filedetail["md5"], "sha1": filedetail["sha1"] } report_result = { "category": parent_task.category, "targetdetail": targetdetail, "reporttime": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "duration": duration, "started": started.strftime("%Y-%m-%d %H:%M:%S"), "ended": completed.strftime("%Y-%m-%d %H:%M:%S"), } report_result.update(base_assessment_result) report_result["file_reports"] = json_reports try: reports_path = os.path.join(REPORT_ROOT, "storage", "analyses", str(parent_task.id), "reports") if not os.path.exists(reports_path): os.makedirs(reports_path) RunReporting(task_id=parent_task.id, results=report_result).run() db.set_status(parent_task.id, TASK_REPORTED) log.info("Task #%d: reports generation completed (path=%s)", parent_task.id, reports_path) except Exception as e: log.error("#%s generate report failed, msg:%s" % (parent_task.id, e)) db.set_status(parent_task.id, TASK_FAILED_REPORTING) finally: del report_result # remove uncompressed dir and delete all sub tasks and their storage if they exist _tail = "_z1p2d1r" uncompressed_dir = parent_task.target + _tail if path.exists(uncompressed_dir): shutil.rmtree(uncompressed_dir, ignore_errors=True) try: for sub_task in sub_tasks: db.delete_task(sub_task.id) db.delete_result(sub_task.id) db.delete_sub_task(sub_task.id) task_path = path.join(REPORT_ROOT, "storage", "analyses", str(sub_task.id)) if path.exists(task_path): shutil.rmtree(task_path, True) log.info("Delete submitted tasks successfully") except Exception as e: log.info("Delete submitted tasks failed, msg: %s" % e) if task_id < 0 and task.mode == 2: json_reports = [] targets = [] report_path = [] sub_tasks = db.list_subtasks(task_id) for sub_task in sub_tasks: if sub_task.status not in [ TASK_REPORTED, TASK_FAILED_REPORTING, TASK_FAILED_ANALYSIS, TASK_FAILED_PROCESSING ]: return json_path = path.join(REPORT_ROOT, "storage", "analyses", str(sub_task.id), "reports", "report.json") if path.exists(json_path): json_report = json.load(open(json_path)) json_reports.append(json_report) targets.append(sub_task.target) report_path.append( path.join(REPORT_ROOT, "storage", "analyses", str(sub_task.id))) max_malscore_index = max(enumerate(json_reports), key=lambda x: x[1]["scores"])[0] parent_task = db.view_parent_task(task_id) db.set_time(parent_task.id, "started_on", json_reports[max_malscore_index]["started"]) db.set_time(parent_task.id, "completed_on", json_reports[max_malscore_index]["completed"]) reports_path = path.join(REPORT_ROOT, "storage", "analyses", str(parent_task.id)) if not path.exists(reports_path): shutil.copytree(report_path[max_malscore_index], reports_path) db.set_status(parent_task.id, TASK_REPORTED) log.info("Task #%d: reports generation completed (path=%s)", parent_task.id, reports_path) try: for sub_task in sub_tasks: # TODO: delete negative task of mode==2 db.delete_task(sub_task.id) db.delete_result(sub_task.id) db.delete_sub_task(sub_task.id) task_path = path.join(REPORT_ROOT, "storage", "analyses", str(sub_task.id)) if path.exists(task_path): shutil.rmtree(task_path, True) log.info("Delete submitted tasks successfully") except Exception as e: log.info("Delete submitted tasks failed, msg: %s" % e) gc.collect()
def process(target=None, copy_path=None, task=None, report=False, auto=False, capeproc=False, memory_debugging=False): # This is the results container. It's what will be used by all the # reporting modules to make it consumable by humans and machines. # It will contain all the results generated by every processing # module available. Its structure can be observed through the JSON # dump in the analysis' reports folder. (If jsondump is enabled.) task_dict = task.to_dict() or {} task_id = task_dict.get("id") or 0 results = {"statistics": {"processing": [], "signatures": [], "reporting": []}} if memory_debugging: gc.collect() log.info("[%s] (1) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage)) if memory_debugging: gc.collect() log.info("[%s] (2) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage)) RunProcessing(task=task_dict, results=results).run() if memory_debugging: gc.collect() log.info("[%s] (3) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage)) RunSignatures(task=task_dict, results=results).run() if memory_debugging: gc.collect() log.info("[%s] (4) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage)) if report: if repconf.mongodb.enabled: conn, mdata, analyses = _load_mongo_report(task_id) if analyses: log.debug("Deleting analysis data for Task %s" % task_id) for analysis in analyses: for process in analysis.get("behavior", {}).get("processes", []): calls = [] for call in process["calls"]: calls.append(ObjectId(call)) mdata.calls.delete_many({"_id": {"$in": calls}}) mdata.analysis.delete_one({"_id": ObjectId(analysis["_id"])}) conn.close() log.debug("Deleted previous MongoDB data for Task %s" % task_id) if repconf.elasticsearchdb.enabled and not repconf.elasticsearchdb.searchonly: try: analyses = es.search( index=get_analysis_index(), query=get_query_by_info_id(task_id) )["hits"]["hits"] if analyses: for analysis in analyses: delete_analysis_and_related_calls(analysis["_id"]) except ESRequestError as e: print(e) if auto or capeproc: reprocess = False else: reprocess = report RunReporting(task=task.to_dict(), results=results, reprocess=reprocess).run() Database().set_status(task_id, TASK_REPORTED) if auto: if cfg.cuckoo.delete_original and os.path.exists(target): os.unlink(target) if copy_path is not None and cfg.cuckoo.delete_bin_copy and os.path.exists(copy_path): os.unlink(copy_path) if memory_debugging: gc.collect() log.info("[%s] (5) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage)) for i, obj in enumerate(gc.garbage): log.info("[%s] (garbage) GC object #%d: type=%s", task_id, i, type(obj).__name__)
def process(target=None, copy_path=None, task=None, report=False, auto=False, capeproc=False, memory_debugging=False): # This is the results container. It's what will be used by all the # reporting modules to make it consumable by humans and machines. # It will contain all the results generated by every processing # module available. Its structure can be observed through the JSON # dump in the analysis' reports folder. (If jsondump is enabled.) task_dict = task.to_dict() or {} task_id = task_dict.get("id") or 0 set_formatter_fmt(task_id) results = { "statistics": { "processing": [], "signatures": [], "reporting": [] } } if memory_debugging: gc.collect() log.info("(1) GC object counts: %d, %d", len(gc.get_objects()), len(gc.garbage)) if memory_debugging: gc.collect() log.info("(2) GC object counts: %d, %d", len(gc.get_objects()), len(gc.garbage)) RunProcessing(task=task_dict, results=results).run() if memory_debugging: gc.collect() log.info("(3) GC object counts: %d, %d", len(gc.get_objects()), len(gc.garbage)) RunSignatures(task=task_dict, results=results).run() if memory_debugging: gc.collect() log.info("(4) GC object counts: %d, %d", len(gc.get_objects()), len(gc.garbage)) if report: if auto or capeproc: reprocess = False else: reprocess = report RunReporting(task=task.to_dict(), results=results, reprocess=reprocess).run() Database().set_status(task_id, TASK_REPORTED) if auto: if cfg.cuckoo.delete_original and os.path.exists(target): os.unlink(target) if copy_path is not None and cfg.cuckoo.delete_bin_copy and os.path.exists( copy_path): os.unlink(copy_path) if memory_debugging: gc.collect() log.info("(5) GC object counts: %d, %d", len(gc.get_objects()), len(gc.garbage)) for i, obj in enumerate(gc.garbage): log.info("(garbage) GC object #%d: type=%s", i, type(obj).__name__)