def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set an unique index on stored files, to avoid duplicates. # From pymongo docs: # Returns the name of the created index if an index is actually # created. # Returns None if the index already exists. self.db.fs.files.ensure_index("sha256", unique=True, sparse=True, name="sha256_unique") # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) # Store the sample in GridFS. if results["info"]["category"] == "file": sample = File(self.file_path) if sample.valid(): fname = results["target"]["file"]["name"] sample_id = self.store_file(sample, filename=fname) report["target"] = {"file_id": sample_id} report["target"].update(results["target"]) # Walk through the dropped files, store them in GridFS and update the # report with the ObjectIds. #new_dropped = [] #for dropped in report["dropped"]: # new_drop = dict(dropped) # drop = File(dropped["path"]) # if drop.valid(): # dropped_id = self.store_file(drop, filename=dropped["name"]) # new_drop["object_id"] = dropped_id # new_dropped.append(new_drop) #report["dropped"] = new_dropped # Store the report and retrieve its object id. self.db.analysis.save(report) self.conn.disconnect()
def get_json_document(results, analysis_path): # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly # compromise the following reporting modules. report = dict(results) if "network" not in report: report["network"] = {} # Add screenshot paths report["shots"] = [] shots_path = os.path.join(analysis_path, "shots") if os.path.exists(shots_path): shots = [ shot for shot in os.listdir(shots_path) if shot.endswith(".jpg") ] for shot_file in sorted(shots): shot_path = os.path.join(analysis_path, "shots", shot_file) screenshot = File(shot_path) if screenshot.valid(): # Strip the extension as it's added later # in the Django view report["shots"].append(shot_file.replace(".jpg", "")) # Calculate the mlist_cnt for display if present to reduce db load if "signatures" in results: for entry in results["signatures"]: if entry["name"] == "ie_martian_children": report["mlist_cnt"] = len(entry["data"]) if entry["name"] == "office_martian_children": report["f_mlist_cnt"] = len(entry["data"]) # Other info we want quick access to from the web UI if results.get("virustotal", {}).get("positive") and results.get( "virustotal", {}).get("total"): report["virustotal_summary"] = "%s/%s" % ( results["virustotal"]["positive"], results["virustotal"]["total"]) if results.get("suricata", False): keywords = ("tls", "alerts", "files", "http", "ssh", "dns") keywords_dict = ("suri_tls_cnt", "suri_alert_cnt", "suri_file_cnt", "suri_http_cnt", "suri_ssh_cnt", "suri_dns_cnt") for keyword, keyword_value in zip(keywords, keywords_dict): if results["suricata"].get(keyword, 0): report[keyword_value] = len(results["suricata"][keyword]) return report
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError( "Unable to import pymongo " "(install with `pip3 install pymongo`)") self.connect() # Set mongo schema version. # TODO: This is not optimal because it run each analysis. Need to run only one time at startup. if "cuckoo_schema" in self.db.collection_names(): if self.db.cuckoo_schema.find_one( )["version"] != self.SCHEMA_VERSION: CuckooReportError( "Mongo schema version not expected, check data migration tool" ) else: self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) if "network" not in report: report["network"] = {} # Add screenshot paths report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): shots = [ shot for shot in os.listdir(shots_path) if shot.endswith(".jpg") ] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) screenshot = File(shot_path) if screenshot.valid(): # Strip the extension as it's added later # in the Django view report["shots"].append(shot_file.replace(".jpg", "")) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. new_processes = [] for process in report.get("behavior", {}).get("processes", []) or []: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for _, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in MongoDB. if len(chunk) == 100: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes # Calculate the mlist_cnt for display if present to reduce db load if "signatures" in results: for entry in results["signatures"]: if entry["name"] == "ie_martian_children": report["mlist_cnt"] = len(entry["data"]) if entry["name"] == "office_martian_children": report["f_mlist_cnt"] = len(entry["data"]) # Other info we want quick access to from the web UI if results.get("virustotal", False) and "positives" in results[ "virustotal"] and "total" in results["virustotal"]: report["virustotal_summary"] = "%s/%s" % ( results["virustotal"]["positives"], results["virustotal"]["total"]) if results.get("suricata", False): keywords = ("tls", "alerts", "files", "http", "ssh", "dns") keywords_dict = ("suri_tls_cnt", "suri_alert_cnt", "suri_file_cnt", "suri_http_cnt", "suri_ssh_cnt", "suri_dns_cnt") for keyword, keyword_value in zip(keywords, keywords_dict): if results["suricata"].get(keyword, 0): report[keyword_value] = len(results["suricata"][keyword]) # Create an index based on the info.id dict key. Increases overall scalability # with large amounts of data. # Note: Silently ignores the creation if the index already exists. self.db.analysis.create_index("info.id", background=True) # trick for distributed api if results.get("info", {}).get("options", {}).get("main_task_id", ""): report["info"]["id"] = int( results["info"]["options"]["main_task_id"]) analyses = self.db.analysis.find( {"info.id": int(report["info"]["id"])}) if analyses.count() > 0: log.debug("Deleting analysis data for Task %s" % report["info"]["id"]) for analysis in analyses: for process in analysis["behavior"]["processes"]: for call in process["calls"]: self.db.calls.remove({"_id": ObjectId(call)}) self.db.analysis.remove({"_id": ObjectId(analysis["_id"])}) log.debug("Deleted previous MongoDB data for Task %s" % report["info"]["id"]) self.ensure_valid_utf8(report) gc.collect() # Store the report and retrieve its object id. try: self.db.analysis.save(report, check_keys=False) except InvalidDocument as e: parent_key, psize = self.debug_dict_size(report)[0] if not self.options.get("fix_large_docs", False): # Just log the error and problem keys #log.error(str(e)) log.warning("Largest parent key: %s (%d MB)" % (parent_key, int(psize) / MEGABYTE)) else: # Delete the problem keys and check for more error_saved = True size_filter = MONGOSIZELIMIT while error_saved: if type(report) == list: report = report[0] try: if type(report[parent_key]) == list: for j, parent_dict in enumerate( report[parent_key]): child_key, csize = self.debug_dict_size( parent_dict)[0] if csize > size_filter: log.warn( "results['%s']['%s'] deleted due to size: %s" % (parent_key, child_key, csize)) del report[parent_key][j][child_key] else: child_key, csize = self.debug_dict_size( report[parent_key])[0] if csize > size_filter: log.warn( "results['%s']['%s'] deleted due to size: %s" % (parent_key, child_key, csize)) del report[parent_key][child_key] try: self.db.analysis.save(report, check_keys=False) error_saved = False except InvalidDocument as e: parent_key, psize = self.debug_dict_size(report)[0] #ror(str(e)) log.warning("Largest parent key: %s (%d MB)" % (parent_key, int(psize) / MEGABYTE)) size_filter = size_filter - MEGABYTE except Exception as e: log.error("Failed to delete child key: %s" % str(e)) error_saved = False self.conn.close()
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to Elasticsearch. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_ELASTICSEARCH: raise CuckooDependencyError("Unable to import elasticsearch " "(install with `pip install elasticsearch`)") self.connect() index_prefix = self.options.get("index", "cuckoo") search_only = self.options.get("searchonly", False) # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) idxdate = report["info"]["started"].split(" ")[0] self.index_name = '{0}-{1}'.format(index_prefix, idxdate) if not search_only: if not "network" in report: report["network"] = {} # Store API calls in chunks for pagination in Django if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in Elastcisearch. if len(chunk) == 100: to_insert = {"pid": process["process_id"], "calls": chunk} pchunk = self.es.index(index=self.index_name, doc_type="calls", body=to_insert) chunk_id = pchunk['_id'] chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} pchunk = self.es.index(index=self.index_name, doc_type="calls", body=to_insert) chunk_id = pchunk['_id'] chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes # Add screenshot paths report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): shots = [shot for shot in os.listdir(shots_path) if shot.endswith(".jpg")] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) screenshot = File(shot_path) if screenshot.valid(): # Strip the extension as it's added later # in the Django view report["shots"].append(shot_file.replace(".jpg", "")) if results.has_key("suricata") and results["suricata"]: if results["suricata"].has_key("tls") and len(results["suricata"]["tls"]) > 0: report["suri_tls_cnt"] = len(results["suricata"]["tls"]) if results["suricata"] and results["suricata"].has_key("alerts") and len(results["suricata"]["alerts"]) > 0: report["suri_alert_cnt"] = len(results["suricata"]["alerts"]) if results["suricata"].has_key("files") and len(results["suricata"]["files"]) > 0: report["suri_file_cnt"] = len(results["suricata"]["files"]) if results["suricata"].has_key("http") and len(results["suricata"]["http"]) > 0: report["suri_http_cnt"] = len(results["suricata"]["http"]) else: report = {} report["task_id"] = results["info"]["id"] report["info"] = results.get("info") report["target"] = results.get("target") report["summary"] = results.get("behavior", {}).get("summary") report["network"] = results.get("network") report["virustotal"] = results.get("virustotal") # Other info we want Quick access to from the web UI if results.has_key("virustotal") and results["virustotal"] and results["virustotal"].has_key("positives") and results["virustotal"].has_key("total"): report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"],results["virustotal"]["total"]) # Store the report and retrieve its object id. self.es.index(index=self.index_name, doc_type="analysis", id=results["info"]["id"], body=report)
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ if not HAVE_MONGO: raise CuckooDependencyError( "Unable to import pymongo (install with " "`pip install pymongo`)" ) self.connect() # Set mongo schema version. # TODO: This is not optimal becuase it run each analysis. Need to run # only one time at startup. if "cuckoo_schema" in self.db.collection_names(): if self.db.cuckoo_schema.find_one()["version"] != self.SCHEMA_VERSION: CuckooReportError("Mongo schema version not expected, check data migration tool") else: self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) # Set an unique index on stored files, to avoid duplicates. # From pymongo docs: # Returns the name of the created index if an index is actually # created. # Returns None if the index already exists. # TODO: This is not optimal because it run each analysis. Need to run # only one time at startup. self.db.fs.files.ensure_index("sha256", unique=True, sparse=True, name="sha256_unique") # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) if "network" not in report: report["network"] = {} # Store the sample in GridFS. if results.get("info", {}).get("category") == "file" and "target" in results: sample = File(self.file_path) if sample.valid(): fname = results["target"]["file"]["name"] sample_id = self.store_file(sample, filename=fname) report["target"] = {"file_id": sample_id} report["target"].update(results["target"]) # Store the PCAP file in GridFS and reference it back in the report. pcap_path = os.path.join(self.analysis_path, "dump.pcap") pcap = File(pcap_path) if pcap.valid(): pcap_id = self.store_file(pcap) report["network"]["pcap_id"] = pcap_id sorted_pcap_path = os.path.join(self.analysis_path, "dump_sorted.pcap") spcap = File(sorted_pcap_path) if spcap.valid(): spcap_id = self.store_file(spcap) report["network"]["sorted_pcap_id"] = spcap_id mitmproxy_path = os.path.join(self.analysis_path, "dump.mitm") mitmpr = File(mitmproxy_path) if mitmpr.valid(): mitmpr_id = self.store_file(mitmpr) report["network"]["mitmproxy_id"] = mitmpr_id # Store the process memory dump file in GridFS and reference it back in the report. if "procmemory" in report and self.options.get("store_memdump", False): for idx, procmem in enumerate(report["procmemory"]): procmem_path = os.path.join(self.analysis_path, "memory", "{0}.dmp".format(procmem["pid"])) procmem_file = File(procmem_path) if procmem_file.valid(): procmem_id = self.store_file(procmem_file) report["procmemory"][idx].update({"procmem_id": procmem_id}) # Walk through the dropped files, store them in GridFS and update the # report with the ObjectIds. new_dropped = [] if "dropped" in report: for dropped in report["dropped"]: new_drop = dict(dropped) drop = File(dropped["path"]) if drop.valid(): dropped_id = self.store_file(drop, filename=dropped["name"]) new_drop["object_id"] = dropped_id new_dropped.append(new_drop) report["dropped"] = new_dropped # Add screenshots. report["shots"] = [] if os.path.exists(self.shots_path): # Walk through the files and select the JPGs. for shot_file in sorted(os.listdir(self.shots_path)): if not shot_file.endswith(".jpg"): continue shot_path = os.path.join(self.shots_path, shot_file) shot = File(shot_path) # If the screenshot path is a valid file, store it and # reference it back in the report. if shot.valid(): shot_id = self.store_file(shot) report["shots"].append(shot_id) paginate = self.options.get("paginate", 100) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for call in process["calls"]: # If the chunk size is paginate or if the loop is # completed then store the chunk in MongoDB. if len(chunk) == paginate: to_insert = {"pid": process["pid"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["pid"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes # FIXME: Bad-ass for unicode def convert2unicode(mydict): for k, v in mydict.iteritems(): if isinstance(v, str): mydict[k] = unicode(v, errors = 'replace') elif isinstance(v, dict): convert2unicode(v) convert2unicode(report) # Store the report and retrieve its object id. self.db.analysis.save(report) self.conn.close()
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to Elasticsearch. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_ELASTICSEARCH: raise CuckooDependencyError( "Unable to import elasticsearch " "(install with `pip install elasticsearch`)") self.connect() index_prefix = self.options.get("index", "cuckoo") search_only = self.options.get("searchonly", False) # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) idxdate = report["info"]["started"].split(" ")[0] call_index_name = '{0}-calls-{1}'.format(index_prefix, idxdate) analysis_index_name = '{0}-analysis-{1}'.format(index_prefix, idxdate) if not search_only: if not "network" in report: report["network"] = {} # Store API calls in chunks for pagination in Django if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in Elastcisearch. if len(chunk) == 100: to_insert = { "pid": process["process_id"], "calls": chunk } try: pchunk = self.es.index(index=call_index_name, doc_type="calls", body=to_insert) chunk_id = pchunk['_id'] chunks_ids.append(chunk_id) except Exception as error: logging.debug( "Failed to save API call chunk: {0}\n\n{1}" .format(error, json.dumps(to_insert, indent=2))) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = { "pid": process["process_id"], "calls": chunk } try: pchunk = self.es.index(index=call_index_name, doc_type="calls", body=to_insert) chunk_id = pchunk['_id'] chunks_ids.append(chunk_id) except Exception as error: logging.debug( "Failed to save API call chunk: {0}\n\n{1}". format(error, json.dumps(to_insert, indent=2))) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes # Add screenshot paths report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): shots = [ shot for shot in os.listdir(shots_path) if shot.endswith(".jpg") ] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) screenshot = File(shot_path) if screenshot.valid(): # Strip the extension as it's added later # in the Django view report["shots"].append(shot_file.replace(".jpg", "")) if results.has_key("suricata") and results["suricata"]: if results["suricata"].has_key("tls") and len( results["suricata"]["tls"]) > 0: report["suri_tls_cnt"] = len(results["suricata"]["tls"]) if results["suricata"] and results["suricata"].has_key( "alerts") and len(results["suricata"]["alerts"]) > 0: report["suri_alert_cnt"] = len( results["suricata"]["alerts"]) if results["suricata"].has_key("files") and len( results["suricata"]["files"]) > 0: report["suri_file_cnt"] = len(results["suricata"]["files"]) if results["suricata"].has_key("http") and len( results["suricata"]["http"]) > 0: report["suri_http_cnt"] = len(results["suricata"]["http"]) else: report = {} report["task_id"] = results["info"]["id"] report["info"] = results.get("info") report["target"] = results.get("target") report["summary"] = results.get("behavior", {}).get("summary") report["network"] = results.get("network") report["virustotal"] = results.get("virustotal") # Other info we want Quick access to from the web UI if results.has_key("virustotal") and results["virustotal"] and results[ "virustotal"].has_key( "positives") and results["virustotal"].has_key("total"): report["virustotal_summary"] = "%s/%s" % ( results["virustotal"]["positives"], results["virustotal"]["total"]) # Store the report and retrieve its object id. self.es.index(index=analysis_index_name, doc_type="analysis", id=results["info"]["id"], body=report)
def run(self, results): """Writes report. @param results: Cuckoo results dict. @raise CuckooReportError: if fails to connect or write to MongoDB. """ self._connect() # Set an unique index on stored files, to avoid duplicates. # From pymongo docs: # Returns the name of the created index if an index is actually created. # Returns None if the index already exists. self._db.fs.files.ensure_index("md5", unique=True, name="md5_unique") # Add pcap file, check for dups and in case add only reference. pcap_file = os.path.join(self.analysis_path, "dump.pcap") pcap = File(pcap_file) if pcap.valid(): pcap_id = self.store_file(pcap) # Preventive key check. if "network" in results and isinstance(results["network"], dict): results["network"]["pcap_id"] = pcap_id else: results["network"] = {"pcap_id": pcap_id} # Add dropped files, check for dups and in case add only reference. dropped_files = {} for dir_name, dir_names, file_names in os.walk(os.path.join(self.analysis_path, "files")): for file_name in file_names: file_path = os.path.join(dir_name, file_name) drop = File(file_path) dropped_files[drop.get_md5()] = drop result_files = dict((dropped.get("md5", None), dropped) for dropped in results["dropped"]) # hopefully the md5s in dropped_files and result_files should be the same if set(dropped_files.keys()) - set(result_files.keys()): log.warning("Dropped files in result dict are different from those in storage.") # store files in gridfs for md5, fileobj in dropped_files.items(): # only store in db if we have a filename for it in results (should be all) resultsdrop = result_files.get(md5, None) if resultsdrop and fileobj.valid(): drop_id = self.store_file(fileobj, filename=resultsdrop["name"]) resultsdrop["dropped_id"] = drop_id # Add screenshots. results["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): shots = [f for f in os.listdir(shots_path) if f.endswith(".jpg")] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) shot = File(shot_path) if shot.valid(): shot_id = self.store_file(shot) results["shots"].append(shot_id) # Save all remaining results. try: self._db.analysis.save(results, manipulate=False) except InvalidDocument: # The document is too big, we need to shrink it and re-save it. results["behavior"]["processes"] = "" # Let's add an error message to the debug block. error = ("The analysis results were too big to be stored, " + "the detailed behavioral analysis has been stripped out.") results["debug"]["errors"].append(error) # Try again to store, if it fails, just abort. try: self._db.analysis.save(results) except Exception as e: raise CuckooReportError("Failed to store the document into MongoDB: %s" % e)
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to S3. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. self.s3_region = self.options.get("region", "us-west-2") self.s3_access_key = self.options.get("access_key", "") self.s3_secret_key = self.options.get("secret_key", "") s3_reports_bucket_name = self.options.get("reports_bucket", "") s3_shots_bucket_name = self.options.get("shots_bucket", "") s3_samples_bucket_name = self.options.get("samples_bucket", "") s3_files_bucket_name = self.options.get("files_bucket", "") s3_aux_bucket_name = self.options.get("aux_bucket", "") s3_logs_bucket_name = self.options.get("logs_bucket", "") s3_pcap_bucket_name = self.options.get("pcap_bucket", "") s3_md5_bucket_name = self.options.get("md5_bucket", "") cleanup = self.options.get("cleanup", False) # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) if not "network" in report: report["network"] = {} # Add screenshot paths report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): shots = [ shot for shot in os.listdir(shots_path) if shot.endswith(".jpg") ] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) screenshot = File(shot_path) if screenshot.valid(): #report["shots"].append("{0}/{1}".format(results['info']['id'], shot_file)) report["shots"].append(shot_file.replace(".jpg", "")) # Store chunks of API calls in a different collection and reference # those chunks back in the report. # Also allows paging of the reports. if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] chunk_count = 0 # Using this type of prefix is useful because you can always re-construct it from # the original results #chunk_prefix = str(results['info']['id']) + '/' + process['process_name'] chunk_prefix = str(results['info']['id']) + '/' + str( process['process_id']) # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in S1. if len(chunk) == 100: chunk_name = "{0}.{1}".format(chunk_prefix, chunk_count) #log.debug("INFO TIME!") #log.debug("%s %s %s" %(s3_reports_bucket_name, chunk_name, chunk_prefix)) #log.debug(chunk_prefix) err = self.save_to_s3(s3_reports_bucket_name, chunk_name, json.dumps(chunk)) if err != '': log.error( "Non-size related issue saving analysis JSON to S3 for chunk {0} - {1}" .format(chunk_name, err)) else: chunks_ids.append("{0}.{1}".format( chunk_prefix, chunk_count)) chunk_count += 1 chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: chunk_name = "{0}.{1}".format(chunk_prefix, chunk_count) #log.debug("%s %s %s" %(s3_reports_bucket_name, chunk_name, chunk_prefix)) err = self.save_to_s3(s3_reports_bucket_name, chunk_name, json.dumps(chunk)) if err != '': log.error( "Non-size related issue saving analysis JSON to S3 for chunk {0} - {1}" .format(chunk_name, err)) else: chunks_ids.append("{0}.{1}".format( chunk_prefix, chunk_count)) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes #Other info we want Quick access to from the web UI if results.has_key("virustotal") and results["virustotal"] and results[ "virustotal"].has_key( "positives") and results["virustotal"].has_key("total"): report["virustotal_summary"] = "%s/%s" % ( results["virustotal"]["positives"], results["virustotal"]["total"]) if results.has_key("suricata") and results["suricata"]: if results["suricata"].has_key("tls") and len( results["suricata"]["tls"]) > 0: report["suri_tls_cnt"] = len(results["suricata"]["tls"]) if results["suricata"].has_key("alerts") and len( results["suricata"]["alerts"]) > 0: report["suri_alert_cnt"] = len(results["suricata"]["alerts"]) if results["suricata"].has_key("files") and len( results["suricata"]["files"]) > 0: report["suri_file_cnt"] = len(results["suricata"]["files"]) if results["suricata"].has_key("http") and len( results["suricata"]["http"]) > 0: report["suri_http_cnt"] = len(results["suricata"]["http"]) if results["suricata"].has_key("ssh") and len( results["suricata"]["ssh"]) > 0: report["suri_ssh_cnt"] = len(results["suricata"]["ssh"]) if results["suricata"].has_key("dns") and len( results["suricata"]["dns"]) > 0: report["suri_dns_cnt"] = len(results["suricata"]["dns"]) # Store the report (it's 'object id' is simply the analysis id) # First make sure it's not too big (5gb limit) data = json.dumps(report) if len(data) < 5000000000: err = self.save_to_s3(s3_reports_bucket_name, results['info']['id'], data) if err != '': log.error( "Non-size related issue saving analysis JSON to S3 for report {0} - {1}" .format(results['info']['id'], err)) else: log.error("JSON for analysis id {0} is greater than 5GB".format( results['info']['id'])) #processes the rest of the analysis files and put them in S3 if s3_shots_bucket_name != '': shots_path = os.path.join(results['info']['id'], self.analysis_path, "shots") self.relocate_to_s3(results['info']['id'], shots_path, s3_shots_bucket_name) if s3_pcap_bucket_name != '': if os.path.isfile(self.analysis_path + '/dump.pcap'): with open(self.analysis_path + '/dump.pcap', 'rb') as infile: self.save_to_s3( s3_pcap_bucket_name, "{0}/dump.pcap".format(results['info']['id']), infile.read()) if os.path.isfile(self.analysis_path + '/dump_sorted.pcap'): with open(self.analysis_path + '/dump_sorted.pcap', 'rb') as infile: self.save_to_s3( s3_pcap_bucket_name, "{0}/dump_sorted.pcap".format(results['info']['id']), infile.read()) if s3_aux_bucket_name != '': aux_path = os.path.join(results['info']['id'], self.analysis_path, "aux") self.relocate_to_s3(results['info']['id'], aux_path, s3_aux_bucket_name) if s3_logs_bucket_name != '': logs_path = os.path.join(results['info']['id'], self.analysis_path, "logs") self.relocate_to_s3(results['info']['id'], logs_path, s3_logs_bucket_name) if s3_samples_bucket_name != '': sample = os.path.realpath(self.analysis_path + '/binary') with open(sample, 'rb') as infile: self.save_to_s3(s3_samples_bucket_name, results['target']['file']['sha256'], infile.read()) #log.debug(s3_files_bucket_name) if s3_files_bucket_name != '': #log.debug(self.analysis_path) for root, dirnames, filenames in os.walk(self.analysis_path + '/files'): #log.debug(filenames) for filename in filenames: key_name = str(results['info']['id']) + '/' + root.split( os.sep)[-1] + '/' + filename if not filename.endswith('_info.txt'): key_name = str( results['info']['id']) + '/' + root.split( os.sep)[-1] #log.debug(key_name) with open(os.path.join(root, filename), 'rb') as infile: self.save_to_s3(s3_files_bucket_name, key_name, infile.read()) if s3_md5_bucket_name != '': info = {} info.update(report['info']) info.update(report['target']) self.save_to_s3(s3_md5_bucket_name, results['target']['file']['md5'], json.dumps(info)) if cleanup: shutil.rmtree(self.analysis_path)
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set mongo schema version. # TODO: This is not optimal becuase it run each analysis. Need to run # only one time at startup. if "cuckoo_schema" in self.db.collection_names(): if self.db.cuckoo_schema.find_one()["version"] != self.SCHEMA_VERSION: CuckooReportError("Mongo schema version not expected, check data migration tool") else: self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) # Set an unique index on stored files, to avoid duplicates. # From pymongo docs: # Returns the name of the created index if an index is actually # created. # Returns None if the index already exists. # TODO: This is not optimal because it run each analysis. Need to run # only one time at startup. self.db.fs.files.ensure_index("sha256", unique=True, sparse=True, name="sha256_unique") # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) # Store the sample in GridFS. if results["info"]["category"] == "file": sample = File(self.file_path) if sample.valid(): fname = results["target"]["file"]["name"] sample_id = self.store_file(sample, filename=fname) report["target"] = {"file_id": sample_id} report["target"].update(results["target"]) # Store the PCAP file in GridFS and reference it back in the report. pcap_path = os.path.join(self.analysis_path, "dump.pcap") pcap = File(pcap_path) if pcap.valid(): pcap_id = self.store_file(pcap) report["network"] = {"pcap_id": pcap_id} report["network"].update(results["network"]) # Walk through the dropped files, store them in GridFS and update the # report with the ObjectIds. new_dropped = [] for dropped in report["dropped"]: new_drop = dict(dropped) drop = File(dropped["path"]) if drop.valid(): dropped_id = self.store_file(drop, filename=dropped["name"]) new_drop["object_id"] = dropped_id new_dropped.append(new_drop) report["dropped"] = new_dropped # Store the Zipped Droppings file in GridFS and reference it back in the report. #cuckoo_dropped_zip_path = os.path.join(self.analysis_path, "cuckoodroppings.zip") #cuckoo_dropped_zip = File(cuckoo_dropped_zip_path) #if cuckoo_dropped_zip.valid(): # cuckoo_droppings_id = self.store_file(cuckoo_dropped_zip) # report["zippeddroppings"] = {"cuckoo_droppings_id": cuckoo_droppings_id} # report["zippeddroppings"].update(results["zippeddroppings"]) # Add screenshots. report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): # Walk through the files and select the JPGs. shots = [shot for shot in os.listdir(shots_path) if shot.endswith(".jpg")] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) shot = File(shot_path) # If the screenshot path is a valid file, store it and # reference it back in the report. if shot.valid(): shot_id = self.store_file(shot) report["shots"].append(shot_id) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in MongoDB. if len(chunk) == 100: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes #Other info we want Quick access to from the web UI if report.has_key("virustotal") and report["virustotal"] and report["virustotal"].has_key("positives") and results["virustotal"].has_key("total"): report["virustotal_summary"] = "%s/%s" % (report["virustotal"]["positives"],report["virustotal"]["total"]) new_suricata_files = [] if report.has_key("suricata") and report["suricata"]: suricata={} suricata["info"]={} suricata["info"]["id"]=report["info"]["id"] # Walk through the suricata extracted files, store them in GridFS and update the # report with the ObjectIds # Store the suri extracted files in GridFS and reference it back in the report. suri_extracted_zip_path = os.path.join(self.analysis_path, "logs/files.zip") suri_extracted_zip = File(suri_extracted_zip_path) if suri_extracted_zip.valid(): suri_extracted_zip_id = self.store_file(suri_extracted_zip) suricata["suri_extracted_zip"]=suri_extracted_zip_id if report["suricata"].has_key("files") and len(report["suricata"]["files"]) > 0: suricata["file_cnt"] = len(report["suricata"]["files"]) for suricata_file_e in report["suricata"]["files"]: if suricata_file_e.has_key("file_info"): tmp_suricata_file_d = dict(suricata_file_e) suricata_file = File(suricata_file_e["file_info"]["path"]) if suricata_file.valid(): suricata_file_id = self.store_file(suricata_file, filename=suricata_file_e["file_info"]["name"]) tmp_suricata_file_d["object_id"] = suricata_file_id new_suricata_files.append(tmp_suricata_file_d) suricata["files"] = new_suricata_files if report["suricata"].has_key("alert_log_full_path") and report["suricata"]["alert_log_full_path"]: suricata_alert_log = File(report["suricata"]["alert_log_full_path"]) if suricata_alert_log.valid(): suricata_alert_log_id = self.store_file(suricata_alert_log) suricata["alert_log_id"] = suricata_alert_log_id if report["suricata"].has_key("tls_log_full_path") and report["suricata"]["tls_log_full_path"]: tls_log = File(report["suricata"]["tls_log_full_path"]) if tls_log.valid(): tls_log_id = self.store_file(tls_log) suricata["tls_log_id"] = tls_log_id if report["suricata"].has_key("http_log_full_path") and report["suricata"]["http_log_full_path"]: http_log = File(report["suricata"]["http_log_full_path"]) if http_log.valid(): http_log_id = self.store_file(http_log) suricata["http_log_id"] = http_log_id if report["suricata"].has_key("file_log_full_path") and report["suricata"]["file_log_full_path"]: file_log = File(report["suricata"]["file_log_full_path"]) if file_log.valid(): file_log_id = self.store_file(file_log) suricata["file_log_id"] = file_log_id if report["suricata"].has_key("dns_log_full_path") and report["suricata"]["dns_log_full_path"]: dns_log = File(report["suricata"]["dns_log_full_path"]) if dns_log.valid(): dns_log_id = self.store_file(dns_log) suricata["dns_log_id"] = dns_log_id if report["suricata"].has_key("ssh_log_full_path") and report["suricata"]["ssh_log_full_path"]: ssh_log = File(report["suricata"]["ssh_log_full_path"]) if ssh_log.valid(): ssh_log_id = self.store_file(ssh_log) suricata["ssh_log_id"] = ssh_log_id if report["suricata"].has_key("tls") and len(report["suricata"]["tls"]) > 0: suricata["tls_cnt"] = len(report["suricata"]["tls"]) suricata["tls"]=report["suricata"]["tls"] if report["suricata"] and report["suricata"].has_key("alerts") and len(report["suricata"]["alerts"]) > 0: suricata["alert_cnt"] = len(report["suricata"]["alerts"]) suricata["alerts"]=report["suricata"]["alerts"] if results["suricata"].has_key("http") and len(report["suricata"]["http"]) > 0: suricata["http_cnt"] = len(report["suricata"]["http"]) suricata["http"]=report["suricata"]["http"] self.db.suricata.save(suricata) #do not store this in analysis collection del report["suricata"] if results.has_key("behavior") and results["behavior"].has_key("martianlist") and results["behavior"]["martianlist"] and len(results["behavior"]["martianlist"]) > 0: report["mlist_cnt"] = len(results["behavior"]["martianlist"]) # Store the report and retrieve its object id. self.db.analysis.save(report) self.conn.disconnect()
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set mongo schema version. # TODO: This is not optimal becuase it run each analysis. Need to run # only one time at startup. if "cuckoo_schema" in self.db.collection_names(): if self.db.cuckoo_schema.find_one()["version"] != self.SCHEMA_VERSION: CuckooReportError("Mongo schema version not expected, check data migration tool") else: self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) # Set an unique index on stored files, to avoid duplicates. # From pymongo docs: # Returns the name of the created index if an index is actually # created. # Returns None if the index already exists. # TODO: This is not optimal because it run each analysis. Need to run # only one time at startup. self.db.fs.files.ensure_index("sha256", unique=True, sparse=True, name="sha256_unique") # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) if not "network" in report: report["network"] = {} # Store the sample in GridFS. if results["info"]["category"] == "file" and "target" in results: sample = File(self.file_path) if sample.valid(): fname = results["target"]["file"]["name"] sample_id = self.store_file(sample, filename=fname) report["target"] = {"file_id": sample_id} report["target"].update(results["target"]) # Store the PCAP file in GridFS and reference it back in the report. pcap_path = os.path.join(self.analysis_path, "dump.pcap") pcap = File(pcap_path) if pcap.valid(): pcap_id = self.store_file(pcap) report["network"]["pcap_id"] = pcap_id sorted_pcap_path = os.path.join(self.analysis_path, "dump_sorted.pcap") spcap = File(sorted_pcap_path) if spcap.valid(): spcap_id = self.store_file(spcap) report["network"]["sorted_pcap_id"] = spcap_id if "procmemory" in report: # Store the process memory dump file in GridFS and reference it back in the report. for idx, procmem in enumerate(report['procmemory']): procmem_path = os.path.join(self.analysis_path, "memory", "{0}.dmp".format(procmem['pid'])) procmem_file = File(procmem_path) if procmem_file.valid(): procmem_id = self.store_file(procmem_file) report["procmemory"][idx].update({"procmem_id": procmem_id}) # Store the suri extracted files in GridFS and reference it back in the report. suri_extracted_zip_path = os.path.join(self.analysis_path, "logs/files.zip") suri_extracted_zip = File(suri_extracted_zip_path) if suri_extracted_zip.valid(): suri_extracted_zip_id = self.store_file(suri_extracted_zip) report["suricata"] = {"suri_extracted_zip": suri_extracted_zip_id} report["suricata"].update(results["suricata"]) # Walk through the dropped files, store them in GridFS and update the # report with the ObjectIds. new_dropped = [] if "dropped" in report: for dropped in report["dropped"]: new_drop = dict(dropped) drop = File(dropped["path"]) if drop.valid(): dropped_id = self.store_file(drop, filename=dropped["name"]) new_drop["object_id"] = dropped_id new_dropped.append(new_drop) report["dropped"] = new_dropped # Store the Zipped Droppings file in GridFS and reference it back in the report. #cuckoo_dropped_zip_path = os.path.join(self.analysis_path, "cuckoodroppings.zip") #cuckoo_dropped_zip = File(cuckoo_dropped_zip_path) #if cuckoo_dropped_zip.valid(): # cuckoo_droppings_id = self.store_file(cuckoo_dropped_zip) # report["zippeddroppings"] = {"cuckoo_droppings_id": cuckoo_droppings_id} # report["zippeddroppings"].update(results["zippeddroppings"]) # Walk through the suricata extracted files, store them in GridFS and update the # report with the ObjectIds. new_suricata_files = [] if results.has_key("suricata") and results["suricata"]: if results["suricata"].has_key("files") and results["suricata"]["files"]: for suricata_file_e in results["suricata"]["files"]: if suricata_file_e.has_key("file_info"): tmp_suricata_file_d = dict(suricata_file_e) suricata_file = File(suricata_file_e["file_info"]["path"]) if suricata_file.valid(): suricata_file_id = self.store_file(suricata_file, filename=suricata_file_e["file_info"]["name"]) tmp_suricata_file_d["object_id"] = suricata_file_id new_suricata_files.append(tmp_suricata_file_d) report["suricata"]["files"] = new_suricata_files # Add screenshots. report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): # Walk through the files and select the JPGs. shots = [shot for shot in os.listdir(shots_path) if shot.endswith(".jpg")] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) shot = File(shot_path) # If the screenshot path is a valid file, store it and # reference it back in the report. if shot.valid(): shot_id = self.store_file(shot) report["shots"].append(shot_id) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in MongoDB. if len(chunk) == 100: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes #Other info we want Quick access to from the web UI if results.has_key("virustotal") and results["virustotal"] and results["virustotal"].has_key("positives") and results["virustotal"].has_key("total"): report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"],results["virustotal"]["total"]) if results.has_key("suricata") and results["suricata"]: if results["suricata"].has_key("tls") and len(results["suricata"]["tls"]) > 0: report["suri_tls_cnt"] = len(results["suricata"]["tls"]) if results["suricata"] and results["suricata"].has_key("alerts") and len(results["suricata"]["alerts"]) > 0: report["suri_alert_cnt"] = len(results["suricata"]["alerts"]) if results["suricata"].has_key("files") and len(results["suricata"]["files"]) > 0: report["suri_file_cnt"] = len(results["suricata"]["files"]) if results["suricata"].has_key("http") and len(results["suricata"]["http"]) > 0: report["suri_http_cnt"] = len(results["suricata"]["http"]) # Store the report and retrieve its object id. try: self.db.analysis.save(report) except InvalidDocument as e: parent_key, psize = self.debug_dict_size(report)[0] child_key, csize = self.debug_dict_size(report[parent_key])[0] if not self.options.get("fix_large_docs", False): # Just log the error and problem keys log.error(str(e)) log.error("Largest parent key: %s (%d MB)" % (parent_key, int(psize) / 1048576)) log.error("Largest child key: %s (%d MB)" % (child_key, int(csize) / 1048576)) else: # Delete the problem keys and check for more error_saved = True while error_saved: log.warn("results['%s']['%s'] deleted due to >16MB size (%dMB)" % (parent_key, child_key, int(psize) / 1048576)) del report[parent_key][child_key] try: self.db.analysis.save(report) error_saved = False except InvalidDocument as e: parent_key, psize = self.debug_dict_size(report)[0] child_key, csize = self.debug_dict_size(report[parent_key])[0] log.error(str(e)) log.error("Largest parent key: %s (%d bytes)" % (parent_key, int(psize) / 1048576)) log.error("Largest child key: %s (%d bytes)" % (child_key, int(csize) / 1048576)) self.conn.close()
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set mongo schema version. # TODO: This is not optimal becuase it run each analysis. Need to run # only one time at startup. if "cuckoo_schema" in self.db.collection_names(): if self.db.cuckoo_schema.find_one( )["version"] != self.SCHEMA_VERSION: CuckooReportError( "Mongo schema version not expected, check data migration tool" ) else: self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) # Set an unique index on stored files, to avoid duplicates. # From pymongo docs: # Returns the name of the created index if an index is actually # created. # Returns None if the index already exists. # TODO: This is not optimal because it run each analysis. Need to run # only one time at startup. self.db.fs.files.ensure_index("sha256", unique=True, sparse=True, name="sha256_unique") # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) # Store the sample in GridFS. if results["info"]["category"] == "file": sample = File(self.file_path) if sample.valid(): fname = results["target"]["file"]["name"] sample_id = self.store_file(sample, filename=fname) report["target"] = {"file_id": sample_id} report["target"].update(results["target"]) # Store the PCAP file in GridFS and reference it back in the report. pcap_path = os.path.join(self.analysis_path, "dump.pcap") pcap = File(pcap_path) if pcap.valid(): pcap_id = self.store_file(pcap) report["network"] = {"pcap_id": pcap_id} report["network"].update(results["network"]) # Walk through the dropped files, store them in GridFS and update the # report with the ObjectIds. new_dropped = [] for dropped in report["dropped"]: new_drop = dict(dropped) drop = File(dropped["path"]) if drop.valid(): dropped_id = self.store_file(drop, filename=dropped["name"]) new_drop["object_id"] = dropped_id new_dropped.append(new_drop) report["dropped"] = new_dropped # Add screenshots. report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): # Walk through the files and select the JPGs. shots = [ shot for shot in os.listdir(shots_path) if shot.endswith(".jpg") ] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) shot = File(shot_path) # If the screenshot path is a valid file, store it and # reference it back in the report. if shot.valid(): shot_id = self.store_file(shot) report["shots"].append(shot_id) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in MongoDB. if len(chunk) == 100: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes # Store the report and retrieve its object id. self.db.analysis.save(report) self.conn.disconnect()
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set mongo schema version. # TODO: This is not optimal becuase it run each analysis. Need to run # only one time at startup. if "cuckoo_schema" in self.db.collection_names(): if self.db.cuckoo_schema.find_one()["version"] != self.SCHEMA_VERSION: CuckooReportError("Mongo schema version not expected, check data migration tool") else: self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) if not "network" in report: report["network"] = {} # Add screenshot paths report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): shots = [shot for shot in os.listdir(shots_path) if shot.endswith(".jpg")] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) screenshot = File(shot_path) if screenshot.valid(): # Strip the extension as it's added later # in the Django view report["shots"].append(shot_file.replace(".jpg", "")) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in MongoDB. if len(chunk) == 100: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes # Calculate the mlist_cnt for display if present to reduce db load if "signatures" in results: for entry in results["signatures"]: if entry["name"] == "ie_martian_children": report["mlist_cnt"] = len(entry["data"]) if entry["name"] == "office_martian_children": report["f_mlist_cnt"] = len(entry["data"]) #Other info we want Quick access to from the web UI if results.has_key("virustotal") and results["virustotal"] and results["virustotal"].has_key("positives") and results["virustotal"].has_key("total"): report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"],results["virustotal"]["total"]) if results.has_key("suricata") and results["suricata"]: if results["suricata"].has_key("tls") and len(results["suricata"]["tls"]) > 0: report["suri_tls_cnt"] = len(results["suricata"]["tls"]) if results["suricata"].has_key("alerts") and len(results["suricata"]["alerts"]) > 0: report["suri_alert_cnt"] = len(results["suricata"]["alerts"]) if results["suricata"].has_key("files") and len(results["suricata"]["files"]) > 0: report["suri_file_cnt"] = len(results["suricata"]["files"]) if results["suricata"].has_key("http") and len(results["suricata"]["http"]) > 0: report["suri_http_cnt"] = len(results["suricata"]["http"]) if results["suricata"].has_key("ssh") and len(results["suricata"]["ssh"]) > 0: report["suri_ssh_cnt"] = len(results["suricata"]["ssh"]) if results["suricata"].has_key("dns") and len(results["suricata"]["dns"]) > 0: report["suri_dns_cnt"] = len(results["suricata"]["dns"]) # Create an index based on the info.id dict key. Increases overall scalability # with large amounts of data. # Note: Silently ignores the creation if the index already exists. self.db.analysis.create_index("info.id", background=True) # Store the report and retrieve its object id. try: self.db.analysis.save(report) except InvalidDocument as e: parent_key, psize = self.debug_dict_size(report)[0] child_key, csize = self.debug_dict_size(report[parent_key])[0] if not self.options.get("fix_large_docs", False): # Just log the error and problem keys log.error(str(e)) log.error("Largest parent key: %s (%d MB)" % (parent_key, int(psize) / 1048576)) log.error("Largest child key: %s (%d MB)" % (child_key, int(csize) / 1048576)) else: # Delete the problem keys and check for more error_saved = True while error_saved: log.warn("results['%s']['%s'] deleted due to >16MB size (%dMB)" % (parent_key, child_key, int(psize) / 1048576)) del report[parent_key][child_key] try: self.db.analysis.save(report) error_saved = False except InvalidDocument as e: parent_key, psize = self.debug_dict_size(report)[0] child_key, csize = self.debug_dict_size(report[parent_key])[0] log.error(str(e)) log.error("Largest parent key: %s (%d MB)" % (parent_key, int(psize) / 1048576)) log.error("Largest child key: %s (%d MB)" % (child_key, int(csize) / 1048576)) self.conn.close()
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to S3. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. self.s3_region = self.options.get("region", "us-west-2") self.s3_access_key = self.options.get("access_key", "") self.s3_secret_key = self.options.get("secret_key", "") s3_reports_bucket_name = self.options.get("reports_bucket", "") s3_shots_bucket_name = self.options.get("shots_bucket", "") s3_samples_bucket_name = self.options.get("samples_bucket", "") s3_files_bucket_name = self.options.get("files_bucket", "") s3_aux_bucket_name = self.options.get("aux_bucket", "") s3_logs_bucket_name = self.options.get("logs_bucket", "") s3_pcap_bucket_name = self.options.get("pcap_bucket", "") s3_md5_bucket_name = self.options.get("md5_bucket", "") cleanup = self.options.get("cleanup", False) # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) if not "network" in report: report["network"] = {} # Add screenshot paths report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): shots = [shot for shot in os.listdir(shots_path) if shot.endswith(".jpg")] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) screenshot = File(shot_path) if screenshot.valid(): #report["shots"].append("{0}/{1}".format(results['info']['id'], shot_file)) report["shots"].append(shot_file.replace(".jpg", "")) # Store chunks of API calls in a different collection and reference # those chunks back in the report. # Also allows paging of the reports. if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] chunk_count = 0 # Using this type of prefix is useful because you can always re-construct it from # the original results #chunk_prefix = str(results['info']['id']) + '/' + process['process_name'] chunk_prefix = str(results['info']['id']) + '/' + str(process['process_id']) # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in S1. if len(chunk) == 100: chunk_name = "{0}.{1}".format(chunk_prefix, chunk_count) #log.debug("INFO TIME!") #log.debug("%s %s %s" %(s3_reports_bucket_name, chunk_name, chunk_prefix)) #log.debug(chunk_prefix) err = self.save_to_s3(s3_reports_bucket_name, chunk_name, json.dumps(chunk)) if err != '': log.error("Non-size related issue saving analysis JSON to S3 for chunk {0} - {1}".format(chunk_name, err)) else: chunks_ids.append("{0}.{1}".format(chunk_prefix, chunk_count)) chunk_count += 1 chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: chunk_name = "{0}.{1}".format(chunk_prefix, chunk_count) #log.debug("%s %s %s" %(s3_reports_bucket_name, chunk_name, chunk_prefix)) err = self.save_to_s3(s3_reports_bucket_name, chunk_name, json.dumps(chunk)) if err != '': log.error("Non-size related issue saving analysis JSON to S3 for chunk {0} - {1}".format(chunk_name, err)) else: chunks_ids.append("{0}.{1}".format(chunk_prefix, chunk_count)) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes #Other info we want Quick access to from the web UI if results.has_key("virustotal") and results["virustotal"] and results["virustotal"].has_key("positives") and results["virustotal"].has_key("total"): report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"], results["virustotal"]["total"]) if results.has_key("suricata") and results["suricata"]: if results["suricata"].has_key("tls") and len(results["suricata"]["tls"]) > 0: report["suri_tls_cnt"] = len(results["suricata"]["tls"]) if results["suricata"].has_key("alerts") and len(results["suricata"]["alerts"]) > 0: report["suri_alert_cnt"] = len(results["suricata"]["alerts"]) if results["suricata"].has_key("files") and len(results["suricata"]["files"]) > 0: report["suri_file_cnt"] = len(results["suricata"]["files"]) if results["suricata"].has_key("http") and len(results["suricata"]["http"]) > 0: report["suri_http_cnt"] = len(results["suricata"]["http"]) if results["suricata"].has_key("ssh") and len(results["suricata"]["ssh"]) > 0: report["suri_ssh_cnt"] = len(results["suricata"]["ssh"]) if results["suricata"].has_key("dns") and len(results["suricata"]["dns"]) > 0: report["suri_dns_cnt"] = len(results["suricata"]["dns"]) # Store the report (it's 'object id' is simply the analysis id) # First make sure it's not too big (5gb limit) data = json.dumps(report) if len(data) < 5000000000: err = self.save_to_s3(s3_reports_bucket_name, results['info']['id'], data) if err != '': log.error("Non-size related issue saving analysis JSON to S3 for report {0} - {1}".format(results['info']['id'], err)) else: log.error("JSON for analysis id {0} is greater than 5GB".format(results['info']['id'])) #processes the rest of the analysis files and put them in S3 if s3_shots_bucket_name != '': shots_path = os.path.join(results['info']['id'], self.analysis_path, "shots") self.relocate_to_s3(results['info']['id'], shots_path, s3_shots_bucket_name) if s3_pcap_bucket_name != '': if os.path.isfile(self.analysis_path + '/dump.pcap'): with open(self.analysis_path + '/dump.pcap', 'rb') as infile: self.save_to_s3(s3_pcap_bucket_name, "{0}/dump.pcap".format(results['info']['id']), infile.read()) if os.path.isfile(self.analysis_path + '/dump_sorted.pcap'): with open(self.analysis_path + '/dump_sorted.pcap', 'rb') as infile: self.save_to_s3(s3_pcap_bucket_name, "{0}/dump_sorted.pcap".format(results['info']['id']), infile.read()) if s3_aux_bucket_name != '': aux_path = os.path.join(results['info']['id'], self.analysis_path, "aux") self.relocate_to_s3(results['info']['id'], aux_path, s3_aux_bucket_name) if s3_logs_bucket_name != '': logs_path = os.path.join(results['info']['id'], self.analysis_path, "logs") self.relocate_to_s3(results['info']['id'], logs_path, s3_logs_bucket_name) if s3_samples_bucket_name != '': sample = os.path.realpath(self.analysis_path + '/binary') with open(sample, 'rb') as infile: self.save_to_s3(s3_samples_bucket_name, results['target']['file']['sha256'], infile.read()) #log.debug(s3_files_bucket_name) if s3_files_bucket_name != '': #log.debug(self.analysis_path) for root, dirnames, filenames in os.walk(self.analysis_path + '/files'): #log.debug(filenames) for filename in filenames: key_name = str(results['info']['id']) + '/' + root.split(os.sep)[-1] + '/' + filename if not filename.endswith('_info.txt'): key_name = str(results['info']['id']) + '/' + root.split(os.sep)[-1] #log.debug(key_name) with open(os.path.join(root, filename), 'rb') as infile: self.save_to_s3(s3_files_bucket_name, key_name, infile.read()) if s3_md5_bucket_name != '': info = {} info.update(report['info']) info.update(report['target']) self.save_to_s3(s3_md5_bucket_name, results['target']['file']['md5'], json.dumps(info)) if cleanup: shutil.rmtree(self.analysis_path)
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set an unique index on stored files, to avoid duplicates. # From pymongo docs: # Returns the name of the created index if an index is actually # created. # Returns None if the index already exists. self.db.fs.files.ensure_index("sha256", unique=True, sparse=True, name="sha256_unique") # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) # Store the sample in GridFS. if results["info"]["category"] == "file": sample = File(self.file_path) if sample.valid(): fname = results["target"]["file"]["name"] sample_id = self.store_file(sample, filename=fname) report["target"] = {"file_id": sample_id} report["target"].update(results["target"]) # Store the PCAP file in GridFS and reference it back in the report. pcap_path = os.path.join(self.analysis_path, "dump.pcap") pcap = File(pcap_path) if pcap.valid(): pcap_id = self.store_file(pcap) report["network"] = {"pcap_id": pcap_id} report["network"].update(results["network"]) # Walk through the dropped files, store them in GridFS and update the # report with the ObjectIds. new_dropped = [] for dropped in report["dropped"]: new_drop = dict(dropped) drop = File(dropped["path"]) if drop.valid(): dropped_id = self.store_file(drop, filename=dropped["name"]) new_drop["object_id"] = dropped_id new_dropped.append(new_drop) report["dropped"] = new_dropped # Add screenshots. report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): # Walk through the files and select the JPGs. shots = [shot for shot in os.listdir(shots_path) if shot.endswith(".jpg")] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) shot = File(shot_path) # If the screenshot path is a valid file, store it and # reference it back in the report. if shot.valid(): shot_id = self.store_file(shot) report["shots"].append(shot_id) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in MongoDB. if len(chunk) == 100: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes # Store the report and retrieve its object id. self.db.analysis.save(report) self.conn.disconnect()
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set mongo schema version. # TODO: This is not optimal becuase it run each analysis. Need to run # only one time at startup. if "cuckoo_schema" in self.db.collection_names(): if self.db.cuckoo_schema.find_one()["version"] != self.SCHEMA_VERSION: CuckooReportError("Mongo schema version not expected, check data migration tool") else: self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) # Set an unique index on stored files, to avoid duplicates. # From pymongo docs: # Returns the name of the created index if an index is actually # created. # Returns None if the index already exists. # TODO: This is not optimal because it run each analysis. Need to run # only one time at startup. self.db.fs.files.ensure_index("sha256", unique=True, sparse=True, name="sha256_unique") # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) if "network" not in report: report["network"] = {} # Store the sample in GridFS. if results["info"]["category"] == "file" and "target" in results: sample = File(self.file_path) if sample.valid(): fname = results["target"]["file"]["name"] sample_id = self.store_file(sample, filename=fname) report["target"] = {"file_id": sample_id} report["target"].update(results["target"]) # Store the PCAP file in GridFS and reference it back in the report. pcap_path = os.path.join(self.analysis_path, "dump.pcap") pcap = File(pcap_path) if pcap.valid(): pcap_id = self.store_file(pcap) report["network"]["pcap_id"] = pcap_id sorted_pcap_path = os.path.join(self.analysis_path, "dump_sorted.pcap") spcap = File(sorted_pcap_path) if spcap.valid(): spcap_id = self.store_file(spcap) report["network"]["sorted_pcap_id"] = spcap_id mitmproxy_path = os.path.join(self.analysis_path, "dump.mitm") mitmpr = File(mitmproxy_path) if mitmpr.valid(): mitmpr_id = self.store_file(mitmpr) report["network"]["mitmproxy_id"] = mitmpr_id # Store the process memory dump file in GridFS and reference it back in the report. if "procmemory" in report and self.options.get("store_memdump", False): for idx, procmem in enumerate(report["procmemory"]): procmem_path = os.path.join(self.analysis_path, "memory", "{0}.dmp".format(procmem["pid"])) procmem_file = File(procmem_path) if procmem_file.valid(): procmem_id = self.store_file(procmem_file) report["procmemory"][idx].update({"procmem_id": procmem_id}) # Walk through the dropped files, store them in GridFS and update the # report with the ObjectIds. new_dropped = [] if "dropped" in report: for dropped in report["dropped"]: new_drop = dict(dropped) drop = File(dropped["path"]) if drop.valid(): dropped_id = self.store_file(drop, filename=dropped["name"]) new_drop["object_id"] = dropped_id new_dropped.append(new_drop) report["dropped"] = new_dropped # Add screenshots. report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): # Walk through the files and select the JPGs. for shot_file in sorted(os.listdir(shots_path)): if not shot_file.endswith(".jpg"): continue shot_path = os.path.join(self.analysis_path, "shots", shot_file) shot = File(shot_path) # If the screenshot path is a valid file, store it and # reference it back in the report. if shot.valid(): shot_id = self.store_file(shot) report["shots"].append(shot_id) paginate = self.options.get("paginate", 100) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is paginate or if the loop is # completed then store the chunk in MongoDB. if len(chunk) == paginate: to_insert = {"pid": process["pid"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["pid"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes # Store the report and retrieve its object id. self.db.analysis.save(report) self.conn.close()
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set mongo schema version. # TODO: This is not optimal becuase it run each analysis. Need to run # only one time at startup. if "cuckoo_schema" in self.db.collection_names(): if self.db.cuckoo_schema.find_one()["version"] != self.SCHEMA_VERSION: CuckooReportError("Mongo schema version not expected, check data migration tool") else: self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) # Set an unique index on stored files, to avoid duplicates. # From pymongo docs: # Returns the name of the created index if an index is actually # created. # Returns None if the index already exists. # TODO: This is not optimal because it run each analysis. Need to run # only one time at startup. self.db.fs.files.ensure_index("sha256", unique=True, sparse=True, name="sha256_unique") # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) if not "network" in report: report["network"] = {} # Store the sample in GridFS. if results["info"]["category"] == "file" and "target" in results: sample = File(self.file_path) if sample.valid(): fname = results["target"]["file"]["name"] sample_id = self.store_file(sample, filename=fname) report["target"] = {"file_id": sample_id} report["target"].update(results["target"]) # Store the PCAP file in GridFS and reference it back in the report. pcap_path = os.path.join(self.analysis_path, "dump.pcap") pcap = File(pcap_path) if pcap.valid(): pcap_id = self.store_file(pcap) report["network"]["pcap_id"] = pcap_id sorted_pcap_path = os.path.join(self.analysis_path, "dump_sorted.pcap") spcap = File(sorted_pcap_path) if spcap.valid(): spcap_id = self.store_file(spcap) report["network"]["sorted_pcap_id"] = spcap_id if "procmemory" in report: # Store the process memory dump file in GridFS and reference it back in the report. for idx, procmem in enumerate(report['procmemory']): procmem_path = os.path.join(self.analysis_path, "memory", "{0}.dmp".format(procmem['pid'])) procmem_file = File(procmem_path) if procmem_file.valid(): procmem_id = self.store_file(procmem_file) report["procmemory"][idx].update({"procmem_id": procmem_id}) # Store the suri extracted files in GridFS and reference it back in the report. suri_extracted_zip_path = os.path.join(self.analysis_path, "logs/files.zip") suri_extracted_zip = File(suri_extracted_zip_path) if suri_extracted_zip.valid(): suri_extracted_zip_id = self.store_file(suri_extracted_zip) report["suricata"] = {"suri_extracted_zip": suri_extracted_zip_id} report["suricata"].update(results["suricata"]) # Walk through the dropped files, store them in GridFS and update the # report with the ObjectIds. new_dropped = [] if "dropped" in report: for dropped in report["dropped"]: new_drop = dict(dropped) drop = File(dropped["path"]) if drop.valid(): dropped_id = self.store_file(drop, filename=dropped["name"]) new_drop["object_id"] = dropped_id new_dropped.append(new_drop) report["dropped"] = new_dropped # Store the Zipped Droppings file in GridFS and reference it back in the report. #cuckoo_dropped_zip_path = os.path.join(self.analysis_path, "cuckoodroppings.zip") #cuckoo_dropped_zip = File(cuckoo_dropped_zip_path) #if cuckoo_dropped_zip.valid(): # cuckoo_droppings_id = self.store_file(cuckoo_dropped_zip) # report["zippeddroppings"] = {"cuckoo_droppings_id": cuckoo_droppings_id} # report["zippeddroppings"].update(results["zippeddroppings"]) # Walk through the suricata extracted files, store them in GridFS and update the # report with the ObjectIds. new_suricata_files = [] if results.has_key("suricata") and results["suricata"]: if results["suricata"].has_key("files") and results["suricata"]["files"]: for suricata_file_e in results["suricata"]["files"]: if suricata_file_e.has_key("file_info"): tmp_suricata_file_d = dict(suricata_file_e) suricata_file = File(suricata_file_e["file_info"]["path"]) if suricata_file.valid(): suricata_file_id = self.store_file(suricata_file, filename=suricata_file_e["file_info"]["name"]) tmp_suricata_file_d["object_id"] = suricata_file_id new_suricata_files.append(tmp_suricata_file_d) report["suricata"]["files"] = new_suricata_files # Add screenshots. report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): # Walk through the files and select the JPGs. shots = [shot for shot in os.listdir(shots_path) if shot.endswith(".jpg")] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) shot = File(shot_path) # If the screenshot path is a valid file, store it and # reference it back in the report. if shot.valid(): shot_id = self.store_file(shot) report["shots"].append(shot_id) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in MongoDB. if len(chunk) == 100: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes #Other info we want Quick access to from the web UI if results.has_key("virustotal") and results["virustotal"] and results["virustotal"].has_key("positives") and results["virustotal"].has_key("total"): report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"],results["virustotal"]["total"]) if results.has_key("suricata") and results["suricata"]: if results["suricata"].has_key("tls") and len(results["suricata"]["tls"]) > 0: report["suri_tls_cnt"] = len(results["suricata"]["tls"]) if results["suricata"] and results["suricata"].has_key("alerts") and len(results["suricata"]["alerts"]) > 0: report["suri_alert_cnt"] = len(results["suricata"]["alerts"]) if results["suricata"].has_key("files") and len(results["suricata"]["files"]) > 0: report["suri_file_cnt"] = len(results["suricata"]["files"]) if results["suricata"].has_key("http") and len(results["suricata"]["http"]) > 0: report["suri_http_cnt"] = len(results["suricata"]["http"]) # Store the report and retrieve its object id. self.db.analysis.save(report) self.conn.close()
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set mongo schema version. # TODO: This is not optimal becuase it run each analysis. Need to run # only one time at startup. if "cuckoo_schema" in self.db.collection_names(): if self.db.cuckoo_schema.find_one( )["version"] != self.SCHEMA_VERSION: CuckooReportError( "Mongo schema version not expected, check data migration tool" ) else: self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) if not "network" in report: report["network"] = {} # Add screenshot paths report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): shots = [ shot for shot in os.listdir(shots_path) if shot.endswith(".jpg") ] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) screenshot = File(shot_path) if screenshot.valid(): # Strip the extension as it's added later # in the Django view report["shots"].append(shot_file.replace(".jpg", "")) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in MongoDB. if len(chunk) == 100: to_insert = { "pid": process["process_id"], "calls": chunk } chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes #Other info we want Quick access to from the web UI if results.has_key("virustotal") and results["virustotal"] and results[ "virustotal"].has_key( "positives") and results["virustotal"].has_key("total"): report["virustotal_summary"] = "%s/%s" % ( results["virustotal"]["positives"], results["virustotal"]["total"]) if results.has_key("suricata") and results["suricata"]: if results["suricata"].has_key("tls") and len( results["suricata"]["tls"]) > 0: report["suri_tls_cnt"] = len(results["suricata"]["tls"]) if results["suricata"] and results["suricata"].has_key( "alerts") and len(results["suricata"]["alerts"]) > 0: report["suri_alert_cnt"] = len(results["suricata"]["alerts"]) if results["suricata"].has_key("files") and len( results["suricata"]["files"]) > 0: report["suri_file_cnt"] = len(results["suricata"]["files"]) if results["suricata"].has_key("http") and len( results["suricata"]["http"]) > 0: report["suri_http_cnt"] = len(results["suricata"]["http"]) # Create an index based on the info.id dict key. Increases overall scalability # with large amounts of data. # Note: Silently ignores the creation if the index already exists. self.db.analysis.create_index("info.id", background=True) # Store the report and retrieve its object id. try: self.db.analysis.save(report) except InvalidDocument as e: parent_key, psize = self.debug_dict_size(report)[0] child_key, csize = self.debug_dict_size(report[parent_key])[0] if not self.options.get("fix_large_docs", False): # Just log the error and problem keys log.error(str(e)) log.error("Largest parent key: %s (%d MB)" % (parent_key, int(psize) / 1048576)) log.error("Largest child key: %s (%d MB)" % (child_key, int(csize) / 1048576)) else: # Delete the problem keys and check for more error_saved = True while error_saved: log.warn( "results['%s']['%s'] deleted due to >16MB size (%dMB)" % (parent_key, child_key, int(psize) / 1048576)) del report[parent_key][child_key] try: self.db.analysis.save(report) error_saved = False except InvalidDocument as e: parent_key, psize = self.debug_dict_size(report)[0] child_key, csize = self.debug_dict_size( report[parent_key])[0] log.error(str(e)) log.error("Largest parent key: %s (%d MB)" % (parent_key, int(psize) / 1048576)) log.error("Largest child key: %s (%d MB)" % (child_key, int(csize) / 1048576)) self.conn.close()
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ self.connect() # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) # Set an unique index on stored files, to avoid duplicates. # From pymongo docs: # Returns the name of the created index if an index is actually created. # Returns None if the index already exists. self.db.fs.files.ensure_index("sha256", unique=True, name="sha256_unique") # Store the PCAP file in GridFS and reference it back in the report. pcap_path = os.path.join(self.analysis_path, "dump.pcap") pcap = File(pcap_path) if pcap.valid(): pcap_id = self.store_file(pcap) report["network"] = {"pcap_id": pcap_id} report["network"].update(results["network"]) # Walk through the dropped files, store them in GridFS and update the # report with the ObjectIds. new_dropped = [] for dropped in report["dropped"]: new_drop = dict(dropped) drop = File(dropped["path"]) if drop.valid(): dropped_id = self.store_file(drop, filename=dropped["name"]) new_drop["object_id"] = dropped_id new_dropped.append(new_drop) report["dropped"] = new_dropped # Add screenshots. report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): # Walk through the files and select the JPGs. shots = [shot for shot in os.listdir(shots_path) if shot.endswith(".jpg")] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) shot = File(shot_path) # If the screenshot path is a valid file, store it and # reference it back in the report. if shot.valid(): shot_id = self.store_file(shot) report["shots"].append(shot_id) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in MongoDB. if len(chunk) == 100: chunk_id = self.db.calls.insert({"pid" : process["process_id"], "calls" : chunk}) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: chunk_id = self.db.calls.insert({"pid" : process["process_id"], "calls" : chunk}) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes # Store the report and retrieve its object id. self.db.analysis.insert(report) self.conn.disconnect()