def retrieval_context(self): """Override ParallelBackendBase.retrieval_context to avoid deadlocks. This removes thread from the worker's thread pool (using 'secede'). Seceding avoids deadlock in nested parallelism settings. """ # See 'joblib.Parallel.__call__' and 'joblib.Parallel.retrieve' for how # this is used. if hasattr(thread_state, 'execution_state'): # we are in a worker. Secede to avoid deadlock. secede() yield if hasattr(thread_state, 'execution_state'): rejoin()
def unzip_then_run(dump_pk, user_pk, es_url): dump = Dump.objects.get(pk=dump_pk) # Unzip file is zipped if is_zipfile(dump.upload.path): with ZipFile(dump.upload.path, "r") as zipObj: objs = zipObj.namelist() extract_path = pathlib.Path(dump.upload.path).parent # zip must contain one file with a memory dump if len(objs) == 1: newpath = zipObj.extract(objs[0], extract_path) # or a vmem + vmss + vmsn elif any([x.lower().endswith(".vmem") for x in objs]): zipObj.extractall(extract_path) for x in objs: if x.endswith(".vmem"): newpath = os.path.join(extract_path, x) else: # zip is unvalid dump.status = 4 dump.save() return else: newpath = dump.upload.path dump.upload.name = newpath dump.save() dask_client = get_client() secede() tasks = [] for result in dump.result_set.all(): if result.result != 5: task = dask_client.submit(run_plugin, dump, result.plugin, es_url) tasks.append(task) results = dask_client.gather(tasks) rejoin() dump.status = 2 dump.save()
def unzip_then_run(dump_pk, user_pk, password): dump = Dump.objects.get(pk=dump_pk) logging.debug("[dump {}] Processing".format(dump_pk)) # COPY EACH FILE IN THEIR FOLDER BEFORE UNZIP/RUN PLUGIN extract_path = f"{settings.MEDIA_ROOT}/{dump.index}" filepath = shutil.move(dump.upload.path, extract_path) filetype = magic.from_file(filepath, mime=True) if filetype in [ "application/zip", "application/x-7z-compressed", "application/x-rar", "application/gzip", "application/x-tar", ]: if password: subprocess.call([ "7z", "e", f"{filepath}", f"-o{extract_path}", f"-p{password}", "-y" ]) else: subprocess.call( ["7z", "e", f"{filepath}", f"-o{extract_path}", "-y"]) os.unlink(filepath) extracted_files = [ str(x) for x in Path(extract_path).glob("**/*") if x.is_file() ] newpath = None if len(extracted_files) == 1: newpath = extracted_files[0] elif len(extracted_files) > 1: for x in extracted_files: if x.lower().endswith(".vmem"): newpath = Path(extract_path, x) if not newpath: # archive is unvalid logging.error( "[dump {}] Invalid archive dump data".format(dump_pk)) dump.status = 4 dump.save() return else: newpath = filepath dump.upload.name = newpath dump.size = os.path.getsize(newpath) sha256, md5 = hash_checksum(newpath) dump.sha256 = sha256 dump.md5 = md5 dump.save() banner = False # check symbols using banners if dump.operating_system in ("Linux", "Mac"): # results already exists because all plugin results are created when dump is created banner = dump.result_set.get(plugin__name="banners.Banners") if banner: banner.result = 0 banner.save() run_plugin(dump, banner.plugin) time.sleep(1) banner_result = get_banner(banner) dump.banner = banner_result.strip("\"'") logging.error("[dump {}] guessed banner '{}'".format( dump_pk, dump.banner)) dump.save() if check_runnable(dump.pk, dump.operating_system, dump.banner): dask_client = get_client() secede() tasks = [] tasks_list = (dump.result_set.all() if dump.operating_system != "Linux" else dump.result_set.exclude( plugin__name="banners.Banners")) for result in tasks_list: if result.result != 5: task = dask_client.submit(run_plugin, dump, result.plugin, None, user_pk) tasks.append(task) _ = dask_client.gather(tasks) logging.debug("[dump {}] tasks submitted".format(dump_pk)) rejoin() dump.status = 2 dump.save() logging.debug("[dump {}] processing terminated".format(dump_pk)) else: # This takes time so we do this one time only if dump.banner: dump.suggested_symbols_path = get_path_from_banner(dump.banner) dump.missing_symbols = True dump.status = 2 dump.save() logging.error( "[dump {}] symbols non available. Disabling all plugins".format( dump_pk)) tasks_list = (dump.result_set.all() if dump.operating_system != "Linux" else dump.result_set.exclude( plugin__name="banners.Banners")) for result in tasks_list: result.result = 5 result.save() send_to_ws(dump, message="Missing symbols all plugin are disabled", color=4)
def run_plugin(dump_obj, plugin_obj, params=None, user_pk=None): """ Execute a single plugin on a dump with optional params. If success data are sent to elastic. """ logging.info("[dump {} - plugin {}] start".format(dump_obj.pk, plugin_obj.pk)) try: ctx = contexts.Context() constants.PARALLELISM = constants.Parallelism.Off _ = framework.import_files(volatility3.plugins, True) automagics = automagic.available(ctx) plugin_list = framework.list_plugins() json_renderer = ReturnJsonRenderer seen_automagics = set() for amagic in automagics: if amagic in seen_automagics: continue seen_automagics.add(amagic) plugin = plugin_list.get(plugin_obj.name) base_config_path = "plugins" file_name = os.path.abspath(dump_obj.upload.path) single_location = "file:" + pathname2url(file_name) ctx.config["automagic.LayerStacker.single_location"] = single_location automagics = automagic.choose_automagic(automagics, plugin) if ctx.config.get("automagic.LayerStacker.stackers", None) is None: ctx.config[ "automagic.LayerStacker.stackers"] = stacker.choose_os_stackers( plugin) # LOCAL DUMPS REQUIRES FILES local_dump = plugin_obj.local_dump # ADD PARAMETERS, AND IF LOCAL DUMP ENABLE ADD DUMP TRUE BY DEFAULT plugin_config_path = interfaces.configuration.path_join( base_config_path, plugin.__name__) if params: # ADD PARAMETERS TO PLUGIN CONF for k, v in params.items(): if v != "": extended_path = interfaces.configuration.path_join( plugin_config_path, k) ctx.config[extended_path] = v if k == "dump" and v: # IF DUMP TRUE HAS BEEN PASS IT'LL DUMP LOCALLY local_dump = True if not params and local_dump: # IF ADMIN SET LOCAL DUMP ADD DUMP TRUE AS PARAMETER extended_path = interfaces.configuration.path_join( plugin_config_path, "dump") ctx.config[extended_path] = True logging.debug("[dump {} - plugin {}] params: {}".format( dump_obj.pk, plugin_obj.pk, ctx.config)) file_list = [] if local_dump: # IF PARAM/ADMIN DUMP CREATE FILECONSUMER local_path = "{}/{}/{}".format(settings.MEDIA_ROOT, dump_obj.index, plugin_obj.name) if not os.path.exists(local_path): os.mkdir(local_path) file_handler = file_handler_class_factory(output_dir=local_path, file_list=file_list) else: local_path = None file_handler = file_handler_class_factory(output_dir=None, file_list=file_list) # ##################### # ## YARA # if not file or rule selected and exists default use that if plugin_obj.name in [ "yarascan.YaraScan", "windows.vadyarascan.VadYaraScan" ]: if not params: has_file = False else: has_file = False for k, v in params.items(): if k in ["yara_file", "yara_compiled_file", "yara_rules"]: if v is not None and v != "": has_file = True if not has_file: rule = CustomRule.objects.get(user__pk=user_pk, default=True) if rule: extended_path = interfaces.configuration.path_join( plugin_config_path, "yara_compiled_file") ctx.config[extended_path] = "file:{}".format(rule.path) logging.error("[dump {} - plugin {}] params: {}".format( dump_obj.pk, plugin_obj.pk, ctx.config)) try: # RUN PLUGIN constructed = plugins.construct_plugin( ctx, automagics, plugin, base_config_path, MuteProgress(), file_handler, ) except exceptions.UnsatisfiedException as excp: # LOG UNSATISFIED ERROR result = Result.objects.get(plugin=plugin_obj, dump=dump_obj) result.result = 3 result.description = "\n".join([ excp.unsatisfied[config_path].description for config_path in excp.unsatisfied ]) result.save() send_to_ws(dump_obj, result, plugin_obj.name) logging.error("[dump {} - plugin {}] unsatisfied".format( dump_obj.pk, plugin_obj.pk)) return 0 try: runned_plugin = constructed.run() except Exception as excp: # LOG GENERIC ERROR [VOLATILITY] fulltrace = traceback.TracebackException.from_exception( excp).format(chain=True) result = Result.objects.get(plugin=plugin_obj, dump=dump_obj) result.result = 4 result.description = "\n".join(fulltrace) result.save() send_to_ws(dump_obj, result, plugin_obj.name) logging.error("[dump {} - plugin {}] generic error".format( dump_obj.pk, plugin_obj.pk)) return 0 # RENDER OUTPUT IN JSON AND PUT IT IN ELASTIC json_data, error = json_renderer().render(runned_plugin) logging.debug("DATA: {}".format(json_data)) logging.debug("ERROR: {}".format(error)) logging.debug("CONFIG: {}".format(ctx.config)) if len(json_data) > 0: # IF DUMP STORE FILE ON DISK if local_dump and file_list: for file_id in file_list: output_path = "{}/{}".format(local_path, file_id.preferred_filename) with open(output_path, "wb") as f: f.write(file_id.getvalue()) # RUN CLAMAV ON ALL FOLDER if plugin_obj.clamav_check: cd = pyclamd.ClamdUnixSocket() match = cd.multiscan_file(local_path) match = {} if not match else match else: match = {} result = Result.objects.get(plugin=plugin_obj, dump=dump_obj) # BULK CREATE EXTRACTED DUMP FOR EACH DUMPED FILE ExtractedDump.objects.bulk_create([ ExtractedDump( result=result, path="{}/{}".format(local_path, file_id.preferred_filename), sha256=hash_checksum("{}/{}".format( local_path, file_id.preferred_filename))[0], md5=hash_checksum("{}/{}".format( local_path, file_id.preferred_filename))[1], clamav=(match["{}/{}".format( local_path, file_id.preferred_filename, )][1] if "{}/{}".format(local_path, file_id.preferred_filename) in match.keys() else None), ) for file_id in file_list ]) # RUN VT AND REGIPY AS DASK SUBTASKS if plugin_obj.vt_check or plugin_obj.regipy_check: dask_client = get_client() secede() tasks = [] for file_id in file_list: if plugin_obj.vt_check: task = dask_client.submit( run_vt, result.pk, "{}/{}".format(local_path, file_id.preferred_filename), ) tasks.append(task) if plugin_obj.regipy_check: task = dask_client.submit( run_regipy, result.pk, "{}/{}".format(local_path, file_id.preferred_filename), ) tasks.append(task) _ = dask_client.gather(tasks) rejoin() es = Elasticsearch( [settings.ELASTICSEARCH_URL], request_timeout=60, timeout=60, max_retries=10, retry_on_timeout=True, ) helpers.bulk( es, gendata( "{}_{}".format(dump_obj.index, plugin_obj.name.lower()), json_data, { "orochi_dump": dump_obj.name, "orochi_plugin": plugin_obj.name.lower(), "orochi_os": dump_obj.get_operating_system_display(), "orochi_createdAt": datetime.datetime.now().replace( microsecond=0).isoformat(), }, ), ) # set max_windows_size on new created index es.indices.put_settings( index="{}_{}".format(dump_obj.index, plugin_obj.name.lower()), body={ "index": { "max_result_window": settings.MAX_ELASTIC_WINDOWS_SIZE } }, ) # EVERYTHING OK result = Result.objects.get(plugin=plugin_obj, dump=dump_obj) result.result = 2 result.description = error result.save() logging.debug("[dump {} - plugin {}] sent to elastic".format( dump_obj.pk, plugin_obj.pk)) else: # OK BUT EMPTY result = Result.objects.get(plugin=plugin_obj, dump=dump_obj) result.result = 1 result.description = error result.save() logging.debug("[dump {} - plugin {}] empty".format( dump_obj.pk, plugin_obj.pk)) send_to_ws(dump_obj, result, plugin_obj.name) return 0 except Exception as excp: # LOG GENERIC ERROR [ELASTIC] fulltrace = traceback.TracebackException.from_exception(excp).format( chain=True) result = Result.objects.get(plugin=plugin_obj, dump=dump_obj) result.result = 4 result.description = "\n".join(fulltrace) result.save() send_to_ws(dump_obj, result, plugin_obj.name) logging.error("[dump {} - plugin {}] generic error".format( dump_obj.pk, plugin_obj.pk)) return 0
def unzip_then_run(dump_pk, user_pk): dump = Dump.objects.get(pk=dump_pk) logging.debug("[dump {}] Processing".format(dump_pk)) newpath = dump.upload.path # Unzip file is zipped if is_zipfile(dump.upload.path): with ZipFile(dump.upload.path, "r") as zipObj: objs = zipObj.namelist() extract_path = pathlib.Path(dump.upload.path).parent # zip must contain one file with a memory dump if len(objs) == 1: newpath = zipObj.extract(objs[0], extract_path) # or a vmem + vmss + vmsn elif any([x.lower().endswith(".vmem") for x in objs]): zipObj.extractall(extract_path) for x in objs: if x.endswith(".vmem"): newpath = os.path.join(extract_path, x) else: # zip is unvalid logging.error("[dump {}] Invalid zipped dump data".format(dump_pk)) dump.status = 4 dump.save() return dump.upload.name = newpath dump.save() banner = False # check symbols using banners if dump.operating_system in ("Linux", "Mac"): # results already exists because all plugin results are crated when dump is created banner = dump.result_set.get(plugin__name="banners.Banners") if banner: banner.result = 0 banner.save() run_plugin(dump, banner.plugin) time.sleep(1) banner_result = get_banner(banner) dump.banner = banner_result logging.error( "[dump {}] guessed banner '{}'".format(dump_pk, banner_result) ) dump.save() if check_runnable(dump.pk, dump.operating_system, dump.banner): dask_client = get_client() secede() tasks = [] tasks_list = ( dump.result_set.all() if dump.operating_system != "Linux" else dump.result_set.exclude(plugin__name="banners.Banners") ) for result in tasks_list: if result.result != 5: task = dask_client.submit( run_plugin, dump, result.plugin, None, user_pk ) tasks.append(task) results = dask_client.gather(tasks) logging.debug("[dump {}] tasks submitted".format(dump_pk)) rejoin() dump.status = 2 dump.save() logging.debug("[dump {}] processing terminated".format(dump_pk)) else: # This takes time so we do this one time only if dump.banner: dump.suggested_symbols_path = get_path_from_banner(dump.banner) dump.missing_symbols = True dump.status = 2 dump.save() logging.error( "[dump {}] symbols non available. Disabling all plugins".format(dump_pk) ) tasks_list = ( dump.result_set.all() if dump.operating_system != "Linux" else dump.result_set.exclude(plugin__name="banners.Banners") ) for result in tasks_list: result.result = 5 result.save() send_to_ws(dump, message="Missing symbols all plugin are disabled", color=4)
def _compute_long_running_tasks(self, neoantigen, patient, sequential=True): has_mhc1 = patient.mhc1 is not None and len(patient.mhc1) > 0 has_mhc2 = patient.mhc2 is not None and len(patient.mhc2) > 0 netmhcpan = None netmhc2pan = None mixmhcpred_annotations = None mixmhc2pred_annotations = None prime_annotations = None if sequential: if has_mhc1: netmhcpan = self.run_netmhcpan( self.runner, self.configuration, self.available_alleles, self.mhc_parser, neoantigen, patient) if has_mhc2: netmhc2pan = self.run_netmhc2pan( self.runner, self.configuration, self.available_alleles, self.mhc_parser, neoantigen, patient ) # avoids running MixMHCpred and PRIME for non human organisms if self.organism == ORGANISM_HOMO_SAPIENS: if self.configuration.mix_mhc2_pred is not None and has_mhc2: mixmhc2pred_annotations = self.run_mixmhc2pred( self.runner, self.configuration, self.mhc_parser, neoantigen, patient, ) if self.configuration.mix_mhc_pred is not None and has_mhc1: mixmhcpred_annotations = self.run_mixmhcpred( self.runner, self.configuration, self.mhc_parser, neoantigen, patient, ) if self.configuration.mix_mhc_pred is not None and has_mhc1: prime_annotations = self.run_prime( self.runner, self.configuration, self.mhc_parser, neoantigen, patient, ) else: dask_client = get_client() netmhcpan_future = None if has_mhc1: netmhcpan_future = dask_client.submit( self.run_netmhcpan, self.runner, self.references, self.configuration, self.available_alleles, self.mhc_parser, neoantigen, patient, ) netmhc2pan_future = None if has_mhc2: netmhc2pan_future = dask_client.submit( self.run_netmhc2pan, self.runner, self.configuration, self.available_alleles, self.mhc_parser, neoantigen, patient, ) # avoids running MixMHCpred and PRIME for non human organisms mixmhc2pred_future = None mixmhcpred_future = None prime_future = None if self.organism == ORGANISM_HOMO_SAPIENS: if self.configuration.mix_mhc2_pred is not None and has_mhc2: mixmhc2pred_future = dask_client.submit( self.run_mixmhc2pred, self.runner, self.configuration, self.mhc_parser, neoantigen, patient, ) if self.configuration.mix_mhc_pred is not None and has_mhc1: mixmhcpred_future = dask_client.submit( self.run_mixmhcpred, self.runner, self.configuration, self.mhc_parser, neoantigen, patient, ) if self.configuration.mix_mhc_pred is not None and has_mhc1: prime_future = dask_client.submit( self.run_prime, self.runner, self.configuration, self.mhc_parser, neoantigen, patient, ) secede() if netmhcpan_future: netmhcpan = dask_client.gather([netmhcpan_future])[0] if netmhc2pan_future: netmhc2pan = dask_client.gather([netmhc2pan_future])[0] if self.organism == ORGANISM_HOMO_SAPIENS: if mixmhcpred_future: mixmhcpred_annotations = dask_client.gather([mixmhcpred_future])[0] if mixmhc2pred_future: mixmhc2pred_annotations = dask_client.gather([mixmhc2pred_future])[0] if prime_future: prime_annotations = dask_client.gather([prime_future])[0] rejoin() return mixmhc2pred_annotations, mixmhcpred_annotations, netmhc2pan, netmhcpan, prime_annotations
def parse_identifiers_task(job_id: str, physical_data_source: str, table_name: str): # Estimated maximum / pessimistic processing time of a single table is around 10 minutes, # therefore we need to ensure that the semaphore isn't automatically released (via timeout) if the task is idle. # See: https://docs.dask.org/en/latest/configuration-reference.html#distributed.scheduler.locks.lease-timeout assert parse_timedelta( dask.config.get('distributed.scheduler.locks.lease-timeout', default='0s')) >= 600 assert dask.config.get('distributed.comm.retry.count', default=0) > 0 # detach the task from worker's thread-pool as we're mostly just waiting for Dremio to return the sampled dataset, # also we don't want to take up "scheduling slots" from other (compute-heavy) tasks as this might lead to deadlocks. secede() start_time = time.perf_counter() parser_job = ParserJob(job_id=job_id) try: ParserJobStore.store(parser_job) params = ParameterStore().get_config() columns_to_consider = list( _extract_explorable_columns(physical_data_source, table_name, params)) Measure.histogram( 'idparser_num_columns_per_table', tags={ 'physical_data_source': physical_data_source, 'table_name': table_name }, )(len(columns_to_consider)) if len(columns_to_consider) > params.max_allowed_dimensions: if params.column_overflow_strategy == ColumnOverflowStrategy.RAISE_EXCEPTION: raise TooManyColumns(physical_data_source, table_name, columns_to_consider) elif params.column_overflow_strategy == ColumnOverflowStrategy.SLICE_COLUMNS: logger.warning( f'Source {physical_data_source}.{table_name} has too many columns. ' f'Slicing first {params.max_allowed_dimensions}') columns_to_consider = columns_to_consider[ 0:params.max_allowed_dimensions] else: raise UnknownStrategy(params.column_overflow_strategy, physical_data_source, table_name, columns_to_consider) # early return for the trivial case if len(columns_to_consider) <= 1: identifiers = set(columns_to_consider) else: identifiers = _find_identifiers(physical_data_source, table_name, columns_to_consider, params) parser_job.identifiers = identifiers parser_job.status = 'COMPLETED' logger.info( f'Success! Finished running the identifier script on {physical_data_source}.{table_name} ' f'in {time.perf_counter() - start_time:0.3f} (identifiers: {identifiers})' ) except Exception as e: parser_job.status = 'FAILED' raise e # Re-raise exception so it's handled and reported via our Dask plugin finally: ParserJobStore.store(parser_job) rejoin( ) # reuse the existing thread by returning it to the worker's thread-pool
def run_plugin(dump_obj, plugin_obj, es_url, params=None): """ Execute a single plugin on a dump with optional params. If success data are sent to elastic. """ try: ctx = contexts.Context() constants.PARALLELISM = constants.Parallelism.Off failures = framework.import_files(volatility.plugins, True) automagics = automagic.available(ctx) plugin_list = framework.list_plugins() json_renderer = ReturnJsonRenderer seen_automagics = set() for amagic in automagics: if amagic in seen_automagics: continue seen_automagics.add(amagic) plugin = plugin_list.get(plugin_obj.name) base_config_path = "plugins" file_name = os.path.abspath(dump_obj.upload.path) single_location = "file:" + pathname2url(file_name) ctx.config["automagic.LayerStacker.single_location"] = single_location automagics = automagic.choose_automagic(automagics, plugin) # LOCAL DUMPS REQUIRES FILES local_dump = plugin_obj.local_dump # ADD PARAMETERS, AND IF LOCAL DUMP ENABLE ADD DUMP TRUE BY DEFAULT plugin_config_path = interfaces.configuration.path_join( base_config_path, plugin.__name__) if params: # ADD PARAMETERS TO PLUGIN CONF for k, v in params.items(): extended_path = interfaces.configuration.path_join( plugin_config_path, k) ctx.config[extended_path] = v if k == "dump" and v == True: # IF DUMP TRUE HAS BEEN PASS IT'LL DUMP LOCALLY local_dump = True if not params and local_dump: # IF ADMIN SET LOCAL DUMP ADD DUMP TRUE AS PARAMETER extended_path = interfaces.configuration.path_join( plugin_config_path, "dump") ctx.config[extended_path] = True if local_dump: # IF PARAM/ADMIN DUMP CREATE FILECONSUMER consumer = FileConsumer() local_path = "{}/{}/{}".format(settings.MEDIA_ROOT, dump_obj.index, plugin_obj.name) if not os.path.exists(local_path): os.mkdir(local_path) else: consumer = None try: # RUN PLUGIN constructed = plugins.construct_plugin(ctx, automagics, plugin, base_config_path, MuteProgress(), consumer) except exceptions.UnsatisfiedException as excp: # LOG UNSATISFIED ERROR result = Result.objects.get(plugin=plugin_obj, dump=dump_obj) result.result = 3 result.description = "\n".join([ excp.unsatisfied[config_path].description for config_path in excp.unsatisfied ]) result.save() send_to_ws(dump_obj, result, plugin_obj.name) return 0 try: runned_plugin = constructed.run() except Exception as excp: # LOG GENERIC ERROR [VOLATILITY] fulltrace = traceback.TracebackException.from_exception( excp).format(chain=True) result = Result.objects.get(plugin=plugin_obj, dump=dump_obj) result.result = 4 result.description = "\n".join(fulltrace) result.save() send_to_ws(dump_obj, result, plugin_obj.name) return 0 # RENDER OUTPUT IN JSON AND PUT IT IN ELASTIC json_data, error = json_renderer().render(runned_plugin) if len(json_data) > 0: # IF DUMP STORE FILE ON DISK if consumer and consumer.files: for filedata in consumer.files: output_path = "{}/{}".format(local_path, filedata.preferred_filename) with open(output_path, "wb") as f: f.write(filedata.data.getvalue()) ## RUN CLAMAV ON ALL FOLDER if plugin_obj.clamav_check: cd = pyclamd.ClamdUnixSocket() match = cd.multiscan_file(local_path) match = {} if not match else match else: match = {} result = Result.objects.get(plugin=plugin_obj, dump=dump_obj) # BULK CREATE EXTRACTED DUMP FOR EACH DUMPED FILE ed = ExtractedDump.objects.bulk_create([ ExtractedDump( result=result, path="{}/{}".format(local_path, filedata.preferred_filename), sha256=sha256_checksum("{}/{}".format( local_path, filedata.preferred_filename)), clamav=(match["{}/{}".format( local_path, filedata.preferred_filename)][1] if "{}/{}".format( local_path, filedata.preferred_filename) in match.keys() else None), ) for filedata in consumer.files ]) ## RUN VT AND REGIPY AS DASK SUBTASKS if plugin_obj.vt_check or plugin_obj.regipy_check: dask_client = get_client() secede() tasks = [] for filedata in consumer.files: task = dask_client.submit( run_vt if plugin_obj.vt_check else run_regipy, result.pk, "{}/{}".format(local_path, filedata.preferred_filename), ) tasks.append(task) results = dask_client.gather(tasks) rejoin() es = Elasticsearch( [es_url], request_timeout=60, timeout=60, max_retries=10, retry_on_timeout=True, ) helpers.bulk( es, gendata( "{}_{}".format(dump_obj.index, plugin_obj.name.lower()), plugin_obj.name, json_data, ), ) # EVERYTHING OK result = Result.objects.get(plugin=plugin_obj, dump=dump_obj) result.result = 2 result.description = error result.save() else: # OK BUT EMPTY result = Result.objects.get(plugin=plugin_obj, dump=dump_obj) result.result = 1 result.description = error result.save() send_to_ws(dump_obj, result, plugin_obj.name) return 0 except Exception as excp: # LOG GENERIC ERROR [ELASTIC] fulltrace = traceback.TracebackException.from_exception(excp).format( chain=True) result = Result.objects.get(plugin=plugin_obj, dump=dump_obj) result.result = 4 result.description = "\n".join(fulltrace) result.save() send_to_ws(dump_obj, result, plugin_obj.name) return 0