Exemplos de rejoin em Python, exemplos de distributed.rejoin em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: _dask.py Projeto: joblib/joblib

    def retrieval_context(self):
        """Override ParallelBackendBase.retrieval_context to avoid deadlocks.

        This removes thread from the worker's thread pool (using 'secede').
        Seceding avoids deadlock in nested parallelism settings.
        """
        # See 'joblib.Parallel.__call__' and 'joblib.Parallel.retrieve' for how
        # this is used.
        if hasattr(thread_state, 'execution_state'):
            # we are in a worker. Secede to avoid deadlock.
            secede()

        yield

        if hasattr(thread_state, 'execution_state'):
            rejoin()

Exemplo n.º 2

0

Exibir arquivo

    def retrieval_context(self):
        """Override ParallelBackendBase.retrieval_context to avoid deadlocks.

        This removes thread from the worker's thread pool (using 'secede').
        Seceding avoids deadlock in nested parallelism settings.
        """
        # See 'joblib.Parallel.__call__' and 'joblib.Parallel.retrieve' for how
        # this is used.
        if hasattr(thread_state, 'execution_state'):
            # we are in a worker. Secede to avoid deadlock.
            secede()

        yield

        if hasattr(thread_state, 'execution_state'):
            rejoin()

Exemplo n.º 3

0

Exibir arquivo

Arquivo: volatility_dask_elk.py Projeto: N4rr34n6/orochi

def unzip_then_run(dump_pk, user_pk, es_url):

    dump = Dump.objects.get(pk=dump_pk)

    # Unzip file is zipped
    if is_zipfile(dump.upload.path):
        with ZipFile(dump.upload.path, "r") as zipObj:
            objs = zipObj.namelist()
            extract_path = pathlib.Path(dump.upload.path).parent

            # zip must contain one file with a memory dump
            if len(objs) == 1:
                newpath = zipObj.extract(objs[0], extract_path)

            # or a vmem + vmss + vmsn
            elif any([x.lower().endswith(".vmem") for x in objs]):
                zipObj.extractall(extract_path)
                for x in objs:
                    if x.endswith(".vmem"):
                        newpath = os.path.join(extract_path, x)

            else:
                # zip is unvalid
                dump.status = 4
                dump.save()
                return
    else:
        newpath = dump.upload.path

    dump.upload.name = newpath
    dump.save()

    dask_client = get_client()
    secede()
    tasks = []
    for result in dump.result_set.all():
        if result.result != 5:
            task = dask_client.submit(run_plugin, dump, result.plugin, es_url)
            tasks.append(task)
    results = dask_client.gather(tasks)
    rejoin()
    dump.status = 2
    dump.save()

Exemplo n.º 4

0

Exibir arquivo

def unzip_then_run(dump_pk, user_pk, password):
    dump = Dump.objects.get(pk=dump_pk)
    logging.debug("[dump {}] Processing".format(dump_pk))

    # COPY EACH FILE IN THEIR FOLDER BEFORE UNZIP/RUN PLUGIN
    extract_path = f"{settings.MEDIA_ROOT}/{dump.index}"
    filepath = shutil.move(dump.upload.path, extract_path)

    filetype = magic.from_file(filepath, mime=True)
    if filetype in [
            "application/zip",
            "application/x-7z-compressed",
            "application/x-rar",
            "application/gzip",
            "application/x-tar",
    ]:
        if password:
            subprocess.call([
                "7z", "e", f"{filepath}", f"-o{extract_path}", f"-p{password}",
                "-y"
            ])
        else:
            subprocess.call(
                ["7z", "e", f"{filepath}", f"-o{extract_path}", "-y"])

        os.unlink(filepath)
        extracted_files = [
            str(x) for x in Path(extract_path).glob("**/*") if x.is_file()
        ]
        newpath = None
        if len(extracted_files) == 1:
            newpath = extracted_files[0]
        elif len(extracted_files) > 1:
            for x in extracted_files:
                if x.lower().endswith(".vmem"):
                    newpath = Path(extract_path, x)
        if not newpath:
            # archive is unvalid
            logging.error(
                "[dump {}] Invalid archive dump data".format(dump_pk))
            dump.status = 4
            dump.save()
            return
    else:
        newpath = filepath

    dump.upload.name = newpath
    dump.size = os.path.getsize(newpath)
    sha256, md5 = hash_checksum(newpath)
    dump.sha256 = sha256
    dump.md5 = md5
    dump.save()
    banner = False

    # check symbols using banners
    if dump.operating_system in ("Linux", "Mac"):
        # results already exists because all plugin results are created when dump is created
        banner = dump.result_set.get(plugin__name="banners.Banners")
        if banner:
            banner.result = 0
            banner.save()
            run_plugin(dump, banner.plugin)
            time.sleep(1)
            banner_result = get_banner(banner)
            dump.banner = banner_result.strip("\"'")
            logging.error("[dump {}] guessed banner '{}'".format(
                dump_pk, dump.banner))
            dump.save()

    if check_runnable(dump.pk, dump.operating_system, dump.banner):
        dask_client = get_client()
        secede()
        tasks = []
        tasks_list = (dump.result_set.all() if dump.operating_system != "Linux"
                      else dump.result_set.exclude(
                          plugin__name="banners.Banners"))
        for result in tasks_list:
            if result.result != 5:
                task = dask_client.submit(run_plugin, dump, result.plugin,
                                          None, user_pk)
                tasks.append(task)
        _ = dask_client.gather(tasks)
        logging.debug("[dump {}] tasks submitted".format(dump_pk))
        rejoin()
        dump.status = 2
        dump.save()
        logging.debug("[dump {}] processing terminated".format(dump_pk))
    else:
        # This takes time so we do this one time only
        if dump.banner:
            dump.suggested_symbols_path = get_path_from_banner(dump.banner)
        dump.missing_symbols = True
        dump.status = 2
        dump.save()
        logging.error(
            "[dump {}] symbols non available. Disabling all plugins".format(
                dump_pk))
        tasks_list = (dump.result_set.all() if dump.operating_system != "Linux"
                      else dump.result_set.exclude(
                          plugin__name="banners.Banners"))
        for result in tasks_list:
            result.result = 5
            result.save()
        send_to_ws(dump,
                   message="Missing symbols all plugin are disabled",
                   color=4)

Exemplo n.º 5

0

Exibir arquivo

def run_plugin(dump_obj, plugin_obj, params=None, user_pk=None):
    """
    Execute a single plugin on a dump with optional params.
    If success data are sent to elastic.
    """
    logging.info("[dump {} - plugin {}] start".format(dump_obj.pk,
                                                      plugin_obj.pk))
    try:
        ctx = contexts.Context()
        constants.PARALLELISM = constants.Parallelism.Off
        _ = framework.import_files(volatility3.plugins, True)
        automagics = automagic.available(ctx)
        plugin_list = framework.list_plugins()
        json_renderer = ReturnJsonRenderer
        seen_automagics = set()
        for amagic in automagics:
            if amagic in seen_automagics:
                continue
            seen_automagics.add(amagic)
        plugin = plugin_list.get(plugin_obj.name)
        base_config_path = "plugins"
        file_name = os.path.abspath(dump_obj.upload.path)
        single_location = "file:" + pathname2url(file_name)
        ctx.config["automagic.LayerStacker.single_location"] = single_location
        automagics = automagic.choose_automagic(automagics, plugin)
        if ctx.config.get("automagic.LayerStacker.stackers", None) is None:
            ctx.config[
                "automagic.LayerStacker.stackers"] = stacker.choose_os_stackers(
                    plugin)
        # LOCAL DUMPS REQUIRES FILES
        local_dump = plugin_obj.local_dump

        # ADD PARAMETERS, AND IF LOCAL DUMP ENABLE ADD DUMP TRUE BY DEFAULT
        plugin_config_path = interfaces.configuration.path_join(
            base_config_path, plugin.__name__)
        if params:
            # ADD PARAMETERS TO PLUGIN CONF
            for k, v in params.items():
                if v != "":
                    extended_path = interfaces.configuration.path_join(
                        plugin_config_path, k)
                    ctx.config[extended_path] = v

                if k == "dump" and v:
                    # IF DUMP TRUE HAS BEEN PASS IT'LL DUMP LOCALLY
                    local_dump = True

        if not params and local_dump:
            # IF ADMIN SET LOCAL DUMP ADD DUMP TRUE AS PARAMETER
            extended_path = interfaces.configuration.path_join(
                plugin_config_path, "dump")
            ctx.config[extended_path] = True

        logging.debug("[dump {} - plugin {}] params: {}".format(
            dump_obj.pk, plugin_obj.pk, ctx.config))

        file_list = []
        if local_dump:
            # IF PARAM/ADMIN DUMP CREATE FILECONSUMER
            local_path = "{}/{}/{}".format(settings.MEDIA_ROOT, dump_obj.index,
                                           plugin_obj.name)
            if not os.path.exists(local_path):
                os.mkdir(local_path)
            file_handler = file_handler_class_factory(output_dir=local_path,
                                                      file_list=file_list)
        else:
            local_path = None
            file_handler = file_handler_class_factory(output_dir=None,
                                                      file_list=file_list)

        # #####################
        # ## YARA
        # if not file or rule selected and exists default use that
        if plugin_obj.name in [
                "yarascan.YaraScan", "windows.vadyarascan.VadYaraScan"
        ]:
            if not params:
                has_file = False
            else:
                has_file = False
                for k, v in params.items():
                    if k in ["yara_file", "yara_compiled_file", "yara_rules"]:
                        if v is not None and v != "":
                            has_file = True

            if not has_file:
                rule = CustomRule.objects.get(user__pk=user_pk, default=True)
                if rule:
                    extended_path = interfaces.configuration.path_join(
                        plugin_config_path, "yara_compiled_file")
                    ctx.config[extended_path] = "file:{}".format(rule.path)

            logging.error("[dump {} - plugin {}] params: {}".format(
                dump_obj.pk, plugin_obj.pk, ctx.config))

        try:
            # RUN PLUGIN
            constructed = plugins.construct_plugin(
                ctx,
                automagics,
                plugin,
                base_config_path,
                MuteProgress(),
                file_handler,
            )
        except exceptions.UnsatisfiedException as excp:
            # LOG UNSATISFIED ERROR
            result = Result.objects.get(plugin=plugin_obj, dump=dump_obj)
            result.result = 3
            result.description = "\n".join([
                excp.unsatisfied[config_path].description
                for config_path in excp.unsatisfied
            ])
            result.save()
            send_to_ws(dump_obj, result, plugin_obj.name)

            logging.error("[dump {} - plugin {}] unsatisfied".format(
                dump_obj.pk, plugin_obj.pk))

            return 0
        try:
            runned_plugin = constructed.run()
        except Exception as excp:
            # LOG GENERIC ERROR [VOLATILITY]
            fulltrace = traceback.TracebackException.from_exception(
                excp).format(chain=True)
            result = Result.objects.get(plugin=plugin_obj, dump=dump_obj)
            result.result = 4
            result.description = "\n".join(fulltrace)
            result.save()
            send_to_ws(dump_obj, result, plugin_obj.name)
            logging.error("[dump {} - plugin {}] generic error".format(
                dump_obj.pk, plugin_obj.pk))
            return 0

        # RENDER OUTPUT IN JSON AND PUT IT IN ELASTIC
        json_data, error = json_renderer().render(runned_plugin)

        logging.debug("DATA: {}".format(json_data))
        logging.debug("ERROR: {}".format(error))
        logging.debug("CONFIG: {}".format(ctx.config))

        if len(json_data) > 0:

            # IF DUMP STORE FILE ON DISK
            if local_dump and file_list:
                for file_id in file_list:
                    output_path = "{}/{}".format(local_path,
                                                 file_id.preferred_filename)
                    with open(output_path, "wb") as f:
                        f.write(file_id.getvalue())

                # RUN CLAMAV ON ALL FOLDER
                if plugin_obj.clamav_check:
                    cd = pyclamd.ClamdUnixSocket()
                    match = cd.multiscan_file(local_path)
                    match = {} if not match else match
                else:
                    match = {}

                result = Result.objects.get(plugin=plugin_obj, dump=dump_obj)

                # BULK CREATE EXTRACTED DUMP FOR EACH DUMPED FILE
                ExtractedDump.objects.bulk_create([
                    ExtractedDump(
                        result=result,
                        path="{}/{}".format(local_path,
                                            file_id.preferred_filename),
                        sha256=hash_checksum("{}/{}".format(
                            local_path, file_id.preferred_filename))[0],
                        md5=hash_checksum("{}/{}".format(
                            local_path, file_id.preferred_filename))[1],
                        clamav=(match["{}/{}".format(
                            local_path,
                            file_id.preferred_filename,
                        )][1] if "{}/{}".format(local_path,
                                                file_id.preferred_filename)
                                in match.keys() else None),
                    ) for file_id in file_list
                ])

                # RUN VT AND REGIPY AS DASK SUBTASKS
                if plugin_obj.vt_check or plugin_obj.regipy_check:
                    dask_client = get_client()
                    secede()
                    tasks = []
                    for file_id in file_list:
                        if plugin_obj.vt_check:
                            task = dask_client.submit(
                                run_vt,
                                result.pk,
                                "{}/{}".format(local_path,
                                               file_id.preferred_filename),
                            )
                            tasks.append(task)
                        if plugin_obj.regipy_check:
                            task = dask_client.submit(
                                run_regipy,
                                result.pk,
                                "{}/{}".format(local_path,
                                               file_id.preferred_filename),
                            )
                            tasks.append(task)
                    _ = dask_client.gather(tasks)
                    rejoin()

            es = Elasticsearch(
                [settings.ELASTICSEARCH_URL],
                request_timeout=60,
                timeout=60,
                max_retries=10,
                retry_on_timeout=True,
            )
            helpers.bulk(
                es,
                gendata(
                    "{}_{}".format(dump_obj.index, plugin_obj.name.lower()),
                    json_data,
                    {
                        "orochi_dump":
                        dump_obj.name,
                        "orochi_plugin":
                        plugin_obj.name.lower(),
                        "orochi_os":
                        dump_obj.get_operating_system_display(),
                        "orochi_createdAt":
                        datetime.datetime.now().replace(
                            microsecond=0).isoformat(),
                    },
                ),
            )

            # set max_windows_size on new created index
            es.indices.put_settings(
                index="{}_{}".format(dump_obj.index, plugin_obj.name.lower()),
                body={
                    "index": {
                        "max_result_window": settings.MAX_ELASTIC_WINDOWS_SIZE
                    }
                },
            )

            # EVERYTHING OK
            result = Result.objects.get(plugin=plugin_obj, dump=dump_obj)
            result.result = 2
            result.description = error
            result.save()

            logging.debug("[dump {} - plugin {}] sent to elastic".format(
                dump_obj.pk, plugin_obj.pk))
        else:
            # OK BUT EMPTY
            result = Result.objects.get(plugin=plugin_obj, dump=dump_obj)
            result.result = 1
            result.description = error
            result.save()

            logging.debug("[dump {} - plugin {}] empty".format(
                dump_obj.pk, plugin_obj.pk))
        send_to_ws(dump_obj, result, plugin_obj.name)
        return 0

    except Exception as excp:
        # LOG GENERIC ERROR [ELASTIC]
        fulltrace = traceback.TracebackException.from_exception(excp).format(
            chain=True)
        result = Result.objects.get(plugin=plugin_obj, dump=dump_obj)
        result.result = 4
        result.description = "\n".join(fulltrace)
        result.save()
        send_to_ws(dump_obj, result, plugin_obj.name)
        logging.error("[dump {} - plugin {}] generic error".format(
            dump_obj.pk, plugin_obj.pk))
        return 0

Exemplo n.º 6

0

Exibir arquivo

def unzip_then_run(dump_pk, user_pk):

    dump = Dump.objects.get(pk=dump_pk)
    logging.debug("[dump {}] Processing".format(dump_pk))

    newpath = dump.upload.path

    # Unzip file is zipped
    if is_zipfile(dump.upload.path):
        with ZipFile(dump.upload.path, "r") as zipObj:
            objs = zipObj.namelist()
            extract_path = pathlib.Path(dump.upload.path).parent

            # zip must contain one file with a memory dump
            if len(objs) == 1:
                newpath = zipObj.extract(objs[0], extract_path)

            # or a vmem + vmss + vmsn
            elif any([x.lower().endswith(".vmem") for x in objs]):
                zipObj.extractall(extract_path)
                for x in objs:
                    if x.endswith(".vmem"):
                        newpath = os.path.join(extract_path, x)

            else:
                # zip is unvalid
                logging.error("[dump {}] Invalid zipped dump data".format(dump_pk))
                dump.status = 4
                dump.save()
                return

    dump.upload.name = newpath
    dump.save()
    banner = False

    # check symbols using banners
    if dump.operating_system in ("Linux", "Mac"):
        # results already exists because all plugin results are crated when dump is created
        banner = dump.result_set.get(plugin__name="banners.Banners")
        if banner:
            banner.result = 0
            banner.save()
            run_plugin(dump, banner.plugin)
            time.sleep(1)
            banner_result = get_banner(banner)
            dump.banner = banner_result
            logging.error(
                "[dump {}] guessed banner '{}'".format(dump_pk, banner_result)
            )
            dump.save()

    if check_runnable(dump.pk, dump.operating_system, dump.banner):
        dask_client = get_client()
        secede()
        tasks = []
        tasks_list = (
            dump.result_set.all()
            if dump.operating_system != "Linux"
            else dump.result_set.exclude(plugin__name="banners.Banners")
        )
        for result in tasks_list:
            if result.result != 5:
                task = dask_client.submit(
                    run_plugin, dump, result.plugin, None, user_pk
                )
                tasks.append(task)
        results = dask_client.gather(tasks)
        logging.debug("[dump {}] tasks submitted".format(dump_pk))
        rejoin()
        dump.status = 2
        dump.save()
        logging.debug("[dump {}] processing terminated".format(dump_pk))
    else:
        # This takes time so we do this one time only
        if dump.banner:
            dump.suggested_symbols_path = get_path_from_banner(dump.banner)
        dump.missing_symbols = True
        dump.status = 2
        dump.save()
        logging.error(
            "[dump {}] symbols non available. Disabling all plugins".format(dump_pk)
        )
        tasks_list = (
            dump.result_set.all()
            if dump.operating_system != "Linux"
            else dump.result_set.exclude(plugin__name="banners.Banners")
        )
        for result in tasks_list:
            result.result = 5
            result.save()
        send_to_ws(dump, message="Missing symbols all plugin are disabled", color=4)

Exemplo n.º 7

0

Exibir arquivo

Arquivo: annotator.py Projeto: TRON-Bioinformatics/neofox

    def _compute_long_running_tasks(self, neoantigen, patient, sequential=True):

        has_mhc1 = patient.mhc1 is not None and len(patient.mhc1) > 0
        has_mhc2 = patient.mhc2 is not None and len(patient.mhc2) > 0

        netmhcpan = None
        netmhc2pan = None
        mixmhcpred_annotations = None
        mixmhc2pred_annotations = None
        prime_annotations = None

        if sequential:
            if has_mhc1:
                netmhcpan = self.run_netmhcpan(
                    self.runner,
                    self.configuration,
                    self.available_alleles,
                    self.mhc_parser,
                    neoantigen,
                    patient)
            if has_mhc2:
                netmhc2pan = self.run_netmhc2pan(
                    self.runner,
                    self.configuration,
                    self.available_alleles,
                    self.mhc_parser,
                    neoantigen,
                    patient
                )
            # avoids running MixMHCpred and PRIME for non human organisms
            if self.organism == ORGANISM_HOMO_SAPIENS:
                if self.configuration.mix_mhc2_pred is not None and has_mhc2:
                    mixmhc2pred_annotations = self.run_mixmhc2pred(
                        self.runner,
                        self.configuration,
                        self.mhc_parser,
                        neoantigen,
                        patient,
                    )
                if self.configuration.mix_mhc_pred is not None and has_mhc1:
                    mixmhcpred_annotations = self.run_mixmhcpred(
                        self.runner,
                        self.configuration,
                        self.mhc_parser,
                        neoantigen,
                        patient,
                    )
                if self.configuration.mix_mhc_pred is not None and has_mhc1:
                    prime_annotations = self.run_prime(
                        self.runner,
                        self.configuration,
                        self.mhc_parser,
                        neoantigen,
                        patient,
                    )
        else:
            dask_client = get_client()

            netmhcpan_future = None
            if has_mhc1:
                netmhcpan_future = dask_client.submit(
                    self.run_netmhcpan,
                    self.runner,
                    self.references,
                    self.configuration,
                    self.available_alleles,
                    self.mhc_parser,
                    neoantigen,
                    patient,
                )
            netmhc2pan_future = None
            if has_mhc2:
                netmhc2pan_future = dask_client.submit(
                    self.run_netmhc2pan,
                    self.runner,
                    self.configuration,
                    self.available_alleles,
                    self.mhc_parser,
                    neoantigen,
                    patient,
                )
            # avoids running MixMHCpred and PRIME for non human organisms
            mixmhc2pred_future = None
            mixmhcpred_future = None
            prime_future = None
            if self.organism == ORGANISM_HOMO_SAPIENS:
                if self.configuration.mix_mhc2_pred is not None and has_mhc2:
                    mixmhc2pred_future = dask_client.submit(
                        self.run_mixmhc2pred,
                        self.runner,
                        self.configuration,
                        self.mhc_parser,
                        neoantigen,
                        patient,
                    )
                if self.configuration.mix_mhc_pred is not None and has_mhc1:
                    mixmhcpred_future = dask_client.submit(
                        self.run_mixmhcpred,
                        self.runner,
                        self.configuration,
                        self.mhc_parser,
                        neoantigen,
                        patient,
                    )
                if self.configuration.mix_mhc_pred is not None and has_mhc1:
                    prime_future = dask_client.submit(
                        self.run_prime,
                        self.runner,
                        self.configuration,
                        self.mhc_parser,
                        neoantigen,
                        patient,
                    )

            secede()

            if netmhcpan_future:
                netmhcpan = dask_client.gather([netmhcpan_future])[0]
            if netmhc2pan_future:
                netmhc2pan = dask_client.gather([netmhc2pan_future])[0]

            if self.organism == ORGANISM_HOMO_SAPIENS:
                if mixmhcpred_future:
                    mixmhcpred_annotations = dask_client.gather([mixmhcpred_future])[0]
                if mixmhc2pred_future:
                    mixmhc2pred_annotations = dask_client.gather([mixmhc2pred_future])[0]
                if prime_future:
                    prime_annotations = dask_client.gather([prime_future])[0]
            rejoin()

        return mixmhc2pred_annotations, mixmhcpred_annotations, netmhc2pan, netmhcpan, prime_annotations

Exemplo n.º 8

0

Exibir arquivo

def parse_identifiers_task(job_id: str, physical_data_source: str,
                           table_name: str):
    # Estimated maximum / pessimistic processing time of a single table is around 10 minutes,
    # therefore we need to ensure that the semaphore isn't automatically released (via timeout) if the task is idle.
    # See: https://docs.dask.org/en/latest/configuration-reference.html#distributed.scheduler.locks.lease-timeout
    assert parse_timedelta(
        dask.config.get('distributed.scheduler.locks.lease-timeout',
                        default='0s')) >= 600
    assert dask.config.get('distributed.comm.retry.count', default=0) > 0

    # detach the task from worker's thread-pool as we're mostly just waiting for Dremio to return the sampled dataset,
    # also we don't want to take up "scheduling slots" from other (compute-heavy) tasks as this might lead to deadlocks.
    secede()
    start_time = time.perf_counter()

    parser_job = ParserJob(job_id=job_id)

    try:
        ParserJobStore.store(parser_job)
        params = ParameterStore().get_config()

        columns_to_consider = list(
            _extract_explorable_columns(physical_data_source, table_name,
                                        params))

        Measure.histogram(
            'idparser_num_columns_per_table',
            tags={
                'physical_data_source': physical_data_source,
                'table_name': table_name
            },
        )(len(columns_to_consider))

        if len(columns_to_consider) > params.max_allowed_dimensions:
            if params.column_overflow_strategy == ColumnOverflowStrategy.RAISE_EXCEPTION:
                raise TooManyColumns(physical_data_source, table_name,
                                     columns_to_consider)
            elif params.column_overflow_strategy == ColumnOverflowStrategy.SLICE_COLUMNS:
                logger.warning(
                    f'Source {physical_data_source}.{table_name} has too many columns. '
                    f'Slicing first {params.max_allowed_dimensions}')
                columns_to_consider = columns_to_consider[
                    0:params.max_allowed_dimensions]
            else:
                raise UnknownStrategy(params.column_overflow_strategy,
                                      physical_data_source, table_name,
                                      columns_to_consider)

        # early return for the trivial case
        if len(columns_to_consider) <= 1:
            identifiers = set(columns_to_consider)
        else:
            identifiers = _find_identifiers(physical_data_source, table_name,
                                            columns_to_consider, params)

        parser_job.identifiers = identifiers
        parser_job.status = 'COMPLETED'

        logger.info(
            f'Success! Finished running the identifier script on {physical_data_source}.{table_name} '
            f'in {time.perf_counter() - start_time:0.3f} (identifiers: {identifiers})'
        )
    except Exception as e:
        parser_job.status = 'FAILED'
        raise e  # Re-raise exception so it's handled and reported via our Dask plugin
    finally:
        ParserJobStore.store(parser_job)
        rejoin(
        )  # reuse the existing thread by returning it to the worker's thread-pool

Exemplo n.º 9

0

Exibir arquivo

Arquivo: volatility_dask_elk.py Projeto: N4rr34n6/orochi

def run_plugin(dump_obj, plugin_obj, es_url, params=None):
    """
    Execute a single plugin on a dump with optional params.
    If success data are sent to elastic.
    """
    try:
        ctx = contexts.Context()
        constants.PARALLELISM = constants.Parallelism.Off
        failures = framework.import_files(volatility.plugins, True)
        automagics = automagic.available(ctx)
        plugin_list = framework.list_plugins()
        json_renderer = ReturnJsonRenderer
        seen_automagics = set()
        for amagic in automagics:
            if amagic in seen_automagics:
                continue
            seen_automagics.add(amagic)
        plugin = plugin_list.get(plugin_obj.name)
        base_config_path = "plugins"
        file_name = os.path.abspath(dump_obj.upload.path)
        single_location = "file:" + pathname2url(file_name)
        ctx.config["automagic.LayerStacker.single_location"] = single_location
        automagics = automagic.choose_automagic(automagics, plugin)

        # LOCAL DUMPS REQUIRES FILES
        local_dump = plugin_obj.local_dump

        # ADD PARAMETERS, AND IF LOCAL DUMP ENABLE ADD DUMP TRUE BY DEFAULT
        plugin_config_path = interfaces.configuration.path_join(
            base_config_path, plugin.__name__)
        if params:
            # ADD PARAMETERS TO PLUGIN CONF
            for k, v in params.items():
                extended_path = interfaces.configuration.path_join(
                    plugin_config_path, k)
                ctx.config[extended_path] = v

                if k == "dump" and v == True:
                    # IF DUMP TRUE HAS BEEN PASS IT'LL DUMP LOCALLY
                    local_dump = True

        if not params and local_dump:
            # IF ADMIN SET LOCAL DUMP ADD DUMP TRUE AS PARAMETER
            extended_path = interfaces.configuration.path_join(
                plugin_config_path, "dump")
            ctx.config[extended_path] = True

        if local_dump:
            # IF PARAM/ADMIN DUMP CREATE FILECONSUMER
            consumer = FileConsumer()
            local_path = "{}/{}/{}".format(settings.MEDIA_ROOT, dump_obj.index,
                                           plugin_obj.name)
            if not os.path.exists(local_path):
                os.mkdir(local_path)
        else:
            consumer = None

        try:
            # RUN PLUGIN
            constructed = plugins.construct_plugin(ctx, automagics, plugin,
                                                   base_config_path,
                                                   MuteProgress(), consumer)
        except exceptions.UnsatisfiedException as excp:
            # LOG UNSATISFIED ERROR
            result = Result.objects.get(plugin=plugin_obj, dump=dump_obj)
            result.result = 3
            result.description = "\n".join([
                excp.unsatisfied[config_path].description
                for config_path in excp.unsatisfied
            ])
            result.save()
            send_to_ws(dump_obj, result, plugin_obj.name)
            return 0
        try:
            runned_plugin = constructed.run()
        except Exception as excp:
            # LOG GENERIC ERROR [VOLATILITY]
            fulltrace = traceback.TracebackException.from_exception(
                excp).format(chain=True)
            result = Result.objects.get(plugin=plugin_obj, dump=dump_obj)
            result.result = 4
            result.description = "\n".join(fulltrace)
            result.save()
            send_to_ws(dump_obj, result, plugin_obj.name)
            return 0

        # RENDER OUTPUT IN JSON AND PUT IT IN ELASTIC
        json_data, error = json_renderer().render(runned_plugin)

        if len(json_data) > 0:

            # IF DUMP STORE FILE ON DISK
            if consumer and consumer.files:
                for filedata in consumer.files:
                    output_path = "{}/{}".format(local_path,
                                                 filedata.preferred_filename)
                    with open(output_path, "wb") as f:
                        f.write(filedata.data.getvalue())

                ## RUN CLAMAV ON ALL FOLDER
                if plugin_obj.clamav_check:
                    cd = pyclamd.ClamdUnixSocket()
                    match = cd.multiscan_file(local_path)
                    match = {} if not match else match
                else:
                    match = {}

                result = Result.objects.get(plugin=plugin_obj, dump=dump_obj)

                # BULK CREATE EXTRACTED DUMP FOR EACH DUMPED FILE
                ed = ExtractedDump.objects.bulk_create([
                    ExtractedDump(
                        result=result,
                        path="{}/{}".format(local_path,
                                            filedata.preferred_filename),
                        sha256=sha256_checksum("{}/{}".format(
                            local_path, filedata.preferred_filename)),
                        clamav=(match["{}/{}".format(
                            local_path,
                            filedata.preferred_filename)][1] if "{}/{}".format(
                                local_path, filedata.preferred_filename)
                                in match.keys() else None),
                    ) for filedata in consumer.files
                ])

                ## RUN VT AND REGIPY AS DASK SUBTASKS
                if plugin_obj.vt_check or plugin_obj.regipy_check:
                    dask_client = get_client()
                    secede()
                    tasks = []
                    for filedata in consumer.files:
                        task = dask_client.submit(
                            run_vt if plugin_obj.vt_check else run_regipy,
                            result.pk,
                            "{}/{}".format(local_path,
                                           filedata.preferred_filename),
                        )
                        tasks.append(task)
                    results = dask_client.gather(tasks)
                    rejoin()

            es = Elasticsearch(
                [es_url],
                request_timeout=60,
                timeout=60,
                max_retries=10,
                retry_on_timeout=True,
            )
            helpers.bulk(
                es,
                gendata(
                    "{}_{}".format(dump_obj.index, plugin_obj.name.lower()),
                    plugin_obj.name,
                    json_data,
                ),
            )
            # EVERYTHING OK
            result = Result.objects.get(plugin=plugin_obj, dump=dump_obj)
            result.result = 2
            result.description = error
            result.save()
        else:
            # OK BUT EMPTY
            result = Result.objects.get(plugin=plugin_obj, dump=dump_obj)
            result.result = 1
            result.description = error
            result.save()
        send_to_ws(dump_obj, result, plugin_obj.name)
        return 0

    except Exception as excp:
        # LOG GENERIC ERROR [ELASTIC]
        fulltrace = traceback.TracebackException.from_exception(excp).format(
            chain=True)
        result = Result.objects.get(plugin=plugin_obj, dump=dump_obj)
        result.result = 4
        result.description = "\n".join(fulltrace)
        result.save()
        send_to_ws(dump_obj, result, plugin_obj.name)
        return 0