Exemplo n.º 1
0
    def execute(self):
        check.inst(self.executor_config, MultiprocessExecutorConfig)
        pipeline_def = self.executor_config.load_pipeline(self.pipeline_run)
        instance = DagsterInstance.from_ref(self.instance_ref)

        start_termination_thread(self.term_event)

        execution_plan = create_execution_plan(
            pipeline_def, self.environment_dict,
            self.pipeline_run).build_subset_plan([self.step_key])

        yield instance.report_engine_event(
            MultiprocessEngine,
            'Executing step {} in subprocess'.format(self.step_key),
            self.pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(str(os.getpid()), 'pid'),
                    EventMetadataEntry.text(self.step_key, 'step_key'),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            self.step_key,
        )

        for step_event in execute_plan_iterator(
                execution_plan,
                self.pipeline_run,
                environment_dict=self.environment_dict,
                retries=self.executor_config.retries.for_inner_plan(),
                instance=instance,
        ):
            yield step_event
Exemplo n.º 2
0
    def execute(self):
        pipeline = self.recon_pipeline
        with DagsterInstance.from_ref(self.instance_ref) as instance:
            start_termination_thread(self.term_event)

            execution_plan = create_execution_plan(
                pipeline=pipeline,
                run_config=self.run_config,
                mode=self.pipeline_run.mode,
                step_keys_to_execute=self.pipeline_run.step_keys_to_execute,
            ).build_subset_plan([self.step_key])

            yield instance.report_engine_event(
                "Executing step {} in subprocess".format(self.step_key),
                self.pipeline_run,
                EngineEventData(
                    [
                        EventMetadataEntry.text(str(os.getpid()), "pid"),
                        EventMetadataEntry.text(self.step_key, "step_key"),
                    ],
                    marker_end=DELEGATE_MARKER,
                ),
                MultiprocessExecutor,
                self.step_key,
            )

            for step_event in execute_plan_iterator(
                    execution_plan,
                    self.pipeline_run,
                    run_config=self.run_config,
                    retries=self.retries.for_inner_plan(),
                    instance=instance,
            ):
                yield step_event
Exemplo n.º 3
0
    def execute(self):
        pipeline = self.recon_pipeline
        with DagsterInstance.from_ref(self.instance_ref) as instance:
            start_termination_thread(self.term_event)
            execution_plan = create_execution_plan(
                pipeline=pipeline,
                run_config=self.run_config,
                mode=self.pipeline_run.mode,
                step_keys_to_execute=[self.step_key],
                known_state=self.known_state,
            )

            yield instance.report_engine_event(
                "Executing step {} in subprocess".format(self.step_key),
                self.pipeline_run,
                EngineEventData(
                    [
                        MetadataEntry("pid", value=str(os.getpid())),
                        MetadataEntry("step_key", value=self.step_key),
                    ],
                    marker_end=DELEGATE_MARKER,
                ),
                MultiprocessExecutor,
                self.step_key,
            )

            yield from execute_plan_iterator(
                execution_plan,
                pipeline,
                self.pipeline_run,
                run_config=self.run_config,
                retry_mode=self.retry_mode.for_inner_plan(),
                instance=instance,
            )
Exemplo n.º 4
0
def _in_mp_process(handle, pipeline_run, instance_ref, term_event):
    """
    Execute pipeline using message queue as a transport
    """
    run_id = pipeline_run.run_id
    pipeline_name = pipeline_run.pipeline_name

    instance = DagsterInstance.from_ref(instance_ref)
    instance.handle_new_event(
        build_process_started_event(run_id, pipeline_name, os.getpid()))

    start_termination_thread(term_event)

    try:
        handle.build_repository_definition()
        pipeline_def = handle.with_pipeline_name(
            pipeline_name).build_pipeline_definition()
    except Exception:  # pylint: disable=broad-except
        repo_error = sys.exc_info()
        instance.handle_new_event(
            build_synthetic_pipeline_error_record(
                run_id, serializable_error_info_from_exc_info(repo_error),
                pipeline_name))
        return

    try:
        event_list = []
        for event in execute_run_iterator(
                pipeline_def.build_sub_pipeline(
                    pipeline_run.selector.solid_subset),
                pipeline_run,
                instance,
        ):
            event_list.append(event)
        return PipelineExecutionResult(pipeline_def, run_id, event_list,
                                       lambda: None)

    # Add a DagsterEvent for unexpected exceptions
    # Explicitly ignore KeyboardInterrupts since they are used for termination
    except DagsterSubprocessError as err:
        if not all([
                err_info.cls_name == 'KeyboardInterrupt'
                for err_info in err.subprocess_error_infos
        ]):
            error_info = serializable_error_info_from_exc_info(sys.exc_info())
            instance.handle_new_event(
                build_synthetic_pipeline_error_record(run_id, error_info,
                                                      pipeline_name))
    except Exception:  # pylint: disable=broad-except
        error_info = serializable_error_info_from_exc_info(sys.exc_info())
        instance.handle_new_event(
            build_synthetic_pipeline_error_record(run_id, error_info,
                                                  pipeline_name))
    finally:
        instance.handle_new_event(
            build_process_exited_event(run_id, pipeline_name, os.getpid()))
Exemplo n.º 5
0
    def execute(self):
        check.inst(self.executor_config, MultiprocessExecutorConfig)
        pipeline_def = self.executor_config.handle.build_pipeline_definition()
        environment_dict = dict(self.environment_dict, execution={'in_process': {}})

        start_termination_thread(self.term_event)

        execution_plan = create_execution_plan(
            pipeline_def, environment_dict, self.pipeline_run
        ).build_subset_plan([self.step_key])

        for step_event in execute_plan_iterator(
            execution_plan,
            self.pipeline_run,
            environment_dict=environment_dict,
            instance=DagsterInstance.from_ref(self.instance_ref),
        ):
            yield step_event
Exemplo n.º 6
0
    def execute(self):
        check.inst(self.executor_config, MultiprocessExecutorConfig)
        pipeline_def = self.executor_config.load_pipeline(self.pipeline_run)

        start_termination_thread(self.term_event)

        execution_plan = create_execution_plan(
            pipeline_def, self.environment_dict, self.pipeline_run
        ).build_subset_plan([self.step_key])

        for step_event in execute_plan_iterator(
            execution_plan,
            self.pipeline_run,
            environment_dict=override_env_for_inner_executor(
                self.environment_dict, self.executor_config.retries, self.step_key, DELEGATE_MARKER,
            ),
            instance=DagsterInstance.from_ref(self.instance_ref),
        ):
            yield step_event
    def in_mp_process(cls, handle, pipeline_run, instance_ref, term_event):
        """
        Execute pipeline using message queue as a transport
        """
        run_id = pipeline_run.run_id
        pipeline_name = pipeline_run.pipeline_name

        instance = DagsterInstance.from_ref(instance_ref)
        pid = os.getpid()
        instance.report_engine_event(
            'Started process for pipeline (pid: {pid}).'.format(pid=pid),
            pipeline_run,
            EngineEventData.in_process(pid,
                                       marker_end='dagit_subprocess_init'),
            cls,
        )

        start_termination_thread(term_event)

        try:
            handle.build_repository_definition()
            pipeline_def = handle.with_pipeline_name(
                pipeline_name).build_pipeline_definition()
        except Exception:  # pylint: disable=broad-except
            instance.report_engine_event(
                'Failed attempting to load pipeline "{}"'.format(
                    pipeline_name),
                pipeline_run,
                EngineEventData.engine_error(
                    serializable_error_info_from_exc_info(sys.exc_info())),
                cls,
            )
            return

        try:
            event_list = []
            for event in execute_run_iterator(
                    pipeline_def.build_sub_pipeline(
                        pipeline_run.selector.solid_subset),
                    pipeline_run,
                    instance,
            ):
                event_list.append(event)
            return PipelineExecutionResult(pipeline_def, run_id, event_list,
                                           lambda: None)

        # Add a DagsterEvent for unexpected exceptions
        # Explicitly ignore KeyboardInterrupts since they are used for termination
        except DagsterSubprocessError as err:
            if not all([
                    err_info.cls_name == 'KeyboardInterrupt'
                    for err_info in err.subprocess_error_infos
            ]):
                instance.report_engine_event(
                    'An exception was thrown during execution that is likely a framework error, '
                    'rather than an error in user code.',
                    pipeline_run,
                    EngineEventData.engine_error(
                        serializable_error_info_from_exc_info(sys.exc_info())),
                    cls,
                )
        except Exception:  # pylint: disable=broad-except
            instance.report_engine_event(
                'An exception was thrown during execution that is likely a framework error, '
                'rather than an error in user code.',
                pipeline_run,
                EngineEventData.engine_error(
                    serializable_error_info_from_exc_info(sys.exc_info())),
                cls,
            )
        finally:
            instance.report_engine_event(
                'Process for pipeline exited (pid: {pid}).'.format(pid=pid),
                pipeline_run,
                cls=cls,
            )
Exemplo n.º 8
0
def _run_in_subprocess(
    serialized_execute_run_args,
    recon_pipeline,
    termination_event,
    subprocess_status_handler,
    run_event_handler,
):

    start_termination_thread(termination_event)
    try:
        execute_run_args = deserialize_json_to_dagster_namedtuple(
            serialized_execute_run_args)
        check.inst_param(execute_run_args, 'execute_run_args', ExecuteRunArgs)

        instance = DagsterInstance.from_ref(execute_run_args.instance_ref)
        pipeline_run = instance.get_run_by_id(execute_run_args.pipeline_run_id)

        pid = os.getpid()

    except:  # pylint: disable=bare-except
        event = IPCErrorMessage(
            serializable_error_info=serializable_error_info_from_exc_info(
                sys.exc_info()),
            message='Error during RPC setup for ExecuteRun',
        )
        subprocess_status_handler(event)
        subprocess_status_handler(RunInSubprocessComplete())
        return

    subprocess_status_handler(StartRunInSubprocessSuccessful())

    run_event_handler(
        instance.report_engine_event(
            'Started process for pipeline (pid: {pid}).'.format(pid=pid),
            pipeline_run,
            EngineEventData.in_process(pid,
                                       marker_end='cli_api_subprocess_init'),
        ))

    # This is so nasty but seemingly unavoidable
    # https://amir.rachum.com/blog/2017/03/03/generator-cleanup/
    closed = False
    try:
        for event in _core_execute_run(recon_pipeline, pipeline_run, instance):
            run_event_handler(event)
    except KeyboardInterrupt:
        run_event_handler(
            instance.report_engine_event(
                message='Pipeline execution terminated by interrupt',
                pipeline_run=pipeline_run,
            ))
        raise
    except GeneratorExit:
        closed = True
        raise
    finally:
        if not closed:
            run_event_handler(
                instance.report_engine_event(
                    'Process for pipeline exited (pid: {pid}).'.format(
                        pid=pid),
                    pipeline_run,
                ))
        subprocess_status_handler(RunInSubprocessComplete())
Exemplo n.º 9
0
def _run_in_subprocess(
    serialized_execute_run_args,
    recon_pipeline,
    termination_event,
    subprocess_status_handler,
    run_event_handler,
):

    start_termination_thread(termination_event)
    try:
        execute_run_args = deserialize_json_to_dagster_namedtuple(
            serialized_execute_run_args)
        check.inst_param(execute_run_args, "execute_run_args",
                         ExecuteExternalPipelineArgs)

        instance = DagsterInstance.from_ref(execute_run_args.instance_ref)
        pipeline_run = instance.get_run_by_id(execute_run_args.pipeline_run_id)

        if not pipeline_run:
            raise DagsterRunNotFoundError(
                "gRPC server could not load run {run_id} in order to execute it. Make sure that the gRPC server has access to your run storage."
                .format(run_id=execute_run_args.pipeline_run_id),
                invalid_run_id=execute_run_args.pipeline_run_id,
            )

        pid = os.getpid()

    except:  # pylint: disable=bare-except
        serializable_error_info = serializable_error_info_from_exc_info(
            sys.exc_info())
        event = IPCErrorMessage(
            serializable_error_info=serializable_error_info,
            message="Error during RPC setup for executing run: {message}".
            format(message=serializable_error_info.message),
        )
        subprocess_status_handler(event)
        subprocess_status_handler(RunInSubprocessComplete())
        if instance:
            instance.dispose()
        return

    subprocess_status_handler(StartRunInSubprocessSuccessful())

    run_event_handler(
        instance.report_engine_event(
            "Started process for pipeline (pid: {pid}).".format(pid=pid),
            pipeline_run,
            EngineEventData.in_process(pid,
                                       marker_end="cli_api_subprocess_init"),
        ))

    # This is so nasty but seemingly unavoidable
    # https://amir.rachum.com/blog/2017/03/03/generator-cleanup/
    closed = False
    try:
        for event in _core_execute_run(recon_pipeline, pipeline_run, instance):
            run_event_handler(event)
    except GeneratorExit:
        closed = True
        raise
    finally:
        if not closed:
            run_event_handler(
                instance.report_engine_event(
                    "Process for pipeline exited (pid: {pid}).".format(
                        pid=pid),
                    pipeline_run,
                ))
        subprocess_status_handler(RunInSubprocessComplete())
        instance.dispose()