Пример #1
0
    def get_external_execution_plan(
        self,
        external_pipeline: ExternalPipeline,
        run_config: Dict[str, Any],
        mode: str,
        step_keys_to_execute: Optional[List[str]],
        known_state: Optional[KnownExecutionState],
    ) -> ExternalExecutionPlan:
        check.inst_param(external_pipeline, "external_pipeline",
                         ExternalPipeline)
        check.dict_param(run_config, "run_config")
        check.str_param(mode, "mode")
        check.opt_list_param(step_keys_to_execute,
                             "step_keys_to_execute",
                             of_type=str)
        check.opt_inst_param(known_state, "known_state", KnownExecutionState)

        execution_plan_snapshot_or_error = sync_get_external_execution_plan_grpc(
            api_client=self.client,
            pipeline_origin=external_pipeline.get_external_origin(),
            run_config=run_config,
            mode=mode,
            pipeline_snapshot_id=external_pipeline.
            identifying_pipeline_snapshot_id,
            solid_selection=external_pipeline.solid_selection,
            step_keys_to_execute=step_keys_to_execute,
            known_state=known_state,
        )

        return ExternalExecutionPlan(
            execution_plan_snapshot=execution_plan_snapshot_or_error,
            represented_pipeline=external_pipeline,
        )
Пример #2
0
    def get_subset_external_pipeline(self, selector):
        from ..schema.pipelines.pipeline_errors import GrapheneInvalidSubsetError
        from ..schema.pipelines.pipeline import GraphenePipeline
        from .utils import UserFacingGraphQLError

        check.inst_param(selector, "selector", PipelineSelector)
        # We have to grab the pipeline from the location instead of the repository directly
        # since we may have to request a subset we don't have in memory yet

        repository_location = self.repository_locations_dict[
            selector.location_name]
        external_repository = repository_location.get_repository(
            selector.repository_name)

        subset_result = repository_location.get_subset_external_pipeline_result(
            selector)
        if not subset_result.success:
            error_info = subset_result.error
            raise UserFacingGraphQLError(
                GrapheneInvalidSubsetError(
                    message="{message}{cause_message}".format(
                        message=error_info.message,
                        cause_message="\n{}".format(error_info.cause.message)
                        if error_info.cause else "",
                    ),
                    pipeline=GraphenePipeline(
                        self.get_full_external_pipeline(selector)),
                ))

        return ExternalPipeline(
            subset_result.external_pipeline_data,
            repository_handle=external_repository.handle,
        )
Пример #3
0
def _launch_scheduled_executions(instance, repo_location, external_repo,
                                 external_schedule, tick_context):
    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=external_schedule.pipeline_name,
        solid_selection=external_schedule.solid_selection,
    )

    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
        pipeline_selector)
    external_pipeline = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )

    schedule_execution_data = repo_location.get_external_schedule_execution_data(
        instance=instance,
        repository_handle=external_repo.handle,
        schedule_name=external_schedule.name,
        scheduled_execution_time=
        None,  # No way to know this in general for this scheduler
    )

    if not schedule_execution_data.run_requests:
        # Update tick to skipped state and return
        tick_context.update_state(JobTickStatus.SKIPPED)
        tick_context.stream.send(ScheduledExecutionSkipped())
        return

    for run_request in schedule_execution_data.run_requests:
        _launch_run(instance, repo_location, external_schedule,
                    external_pipeline, tick_context, run_request)

    tick_context.update_state(JobTickStatus.SUCCESS)
Пример #4
0
    def get_external_pipeline(self,
                              selector: PipelineSelector) -> ExternalPipeline:
        """Return the ExternalPipeline for a specific pipeline. Subclasses only
        need to implement get_subset_external_pipeline_result to handle the case where
        a solid selection is specified, which requires access to the underlying PipelineDefinition
        to generate the subsetted pipeline snapshot."""
        if not selector.solid_selection:
            return self.get_repository(
                selector.repository_name).get_full_external_pipeline(
                    selector.pipeline_name)

        repo_handle = self.get_repository(selector.repository_name).handle

        return ExternalPipeline(
            self.get_subset_external_pipeline_result(
                selector).external_pipeline_data, repo_handle)
Пример #5
0
def _launch_scheduled_execution(instance, repo_location, external_repo,
                                external_schedule, tick, stream):
    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=external_schedule.pipeline_name,
        solid_selection=external_schedule.solid_selection,
    )

    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
        pipeline_selector)
    external_pipeline = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )

    schedule_execution_data = repo_location.get_external_schedule_execution_data(
        instance=instance,
        repository_handle=external_repo.handle,
        schedule_name=external_schedule.name,
        schedule_execution_data_mode=ScheduleExecutionDataMode.
        LAUNCH_SCHEDULED_EXECUTION,
        scheduled_execution_time=
        None,  # No way to know this in general for this scheduler
    )

    run_config = {}
    schedule_tags = {}
    execution_plan_snapshot = None
    errors = []

    if isinstance(schedule_execution_data, ExternalScheduleExecutionErrorData):
        error = schedule_execution_data.error
        tick.update_with_status(ScheduleTickStatus.FAILURE, error=error)
        stream.send(ScheduledExecutionFailed(run_id=None, errors=[error]))
        return
    elif not schedule_execution_data.should_execute:
        # Update tick to skipped state and return
        tick.update_with_status(ScheduleTickStatus.SKIPPED)
        stream.send(ScheduledExecutionSkipped())
        return
    else:
        run_config = schedule_execution_data.run_config
        schedule_tags = schedule_execution_data.tags
        try:
            external_execution_plan = repo_location.get_external_execution_plan(
                external_pipeline,
                run_config,
                external_schedule.mode,
                step_keys_to_execute=None,
            )
            execution_plan_snapshot = external_execution_plan.execution_plan_snapshot
        except DagsterSubprocessError as e:
            errors.extend(e.subprocess_error_infos)
        except Exception as e:  # pylint: disable=broad-except
            errors.append(serializable_error_info_from_exc_info(
                sys.exc_info()))

    pipeline_tags = external_pipeline.tags or {}
    check_tags(pipeline_tags, "pipeline_tags")
    tags = merge_dicts(pipeline_tags, schedule_tags)

    # Enter the run in the DB with the information we have
    possibly_invalid_pipeline_run = instance.create_run(
        pipeline_name=external_schedule.pipeline_name,
        run_id=None,
        run_config=run_config,
        mode=external_schedule.mode,
        solids_to_execute=external_pipeline.solids_to_execute,
        step_keys_to_execute=None,
        solid_selection=external_pipeline.solid_selection,
        status=None,
        root_run_id=None,
        parent_run_id=None,
        tags=tags,
        pipeline_snapshot=external_pipeline.pipeline_snapshot,
        execution_plan_snapshot=execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot,
    )

    tick.update_with_status(ScheduleTickStatus.SUCCESS,
                            run_id=possibly_invalid_pipeline_run.run_id)

    # If there were errors, inject them into the event log and fail the run
    if len(errors) > 0:
        for error in errors:
            instance.report_engine_event(
                error.message,
                possibly_invalid_pipeline_run,
                EngineEventData.engine_error(error),
            )
        instance.report_run_failed(possibly_invalid_pipeline_run)
        stream.send(
            ScheduledExecutionFailed(
                run_id=possibly_invalid_pipeline_run.run_id, errors=errors))
        return

    try:
        launched_run = instance.launch_run(
            possibly_invalid_pipeline_run.run_id, external_pipeline)
    except Exception:  # pylint: disable=broad-except
        stream.send(
            ScheduledExecutionFailed(
                run_id=possibly_invalid_pipeline_run.run_id, errors=[error]))
        return

    stream.send(ScheduledExecutionSuccess(run_id=launched_run.run_id))
    return