Пример #1
0
def get_external_schedule_execution(
    recon_repo,
    instance_ref,
    schedule_name,
    scheduled_execution_timestamp,
    scheduled_execution_timezone,
):
    check.inst_param(
        recon_repo,
        "recon_repo",
        ReconstructableRepository,
    )
    definition = recon_repo.get_definition()
    schedule_def = definition.get_schedule_def(schedule_name)
    scheduled_execution_time = (pendulum.from_timestamp(
        scheduled_execution_timestamp,
        tz=scheduled_execution_timezone,
    ) if scheduled_execution_timestamp else None)

    schedule_context = ScheduleExecutionContext(instance_ref,
                                                scheduled_execution_time)

    try:
        with user_code_error_boundary(
                ScheduleExecutionError,
                lambda:
                "Error occurred during the execution function for schedule "
                "{schedule_name}".format(schedule_name=schedule_def.name),
        ):
            return ExternalScheduleExecutionData.from_execution_data(
                schedule_def.get_execution_data(schedule_context))
    except ScheduleExecutionError:
        return ExternalScheduleExecutionErrorData(
            serializable_error_info_from_exc_info(sys.exc_info()))
Пример #2
0
def get_external_schedule_execution(external_schedule_execution_args):
    check.inst_param(
        external_schedule_execution_args,
        'external_schedule_execution_args',
        ExternalScheduleExecutionArgs,
    )

    recon_repo = recon_repository_from_origin(
        external_schedule_execution_args.repository_origin)
    definition = recon_repo.get_definition()
    schedule_def = definition.get_schedule_def(
        external_schedule_execution_args.schedule_name)
    instance = DagsterInstance.from_ref(
        external_schedule_execution_args.instance_ref)
    schedule_context = ScheduleExecutionContext(instance)
    try:
        with user_code_error_boundary(
                ScheduleExecutionError,
                lambda:
                'Error occurred during the execution of run_config_fn for schedule '
                '{schedule_name}'.format(schedule_name=schedule_def.name),
        ):
            run_config = schedule_def.get_run_config(schedule_context)
            return ExternalScheduleExecutionData(run_config=run_config)
    except ScheduleExecutionError:
        return ExternalScheduleExecutionErrorData(
            serializable_error_info_from_exc_info(sys.exc_info()))
Пример #3
0
def get_external_schedule_execution(recon_repo,
                                    external_schedule_execution_args):
    check.inst_param(
        recon_repo,
        "recon_repo",
        ReconstructableRepository,
    )
    check.inst_param(
        external_schedule_execution_args,
        "external_schedule_execution_args",
        ExternalScheduleExecutionArgs,
    )

    definition = recon_repo.get_definition()
    schedule_def = definition.get_schedule_def(
        external_schedule_execution_args.schedule_name)
    with DagsterInstance.from_ref(
            external_schedule_execution_args.instance_ref) as instance:
        schedule_context = ScheduleExecutionContext(instance)
        schedule_execution_data_mode = external_schedule_execution_args.schedule_execution_data_mode

        try:
            with user_code_error_boundary(
                    ScheduleExecutionError,
                    lambda:
                    "Error occurred during the execution of should_execute for schedule "
                    "{schedule_name}".format(schedule_name=schedule_def.name),
            ):
                should_execute = None
                if (schedule_execution_data_mode ==
                        ScheduleExecutionDataMode.LAUNCH_SCHEDULED_EXECUTION):
                    should_execute = schedule_def.should_execute(
                        schedule_context)
                    if not should_execute:
                        return ExternalScheduleExecutionData(
                            should_execute=False, run_config=None, tags=None)

            with user_code_error_boundary(
                    ScheduleExecutionError,
                    lambda:
                    "Error occurred during the execution of run_config_fn for schedule "
                    "{schedule_name}".format(schedule_name=schedule_def.name),
            ):
                run_config = schedule_def.get_run_config(schedule_context)

            with user_code_error_boundary(
                    ScheduleExecutionError,
                    lambda:
                    "Error occurred during the execution of tags_fn for schedule "
                    "{schedule_name}".format(schedule_name=schedule_def.name),
            ):
                tags = schedule_def.get_tags(schedule_context)

            return ExternalScheduleExecutionData(run_config=run_config,
                                                 tags=tags,
                                                 should_execute=should_execute)
        except ScheduleExecutionError:
            return ExternalScheduleExecutionErrorData(
                serializable_error_info_from_exc_info(sys.exc_info()))
Пример #4
0
def get_external_schedule_execution(
    recon_repo,
    instance_ref,
    schedule_name,
    scheduled_execution_timestamp,
    scheduled_execution_timezone,
):
    check.inst_param(
        recon_repo,
        "recon_repo",
        ReconstructableRepository,
    )
    definition = recon_repo.get_definition()
    schedule_def = definition.get_schedule_def(schedule_name)
    with DagsterInstance.from_ref(instance_ref) as instance:

        scheduled_execution_time = (pendulum.from_timestamp(
            scheduled_execution_timestamp,
            tz=scheduled_execution_timezone,
        ) if scheduled_execution_timestamp else None)

        schedule_context = ScheduleExecutionContext(instance,
                                                    scheduled_execution_time)

        try:
            with user_code_error_boundary(
                    ScheduleExecutionError,
                    lambda:
                    "Error occurred during the execution of should_execute for schedule "
                    "{schedule_name}".format(schedule_name=schedule_def.name),
            ):
                if not schedule_def.should_execute(schedule_context):
                    return ExternalScheduleExecutionData(should_execute=False,
                                                         run_config=None,
                                                         tags=None)

            with user_code_error_boundary(
                    ScheduleExecutionError,
                    lambda:
                    "Error occurred during the execution of run_config_fn for schedule "
                    "{schedule_name}".format(schedule_name=schedule_def.name),
            ):
                run_config = schedule_def.get_run_config(schedule_context)

            with user_code_error_boundary(
                    ScheduleExecutionError,
                    lambda:
                    "Error occurred during the execution of tags_fn for schedule "
                    "{schedule_name}".format(schedule_name=schedule_def.name),
            ):
                tags = schedule_def.get_tags(schedule_context)

            return ExternalScheduleExecutionData(run_config=run_config,
                                                 tags=tags,
                                                 should_execute=True)
        except ScheduleExecutionError:
            return ExternalScheduleExecutionErrorData(
                serializable_error_info_from_exc_info(sys.exc_info()))
Пример #5
0
    def __init__(self, graphene_info, schedule_def):
        self._schedule_def = check.inst_param(schedule_def, 'schedule_def', ScheduleDefinition)
        self._schedule_context = ScheduleExecutionContext(graphene_info.context.instance)
        execution_params = schedule_def.execution_params
        environment_config = schedule_def.get_environment_dict(self._schedule_context)
        execution_params['environmentConfigData'] = environment_config

        super(DauphinScheduleDefinition, self).__init__(
            name=schedule_def.name,
            cron_schedule=schedule_def.cron_schedule,
            execution_params_string=seven.json.dumps(execution_params),
        )
Пример #6
0
    def resolve_run_config_yaml(self, graphene_info):
        schedule_def = self._schedule_def
        schedule_context = ScheduleExecutionContext(graphene_info.context.instance)
        try:
            with user_code_error_boundary(
                ScheduleExecutionError,
                lambda: 'Error occurred during the execution of environment_dict_fn for schedule '
                '{schedule_name}'.format(schedule_name=schedule_def.name),
            ):
                environment_config = schedule_def.get_environment_dict(schedule_context)
        except ScheduleExecutionError:
            return None

        run_config_yaml = yaml.dump(environment_config, default_flow_style=False)
        return run_config_yaml if run_config_yaml else ''
Пример #7
0
    def __init__(self, graphene_info, schedule_def):
        self._schedule_def = check.inst_param(schedule_def, 'schedule_def',
                                              ScheduleDefinition)
        self._schedule_context = ScheduleExecutionContext(
            graphene_info.context.instance)
        self._schedule_def = check.inst_param(schedule_def, 'schedule_def',
                                              ScheduleDefinition)

        super(DauphinScheduleDefinition, self).__init__(
            name=schedule_def.name,
            cron_schedule=schedule_def.cron_schedule,
            pipeline_name=schedule_def.selector.name,
            solid_subset=schedule_def.selector.solid_subset,
            mode=schedule_def.mode,
        )
Пример #8
0
def _launch_scheduled_execution(instance, schedule_def, pipeline, tick,
                                stream):
    pipeline_def = pipeline.get_definition()

    # Run should_execute and halt if it returns False
    schedule_context = ScheduleExecutionContext(instance)
    with user_code_error_boundary(
            ScheduleExecutionError,
            lambda:
            'Error occurred during the execution of should_execute for schedule '
            '{schedule_name}'.format(schedule_name=schedule_def.name),
    ):
        should_execute = schedule_def.should_execute(schedule_context)

    if not should_execute:
        # Update tick to skipped state and return
        tick.update_with_status(ScheduleTickStatus.SKIPPED)
        stream.send(ScheduledExecutionSkipped())
        return

    errors = []

    run_config = {}
    schedule_tags = {}
    try:
        with user_code_error_boundary(
                ScheduleExecutionError,
                lambda:
                'Error occurred during the execution of run_config_fn for schedule '
                '{schedule_name}'.format(schedule_name=schedule_def.name),
        ):
            run_config = schedule_def.get_run_config(schedule_context)
    except DagsterUserCodeExecutionError:
        error_data = serializable_error_info_from_exc_info(sys.exc_info())
        errors.append(error_data)

    try:
        with user_code_error_boundary(
                ScheduleExecutionError,
                lambda:
                'Error occurred during the execution of tags_fn for schedule '
                '{schedule_name}'.format(schedule_name=schedule_def.name),
        ):
            schedule_tags = schedule_def.get_tags(schedule_context)
    except DagsterUserCodeExecutionError:
        error_data = serializable_error_info_from_exc_info(sys.exc_info())
        errors.append(error_data)

    pipeline_tags = pipeline_def.tags or {}
    check_tags(pipeline_tags, 'pipeline_tags')
    tags = merge_dicts(pipeline_tags, schedule_tags)

    mode = schedule_def.mode

    execution_plan_snapshot = None
    try:
        execution_plan = create_execution_plan(
            pipeline_def,
            run_config=run_config,
            mode=mode,
        )
        execution_plan_snapshot = snapshot_from_execution_plan(
            execution_plan, pipeline_def.get_pipeline_snapshot_id())
    except DagsterInvalidConfigError:
        error_data = serializable_error_info_from_exc_info(sys.exc_info())
        errors.append(error_data)

    # Enter the run in the DB with the information we have
    possibly_invalid_pipeline_run = instance.create_run(
        pipeline_name=schedule_def.pipeline_name,
        run_id=None,
        run_config=run_config,
        mode=mode,
        solids_to_execute=pipeline.solids_to_execute,
        step_keys_to_execute=None,
        solid_selection=pipeline.solid_selection,
        status=None,
        root_run_id=None,
        parent_run_id=None,
        tags=tags,
        pipeline_snapshot=pipeline_def.get_pipeline_snapshot(),
        execution_plan_snapshot=execution_plan_snapshot,
        parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot(),
    )

    tick.update_with_status(ScheduleTickStatus.SUCCESS,
                            run_id=possibly_invalid_pipeline_run.run_id)

    # If there were errors, inject them into the event log and fail the run
    if len(errors) > 0:
        for error in errors:
            instance.report_engine_event(
                error.message,
                possibly_invalid_pipeline_run,
                EngineEventData.engine_error(error),
            )
        instance.report_run_failed(possibly_invalid_pipeline_run)
        stream.send(
            ScheduledExecutionFailed(
                run_id=possibly_invalid_pipeline_run.run_id, errors=errors))
        return

    # Otherwise the run should be valid so lets launch it

    # Need an ExternalPipeline to launch so make one here
    recon_repo = pipeline.get_reconstructable_repository()
    repo_location = InProcessRepositoryLocation(recon_repo)
    external_pipeline = repo_location.get_repository(
        recon_repo.get_definition().name).get_full_external_pipeline(
            pipeline_def.name)

    try:
        launched_run = instance.launch_run(
            possibly_invalid_pipeline_run.run_id, external_pipeline)
    except DagsterLaunchFailedError:
        error = serializable_error_info_from_exc_info(sys.exc_info())
        instance.report_engine_event(
            error.message,
            possibly_invalid_pipeline_run,
            EngineEventData.engine_error(error),
        )
        instance.report_run_failed(possibly_invalid_pipeline_run)
        stream.send(
            ScheduledExecutionFailed(
                run_id=possibly_invalid_pipeline_run.run_id, errors=[error]))
        return

    stream.send(ScheduledExecutionSuccess(run_id=launched_run.run_id))
    return