Пример #1
0
def _schedule_tick_state(instance, stream, tick_data):
    tick = instance.create_schedule_tick(tick_data)
    holder = _ScheduleTickHolder(tick=tick, instance=instance)
    try:
        yield holder
    except Exception:  # pylint: disable=broad-except
        error_data = serializable_error_info_from_exc_info(sys.exc_info())
        holder.update_with_status(ScheduleTickStatus.FAILURE, error=error_data)
        stream.send(ScheduledExecutionFailed(run_id=None, errors=[error_data]))
    finally:
        holder.write()
Пример #2
0
def _schedule_tick_context(instance, stream, tick_data):
    tick = instance.create_job_tick(tick_data)
    context = _ScheduleLaunchContext(tick=tick, instance=instance, stream=stream)
    try:
        yield context
    except Exception:  # pylint: disable=broad-except
        error_data = serializable_error_info_from_exc_info(sys.exc_info())
        context.update_state(JobTickStatus.FAILURE, error=error_data)
        stream.send(ScheduledExecutionFailed(run_id=None, errors=[error_data]))
    finally:
        context.write()
Пример #3
0
def _launch_scheduled_executions(instance, repo_location, external_repo,
                                 external_schedule, tick_context):
    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=external_schedule.pipeline_name,
        solid_selection=external_schedule.solid_selection,
    )

    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
        pipeline_selector)
    external_pipeline = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )

    schedule_execution_data = repo_location.get_external_schedule_execution_data(
        instance=instance,
        repository_handle=external_repo.handle,
        schedule_name=external_schedule.name,
        scheduled_execution_time=
        None,  # No way to know this in general for this scheduler
    )

    if isinstance(schedule_execution_data, ExternalScheduleExecutionErrorData):
        error = schedule_execution_data.error
        tick_context.update_state(JobTickStatus.FAILURE, error=error)
        tick_context.stream.send(
            ScheduledExecutionFailed(run_id=None, errors=[error]))
        return

    if not schedule_execution_data.run_requests:
        # Update tick to skipped state and return
        tick_context.update_state(JobTickStatus.SKIPPED)
        tick_context.stream.send(ScheduledExecutionSkipped())
        return

    for run_request in schedule_execution_data.run_requests:
        _launch_run(instance, repo_location, external_schedule,
                    external_pipeline, tick_context, run_request)

    tick_context.update_state(JobTickStatus.SUCCESS)
Пример #4
0
def _launch_scheduled_execution(instance, schedule_def, pipeline, tick,
                                stream):
    pipeline_def = pipeline.get_definition()

    # Run should_execute and halt if it returns False
    schedule_context = ScheduleExecutionContext(instance)
    with user_code_error_boundary(
            ScheduleExecutionError,
            lambda:
            'Error occurred during the execution of should_execute for schedule '
            '{schedule_name}'.format(schedule_name=schedule_def.name),
    ):
        should_execute = schedule_def.should_execute(schedule_context)

    if not should_execute:
        # Update tick to skipped state and return
        tick.update_with_status(ScheduleTickStatus.SKIPPED)
        stream.send(ScheduledExecutionSkipped())
        return

    errors = []

    run_config = {}
    schedule_tags = {}
    try:
        with user_code_error_boundary(
                ScheduleExecutionError,
                lambda:
                'Error occurred during the execution of run_config_fn for schedule '
                '{schedule_name}'.format(schedule_name=schedule_def.name),
        ):
            run_config = schedule_def.get_run_config(schedule_context)
    except DagsterUserCodeExecutionError:
        error_data = serializable_error_info_from_exc_info(sys.exc_info())
        errors.append(error_data)

    try:
        with user_code_error_boundary(
                ScheduleExecutionError,
                lambda:
                'Error occurred during the execution of tags_fn for schedule '
                '{schedule_name}'.format(schedule_name=schedule_def.name),
        ):
            schedule_tags = schedule_def.get_tags(schedule_context)
    except DagsterUserCodeExecutionError:
        error_data = serializable_error_info_from_exc_info(sys.exc_info())
        errors.append(error_data)

    pipeline_tags = pipeline_def.tags or {}
    check_tags(pipeline_tags, 'pipeline_tags')
    tags = merge_dicts(pipeline_tags, schedule_tags)

    mode = schedule_def.mode

    execution_plan_snapshot = None
    try:
        execution_plan = create_execution_plan(
            pipeline_def,
            run_config=run_config,
            mode=mode,
        )
        execution_plan_snapshot = snapshot_from_execution_plan(
            execution_plan, pipeline_def.get_pipeline_snapshot_id())
    except DagsterInvalidConfigError:
        error_data = serializable_error_info_from_exc_info(sys.exc_info())
        errors.append(error_data)

    # Enter the run in the DB with the information we have
    possibly_invalid_pipeline_run = instance.create_run(
        pipeline_name=schedule_def.pipeline_name,
        run_id=None,
        run_config=run_config,
        mode=mode,
        solids_to_execute=pipeline.solids_to_execute,
        step_keys_to_execute=None,
        solid_selection=pipeline.solid_selection,
        status=None,
        root_run_id=None,
        parent_run_id=None,
        tags=tags,
        pipeline_snapshot=pipeline_def.get_pipeline_snapshot(),
        execution_plan_snapshot=execution_plan_snapshot,
        parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot(),
    )

    tick.update_with_status(ScheduleTickStatus.SUCCESS,
                            run_id=possibly_invalid_pipeline_run.run_id)

    # If there were errors, inject them into the event log and fail the run
    if len(errors) > 0:
        for error in errors:
            instance.report_engine_event(
                error.message,
                possibly_invalid_pipeline_run,
                EngineEventData.engine_error(error),
            )
        instance.report_run_failed(possibly_invalid_pipeline_run)
        stream.send(
            ScheduledExecutionFailed(
                run_id=possibly_invalid_pipeline_run.run_id, errors=errors))
        return

    # Otherwise the run should be valid so lets launch it

    # Need an ExternalPipeline to launch so make one here
    recon_repo = pipeline.get_reconstructable_repository()
    repo_location = InProcessRepositoryLocation(recon_repo)
    external_pipeline = repo_location.get_repository(
        recon_repo.get_definition().name).get_full_external_pipeline(
            pipeline_def.name)

    try:
        launched_run = instance.launch_run(
            possibly_invalid_pipeline_run.run_id, external_pipeline)
    except DagsterLaunchFailedError:
        error = serializable_error_info_from_exc_info(sys.exc_info())
        instance.report_engine_event(
            error.message,
            possibly_invalid_pipeline_run,
            EngineEventData.engine_error(error),
        )
        instance.report_run_failed(possibly_invalid_pipeline_run)
        stream.send(
            ScheduledExecutionFailed(
                run_id=possibly_invalid_pipeline_run.run_id, errors=[error]))
        return

    stream.send(ScheduledExecutionSuccess(run_id=launched_run.run_id))
    return
Пример #5
0
def _launch_run(instance, repo_location, external_schedule, external_pipeline,
                tick_context, run_request):
    run_config = run_request.run_config
    schedule_tags = run_request.tags

    execution_plan_snapshot = None
    errors = []
    try:
        external_execution_plan = repo_location.get_external_execution_plan(
            external_pipeline,
            run_config,
            external_schedule.mode,
            step_keys_to_execute=None,
        )
        execution_plan_snapshot = external_execution_plan.execution_plan_snapshot
    except DagsterSubprocessError as e:
        errors.extend(e.subprocess_error_infos)
    except Exception as e:  # pylint: disable=broad-except
        errors.append(serializable_error_info_from_exc_info(sys.exc_info()))

    pipeline_tags = external_pipeline.tags or {}
    check_tags(pipeline_tags, "pipeline_tags")
    tags = merge_dicts(pipeline_tags, schedule_tags)

    # Enter the run in the DB with the information we have
    possibly_invalid_pipeline_run = instance.create_run(
        pipeline_name=external_schedule.pipeline_name,
        run_id=None,
        run_config=run_config,
        mode=external_schedule.mode,
        solids_to_execute=external_pipeline.solids_to_execute,
        step_keys_to_execute=None,
        solid_selection=external_pipeline.solid_selection,
        status=None,
        root_run_id=None,
        parent_run_id=None,
        tags=tags,
        pipeline_snapshot=external_pipeline.pipeline_snapshot,
        execution_plan_snapshot=execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot,
        external_pipeline_origin=external_pipeline.get_external_origin(),
    )

    tick_context.add_run(run_id=possibly_invalid_pipeline_run.run_id,
                         run_key=run_request.run_key)

    # If there were errors, inject them into the event log and fail the run
    if len(errors) > 0:
        for error in errors:
            instance.report_engine_event(
                error.message,
                possibly_invalid_pipeline_run,
                EngineEventData.engine_error(error),
            )
        instance.report_run_failed(possibly_invalid_pipeline_run)
        tick_context.stream.send(
            ScheduledExecutionFailed(
                run_id=possibly_invalid_pipeline_run.run_id, errors=errors))
        return

    try:
        launched_run = instance.submit_run(
            possibly_invalid_pipeline_run.run_id, external_pipeline)
    except Exception:  # pylint: disable=broad-except
        tick_context.stream.send(
            ScheduledExecutionFailed(
                run_id=possibly_invalid_pipeline_run.run_id,
                errors=[serializable_error_info_from_exc_info(sys.exc_info())],
            ))
        return

    tick_context.stream.send(
        ScheduledExecutionSuccess(run_id=launched_run.run_id))
Пример #6
0
def _launch_scheduled_execution(instance, repo_location, external_repo,
                                external_schedule, tick, stream):
    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=external_schedule.pipeline_name,
        solid_selection=external_schedule.solid_selection,
    )

    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
        pipeline_selector)
    external_pipeline = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )

    schedule_execution_data = repo_location.get_external_schedule_execution_data(
        instance=instance,
        repository_handle=external_repo.handle,
        schedule_name=external_schedule.name,
        schedule_execution_data_mode=ScheduleExecutionDataMode.
        LAUNCH_SCHEDULED_EXECUTION,
        scheduled_execution_time=
        None,  # No way to know this in general for this scheduler
    )

    run_config = {}
    schedule_tags = {}
    execution_plan_snapshot = None
    errors = []

    if isinstance(schedule_execution_data, ExternalScheduleExecutionErrorData):
        error = schedule_execution_data.error
        tick.update_with_status(ScheduleTickStatus.FAILURE, error=error)
        stream.send(ScheduledExecutionFailed(run_id=None, errors=[error]))
        return
    elif not schedule_execution_data.should_execute:
        # Update tick to skipped state and return
        tick.update_with_status(ScheduleTickStatus.SKIPPED)
        stream.send(ScheduledExecutionSkipped())
        return
    else:
        run_config = schedule_execution_data.run_config
        schedule_tags = schedule_execution_data.tags
        try:
            external_execution_plan = repo_location.get_external_execution_plan(
                external_pipeline,
                run_config,
                external_schedule.mode,
                step_keys_to_execute=None,
            )
            execution_plan_snapshot = external_execution_plan.execution_plan_snapshot
        except DagsterSubprocessError as e:
            errors.extend(e.subprocess_error_infos)
        except Exception as e:  # pylint: disable=broad-except
            errors.append(serializable_error_info_from_exc_info(
                sys.exc_info()))

    pipeline_tags = external_pipeline.tags or {}
    check_tags(pipeline_tags, "pipeline_tags")
    tags = merge_dicts(pipeline_tags, schedule_tags)

    # Enter the run in the DB with the information we have
    possibly_invalid_pipeline_run = instance.create_run(
        pipeline_name=external_schedule.pipeline_name,
        run_id=None,
        run_config=run_config,
        mode=external_schedule.mode,
        solids_to_execute=external_pipeline.solids_to_execute,
        step_keys_to_execute=None,
        solid_selection=external_pipeline.solid_selection,
        status=None,
        root_run_id=None,
        parent_run_id=None,
        tags=tags,
        pipeline_snapshot=external_pipeline.pipeline_snapshot,
        execution_plan_snapshot=execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot,
    )

    tick.update_with_status(ScheduleTickStatus.SUCCESS,
                            run_id=possibly_invalid_pipeline_run.run_id)

    # If there were errors, inject them into the event log and fail the run
    if len(errors) > 0:
        for error in errors:
            instance.report_engine_event(
                error.message,
                possibly_invalid_pipeline_run,
                EngineEventData.engine_error(error),
            )
        instance.report_run_failed(possibly_invalid_pipeline_run)
        stream.send(
            ScheduledExecutionFailed(
                run_id=possibly_invalid_pipeline_run.run_id, errors=errors))
        return

    try:
        launched_run = instance.launch_run(
            possibly_invalid_pipeline_run.run_id, external_pipeline)
    except Exception:  # pylint: disable=broad-except
        stream.send(
            ScheduledExecutionFailed(
                run_id=possibly_invalid_pipeline_run.run_id, errors=[error]))
        return

    stream.send(ScheduledExecutionSuccess(run_id=launched_run.run_id))
    return