Exemplo n.º 1
0
    def get_external_execution_plan(
        self,
        external_pipeline: ExternalPipeline,
        run_config: Dict[str, Any],
        mode: str,
        step_keys_to_execute: Optional[List[str]],
        known_state: Optional[KnownExecutionState],
        instance: Optional[DagsterInstance] = None,
    ) -> ExternalExecutionPlan:
        check.inst_param(external_pipeline, "external_pipeline",
                         ExternalPipeline)
        check.dict_param(run_config, "run_config")
        check.str_param(mode, "mode")
        check.opt_nullable_list_param(step_keys_to_execute,
                                      "step_keys_to_execute",
                                      of_type=str)
        check.opt_inst_param(known_state, "known_state", KnownExecutionState)
        check.opt_inst_param(instance, "instance", DagsterInstance)

        execution_plan = create_execution_plan(
            pipeline=self.get_reconstructable_pipeline(external_pipeline.name).
            subset_for_execution_from_existing_pipeline(
                external_pipeline.solids_to_execute),
            run_config=run_config,
            mode=mode,
            step_keys_to_execute=step_keys_to_execute,
            known_state=known_state,
            instance_ref=instance.get_ref()
            if instance and instance.is_persistent else None,
        )
        return ExternalExecutionPlan(
            execution_plan_snapshot=snapshot_from_execution_plan(
                execution_plan,
                external_pipeline.identifying_pipeline_snapshot_id,
            ))
Exemplo n.º 2
0
def get_external_execution_plan_snapshot(recon_pipeline, args):
    check.inst_param(recon_pipeline, "recon_pipeline", ReconstructablePipeline)
    check.inst_param(args, "args", ExecutionPlanSnapshotArgs)

    try:
        pipeline = (
            recon_pipeline.subset_for_execution(args.solid_selection)
            if args.solid_selection
            else recon_pipeline
        )

        return snapshot_from_execution_plan(
            create_execution_plan(
                pipeline=pipeline,
                run_config=args.run_config,
                mode=args.mode,
                step_keys_to_execute=args.step_keys_to_execute,
                known_state=args.known_state,
            ),
            args.pipeline_snapshot_id,
        )
    except:  # pylint: disable=bare-except
        return ExecutionPlanSnapshotErrorData(
            error=serializable_error_info_from_exc_info(sys.exc_info())
        )
Exemplo n.º 3
0
def test_create_execution_plan_snapshot():
    @solid
    def noop_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        noop_solid()

    instance = DagsterInstance.local_temp()

    execution_plan = create_execution_plan(noop_pipeline)

    ep_snapshot = snapshot_from_execution_plan(
        execution_plan, noop_pipeline.get_pipeline_snapshot_id())
    ep_snapshot_id = create_execution_plan_snapshot_id(ep_snapshot)

    result = execute_pipeline(noop_pipeline, instance=instance)
    assert result.success

    run = instance.get_run_by_id(result.run_id)

    assert run.execution_plan_snapshot_id == ep_snapshot_id
    assert run.execution_plan_snapshot_id == create_execution_plan_snapshot_id(
        ep_snapshot)
Exemplo n.º 4
0
    def get_external_execution_plan(self, external_pipeline, run_config, mode,
                                    step_keys_to_execute):
        check.inst_param(external_pipeline, 'external_pipeline',
                         ExternalPipeline)
        check.dict_param(run_config, 'run_config')
        check.str_param(mode, 'mode')
        check.opt_list_param(step_keys_to_execute,
                             'step_keys_to_execute',
                             of_type=str)

        return ExternalExecutionPlan(
            execution_plan_snapshot=snapshot_from_execution_plan(
                create_execution_plan(
                    pipeline=self.get_reconstructable_pipeline(
                        external_pipeline.name).
                    subset_for_execution_from_existing_pipeline(
                        external_pipeline.solids_to_execute),
                    run_config=run_config,
                    mode=mode,
                    step_keys_to_execute=step_keys_to_execute,
                ),
                external_pipeline.identifying_pipeline_snapshot_id,
            ),
            represented_pipeline=external_pipeline,
        )
Exemplo n.º 5
0
    def ExecutionPlanSnapshot(self, request, _context):
        execution_plan_args = deserialize_json_to_dagster_namedtuple(
            request.serialized_execution_plan_snapshot_args)

        check.inst_param(execution_plan_args, 'execution_plan_args',
                         ExecutionPlanSnapshotArgs)

        recon_pipeline = (recon_pipeline_from_origin(
            execution_plan_args.pipeline_origin).subset_for_execution(
                execution_plan_args.solid_selection)
                          if execution_plan_args.solid_selection else
                          recon_pipeline_from_origin(
                              execution_plan_args.pipeline_origin))

        execution_plan_snapshot = snapshot_from_execution_plan(
            create_execution_plan(
                pipeline=recon_pipeline,
                run_config=execution_plan_args.run_config,
                mode=execution_plan_args.mode,
                step_keys_to_execute=execution_plan_args.step_keys_to_execute,
            ),
            execution_plan_args.pipeline_snapshot_id,
        )
        return api_pb2.ExecutionPlanSnapshotReply(
            serialized_execution_plan_snapshot=serialize_dagster_namedtuple(
                execution_plan_snapshot))
Exemplo n.º 6
0
    def get_external_execution_plan(
        self,
        external_pipeline: ExternalPipeline,
        run_config: Dict[str, Any],
        mode: str,
        step_keys_to_execute: Optional[List[str]],
        known_state: Optional[KnownExecutionState],
    ) -> ExternalExecutionPlan:
        check.inst_param(external_pipeline, "external_pipeline",
                         ExternalPipeline)
        check.dict_param(run_config, "run_config")
        check.str_param(mode, "mode")
        check.opt_list_param(step_keys_to_execute,
                             "step_keys_to_execute",
                             of_type=str)
        check.opt_inst_param(known_state, "known_state", KnownExecutionState)

        return ExternalExecutionPlan(
            execution_plan_snapshot=snapshot_from_execution_plan(
                create_execution_plan(
                    pipeline=self.get_reconstructable_pipeline(
                        external_pipeline.name).
                    subset_for_execution_from_existing_pipeline(
                        external_pipeline.solids_to_execute),
                    run_config=run_config,
                    mode=mode,
                    step_keys_to_execute=step_keys_to_execute,
                    known_state=known_state,
                ),
                external_pipeline.identifying_pipeline_snapshot_id,
            ),
            represented_pipeline=external_pipeline,
        )
Exemplo n.º 7
0
    def build_flyte_sdk_workflow(self):
        ordered_step_dict = self.execution_plan.execution_deps()
        instance = DagsterInstance.ephemeral()
        pipeline_run = instance.create_run(
            pipeline_name=self.execution_plan.pipeline_def.display_name,
            run_id=self.execution_plan.pipeline_def.display_name,
            run_config=self.run_config,
            mode=None,
            solids_to_execute=None,
            step_keys_to_execute=None,
            status=None,
            tags=None,
            root_run_id=None,
            parent_run_id=None,
            pipeline_snapshot=self.execution_plan.pipeline_def.
            get_pipeline_snapshot(),
            execution_plan_snapshot=snapshot_from_execution_plan(
                self.execution_plan,
                self.execution_plan.pipeline_def.get_pipeline_snapshot_id()),
            parent_pipeline_snapshot=self.execution_plan.pipeline_def.
            get_parent_pipeline_snapshot(),
        )

        initialization_manager = PlanExecutionContextManager(
            Retries.disabled_mode(),
            self.execution_plan,
            self.run_config,
            instance.get_run_by_id(
                self.execution_plan.pipeline_def.display_name),
            instance,
        )

        list(initialization_manager.prepare_context())
        pipeline_context = initialization_manager.get_context()

        for step_key in ordered_step_dict:
            solid_name = self.execution_plan.get_step_by_key(
                step_key).solid_name
            self.sdk_node_dict[solid_name] = self.get_sdk_node(
                pipeline_context,
                instance,
                pipeline_run,
                step_key,
                storage_request=self.compute_dict[solid_name].get(
                    "storage_request", None),
                cpu_request=self.compute_dict[solid_name].get(
                    "cpu_request", None),
                memory_request=self.compute_dict[solid_name].get(
                    "memory_request", None),
                storage_limit=self.compute_dict[solid_name].get(
                    "storage_limit", None),
                cpu_limit=self.compute_dict[solid_name].get("cpu_limit", None),
                memory_limit=self.compute_dict[solid_name].get(
                    "memory_limit", None),
            )
Exemplo n.º 8
0
    def create_run_for_pipeline(
        self,
        pipeline,
        execution_plan=None,
        run_id=None,
        environment_dict=None,
        mode=None,
        selector=None,
        step_keys_to_execute=None,
        status=None,
        tags=None,
        root_run_id=None,
        parent_run_id=None,
    ):
        from dagster.core.execution.api import create_execution_plan
        from dagster.core.execution.plan.plan import ExecutionPlan
        from dagster.core.snap.execution_plan_snapshot import snapshot_from_execution_plan

        check.inst_param(pipeline, 'pipeline', PipelineDefinition)
        check.opt_inst_param(execution_plan, 'execution_plan', ExecutionPlan)

        if execution_plan is None:
            execution_plan = create_execution_plan(
                pipeline,
                environment_dict=environment_dict,
                mode=mode,
                step_keys_to_execute=step_keys_to_execute,
            )

        return self.get_or_create_run(
            pipeline_name=pipeline.name,
            run_id=run_id,
            environment_dict=environment_dict,
            mode=check.opt_str_param(mode,
                                     'mode',
                                     default=pipeline.get_default_mode_name()),
            selector=check.opt_inst_param(
                selector,
                'selector',
                ExecutionSelector,
                default=ExecutionSelector(name=pipeline.name),
            ),
            step_keys_to_execute=step_keys_to_execute,
            status=status,
            tags=tags,
            root_run_id=root_run_id,
            parent_run_id=parent_run_id,
            pipeline_snapshot=pipeline.get_pipeline_snapshot(),
            execution_plan_snapshot=snapshot_from_execution_plan(
                execution_plan, pipeline.get_pipeline_snapshot_id()),
        )
Exemplo n.º 9
0
def test_execution_plan_snapshot_backcompat():

    src_dir = file_relative_path(__file__, "test_execution_plan_snapshots/")
    snapshot_dirs = [
        f for f in os.listdir(src_dir)
        if not os.path.isfile(os.path.join(src_dir, f))
    ]
    for snapshot_dir_path in snapshot_dirs:
        print(f"Executing a saved run from {snapshot_dir_path}")  # pylint: disable=print-call

        with copy_directory(os.path.join(src_dir,
                                         snapshot_dir_path)) as test_dir:
            with DagsterInstance.from_ref(
                    InstanceRef.from_dir(test_dir)) as instance:
                runs = instance.get_runs()
                assert len(runs) == 1

                run = runs[0]
                assert run.status == PipelineRunStatus.NOT_STARTED

                the_pipeline = InMemoryPipeline(dynamic_pipeline)

                # First create a brand new plan from the pipeline and validate it
                new_plan = create_execution_plan(the_pipeline,
                                                 run_config=run.run_config)
                _validate_execution_plan(new_plan)

                # Create a snapshot and rebuild it, validate the rebuilt plan
                new_plan_snapshot = snapshot_from_execution_plan(
                    new_plan, run.pipeline_snapshot_id)
                rebuilt_plan = ExecutionPlan.rebuild_from_snapshot(
                    "dynamic_pipeline", new_plan_snapshot)
                _validate_execution_plan(rebuilt_plan)

                # Then validate the plan built from the historical snapshot on the run
                stored_snapshot = instance.get_execution_plan_snapshot(
                    run.execution_plan_snapshot_id)

                rebuilt_plan = ExecutionPlan.rebuild_from_snapshot(
                    "dynamic_pipeline", stored_snapshot)
                _validate_execution_plan(rebuilt_plan)

                # Finally, execute the run (using the historical execution plan snapshot)
                result = execute_run(the_pipeline,
                                     run,
                                     instance,
                                     raise_on_error=True)
                assert result.success
Exemplo n.º 10
0
def _launch_pipeline_execution(graphene_info, execution_params, is_reexecuted=False):
    check.inst_param(graphene_info, 'graphene_info', ResolveInfo)
    check.inst_param(execution_params, 'execution_params', ExecutionParams)

    if is_reexecuted:
        # required fields for re-execution
        execution_metadata = check.inst_param(
            execution_params.execution_metadata, 'execution_metadata', ExecutionMetadata
        )
        check.str_param(execution_metadata.root_run_id, 'root_run_id')
        check.str_param(execution_metadata.parent_run_id, 'parent_run_id')

    instance = graphene_info.context.instance
    run_launcher = instance.run_launcher

    if run_launcher is None:
        return graphene_info.schema.type_named('RunLauncherNotDefinedError')()

    pipeline_def = get_pipeline_def_from_selector(graphene_info, execution_params.selector)

    get_validated_config(
        pipeline_def,
        environment_dict=execution_params.environment_dict,
        mode=execution_params.mode,
    )

    execution_plan = create_execution_plan(
        pipeline_def, execution_params.environment_dict, mode=execution_params.mode,
    )

    _check_start_pipeline_execution_errors(graphene_info, execution_params, execution_plan)

    pipeline_run = instance.create_run(
        pipeline_snapshot=pipeline_def.get_pipeline_snapshot(),
        execution_plan_snapshot=snapshot_from_execution_plan(
            execution_plan, pipeline_def.get_pipeline_snapshot_id()
        ),
        **pipeline_run_args_from_execution_params(
            execution_params, get_step_keys_to_execute(instance, pipeline_def, execution_params),
        )
    )

    run = instance.launch_run(pipeline_run.run_id)

    return graphene_info.schema.type_named('LaunchPipelineRunSuccess')(
        run=graphene_info.schema.type_named('PipelineRun')(run)
    )
Exemplo n.º 11
0
def test_create_noop_execution_plan(snapshot):
    @solid
    def noop_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        noop_solid()

    execution_plan = create_execution_plan(noop_pipeline)

    snapshot.assert_match(
        serialize_pp(
            snapshot_from_execution_plan(
                execution_plan,
                create_pipeline_snapshot_id(
                    noop_pipeline.get_pipeline_snapshot()))))
Exemplo n.º 12
0
def test_create_noop_execution_plan_with_tags(snapshot):
    @solid(tags={'foo': 'bar', 'bar': 'baaz'})
    def noop_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        noop_solid()

    execution_plan = create_execution_plan(noop_pipeline)

    snapshot.assert_match(
        serialize_pp(
            snapshot_from_execution_plan(
                execution_plan,
                create_pipeline_snapshot_id(
                    noop_pipeline.get_pipeline_snapshot()))))
Exemplo n.º 13
0
def execution_plan_snapshot_command(args):

    check.inst_param(args, 'args', ExecutionPlanSnapshotArgs)

    recon_pipeline = (recon_pipeline_from_origin(
        args.pipeline_origin).subset_for_execution(args.solid_selection)
                      if args.solid_selection else recon_pipeline_from_origin(
                          args.pipeline_origin))

    return snapshot_from_execution_plan(
        create_execution_plan(
            pipeline=recon_pipeline,
            run_config=args.run_config,
            mode=args.mode,
            step_keys_to_execute=args.step_keys_to_execute,
        ),
        args.pipeline_snapshot_id,
    )
Exemplo n.º 14
0
def execution_plan_snapshot_command(args):
    check.inst_param(args, 'args', ExecutionPlanSnapshotArgs)

    recon_pipeline = (recon_pipeline_from_origin(
        args.pipeline_origin).subset_for_execution(args.solid_selection)
                      if args.solid_selection else recon_pipeline_from_origin(
                          args.pipeline_origin))

    try:
        return snapshot_from_execution_plan(
            create_execution_plan(
                pipeline=recon_pipeline,
                run_config=args.run_config,
                mode=args.mode,
                step_keys_to_execute=args.step_keys_to_execute,
            ),
            args.pipeline_snapshot_id,
        )
    except:  # pylint: disable=bare-except
        return ExecutionPlanSnapshotErrorData(
            error=serializable_error_info_from_exc_info(sys.exc_info()))
Exemplo n.º 15
0
def test_create_execution_plan_with_dep(snapshot):
    @solid
    def solid_one(_):
        return 1

    @solid
    def solid_two(_, num):
        return num + 1

    @pipeline
    def noop_pipeline():
        solid_two(solid_one())

    execution_plan = create_execution_plan(noop_pipeline)

    snapshot.assert_match(
        serialize_pp(
            snapshot_from_execution_plan(
                execution_plan,
                create_pipeline_snapshot_id(
                    noop_pipeline.get_pipeline_snapshot()))))
Exemplo n.º 16
0
def test_create_with_composite(snapshot):
    @solid(output_defs=[OutputDefinition(name='out_num', dagster_type=int)])
    def return_one(_):
        return 1

    @solid(
        input_defs=[InputDefinition(name='num', dagster_type=int)],
        output_defs=[OutputDefinition(int)],
    )
    def add_one(_, num):
        return num + 1

    @composite_solid(
        output_defs=[OutputDefinition(name='named_output', dagster_type=int)])
    def comp_1():
        return add_one(return_one())

    @composite_solid(
        output_defs=[OutputDefinition(name='named_output', dagster_type=int)])
    def comp_2():
        return add_one(return_one())

    @solid
    def add(_, num_one, num_two):
        return num_one + num_two

    @pipeline
    def do_comps():
        add(num_one=comp_1(), num_two=comp_2())

    execution_plan = create_execution_plan(do_comps)

    snapshot.assert_match(
        serialize_pp(
            snapshot_from_execution_plan(
                execution_plan,
                create_pipeline_snapshot_id(
                    do_comps.get_pipeline_snapshot()))))
Exemplo n.º 17
0
def execution_plan_snapshot_command(output_file, solid_selection,
                                    environment_dict, mode,
                                    step_keys_to_execute, snapshot_id,
                                    **kwargs):
    recon_pipeline = recon_pipeline_for_cli_args(kwargs)

    environment_dict = json.loads(environment_dict)
    if step_keys_to_execute:
        step_keys_to_execute = json.loads(step_keys_to_execute)
    if solid_selection:
        solid_selection = json.loads(solid_selection)
        recon_pipeline = recon_pipeline.subset_for_execution(solid_selection)

    execution_plan_snapshot = snapshot_from_execution_plan(
        create_execution_plan(
            pipeline=recon_pipeline,
            environment_dict=environment_dict,
            mode=mode,
            step_keys_to_execute=step_keys_to_execute,
        ),
        snapshot_id,
    )

    ipc_write_unary_response(output_file, execution_plan_snapshot)
Exemplo n.º 18
0
def _execute_schedule(graphene_info, pipeline_def, execution_params):

    instance = graphene_info.context.instance

    execution_plan = None
    if is_config_valid(pipeline_def, execution_params.environment_dict, execution_params.mode):
        execution_plan = create_execution_plan(
            pipeline_def, execution_params.environment_dict, mode=execution_params.mode,
        )

    execution_plan_snapshot = None
    if execution_plan:
        execution_plan_snapshot = snapshot_from_execution_plan(
            execution_plan, pipeline_def.get_pipeline_snapshot_id()
        )

    pipeline_run = instance.get_or_create_run(
        pipeline_name=pipeline_def.name,
        environment_dict=execution_params.environment_dict,
        mode=execution_params.mode,
        selector=execution_params.selector,
        tags=execution_params.execution_metadata.tags,
        pipeline_snapshot=pipeline_def.get_pipeline_snapshot(),
        execution_plan_snapshot=execution_plan_snapshot,
    )

    # Inject errors into event log at this point

    # Launch run if run launcher is defined
    run_launcher = graphene_info.context.instance.run_launcher
    if run_launcher:
        result = _launch_pipeline_execution_for_created_run(graphene_info, pipeline_run.run_id)
    else:
        result = _start_pipeline_execution_for_created_run(graphene_info, pipeline_run.run_id)

    return result
Exemplo n.º 19
0
def _start_pipeline_execution(graphene_info,
                              execution_params,
                              is_reexecuted=False):
    check.inst_param(graphene_info, 'graphene_info', ResolveInfo)
    check.inst_param(execution_params, 'execution_params', ExecutionParams)

    if is_reexecuted:
        # required fields for re-execution
        execution_metadata = check.inst_param(
            execution_params.execution_metadata, 'execution_metadata',
            ExecutionMetadata)
        check.str_param(execution_metadata.root_run_id, 'root_run_id')
        check.str_param(execution_metadata.parent_run_id, 'parent_run_id')

    instance = graphene_info.context.instance
    execution_manager_settings = instance.dagit_settings.get(
        'execution_manager')
    if execution_manager_settings and execution_manager_settings.get(
            'disabled'):
        return graphene_info.schema.type_named(
            'StartPipelineRunDisabledError')()

    pipeline_def = get_pipeline_def_from_selector(graphene_info,
                                                  execution_params.selector)

    get_validated_config(
        pipeline_def,
        environment_dict=execution_params.environment_dict,
        mode=execution_params.mode,
    )

    execution_plan = create_execution_plan(
        pipeline_def,
        execution_params.environment_dict,
        mode=execution_params.mode,
    )

    _check_start_pipeline_execution_errors(graphene_info, execution_params,
                                           execution_plan)

    try:
        pipeline_run = instance.get_or_create_run(
            pipeline_name=pipeline_def.name,
            run_id=execution_params.execution_metadata.run_id if
            execution_params.execution_metadata.run_id else make_new_run_id(),
            selector=execution_params.selector
            or ExecutionSelector(name=pipeline_def.name),
            environment_dict=execution_params.environment_dict,
            mode=execution_params.mode,
            step_keys_to_execute=(get_step_keys_to_execute(
                instance, pipeline_def, execution_params)
                                  or execution_params.step_keys),
            tags=merge_dicts(pipeline_def.tags,
                             execution_params.execution_metadata.tags),
            status=PipelineRunStatus.NOT_STARTED,
            root_run_id=execution_params.execution_metadata.root_run_id,
            parent_run_id=execution_params.execution_metadata.parent_run_id,
            pipeline_snapshot=pipeline_def.get_pipeline_snapshot(),
            execution_plan_snapshot=snapshot_from_execution_plan(
                execution_plan, pipeline_def.get_pipeline_snapshot_id()),
        )
    except DagsterRunConflict as exc:
        return graphene_info.schema.type_named('PipelineRunConflict')(exc)

    graphene_info.context.execution_manager.execute_pipeline(
        graphene_info.context.get_handle(),
        pipeline_def,
        pipeline_run,
        instance=instance,
    )

    return graphene_info.schema.type_named('StartPipelineRunSuccess')(
        run=graphene_info.schema.type_named('PipelineRun')(pipeline_run))
Exemplo n.º 20
0
def _launch_scheduled_execution(instance, schedule_def, pipeline, tick,
                                stream):
    pipeline_def = pipeline.get_definition()

    # Run should_execute and halt if it returns False
    schedule_context = ScheduleExecutionContext(instance)
    with user_code_error_boundary(
            ScheduleExecutionError,
            lambda:
            'Error occurred during the execution of should_execute for schedule '
            '{schedule_name}'.format(schedule_name=schedule_def.name),
    ):
        should_execute = schedule_def.should_execute(schedule_context)

    if not should_execute:
        # Update tick to skipped state and return
        tick.update_with_status(ScheduleTickStatus.SKIPPED)
        stream.send(ScheduledExecutionSkipped())
        return

    errors = []

    run_config = {}
    schedule_tags = {}
    try:
        with user_code_error_boundary(
                ScheduleExecutionError,
                lambda:
                'Error occurred during the execution of run_config_fn for schedule '
                '{schedule_name}'.format(schedule_name=schedule_def.name),
        ):
            run_config = schedule_def.get_run_config(schedule_context)
    except DagsterUserCodeExecutionError:
        error_data = serializable_error_info_from_exc_info(sys.exc_info())
        errors.append(error_data)

    try:
        with user_code_error_boundary(
                ScheduleExecutionError,
                lambda:
                'Error occurred during the execution of tags_fn for schedule '
                '{schedule_name}'.format(schedule_name=schedule_def.name),
        ):
            schedule_tags = schedule_def.get_tags(schedule_context)
    except DagsterUserCodeExecutionError:
        error_data = serializable_error_info_from_exc_info(sys.exc_info())
        errors.append(error_data)

    pipeline_tags = pipeline_def.tags or {}
    check_tags(pipeline_tags, 'pipeline_tags')
    tags = merge_dicts(pipeline_tags, schedule_tags)

    mode = schedule_def.mode

    execution_plan_snapshot = None
    try:
        execution_plan = create_execution_plan(
            pipeline_def,
            run_config=run_config,
            mode=mode,
        )
        execution_plan_snapshot = snapshot_from_execution_plan(
            execution_plan, pipeline_def.get_pipeline_snapshot_id())
    except DagsterInvalidConfigError:
        error_data = serializable_error_info_from_exc_info(sys.exc_info())
        errors.append(error_data)

    # Enter the run in the DB with the information we have
    possibly_invalid_pipeline_run = instance.create_run(
        pipeline_name=schedule_def.pipeline_name,
        run_id=None,
        run_config=run_config,
        mode=mode,
        solids_to_execute=pipeline.solids_to_execute,
        step_keys_to_execute=None,
        solid_selection=pipeline.solid_selection,
        status=None,
        root_run_id=None,
        parent_run_id=None,
        tags=tags,
        pipeline_snapshot=pipeline_def.get_pipeline_snapshot(),
        execution_plan_snapshot=execution_plan_snapshot,
        parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot(),
    )

    tick.update_with_status(ScheduleTickStatus.SUCCESS,
                            run_id=possibly_invalid_pipeline_run.run_id)

    # If there were errors, inject them into the event log and fail the run
    if len(errors) > 0:
        for error in errors:
            instance.report_engine_event(
                error.message,
                possibly_invalid_pipeline_run,
                EngineEventData.engine_error(error),
            )
        instance.report_run_failed(possibly_invalid_pipeline_run)
        stream.send(
            ScheduledExecutionFailed(
                run_id=possibly_invalid_pipeline_run.run_id, errors=errors))
        return

    # Otherwise the run should be valid so lets launch it

    # Need an ExternalPipeline to launch so make one here
    recon_repo = pipeline.get_reconstructable_repository()
    repo_location = InProcessRepositoryLocation(recon_repo)
    external_pipeline = repo_location.get_repository(
        recon_repo.get_definition().name).get_full_external_pipeline(
            pipeline_def.name)

    try:
        launched_run = instance.launch_run(
            possibly_invalid_pipeline_run.run_id, external_pipeline)
    except DagsterLaunchFailedError:
        error = serializable_error_info_from_exc_info(sys.exc_info())
        instance.report_engine_event(
            error.message,
            possibly_invalid_pipeline_run,
            EngineEventData.engine_error(error),
        )
        instance.report_run_failed(possibly_invalid_pipeline_run)
        stream.send(
            ScheduledExecutionFailed(
                run_id=possibly_invalid_pipeline_run.run_id, errors=[error]))
        return

    stream.send(ScheduledExecutionSuccess(run_id=launched_run.run_id))
    return
Exemplo n.º 21
0
def _make_airflow_dag(
    handle,
    pipeline_name,
    environment_dict=None,
    mode=None,
    instance=None,
    dag_id=None,
    dag_description=None,
    dag_kwargs=None,
    op_kwargs=None,
    operator=DagsterPythonOperator,
):
    check.inst_param(handle, 'handle', ExecutionTargetHandle)
    check.str_param(pipeline_name, 'pipeline_name')
    environment_dict = check.opt_dict_param(environment_dict,
                                            'environment_dict',
                                            key_type=str)
    mode = check.opt_str_param(mode, 'mode')
    # Default to use the (persistent) system temp directory rather than a seven.TemporaryDirectory,
    # which would not be consistent between Airflow task invocations.
    instance = (check.inst_param(instance, 'instance', DagsterInstance)
                if instance else DagsterInstance.get(
                    fallback_storage=seven.get_system_temp_directory()))

    # Only used for Airflow; internally we continue to use pipeline.name
    dag_id = check.opt_str_param(dag_id, 'dag_id',
                                 _rename_for_airflow(pipeline_name))

    dag_description = check.opt_str_param(dag_description, 'dag_description',
                                          _make_dag_description(pipeline_name))
    check.subclass_param(operator, 'operator', BaseOperator)

    dag_kwargs = dict({'default_args': DEFAULT_ARGS},
                      **check.opt_dict_param(dag_kwargs,
                                             'dag_kwargs',
                                             key_type=str))

    op_kwargs = check.opt_dict_param(op_kwargs, 'op_kwargs', key_type=str)

    dag = DAG(dag_id=dag_id, description=dag_description, **dag_kwargs)
    pipeline = handle.build_pipeline_definition()

    if mode is None:
        mode = pipeline.get_default_mode_name()

    execution_plan = create_execution_plan(pipeline,
                                           environment_dict,
                                           mode=mode)

    tasks = {}

    coalesced_plan = coalesce_execution_steps(execution_plan)

    for solid_handle, solid_steps in coalesced_plan.items():
        step_keys = [step.key for step in solid_steps]

        operator_parameters = DagsterOperatorParameters(
            handle=handle,
            pipeline_name=pipeline_name,
            environment_dict=environment_dict,
            mode=mode,
            task_id=solid_handle,
            step_keys=step_keys,
            dag=dag,
            instance_ref=instance.get_ref(),
            op_kwargs=op_kwargs,
            pipeline_snapshot=pipeline.get_pipeline_snapshot(),
            execution_plan_snapshot=snapshot_from_execution_plan(
                execution_plan,
                pipeline_snapshot_id=pipeline.get_pipeline_snapshot_id()),
        )
        task = operator(operator_parameters)

        tasks[solid_handle] = task

        for solid_step in solid_steps:
            for step_input in solid_step.step_inputs:
                for key in step_input.dependency_keys:
                    prev_solid_handle = execution_plan.get_step_by_key(
                        key).solid_handle.to_string()
                    if solid_handle != prev_solid_handle:
                        tasks[prev_solid_handle].set_downstream(task)

    return (dag,
            [tasks[solid_handle] for solid_handle in coalesced_plan.keys()])
Exemplo n.º 22
0

@solid
def nonce_solid(_):
    return


@pipeline
def nonce_pipeline():
    return nonce_solid()


nonce_pipeline_snapshot = nonce_pipeline.get_pipeline_snapshot()

nonce_execution_plan_snapshot = snapshot_from_execution_plan(
    create_execution_plan(nonce_pipeline),
    nonce_pipeline.get_pipeline_snapshot_id())


def test_init_modified_docker_operator(dagster_docker_image, ):  # pylint: disable=redefined-outer-name
    dagster_operator_parameters = DagsterOperatorParameters(
        task_id='nonce',
        environment_dict={'storage': {
            'filesystem': {}
        }},
        pipeline_name='',
        mode='default',
        op_kwargs={
            'image': dagster_docker_image,
            'api_version': 'auto',
        },