Пример #1
0
def test_fan_out_should_skip_step():
    @solid(output_defs=[
        OutputDefinition(Int, "out_1", is_required=False),
        OutputDefinition(Int, "out_2", is_required=False),
        OutputDefinition(Int, "out_3", is_required=False),
    ])
    def foo(_):
        yield Output(1, "out_1")

    @solid
    def bar(_, input_arg):
        return input_arg

    @pipeline
    def optional_outputs():
        foo_res = foo()
        # pylint: disable=no-member
        bar.alias("bar_1")(input_arg=foo_res.out_1)
        bar.alias("bar_2")(input_arg=foo_res.out_2)
        bar.alias("bar_3")(input_arg=foo_res.out_3)

    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(pipeline_name="optional_outputs",
                               run_id=make_new_run_id())
    execute_plan(
        create_execution_plan(optional_outputs, step_keys_to_execute=["foo"]),
        InMemoryPipeline(optional_outputs),
        instance,
        pipeline_run,
    )

    assert not should_skip_step(
        create_execution_plan(optional_outputs, step_keys_to_execute=["bar_1"
                                                                      ]),
        instance,
        pipeline_run.run_id,
    )
    assert should_skip_step(
        create_execution_plan(optional_outputs,
                              step_keys_to_execute=["bar_2"]),
        instance,
        pipeline_run.run_id,
    )
    assert should_skip_step(
        create_execution_plan(optional_outputs,
                              step_keys_to_execute=["bar_3"]),
        instance,
        pipeline_run.run_id,
    )
Пример #2
0
def execute_step_command(input_json):
    try:
        signal.signal(signal.SIGTERM, signal.getsignal(signal.SIGINT))
    except ValueError:
        warnings.warn((
            "Unexpected error attempting to manage signal handling on thread {thread_name}. "
            "You should not invoke this API (execute_step) from threads "
            "other than the main thread.").format(
                thread_name=threading.current_thread().name))

    args = check.inst(deserialize_json_to_dagster_namedtuple(input_json),
                      ExecuteStepArgs)

    with (DagsterInstance.from_ref(args.instance_ref)
          if args.instance_ref else DagsterInstance.get()) as instance:
        pipeline_run = instance.get_run_by_id(args.pipeline_run_id)
        check.inst(
            pipeline_run,
            PipelineRun,
            "Pipeline run with id '{}' not found for step execution".format(
                args.pipeline_run_id),
        )

        recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin)
        retries = Retries.from_config(args.retries_dict)

        if args.should_verify_step:
            success = verify_step(instance, pipeline_run, retries,
                                  args.step_keys_to_execute)
            if not success:
                return

        execution_plan = create_execution_plan(
            recon_pipeline.subset_for_execution_from_existing_pipeline(
                pipeline_run.solids_to_execute),
            run_config=pipeline_run.run_config,
            step_keys_to_execute=args.step_keys_to_execute,
            mode=pipeline_run.mode,
        )

        buff = []

        # Flag that the step execution is skipped
        if should_skip_step(execution_plan,
                            instance=instance,
                            run_id=pipeline_run.run_id):
            click.echo(serialize_dagster_namedtuple(StepExecutionSkipped()))
            return

        for event in execute_plan_iterator(
                execution_plan,
                pipeline_run,
                instance,
                run_config=pipeline_run.run_config,
                retries=retries,
        ):
            buff.append(serialize_dagster_namedtuple(event))

        for line in buff:
            click.echo(line)
Пример #3
0
def execute_step_command(input_json):
    with capture_interrupts():

        args = check.inst(deserialize_json_to_dagster_namedtuple(input_json),
                          ExecuteStepArgs)

        with (DagsterInstance.from_ref(args.instance_ref)
              if args.instance_ref else DagsterInstance.get()) as instance:
            pipeline_run = instance.get_run_by_id(args.pipeline_run_id)
            check.inst(
                pipeline_run,
                PipelineRun,
                "Pipeline run with id '{}' not found for step execution".
                format(args.pipeline_run_id),
            )

            if args.should_verify_step:
                success = verify_step(
                    instance,
                    pipeline_run,
                    args.known_state.get_retry_state(),
                    args.step_keys_to_execute,
                )
                if not success:
                    return

            recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin)

            execution_plan = create_execution_plan(
                recon_pipeline.subset_for_execution_from_existing_pipeline(
                    pipeline_run.solids_to_execute),
                run_config=pipeline_run.run_config,
                step_keys_to_execute=args.step_keys_to_execute,
                mode=pipeline_run.mode,
                known_state=args.known_state,
            )

            buff = []

            # Flag that the step execution is skipped
            if should_skip_step(execution_plan,
                                instance=instance,
                                run_id=pipeline_run.run_id):
                click.echo(serialize_dagster_namedtuple(
                    StepExecutionSkipped()))
                return

            for event in execute_plan_iterator(
                    execution_plan,
                    pipeline_run,
                    instance,
                    run_config=pipeline_run.run_config,
                    retry_mode=args.retry_mode,
            ):
                buff.append(serialize_dagster_namedtuple(event))

            for line in buff:
                click.echo(line)
Пример #4
0
def invoke_steps_within_python_operator(invocation_args, ts, dag_run,
                                        **kwargs):  # pylint: disable=unused-argument
    mode = invocation_args.mode
    pipeline_name = invocation_args.pipeline_name
    step_keys = invocation_args.step_keys
    instance_ref = invocation_args.instance_ref
    run_config = invocation_args.run_config
    recon_repo = invocation_args.recon_repo
    pipeline_snapshot = invocation_args.pipeline_snapshot
    execution_plan_snapshot = invocation_args.execution_plan_snapshot
    parent_pipeline_snapshot = invocation_args.parent_pipeline_snapshot

    run_id = dag_run.run_id

    instance = DagsterInstance.from_ref(instance_ref) if instance_ref else None
    if instance:
        with instance:
            tags = {AIRFLOW_EXECUTION_DATE_STR: ts} if ts else {}
            pipeline_run = instance.register_managed_run(
                pipeline_name=pipeline_name,
                run_id=run_id,
                run_config=run_config,
                mode=mode,
                solids_to_execute=None,
                step_keys_to_execute=None,
                tags=tags,
                root_run_id=None,
                parent_run_id=None,
                pipeline_snapshot=pipeline_snapshot,
                execution_plan_snapshot=execution_plan_snapshot,
                parent_pipeline_snapshot=parent_pipeline_snapshot,
            )

            recon_pipeline = recon_repo.get_reconstructable_pipeline(
                pipeline_name).subset_for_execution_from_existing_pipeline(
                    pipeline_run.solids_to_execute)

            execution_plan = create_execution_plan(
                recon_pipeline,
                run_config=run_config,
                step_keys_to_execute=step_keys,
                mode=mode,
            )
            if should_skip_step(execution_plan, instance, pipeline_run.run_id):
                raise AirflowSkipException(
                    "Dagster emitted skip event, skipping execution in Airflow"
                )
            events = execute_plan(execution_plan,
                                  recon_pipeline,
                                  instance,
                                  pipeline_run,
                                  run_config=run_config)
            check_events_for_failures(events)
            check_events_for_skips(events)
            return events
Пример #5
0
 def _should_skip(self, pipeline_run):
     recon_pipeline = self.recon_repo.get_reconstructable_pipeline(self.pipeline_name)
     execution_plan = create_execution_plan(
         recon_pipeline.subset_for_execution_from_existing_pipeline(
             pipeline_run.solids_to_execute
         ),
         run_config=self.run_config,
         step_keys_to_execute=self.step_keys,
         mode=self.mode,
     )
     return should_skip_step(execution_plan, instance=self.instance, run_id=pipeline_run.run_id)
Пример #6
0
def test_configured_input_should_skip_step():
    called = {}

    @solid(output_defs=[OutputDefinition(is_required=False)])
    def one(_):
        yield Output(1)

    @solid
    def solid_should_not_skip(_, input_one, input_two):  # pylint: disable=unused-argument
        called["yup"] = True

    @pipeline
    def my_pipeline():
        solid_should_not_skip(one())

    run_config = {
        "solids": {
            "solid_should_not_skip": {
                "inputs": {
                    "input_two": {
                        "value": "2"
                    }
                }
            }
        }
    }
    execute_pipeline(my_pipeline, run_config=run_config)
    assert called.get("yup")

    # ensure should_skip_step behave the same as execute_pipeline
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(pipeline_name="my_pipeline",
                               run_id=make_new_run_id())
    execute_plan(
        create_execution_plan(
            my_pipeline,
            step_keys_to_execute=["one"],
            run_config=run_config,
        ),
        InMemoryPipeline(my_pipeline),
        instance,
        pipeline_run,
        run_config=run_config,
    )
    assert not should_skip_step(
        create_execution_plan(
            my_pipeline,
            step_keys_to_execute=["solid_should_not_skip"],
            run_config=run_config,
        ),
        instance,
        pipeline_run.run_id,
    )
Пример #7
0
def test_fan_in_should_skip_step():
    @lambda_solid
    def one():
        return 1

    @solid(output_defs=[OutputDefinition(is_required=False)])
    def skip(_):
        return
        yield  # pylint: disable=unreachable

    @solid
    def fan_in(_context, items):
        return items

    @composite_solid(output_defs=[OutputDefinition(is_required=False)])
    def composite_all_upstream_skip():
        return fan_in([skip(), skip()])

    @composite_solid(output_defs=[OutputDefinition(is_required=False)])
    def composite_one_upstream_skip():
        return fan_in([one(), skip()])

    @pipeline
    def optional_outputs_composite():
        composite_all_upstream_skip()
        composite_one_upstream_skip()

    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(pipeline_name="optional_outputs_composite", run_id=make_new_run_id())
    execute_plan(
        create_execution_plan(
            optional_outputs_composite,
            step_keys_to_execute=[
                "composite_all_upstream_skip.skip",
                "composite_all_upstream_skip.skip_2",
            ],
        ),
        instance,
        pipeline_run,
    )
    # skip when all the step's sources weren't yield
    assert should_skip_step(
        create_execution_plan(
            optional_outputs_composite, step_keys_to_execute=["composite_all_upstream_skip.fan_in"],
        ),
        instance,
        pipeline_run.run_id,
    )

    execute_plan(
        create_execution_plan(
            optional_outputs_composite,
            step_keys_to_execute=[
                "composite_one_upstream_skip.one",
                "composite_one_upstream_skip.skip",
            ],
        ),
        instance,
        pipeline_run,
    )
    # do not skip when some of the sources exist
    assert not should_skip_step(
        create_execution_plan(
            optional_outputs_composite, step_keys_to_execute=["composite_one_upstream_skip.fan_in"],
        ),
        instance,
        pipeline_run.run_id,
    )