Exemplo n.º 1
0
def yield_empty_pipeline_context(run_id=None, instance=None):
    pipeline = InMemoryExecutablePipeline(PipelineDefinition([]))
    pipeline_def = pipeline.get_definition()
    instance = check.opt_inst_param(
        instance, 'instance', DagsterInstance, default=DagsterInstance.ephemeral()
    )

    execution_plan = create_execution_plan(pipeline)

    pipeline_run = instance.create_run(
        pipeline_name='<empty>',
        run_id=run_id,
        run_config=None,
        mode=None,
        solids_to_execute=None,
        step_keys_to_execute=None,
        status=None,
        tags=None,
        root_run_id=None,
        parent_run_id=None,
        pipeline_snapshot=pipeline_def.get_pipeline_snapshot(),
        execution_plan_snapshot=snapshot_from_execution_plan(
            execution_plan, pipeline_def.get_pipeline_snapshot_id()
        ),
        parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot(),
    )
    with scoped_pipeline_context(execution_plan, {}, pipeline_run, instance) as context:
        yield context
Exemplo n.º 2
0
def create_execution_plan(pipeline,
                          environment_dict=None,
                          mode=None,
                          step_keys_to_execute=None):
    # backcompat
    if isinstance(pipeline, PipelineDefinition):
        pipeline = InMemoryExecutablePipeline(pipeline)

    check.inst_param(pipeline, 'pipeline', ExecutablePipeline)
    pipeline_def = pipeline.get_definition()

    environment_dict = check.opt_dict_param(environment_dict,
                                            'environment_dict',
                                            key_type=str)
    mode = check.opt_str_param(mode,
                               'mode',
                               default=pipeline_def.get_default_mode_name())
    check.opt_list_param(step_keys_to_execute,
                         'step_keys_to_execute',
                         of_type=str)

    environment_config = EnvironmentConfig.build(pipeline_def,
                                                 environment_dict,
                                                 mode=mode)

    return ExecutionPlan.build(pipeline,
                               environment_config,
                               mode=mode,
                               step_keys_to_execute=step_keys_to_execute)
Exemplo n.º 3
0
def test_subset_for_execution():
    pipeline = InMemoryExecutablePipeline(foo_pipeline)
    sub_pipeline = pipeline.subset_for_execution(['*add_nums'])
    assert sub_pipeline.solid_selection == ['*add_nums']
    assert sub_pipeline.solids_to_execute == {'add_nums', 'return_one', 'return_two'}

    result = execute_pipeline(sub_pipeline)
    assert result.success
Exemplo n.º 4
0
def _check_pipeline(pipeline):
    # backcompat
    if isinstance(pipeline, PipelineDefinition):
        pipeline = InMemoryExecutablePipeline(pipeline)

    check.inst_param(pipeline, 'pipeline', ExecutablePipeline)
    pipeline_def = pipeline.get_definition()
    return pipeline, pipeline_def
Exemplo n.º 5
0
def test_subset_for_execution():
    pipeline = InMemoryExecutablePipeline(foo_pipeline)
    sub_pipeline = pipeline.subset_for_execution(["*add_nums"])
    assert sub_pipeline.solid_selection == ["*add_nums"]
    assert sub_pipeline.solids_to_execute == {
        "add_nums", "return_one", "return_two"
    }

    result = execute_pipeline(sub_pipeline)
    assert result.success
Exemplo n.º 6
0
def test_compile():
    environment_config = EnvironmentConfig.build(
        composition,
        {"solids": {
            "add_four": {
                "inputs": {
                    "num": {
                        "value": 1
                    }
                }
            }
        }},
    )

    plan = ExecutionPlan.build(InMemoryExecutablePipeline(composition),
                               environment_config)

    res = coalesce_execution_steps(plan)

    assert set(res.keys()) == {
        "add_four.add_two.add_one",
        "add_four.add_two.add_one_2",
        "add_four.add_two_2.add_one",
        "add_four.add_two_2.add_one_2",
        "div_four.div_two",
        "div_four.div_two_2",
        "int_to_float",
    }
Exemplo n.º 7
0
def _check_pipeline(pipeline):
    # backcompat
    if isinstance(pipeline, PipelineDefinition):
        pipeline = InMemoryExecutablePipeline(pipeline)

    check.inst_param(pipeline, "pipeline", ExecutablePipeline)
    return pipeline
Exemplo n.º 8
0
def test_compile():
    environment_config = EnvironmentConfig.build(
        composition,
        {'solids': {
            'add_four': {
                'inputs': {
                    'num': {
                        'value': 1
                    }
                }
            }
        }},
    )

    plan = ExecutionPlan.build(InMemoryExecutablePipeline(composition),
                               environment_config)

    res = coalesce_execution_steps(plan)

    assert set(res.keys()) == {
        'add_four.add_two.add_one',
        'add_four.add_two.add_one_2',
        'add_four.add_two_2.add_one',
        'add_four.add_two_2.add_one_2',
        'div_four.div_two',
        'div_four.div_two_2',
        'int_to_float',
    }
Exemplo n.º 9
0
def synthesize_events(solids_fn, run_id=None):
    events = []

    def _append_event(event):
        events.append(event)

    @pipeline(mode_defs=[mode_def(_append_event)])
    def a_pipe():
        solids_fn()

    instance = DagsterInstance.local_temp()

    pipeline_run = instance.create_run_for_pipeline(
        a_pipe,
        run_id=run_id,
        environment_dict={'loggers': {
            'callback': {},
            'console': {}
        }})

    result = execute_run(InMemoryExecutablePipeline(a_pipe), pipeline_run,
                         instance)

    assert result.success

    return events, result
Exemplo n.º 10
0
def test_compile():
    # TODO: remove dependency on legacy_examples
    # https://github.com/dagster-io/dagster/issues/2653
    environment_config = EnvironmentConfig.build(
        composition,
        {'solids': {
            'add_four': {
                'inputs': {
                    'num': {
                        'value': 1
                    }
                }
            }
        }},
    )

    plan = ExecutionPlan.build(InMemoryExecutablePipeline(composition),
                               environment_config)

    res = coalesce_execution_steps(plan)

    assert set(res.keys()) == {
        'add_four.add_two.add_one',
        'add_four.add_two.add_one_2',
        'add_four.add_two_2.add_one',
        'add_four.add_two_2.add_one_2',
        'div_four.div_two',
        'div_four.div_two_2',
        'int_to_float',
    }
Exemplo n.º 11
0
def yield_empty_pipeline_context(run_id=None, instance=None):
    pipeline = InMemoryExecutablePipeline(PipelineDefinition([]))
    instance = check.opt_inst_param(instance,
                                    'instance',
                                    DagsterInstance,
                                    default=DagsterInstance.ephemeral())
    pipeline_run = instance.create_run(
        pipeline_name='<empty>',
        run_id=run_id,
        environment_dict=None,
        mode=None,
        solids_to_execute=None,
        step_keys_to_execute=None,
        status=None,
        tags=None,
        root_run_id=None,
        parent_run_id=None,
        pipeline_snapshot=None,
        execution_plan_snapshot=None,
        parent_pipeline_snapshot=None,
    )
    with scoped_pipeline_context(
            create_execution_plan(pipeline),
        {},
            pipeline_run,
            instance,
    ) as context:
        yield context
Exemplo n.º 12
0
def test_single_step_resource_event_logs():
    # Test to attribute logs for single-step plans which are often the representation of
    # sub-plans in a multiprocessing execution environment. Most likely will need to be rewritten
    # with the refactor detailed in https://github.com/dagster-io/dagster/issues/2239
    USER_SOLID_MESSAGE = 'I AM A SOLID'
    USER_RESOURCE_MESSAGE = 'I AM A RESOURCE'
    events = []

    def event_callback(record):
        assert isinstance(record, EventRecord)
        events.append(record)

    @solid(required_resource_keys={'a'})
    def resource_solid(context):
        context.log.info(USER_SOLID_MESSAGE)

    @resource
    def resource_a(context):
        context.log.info(USER_RESOURCE_MESSAGE)
        return 'A'

    pipeline = PipelineDefinition(
        name='resource_logging_pipeline',
        solid_defs=[resource_solid],
        mode_defs=[
            ModeDefinition(
                resource_defs={'a': resource_a},
                logger_defs={
                    'callback': construct_event_logger(event_callback)
                },
            )
        ],
    )

    instance = DagsterInstance.local_temp()

    pipeline_run = instance.create_run_for_pipeline(
        pipeline,
        run_config={'loggers': {
            'callback': {}
        }},
        step_keys_to_execute=['resource_solid.compute'],
    )

    result = execute_run(InMemoryExecutablePipeline(pipeline), pipeline_run,
                         instance)

    assert result.success
    log_messages = [
        event for event in events if isinstance(event, LogMessageRecord)
    ]
    assert len(log_messages) == 2

    resource_log_message = next(
        iter([
            message for message in log_messages
            if message.user_message == USER_RESOURCE_MESSAGE
        ]))
    assert resource_log_message.step_key == 'resource_solid.compute'
Exemplo n.º 13
0
 def run_one(self, instance):
     assert len(self._queue) > 0
     run = self._queue.pop(0)
     pipeline_def = define_repository().get_pipeline(run.pipeline_name)
     return [
         ev for ev in execute_run_iterator(
             InMemoryExecutablePipeline(pipeline_def), run, instance)
     ]
Exemplo n.º 14
0
def test_execute_run_iterator():
    records = []

    def event_callback(record):
        assert isinstance(record, EventRecord)
        records.append(record)

    instance = DagsterInstance.local_temp()

    pipeline_def = PipelineDefinition(
        name="basic_resource_pipeline",
        solid_defs=[resource_solid],
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    "a": resource_a,
                    "b": resource_b
                },
                logger_defs={
                    "callback": construct_event_logger(event_callback)
                },
            )
        ],
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def,
        run_config={"loggers": {
            "callback": {}
        }},
        mode="default",
    )

    iterator = execute_run_iterator(InMemoryExecutablePipeline(pipeline_def),
                                    pipeline_run,
                                    instance=instance)

    event_type = None
    while event_type != "STEP_START":
        event = next(iterator)
        event_type = event.event_type_value

    iterator.close()
    events = [
        record.dagster_event for record in records if record.is_dagster_event
    ]
    messages = [
        record.user_message for record in records
        if not record.is_dagster_event
    ]
    assert len([event for event in events if event.is_pipeline_failure]) > 0
    assert len([message
                for message in messages if message == "CLEANING A"]) > 0
    assert len([message
                for message in messages if message == "CLEANING B"]) > 0
Exemplo n.º 15
0
def test_execute_run_iterator():
    records = []

    def event_callback(record):
        assert isinstance(record, EventRecord)
        records.append(record)

    instance = DagsterInstance.local_temp()

    pipeline_def = PipelineDefinition(
        name='basic_resource_pipeline',
        solid_defs=[resource_solid],
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    'a': resource_a,
                    'b': resource_b
                },
                logger_defs={
                    'callback': construct_event_logger(event_callback)
                },
            )
        ],
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def,
        environment_dict={'loggers': {
            'callback': {}
        }},
        mode='default',
    )

    iterator = execute_run_iterator(InMemoryExecutablePipeline(pipeline_def),
                                    pipeline_run,
                                    instance=instance)

    event_type = None
    while event_type != 'STEP_START':
        event = next(iterator)
        event_type = event.event_type_value

    iterator.close()
    events = [
        record.dagster_event for record in records if record.is_dagster_event
    ]
    messages = [
        record.user_message for record in records
        if not record.is_dagster_event
    ]
    assert len([event for event in events if event.is_pipeline_failure]) > 0
    assert len([message
                for message in messages if message == 'CLEANING A']) > 0
    assert len([message
                for message in messages if message == 'CLEANING B']) > 0
Exemplo n.º 16
0
def yield_empty_pipeline_context(run_id=None, instance=None):
    pipeline = InMemoryExecutablePipeline(PipelineDefinition([]))
    instance = check.opt_inst_param(
        instance, 'instance', DagsterInstance, default=DagsterInstance.ephemeral()
    )
    pipeline_run = instance.create_run(
        run_id=run_id, pipeline_name='<empty>', pipeline_snapshot=None
    )
    with scoped_pipeline_context(
        create_execution_plan(pipeline), {}, pipeline_run, instance,
    ) as context:
        yield context
    def execute_pipeline(self, _, pipeline_def, pipeline_run, instance):
        check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition)
        check.inst_param(pipeline_run, 'pipeline_run', PipelineRun)
        check.inst_param(instance, 'instance', DagsterInstance)

        event_list = []
        self._active.add(pipeline_run.run_id)
        for event in execute_run_iterator(
                InMemoryExecutablePipeline(pipeline_def), pipeline_run,
                instance):
            event_list.append(event)
        self._active.remove(pipeline_run.run_id)
        return PipelineExecutionResult(pipeline_def, pipeline_run.run_id,
                                       event_list, lambda: None)
Exemplo n.º 18
0
def test_execution_plan_subset_strict_resources_within_composite():
    resources_initted = {}

    pipeline_def = create_composite_solid_pipeline(resources_initted)

    instance = DagsterInstance.ephemeral()

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def, step_keys_to_execute=['wraps_b.consumes_resource_b.compute'],
    )

    result = execute_run(InMemoryExecutablePipeline(pipeline_def), pipeline_run, instance)

    assert result.success

    assert set(resources_initted.keys()) == {'b'}
Exemplo n.º 19
0
def test_run_group():
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance.local_temp(temp_dir)
        repo = get_repo_at_time_1()
        foo_pipeline = repo.get_pipeline('foo_pipeline')
        runs = [execute_pipeline(foo_pipeline, instance=instance)]
        root_run_id = runs[-1].run_id
        for _ in range(3):
            # https://github.com/dagster-io/dagster/issues/2433
            run = instance.create_run_for_pipeline(
                foo_pipeline,
                parent_run_id=root_run_id,
                root_run_id=root_run_id,
                tags={
                    PARENT_RUN_ID_TAG: root_run_id,
                    ROOT_RUN_ID_TAG: root_run_id
                },
            )
            execute_run(InMemoryExecutablePipeline(foo_pipeline), run,
                        instance)
            runs.append(run)

        context_at_time_1 = define_context_for_file(__file__,
                                                    'get_repo_at_time_1',
                                                    instance)

        result_one = execute_dagster_graphql(
            context_at_time_1,
            RUN_GROUP_QUERY,
            variables={'runId': root_run_id},
        )
        assert result_one.data['runGroupOrError']['__typename'] == 'RunGroup'

        assert len(result_one.data['runGroupOrError']['runs']) == 4

        result_two = execute_dagster_graphql(
            context_at_time_1,
            RUN_GROUP_QUERY,
            variables={'runId': runs[-1].run_id},
        )
        assert result_one.data['runGroupOrError']['__typename'] == 'RunGroup'
        assert len(result_two.data['runGroupOrError']['runs']) == 4

        assert (result_one.data['runGroupOrError']['rootRunId'] ==
                result_two.data['runGroupOrError']['rootRunId'])
        assert (result_one.data['runGroupOrError']['runs'] ==
                result_two.data['runGroupOrError']['runs'])
Exemplo n.º 20
0
def test_execution_plan_subset_with_aliases():
    resources_initted = {}

    @resource
    def resource_a(_):
        resources_initted['a'] = True
        yield 'A'

    @resource
    def resource_b(_):
        resources_initted['b'] = True
        yield 'B'

    @solid(required_resource_keys={'a'})
    def consumes_resource_a(context):
        assert context.resources.a == 'A'

    @solid(required_resource_keys={'b'})
    def consumes_resource_b(context):
        assert context.resources.b == 'B'

    @pipeline(
        mode_defs=[
            ModeDefinition(resource_defs={
                'a': resource_a,
                'b': resource_b,
            })
        ], )
    def selective_init_test_pipeline_with_alias():
        consumes_resource_a()
        consumes_resource_b.alias('b_alias')()

    instance = DagsterInstance.ephemeral()

    pipeline_run = instance.create_run_for_pipeline(
        selective_init_test_pipeline_with_alias,
        step_keys_to_execute=['b_alias.compute'],
    )

    result = execute_run(
        InMemoryExecutablePipeline(selective_init_test_pipeline_with_alias),
        pipeline_run, instance)

    assert result.success

    assert set(resources_initted.keys()) == {'b'}
Exemplo n.º 21
0
def test_execution_plan_subset_strict_resources():
    resources_initted = {}

    instance = DagsterInstance.ephemeral()

    pipeline_def = get_resource_init_pipeline(resources_initted)

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def,
        step_keys_to_execute=["consumes_resource_b.compute"],
    )

    result = execute_run(InMemoryExecutablePipeline(pipeline_def),
                         pipeline_run, instance)

    assert result.success

    assert set(resources_initted.keys()) == {"b"}
Exemplo n.º 22
0
def test_single_step_reexecution():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def add_one(num):
        return num + 1

    pipeline_def = PipelineDefinition(
        solid_defs=[return_one, add_one],
        dependencies={'add_one': {
            'num': DependencyDefinition('return_one')
        }},
    )
    environment_dict = {'storage': {'filesystem': {}}}
    instance = DagsterInstance.ephemeral()
    pipeline_result = execute_pipeline(pipeline_def,
                                       environment_dict,
                                       instance=instance)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid('add_one').output_value() == 2

    # This is how this is actually done in dagster_graphql.implementation.pipeline_execution_manager
    reexecution_pipeline_run = instance.create_run_for_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        step_keys_to_execute=['add_one.compute'],
        parent_run_id=pipeline_result.run_id,
        root_run_id=pipeline_result.run_id,
    )

    reexecution_result = execute_run(InMemoryExecutablePipeline(pipeline_def),
                                     reexecution_pipeline_run, instance)

    assert reexecution_result.success
    assert reexecution_result.result_for_solid(
        'return_one').output_value() == None
    assert reexecution_result.result_for_solid('add_one').output_value() == 2
Exemplo n.º 23
0
def test_two_step_reexecution():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def add_one(num):
        return num + 1

    @pipeline
    def two_step_reexec():
        add_one(add_one(return_one()))

    instance = DagsterInstance.ephemeral()
    environment_dict = {'storage': {'filesystem': {}}}
    pipeline_result = execute_pipeline(two_step_reexec,
                                       environment_dict=environment_dict,
                                       instance=instance)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid('add_one_2').output_value() == 3

    reexecution_pipeline_run = instance.create_run_for_pipeline(
        two_step_reexec,
        environment_dict=environment_dict,
        step_keys_to_execute=['add_one.compute', 'add_one_2.compute'],
        parent_run_id=pipeline_result.run_id,
        root_run_id=pipeline_result.run_id,
    )

    reexecution_result = execute_run(
        InMemoryExecutablePipeline(two_step_reexec),
        reexecution_pipeline_run,
        instance=instance)

    assert reexecution_result.success
    assert reexecution_result.result_for_solid(
        'return_one').output_value() == None
    assert reexecution_result.result_for_solid('add_one_2').output_value() == 3
    def in_mp_process(cls, handle, pipeline_run, instance_ref, term_event):
        """
        Execute pipeline using message queue as a transport
        """
        run_id = pipeline_run.run_id
        pipeline_name = pipeline_run.pipeline_name

        instance = DagsterInstance.from_ref(instance_ref)
        pid = os.getpid()
        instance.report_engine_event(
            'Started process for pipeline (pid: {pid}).'.format(pid=pid),
            pipeline_run,
            EngineEventData.in_process(pid,
                                       marker_end='dagit_subprocess_init'),
            cls,
        )

        start_termination_thread(term_event)

        try:
            handle.build_repository_definition()
            pipeline_def = handle.with_pipeline_name(
                pipeline_name).build_pipeline_definition()
        except Exception:  # pylint: disable=broad-except
            instance.report_engine_event(
                'Failed attempting to load pipeline "{}"'.format(
                    pipeline_name),
                pipeline_run,
                EngineEventData.engine_error(
                    serializable_error_info_from_exc_info(sys.exc_info())),
                cls,
            )
            return

        try:
            event_list = []
            for event in execute_run_iterator(
                    InMemoryExecutablePipeline(
                        pipeline_def.build_sub_pipeline(
                            pipeline_run.selector.solid_subset)),
                    pipeline_run,
                    instance,
            ):
                event_list.append(event)
            return PipelineExecutionResult(pipeline_def, run_id, event_list,
                                           lambda: None)

        # Add a DagsterEvent for unexpected exceptions
        # Explicitly ignore KeyboardInterrupts since they are used for termination
        except DagsterSubprocessError as err:
            if not all([
                    err_info.cls_name == 'KeyboardInterrupt'
                    for err_info in err.subprocess_error_infos
            ]):
                instance.report_engine_event(
                    'An exception was thrown during execution that is likely a framework error, '
                    'rather than an error in user code.',
                    pipeline_run,
                    EngineEventData.engine_error(
                        serializable_error_info_from_exc_info(sys.exc_info())),
                    cls,
                )
        except Exception:  # pylint: disable=broad-except
            instance.report_engine_event(
                'An exception was thrown during execution that is likely a framework error, '
                'rather than an error in user code.',
                pipeline_run,
                EngineEventData.engine_error(
                    serializable_error_info_from_exc_info(sys.exc_info())),
                cls,
            )
        finally:
            instance.report_engine_event(
                'Process for pipeline exited (pid: {pid}).'.format(pid=pid),
                pipeline_run,
                cls=cls,
            )
Exemplo n.º 25
0
def test_reexecution_fs_storage():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def add_one(num):
        return num + 1

    pipeline_def = PipelineDefinition(
        solid_defs=[return_one, add_one],
        dependencies={'add_one': {
            'num': DependencyDefinition('return_one')
        }},
    )
    environment_dict = {'storage': {'filesystem': {}}}
    instance = DagsterInstance.ephemeral()
    pipeline_result = execute_pipeline(
        pipeline_def,
        environment_dict={'storage': {
            'filesystem': {}
        }},
        instance=instance)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid('add_one').output_value() == 2

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        parent_run_id=pipeline_result.run_id,
        root_run_id=pipeline_result.run_id,
    )

    reexecution_result = execute_run(InMemoryExecutablePipeline(pipeline_def),
                                     pipeline_run, instance)

    assert reexecution_result.success
    assert len(reexecution_result.solid_result_list) == 2
    assert reexecution_result.result_for_solid(
        'return_one').output_value() == 1
    assert reexecution_result.result_for_solid('add_one').output_value() == 2
    reexecution_run = instance.get_run_by_id(reexecution_result.run_id)
    assert reexecution_run.parent_run_id == pipeline_result.run_id
    assert reexecution_run.root_run_id == pipeline_result.run_id

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        parent_run_id=reexecution_result.run_id,
        root_run_id=pipeline_result.run_id,
    )

    grandchild_result = execute_run(InMemoryExecutablePipeline(pipeline_def),
                                    pipeline_run, instance)

    assert grandchild_result.success
    assert len(grandchild_result.solid_result_list) == 2
    assert grandchild_result.result_for_solid('return_one').output_value() == 1
    assert grandchild_result.result_for_solid('add_one').output_value() == 2
    grandchild_run = instance.get_run_by_id(grandchild_result.run_id)
    assert grandchild_run.parent_run_id == reexecution_result.run_id
    assert grandchild_run.root_run_id == pipeline_result.run_id
Exemplo n.º 26
0
def test_reexecution_fs_storage_with_subset():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def add_one(num):
        return num + 1

    pipeline_def = PipelineDefinition(
        solid_defs=[return_one, add_one],
        dependencies={'add_one': {
            'num': DependencyDefinition('return_one')
        }},
    )
    environment_dict = {'storage': {'filesystem': {}}}
    instance = DagsterInstance.ephemeral()
    pipeline_result = execute_pipeline(pipeline_def,
                                       environment_dict,
                                       instance=instance)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid('add_one').output_value() == 2

    # This is how this is actually done in dagster_graphql.implementation.pipeline_execution_manager
    reexecution_pipeline_run = instance.create_run_for_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        step_keys_to_execute=['return_one.compute'],
        parent_run_id=pipeline_result.run_id,
        root_run_id=pipeline_result.run_id,
    )
    reexecution_result_no_subset = execute_run(
        InMemoryExecutablePipeline(pipeline_def), reexecution_pipeline_run,
        instance)
    assert reexecution_result_no_subset.success
    assert len(reexecution_result_no_subset.solid_result_list) == 2
    assert reexecution_result_no_subset.result_for_solid('add_one').skipped
    assert reexecution_result_no_subset.result_for_solid(
        'return_one').output_value() == 1

    pipeline_result_subset = execute_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        instance=instance,
        solid_selection=['return_one'],
    )
    assert pipeline_result_subset.success
    assert len(pipeline_result_subset.solid_result_list) == 1
    with pytest.raises(DagsterInvariantViolationError):
        pipeline_result_subset.result_for_solid('add_one')
    assert pipeline_result_subset.result_for_solid(
        'return_one').output_value() == 1

    reexecution_pipeline_run = instance.create_run_for_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        parent_run_id=pipeline_result_subset.run_id,
        root_run_id=pipeline_result_subset.run_id,
        solids_to_execute={'return_one'},
        step_keys_to_execute=['return_one.compute'],
    )

    reexecution_result = execute_run(InMemoryExecutablePipeline(pipeline_def),
                                     reexecution_pipeline_run, instance)

    assert reexecution_result.success
    assert len(reexecution_result.solid_result_list) == 1
    with pytest.raises(DagsterInvariantViolationError):
        pipeline_result_subset.result_for_solid('add_one')
    assert reexecution_result.result_for_solid(
        'return_one').output_value() == 1

    with pytest.raises(
            DagsterExecutionStepNotFoundError,
            match=re.escape(
                'Execution plan does not contain step: add_one.compute'),
    ):
        instance.create_run_for_pipeline(
            pipeline_def,
            environment_dict=environment_dict,
            parent_run_id=pipeline_result_subset.run_id,
            root_run_id=pipeline_result_subset.run_id,
            solids_to_execute={'return_one'},
            step_keys_to_execute=['add_one.compute'],
        )

    re_reexecution_pipeline_run = instance.create_run_for_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        parent_run_id=reexecution_result.run_id,
        root_run_id=reexecution_result.run_id,
        solids_to_execute={'return_one'},
        step_keys_to_execute=['return_one.compute'],
    )

    re_reexecution_result = execute_run(
        InMemoryExecutablePipeline(pipeline_def), re_reexecution_pipeline_run,
        instance)

    assert re_reexecution_result.success
    assert len(re_reexecution_result.solid_result_list) == 1
    assert re_reexecution_result.result_for_solid(
        'return_one').output_value() == 1

    with pytest.raises(
            DagsterExecutionStepNotFoundError,
            match=re.escape('Execution plan does not contain step: add_one'),
    ):
        instance.create_run_for_pipeline(
            pipeline_def,
            environment_dict=environment_dict,
            parent_run_id=reexecution_result.run_id,
            root_run_id=reexecution_result.run_id,
            solids_to_execute={'return_one'},
            step_keys_to_execute=['add_one.compute'],
        )
Exemplo n.º 27
0
def _check_execute_pipeline_args(fn_name, pipeline, environment_dict, mode,
                                 preset, tags, run_config, instance):
    # backcompat
    if isinstance(pipeline, PipelineDefinition):
        pipeline = InMemoryExecutablePipeline(pipeline)

    check.inst_param(pipeline, 'pipeline', ExecutablePipeline)
    pipeline_def = pipeline.get_definition()

    environment_dict = check.opt_dict_param(environment_dict,
                                            'environment_dict')

    check.opt_str_param(mode, 'mode')
    check.opt_str_param(preset, 'preset')
    check.invariant(
        not (mode is not None and preset is not None),
        'You may set only one of `mode` (got {mode}) or `preset` (got {preset}).'
        .format(mode=mode, preset=preset),
    )

    tags = check.opt_dict_param(tags, 'tags', key_type=str)

    run_config = check.opt_inst_param(run_config,
                                      'run_config',
                                      RunConfig,
                                      default=RunConfig())

    if preset is not None:
        pipeline_preset = pipeline_def.get_preset(preset)

        check.invariant(
            run_config.mode is None or pipeline_preset.mode == run_config.mode,
            'The mode set in preset \'{preset}\' (\'{preset_mode}\') does not agree with the mode '
            'set in the `run_config` (\'{run_config_mode}\')'.format(
                preset=preset,
                preset_mode=pipeline_preset.mode,
                run_config_mode=run_config.mode),
        )

        if pipeline_preset.environment_dict is not None:
            check.invariant(
                (not environment_dict)
                or (pipeline_preset.environment_dict == environment_dict),
                'The environment set in preset \'{preset}\' does not agree with the environment '
                'passed in the `environment_dict` argument.'.format(
                    preset=preset),
            )

            environment_dict = pipeline_preset.environment_dict

        if pipeline_preset.solid_subset is not None:
            pipeline = pipeline.build_sub_pipeline(
                pipeline_preset.solid_subset)

        check.invariant(
            mode is None or mode == pipeline_preset.mode,
            'Mode {mode} does not agree with the mode set in preset \'{preset}\': '
            '(\'{preset_mode}\')'.format(preset=preset,
                                         preset_mode=pipeline_preset.mode,
                                         mode=mode),
        )

        mode = pipeline_preset.mode

    if run_config.mode is not None or run_config.tags:
        warnings.warn((
            'In 0.8.0, the use of `run_config` to set pipeline mode and tags will be '
            'deprecated. Please use the `mode` and `tags` arguments to `{fn_name}` '
            'instead.').format(fn_name=fn_name))

    if run_config.mode is not None:
        if mode is not None:
            check.invariant(
                run_config.mode == mode,
                'Mode \'{mode}\' does not agree with the mode set in the `run_config`: '
                '\'{run_config_mode}\''.format(
                    mode=mode, run_config_mode=run_config.mode),
            )
        mode = run_config.mode

    if mode is not None:
        if not pipeline_def.has_mode_definition(mode):
            raise DagsterInvariantViolationError((
                'You have attempted to execute pipeline {name} with mode {mode}. '
                'Available modes: {modes}').format(
                    name=pipeline_def.name,
                    mode=mode,
                    modes=pipeline_def.available_modes,
                ))
    else:
        if not pipeline_def.is_single_mode:
            raise DagsterInvariantViolationError((
                'Pipeline {name} has multiple modes (Available modes: {modes}) and you have '
                'attempted to execute it without specifying a mode. Set '
                'mode property on the PipelineRun object.').format(
                    name=pipeline_def.name,
                    modes=pipeline_def.available_modes))
        mode = pipeline_def.get_default_mode_name()

    tags = merge_dicts(merge_dicts(pipeline_def.tags, run_config.tags or {}),
                       tags)

    check.opt_inst_param(instance, 'instance', DagsterInstance)
    instance = instance or DagsterInstance.ephemeral()

    execution_plan = create_execution_plan(
        pipeline,
        environment_dict,
        mode=mode,
        step_keys_to_execute=run_config.step_keys_to_execute,
    )

    return pipeline, environment_dict, instance, mode, tags, run_config, execution_plan
def test_pipeline_step_key_subset_execution():
    pipeline_def = define_addy_pipeline()
    instance = DagsterInstance.ephemeral()
    environment_dict = env_with_fs(
        {'solids': {
            'add_one': {
                'inputs': {
                    'num': {
                        'value': 3
                    }
                }
            }
        }})
    result = execute_pipeline(pipeline_def,
                              environment_dict=environment_dict,
                              instance=instance)

    assert result.success

    intermediates_manager = IntermediateStoreIntermediatesManager(
        build_fs_intermediate_store(instance.intermediates_directory,
                                    result.run_id))
    assert (intermediates_manager.get_intermediate(
        None, Int, StepOutputHandle('add_one.compute')).obj == 4)
    assert (intermediates_manager.get_intermediate(
        None, Int, StepOutputHandle('add_two.compute')).obj == 6)

    ## re-execute add_two

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        step_keys_to_execute=['add_two.compute'],
        parent_run_id=result.run_id,
        root_run_id=result.run_id,
    )

    pipeline_reexecution_result = execute_run(
        InMemoryExecutablePipeline(pipeline_def), pipeline_run, instance)

    assert pipeline_reexecution_result.success

    step_events = pipeline_reexecution_result.step_event_list
    assert step_events

    intermediates_manager = IntermediateStoreIntermediatesManager(
        build_fs_intermediate_store(instance.intermediates_directory,
                                    result.run_id))
    assert (intermediates_manager.get_intermediate(
        None, Int, StepOutputHandle('add_one.compute')).obj == 4)
    assert (intermediates_manager.get_intermediate(
        None, Int, StepOutputHandle('add_two.compute')).obj == 6)

    assert not get_step_output_event(step_events, 'add_one.compute')
    assert get_step_output_event(step_events, 'add_two.compute')

    with pytest.raises(DagsterExecutionStepNotFoundError,
                       match='Execution plan does not contain step'):
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def,
            environment_dict=environment_dict,
            step_keys_to_execute=['nope.compute'],
            parent_run_id=result.run_id,
            root_run_id=result.run_id,
        )