Пример #1
0
 def launch_run(self, instance, run, external_pipeline):
     check.inst_param(external_pipeline, 'external_pipeline',
                      ExternalPipeline)
     recon_pipeline = recon_pipeline_from_pipeline_handle(
         external_pipeline.handle)
     execute_run(recon_pipeline, run, instance)
     return run
Пример #2
0
def test_execute_run_bad_state():
    records = []

    def event_callback(record):
        assert isinstance(record, EventRecord)
        records.append(record)

    with instance_for_test() as instance:
        pipeline_def = PipelineDefinition(
            name="basic_resource_pipeline",
            solid_defs=[resource_solid],
            mode_defs=[
                ModeDefinition(
                    resource_defs={"a": resource_a, "b": resource_b},
                    logger_defs={"callback": construct_event_logger(event_callback)},
                )
            ],
        )
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def, run_config={"loggers": {"callback": {}}}, mode="default",
        ).with_status(PipelineRunStatus.SUCCESS)

        with pytest.raises(
            check.CheckError,
            match=r"Pipeline run basic_resource_pipeline \({}\) in state"
            r" PipelineRunStatus.SUCCESS, expected PipelineRunStatus.NOT_STARTED".format(
                pipeline_run.run_id
            ),
        ):
            execute_run(InMemoryPipeline(pipeline_def), pipeline_run, instance=instance)
Пример #3
0
def test_event_callback_logging():
    events = defaultdict(list)

    def _event_callback(record):
        assert isinstance(record, EventRecord)
        if record.is_dagster_event:
            events[record.dagster_event.event_type].append(record)

    pipeline = ReconstructablePipeline.for_module(
        "dagstermill.examples.repository",
        "define_hello_logging_pipeline",
    )
    pipeline_def = pipeline.get_definition()
    instance = DagsterInstance.local_temp()

    pipeline_run = instance.create_run_for_pipeline(pipeline_def)

    instance.watch_event_logs(pipeline_run.run_id, -1, _event_callback)

    execute_run(pipeline, pipeline_run, instance)

    passed_before_timeout = False
    retries = 5
    while retries > 0:
        time.sleep(0.333)
        if DagsterEventType.PIPELINE_FAILURE in events.keys():
            break
        if DagsterEventType.PIPELINE_SUCCESS in events.keys():
            passed_before_timeout = True
            break
        retries -= 1

    assert passed_before_timeout
Пример #4
0
 def launch_run(self, instance, run, external_pipeline):
     check.inst_param(external_pipeline, "external_pipeline",
                      ExternalPipeline)
     recon_pipeline = recon_pipeline_from_origin(
         external_pipeline.get_origin())
     execute_run(recon_pipeline, run, self._instance)
     return run
Пример #5
0
def test_reexecution_fs_storage():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def add_one(num):
        return num + 1

    pipeline_def = PipelineDefinition(
        solid_defs=[return_one, add_one],
        dependencies={'add_one': {
            'num': DependencyDefinition('return_one')
        }},
    )
    environment_dict = {'storage': {'filesystem': {}}}
    instance = DagsterInstance.ephemeral()
    pipeline_result = execute_pipeline(
        pipeline_def,
        environment_dict={'storage': {
            'filesystem': {}
        }},
        instance=instance)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid('add_one').output_value() == 2

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        parent_run_id=pipeline_result.run_id,
        root_run_id=pipeline_result.run_id,
    )

    reexecution_result = execute_run(pipeline_def, pipeline_run, instance)

    assert reexecution_result.success
    assert len(reexecution_result.solid_result_list) == 2
    assert reexecution_result.result_for_solid(
        'return_one').output_value() == 1
    assert reexecution_result.result_for_solid('add_one').output_value() == 2
    reexecution_run = instance.get_run_by_id(reexecution_result.run_id)
    assert reexecution_run.parent_run_id == pipeline_result.run_id
    assert reexecution_run.root_run_id == pipeline_result.run_id

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        parent_run_id=reexecution_result.run_id,
        root_run_id=pipeline_result.run_id,
    )

    grandchild_result = execute_run(pipeline_def, pipeline_run, instance)

    assert grandchild_result.success
    assert len(grandchild_result.solid_result_list) == 2
    assert grandchild_result.result_for_solid('return_one').output_value() == 1
    assert grandchild_result.result_for_solid('add_one').output_value() == 2
    grandchild_run = instance.get_run_by_id(grandchild_result.run_id)
    assert grandchild_run.parent_run_id == reexecution_result.run_id
    assert grandchild_run.root_run_id == pipeline_result.run_id
Пример #6
0
def test_execute_canceled_state():
    def event_callback(_record):
        pass

    with instance_for_test() as instance:
        pipeline_def = PipelineDefinition(
            name="basic_resource_pipeline",
            solid_defs=[resource_solid],
            mode_defs=[
                ModeDefinition(
                    resource_defs={
                        "a": resource_a,
                        "b": resource_b
                    },
                    logger_defs={
                        "callback": construct_event_logger(event_callback)
                    },
                )
            ],
        )
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            run_config={
                "loggers": {
                    "callback": {}
                }
            },
            mode="default",
        ).with_status(PipelineRunStatus.CANCELED)

        execute_run(InMemoryPipeline(pipeline_def),
                    pipeline_run,
                    instance=instance)

        logs = instance.all_logs(pipeline_run.run_id)

        assert len(logs) == 1
        assert (
            "Not starting execution since the run was canceled before execution could start"
            in logs[0].message)

        iter_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            run_config={
                "loggers": {
                    "callback": {}
                }
            },
            mode="default",
        ).with_status(PipelineRunStatus.CANCELED)

        iter_events = list(
            execute_run_iterator(InMemoryPipeline(pipeline_def),
                                 iter_run,
                                 instance=instance))

        assert len(iter_events) == 1
        assert (
            "Not starting execution since the run was canceled before execution could start"
            in iter_events[0].message)
Пример #7
0
def _synchronously_execute_run_within_hosted_user_process(
        graphene_info, run_id):
    check.inst_param(graphene_info, 'graphene_info', ResolveInfo)

    instance = graphene_info.context.instance

    pipeline_run = instance.get_run_by_id(run_id)
    if not pipeline_run:
        return graphene_info.schema.type_named('PipelineRunNotFoundError')(
            run_id)

    external_pipeline = legacy_get_external_pipeline_or_raise(
        graphene_info, pipeline_run.pipeline_name, pipeline_run.solid_subset)

    validated_config = validate_config_from_snap(
        external_pipeline.config_schema_snapshot,
        external_pipeline.root_config_key_for_mode(pipeline_run.mode),
        pipeline_run.environment_dict,
    )

    if not validated_config.success:
        # If the config is invalid, we construct a DagsterInvalidConfigError exception and
        # insert it into the event log. We also return a PipelineConfigValidationInvalid user facing
        # graphql error.

        # We currently re-use the engine events machinery to add the error to the event log, but
        # may need to create a new event type and instance method to handle these errors.
        invalid_config_exception = DagsterInvalidConfigError(
            'Error in config for pipeline {}'.format(external_pipeline.name),
            validated_config.errors,
            pipeline_run.environment_dict,
        )

        instance.report_engine_event(
            str(invalid_config_exception.message),
            pipeline_run,
            EngineEventData.engine_error(
                SerializableErrorInfo(
                    invalid_config_exception.message,
                    [],
                    DagsterInvalidConfigError.__class__.__name__,
                    None,
                )),
        )

        instance.report_run_failed(pipeline_run)

        return DauphinPipelineConfigValidationInvalid.for_validation_errors(
            external_pipeline, validated_config.errors)

    pipeline_def = pipeline_def_from_pipeline_handle(external_pipeline.handle)
    execute_run(pipeline_def, pipeline_run, instance)

    return graphene_info.schema.type_named('StartPipelineRunSuccess')(
        run=graphene_info.schema.type_named('PipelineRun')(pipeline_run))
Пример #8
0
def _synthesize_events(solids_fn, run_id=None, check_success=True):
    events = []

    def _append_event(event):
        events.append(event)

    @pipeline(mode_defs=[_mode_def(_append_event)])
    def a_pipe():
        solids_fn()

    with instance_for_test() as instance:
        pipeline_run = instance.create_run_for_pipeline(
            a_pipe,
            run_id=run_id,
            run_config={"loggers": {
                "callback": {},
                "console": {}
            }})

        result = execute_run(InMemoryPipeline(a_pipe), pipeline_run, instance)

        if check_success:
            assert result.success

        return events, result
Пример #9
0
def synthesize_events(solids_fn, run_id=None):
    events = []

    def _append_event(event):
        events.append(event)

    @pipeline(mode_defs=[mode_def(_append_event)])
    def a_pipe():
        solids_fn()

    instance = DagsterInstance.local_temp()

    pipeline_run = instance.create_run_for_pipeline(
        a_pipe,
        run_id=run_id,
        environment_dict={'loggers': {
            'callback': {},
            'console': {}
        }})

    result = execute_run(InMemoryExecutablePipeline(a_pipe), pipeline_run,
                         instance)

    assert result.success

    return events, result
Пример #10
0
def synthesize_events(solids_fn, run_id=None):
    events = []

    def _append_event(event):
        events.append(event)

    @pipeline(mode_defs=[mode_def(_append_event)])
    def a_pipe():
        solids_fn()

    instance = DagsterInstance.local_temp()

    pipeline_run = instance.create_run_for_pipeline(
        a_pipe,
        run_id=run_id,
        run_config={"loggers": {
            "callback": {},
            "console": {}
        }})

    result = execute_run(InMemoryPipeline(a_pipe), pipeline_run, instance)

    assert result.success

    return events, result
Пример #11
0
def test_run_group():
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance.local_temp(temp_dir)
        repo = get_repo_at_time_1()
        foo_pipeline = repo.get_pipeline('foo_pipeline')
        runs = [execute_pipeline(foo_pipeline, instance=instance)]
        root_run_id = runs[-1].run_id
        for _ in range(3):
            # https://github.com/dagster-io/dagster/issues/2433
            run = instance.create_run_for_pipeline(
                foo_pipeline,
                parent_run_id=root_run_id,
                root_run_id=root_run_id,
                tags={
                    PARENT_RUN_ID_TAG: root_run_id,
                    ROOT_RUN_ID_TAG: root_run_id
                },
            )
            execute_run(InMemoryExecutablePipeline(foo_pipeline), run,
                        instance)
            runs.append(run)

        context_at_time_1 = define_context_for_file(__file__,
                                                    'get_repo_at_time_1',
                                                    instance)

        result_one = execute_dagster_graphql(
            context_at_time_1,
            RUN_GROUP_QUERY,
            variables={'runId': root_run_id},
        )
        assert result_one.data['runGroupOrError']['__typename'] == 'RunGroup'

        assert len(result_one.data['runGroupOrError']['runs']) == 4

        result_two = execute_dagster_graphql(
            context_at_time_1,
            RUN_GROUP_QUERY,
            variables={'runId': runs[-1].run_id},
        )
        assert result_one.data['runGroupOrError']['__typename'] == 'RunGroup'
        assert len(result_two.data['runGroupOrError']['runs']) == 4

        assert (result_one.data['runGroupOrError']['rootRunId'] ==
                result_two.data['runGroupOrError']['rootRunId'])
        assert (result_one.data['runGroupOrError']['runs'] ==
                result_two.data['runGroupOrError']['runs'])
Пример #12
0
def test_single_step_resource_event_logs():
    # Test to attribute logs for single-step plans which are often the representation of
    # sub-plans in a multiprocessing execution environment. Most likely will need to be rewritten
    # with the refactor detailed in https://github.com/dagster-io/dagster/issues/2239
    USER_SOLID_MESSAGE = 'I AM A SOLID'
    USER_RESOURCE_MESSAGE = 'I AM A RESOURCE'
    events = []

    def event_callback(record):
        assert isinstance(record, EventRecord)
        events.append(record)

    @solid(required_resource_keys={'a'})
    def resource_solid(context):
        context.log.info(USER_SOLID_MESSAGE)

    @resource
    def resource_a(context):
        context.log.info(USER_RESOURCE_MESSAGE)
        return 'A'

    pipeline = PipelineDefinition(
        name='resource_logging_pipeline',
        solid_defs=[resource_solid],
        mode_defs=[
            ModeDefinition(
                resource_defs={'a': resource_a},
                logger_defs={
                    'callback': construct_event_logger(event_callback)
                },
            )
        ],
    )

    instance = DagsterInstance.local_temp()

    pipeline_run = instance.create_run_for_pipeline(
        pipeline,
        run_config={'loggers': {
            'callback': {}
        }},
        step_keys_to_execute=['resource_solid.compute'],
    )

    result = execute_run(InMemoryExecutablePipeline(pipeline), pipeline_run,
                         instance)

    assert result.success
    log_messages = [
        event for event in events if isinstance(event, LogMessageRecord)
    ]
    assert len(log_messages) == 2

    resource_log_message = next(
        iter([
            message for message in log_messages
            if message.user_message == USER_RESOURCE_MESSAGE
        ]))
    assert resource_log_message.step_key == 'resource_solid.compute'
def test_pipeline_step_key_subset_execution():
    pipeline_def = define_addy_pipeline()
    instance = DagsterInstance.ephemeral()
    environment_dict = env_with_fs(
        {'solids': {
            'add_one': {
                'inputs': {
                    'num': {
                        'value': 3
                    }
                }
            }
        }})
    result = execute_pipeline(pipeline_def,
                              environment_dict=environment_dict,
                              instance=instance)

    assert result.success

    store = build_fs_intermediate_store(instance.intermediates_directory,
                                        result.run_id)
    assert store.get_intermediate(None, 'add_one.compute', Int).obj == 4
    assert store.get_intermediate(None, 'add_two.compute', Int).obj == 6

    ## re-execute add_two

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        step_keys_to_execute=['add_two.compute'],
        parent_run_id=result.run_id,
        root_run_id=result.run_id,
    )

    pipeline_reexecution_result = execute_run(pipeline_def, pipeline_run,
                                              instance)

    assert pipeline_reexecution_result.success

    step_events = pipeline_reexecution_result.step_event_list
    assert step_events

    store = build_fs_intermediate_store(instance.intermediates_directory,
                                        pipeline_reexecution_result.run_id)
    assert store.get_intermediate(None, 'add_one.compute', Int).obj == 4
    assert store.get_intermediate(None, 'add_two.compute', Int).obj == 6

    assert not get_step_output_event(step_events, 'add_one.compute')
    assert get_step_output_event(step_events, 'add_two.compute')

    with pytest.raises(DagsterExecutionStepNotFoundError,
                       match='Execution plan does not contain step'):
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def,
            environment_dict=environment_dict,
            step_keys_to_execute=['nope.compute'],
            parent_run_id=result.run_id,
            root_run_id=result.run_id,
        )
Пример #14
0
def test_single_step_resource_event_logs():
    # Test to attribute logs for single-step plans which are often the representation of
    # sub-plans in a multiprocessing execution environment. Most likely will need to be rewritten
    # with the refactor detailed in https://github.com/dagster-io/dagster/issues/2239
    USER_SOLID_MESSAGE = "I AM A SOLID"
    USER_RESOURCE_MESSAGE = "I AM A RESOURCE"
    events = []

    def event_callback(record):
        assert isinstance(record, EventRecord)
        events.append(record)

    @solid(required_resource_keys={"a"})
    def resource_solid(context):
        context.log.info(USER_SOLID_MESSAGE)

    @resource
    def resource_a(context):
        context.log.info(USER_RESOURCE_MESSAGE)
        return "A"

    the_pipeline = PipelineDefinition(
        name="resource_logging_pipeline",
        solid_defs=[resource_solid],
        mode_defs=[
            ModeDefinition(
                resource_defs={"a": resource_a},
                logger_defs={
                    "callback": construct_event_logger(event_callback)
                },
            )
        ],
    )

    with instance_for_test() as instance:
        pipeline_run = instance.create_run_for_pipeline(
            the_pipeline,
            run_config={"loggers": {
                "callback": {}
            }},
            step_keys_to_execute=["resource_solid"],
        )

        result = execute_run(InMemoryPipeline(the_pipeline), pipeline_run,
                             instance)

        assert result.success
        log_messages = [
            event for event in events if isinstance(event, EventRecord)
            and event.level == coerce_valid_log_level("INFO")
        ]
        assert len(log_messages) == 2

        resource_log_message = next(
            iter([
                message for message in log_messages
                if message.user_message == USER_RESOURCE_MESSAGE
            ]))
        assert resource_log_message.step_key == "resource_solid"
Пример #15
0
def _synchronously_execute_run_within_hosted_user_process(
    graphene_info, repository_location_name, repository_name, run_id,
):
    run_info_or_error = get_run_execution_info_for_created_run_or_error(
        graphene_info, repository_location_name, repository_name, run_id
    )

    if not isinstance(run_info_or_error, RunExecutionInfo):
        # if it is not a success the return value is the dauphin error
        return run_info_or_error

    external_pipeline, pipeline_run = run_info_or_error
    recon_pipeline = recon_pipeline_from_origin(external_pipeline.get_origin())
    execute_run(recon_pipeline, pipeline_run, graphene_info.context.instance)
    return graphene_info.schema.type_named("ExecuteRunInProcessSuccess")(
        run=graphene_info.schema.type_named("PipelineRun")(pipeline_run)
    )
Пример #16
0
def test_run_group():
    with instance_for_test() as instance:
        repo = get_repo_at_time_1()
        foo_pipeline = repo.get_pipeline("foo_pipeline")
        runs = [execute_pipeline(foo_pipeline, instance=instance)]
        root_run_id = runs[-1].run_id
        for _ in range(3):
            # https://github.com/dagster-io/dagster/issues/2433
            run = instance.create_run_for_pipeline(
                foo_pipeline,
                parent_run_id=root_run_id,
                root_run_id=root_run_id,
                tags={
                    PARENT_RUN_ID_TAG: root_run_id,
                    ROOT_RUN_ID_TAG: root_run_id
                },
            )
            execute_run(InMemoryPipeline(foo_pipeline), run, instance)
            runs.append(run)

        with define_out_of_process_context(__file__, "get_repo_at_time_1",
                                           instance) as context_at_time_1:
            result_one = execute_dagster_graphql(
                context_at_time_1,
                RUN_GROUP_QUERY,
                variables={"runId": root_run_id},
            )
            assert result_one.data["runGroupOrError"][
                "__typename"] == "RunGroup"

            assert len(result_one.data["runGroupOrError"]["runs"]) == 4

            result_two = execute_dagster_graphql(
                context_at_time_1,
                RUN_GROUP_QUERY,
                variables={"runId": runs[-1].run_id},
            )
            assert result_one.data["runGroupOrError"][
                "__typename"] == "RunGroup"
            assert len(result_two.data["runGroupOrError"]["runs"]) == 4

            assert (result_one.data["runGroupOrError"]["rootRunId"] ==
                    result_two.data["runGroupOrError"]["rootRunId"])
            assert (result_one.data["runGroupOrError"]["runs"] ==
                    result_two.data["runGroupOrError"]["runs"])
Пример #17
0
    def execute_pipeline(self, pipeline, pipeline_run, instance):
        check.inst_param(pipeline, 'pipeline', ExecutablePipeline)
        check.inst_param(pipeline_run, 'pipeline_run', PipelineRun)
        check.inst_param(instance, 'instance', DagsterInstance)

        self._active.add(pipeline_run.run_id)

        result = execute_run(pipeline, pipeline_run, instance)
        self._active.remove(pipeline_run.run_id)
        return result
Пример #18
0
    def execute_pipeline(
        self, instance, external_pipeline, pipeline_run,
    ):
        check.inst_param(instance, "instance", DagsterInstance)
        check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)
        check.inst_param(pipeline_run, "pipeline_run", PipelineRun)

        pipeline = self.get_reconstructable_pipeline(
            external_pipeline.name
        ).subset_for_execution_from_existing_pipeline(external_pipeline.solids_to_execute)

        execution_result = execute_run(pipeline, pipeline_run, instance)

        return ExternalPipelineExecutionResult(event_list=execution_result.event_list)
Пример #19
0
def test_execution_plan_subset_with_aliases():
    resources_initted = {}

    @resource
    def resource_a(_):
        resources_initted["a"] = True
        yield "A"

    @resource
    def resource_b(_):
        resources_initted["b"] = True
        yield "B"

    @solid(required_resource_keys={"a"})
    def consumes_resource_a(context):
        assert context.resources.a == "A"

    @solid(required_resource_keys={"b"})
    def consumes_resource_b(context):
        assert context.resources.b == "B"

    @pipeline(
        mode_defs=[
            ModeDefinition(resource_defs={
                "a": resource_a,
                "b": resource_b,
            })
        ], )
    def selective_init_test_pipeline_with_alias():
        consumes_resource_a()
        consumes_resource_b.alias("b_alias")()

    instance = DagsterInstance.ephemeral()

    execution_plan = create_execution_plan(
        selective_init_test_pipeline_with_alias,
        step_keys_to_execute=["b_alias"])

    pipeline_run = instance.create_run_for_pipeline(
        selective_init_test_pipeline_with_alias,
        execution_plan=execution_plan,
    )

    result = execute_run(
        InMemoryPipeline(selective_init_test_pipeline_with_alias),
        pipeline_run, instance)

    assert result.success

    assert set(resources_initted.keys()) == {"b"}
Пример #20
0
def test_execution_plan_snapshot_backcompat():

    src_dir = file_relative_path(__file__, "test_execution_plan_snapshots/")
    snapshot_dirs = [
        f for f in os.listdir(src_dir)
        if not os.path.isfile(os.path.join(src_dir, f))
    ]
    for snapshot_dir_path in snapshot_dirs:
        print(f"Executing a saved run from {snapshot_dir_path}")  # pylint: disable=print-call

        with copy_directory(os.path.join(src_dir,
                                         snapshot_dir_path)) as test_dir:
            with DagsterInstance.from_ref(
                    InstanceRef.from_dir(test_dir)) as instance:
                runs = instance.get_runs()
                assert len(runs) == 1

                run = runs[0]
                assert run.status == PipelineRunStatus.NOT_STARTED

                the_pipeline = InMemoryPipeline(dynamic_pipeline)

                # First create a brand new plan from the pipeline and validate it
                new_plan = create_execution_plan(the_pipeline,
                                                 run_config=run.run_config)
                _validate_execution_plan(new_plan)

                # Create a snapshot and rebuild it, validate the rebuilt plan
                new_plan_snapshot = snapshot_from_execution_plan(
                    new_plan, run.pipeline_snapshot_id)
                rebuilt_plan = ExecutionPlan.rebuild_from_snapshot(
                    "dynamic_pipeline", new_plan_snapshot)
                _validate_execution_plan(rebuilt_plan)

                # Then validate the plan built from the historical snapshot on the run
                stored_snapshot = instance.get_execution_plan_snapshot(
                    run.execution_plan_snapshot_id)

                rebuilt_plan = ExecutionPlan.rebuild_from_snapshot(
                    "dynamic_pipeline", stored_snapshot)
                _validate_execution_plan(rebuilt_plan)

                # Finally, execute the run (using the historical execution plan snapshot)
                result = execute_run(the_pipeline,
                                     run,
                                     instance,
                                     raise_on_error=True)
                assert result.success
Пример #21
0
def test_execution_plan_subset_strict_resources_within_composite():
    resources_initted = {}

    pipeline_def = create_composite_solid_pipeline(resources_initted)

    instance = DagsterInstance.ephemeral()

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def, step_keys_to_execute=['wraps_b.consumes_resource_b.compute'],
    )

    result = execute_run(InMemoryExecutablePipeline(pipeline_def), pipeline_run, instance)

    assert result.success

    assert set(resources_initted.keys()) == {'b'}
def test_execution_plan_subset_strict_resources():
    resources_initted = {}

    instance = DagsterInstance.ephemeral()

    pipeline_def = get_resource_init_pipeline(resources_initted)

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def,
        step_keys_to_execute=['consumes_resource_b.compute'],
    )

    result = execute_run(pipeline_def, pipeline_run, instance)

    assert result.success

    assert set(resources_initted.keys()) == {'b'}
Пример #23
0
def test_execution_plan_subset_with_aliases():
    resources_initted = {}

    @resource
    def resource_a(_):
        resources_initted['a'] = True
        yield 'A'

    @resource
    def resource_b(_):
        resources_initted['b'] = True
        yield 'B'

    @solid(required_resource_keys={'a'})
    def consumes_resource_a(context):
        assert context.resources.a == 'A'

    @solid(required_resource_keys={'b'})
    def consumes_resource_b(context):
        assert context.resources.b == 'B'

    @pipeline(
        mode_defs=[
            ModeDefinition(resource_defs={
                'a': resource_a,
                'b': resource_b,
            })
        ], )
    def selective_init_test_pipeline_with_alias():
        consumes_resource_a()
        consumes_resource_b.alias('b_alias')()

    instance = DagsterInstance.ephemeral()

    pipeline_run = instance.create_run_for_pipeline(
        selective_init_test_pipeline_with_alias,
        step_keys_to_execute=['b_alias.compute'],
    )

    result = execute_run(
        InMemoryExecutablePipeline(selective_init_test_pipeline_with_alias),
        pipeline_run, instance)

    assert result.success

    assert set(resources_initted.keys()) == {'b'}
Пример #24
0
def test_execution_plan_subset_strict_resources():
    resources_initted = {}

    instance = DagsterInstance.ephemeral()

    pipeline_def = get_resource_init_pipeline(resources_initted)

    execution_plan = create_execution_plan(
        pipeline_def, step_keys_to_execute=["consumes_resource_b"])

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def,
        execution_plan=execution_plan,
    )

    result = execute_run(InMemoryPipeline(pipeline_def), pipeline_run,
                         instance)

    assert result.success

    assert set(resources_initted.keys()) == {"b"}
Пример #25
0
def test_single_step_reexecution():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def add_one(num):
        return num + 1

    pipeline_def = PipelineDefinition(
        solid_defs=[return_one, add_one],
        dependencies={'add_one': {
            'num': DependencyDefinition('return_one')
        }},
    )
    environment_dict = {'storage': {'filesystem': {}}}
    instance = DagsterInstance.ephemeral()
    pipeline_result = execute_pipeline(pipeline_def,
                                       environment_dict,
                                       instance=instance)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid('add_one').output_value() == 2

    # This is how this is actually done in dagster_graphql.implementation.pipeline_execution_manager
    reexecution_pipeline_run = instance.create_run_for_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        step_keys_to_execute=['add_one.compute'],
        parent_run_id=pipeline_result.run_id,
        root_run_id=pipeline_result.run_id,
    )

    reexecution_result = execute_run(pipeline_def, reexecution_pipeline_run,
                                     instance)

    assert reexecution_result.success
    assert reexecution_result.result_for_solid(
        'return_one').output_value() == None
    assert reexecution_result.result_for_solid('add_one').output_value() == 2
Пример #26
0
def test_two_step_reexecution():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def add_one(num):
        return num + 1

    @pipeline
    def two_step_reexec():
        add_one(add_one(return_one()))

    instance = DagsterInstance.ephemeral()
    environment_dict = {'storage': {'filesystem': {}}}
    pipeline_result = execute_pipeline(two_step_reexec,
                                       environment_dict=environment_dict,
                                       instance=instance)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid('add_one_2').output_value() == 3

    reexecution_pipeline_run = instance.create_run_for_pipeline(
        two_step_reexec,
        environment_dict=environment_dict,
        step_keys_to_execute=['add_one.compute', 'add_one_2.compute'],
        parent_run_id=pipeline_result.run_id,
        root_run_id=pipeline_result.run_id,
    )

    reexecution_result = execute_run(
        InMemoryExecutablePipeline(two_step_reexec),
        reexecution_pipeline_run,
        instance=instance)

    assert reexecution_result.success
    assert reexecution_result.result_for_solid(
        'return_one').output_value() == None
    assert reexecution_result.result_for_solid('add_one_2').output_value() == 3
Пример #27
0
    def in_mp_process(cls, pipeline_dict, pipeline_run, instance_ref,
                      term_event):
        """
        Execute pipeline using message queue as a transport
        """
        pipeline_name = pipeline_run.pipeline_name
        instance = DagsterInstance.from_ref(instance_ref)
        pid = os.getpid()
        instance.report_engine_event(
            'Started process for pipeline (pid: {pid}).'.format(pid=pid),
            pipeline_run,
            EngineEventData.in_process(pid,
                                       marker_end='dagit_subprocess_init'),
            cls,
        )

        start_termination_thread(term_event)

        try:
            pipeline = InterProcessExecutablePipeline.from_dict(pipeline_dict)
        except Exception:  # pylint: disable=broad-except
            instance.report_engine_event(
                'Failed attempting to load pipeline "{}"'.format(
                    pipeline_name),
                pipeline_run,
                EngineEventData.engine_error(
                    serializable_error_info_from_exc_info(sys.exc_info())),
                cls,
            )
            return

        try:
            return execute_run(
                pipeline.subset_for_execution(
                    pipeline_run.selector.solid_subset),
                pipeline_run,
                instance,
            )

        # Add a DagsterEvent for unexpected exceptions
        # Explicitly ignore KeyboardInterrupts since they are used for termination
        except DagsterSubprocessError as err:
            if not all([
                    err_info.cls_name == 'KeyboardInterrupt'
                    for err_info in err.subprocess_error_infos
            ]):
                instance.report_engine_event(
                    'An exception was thrown during execution that is likely a framework error, '
                    'rather than an error in user code.',
                    pipeline_run,
                    EngineEventData.engine_error(
                        serializable_error_info_from_exc_info(sys.exc_info())),
                    cls,
                )
        except Exception:  # pylint: disable=broad-except
            instance.report_engine_event(
                'An exception was thrown during execution that is likely a framework error, '
                'rather than an error in user code.',
                pipeline_run,
                EngineEventData.engine_error(
                    serializable_error_info_from_exc_info(sys.exc_info())),
                cls,
            )
        finally:
            instance.report_engine_event(
                'Process for pipeline exited (pid: {pid}).'.format(pid=pid),
                pipeline_run,
                cls=cls,
            )
 def launch_run(self, context: LaunchRunContext) -> None:
     recon_pipeline = recon_pipeline_from_origin(context.pipeline_code_origin)
     execute_run(recon_pipeline, context.pipeline_run, self._instance)
Пример #29
0
    def test_get_event_records_sqlite(self, storage):
        # test for sqlite only because sqlite requires special logic to handle cross-run queries
        if not isinstance(storage, SqliteEventLogStorage):
            pytest.skip()

        asset_key = AssetKey(["path", "to", "asset_one"])

        events = []

        def _append_event(event):
            events.append(event)

        @solid
        def materialize_one(_):
            yield AssetMaterialization(
                asset_key=asset_key,
                metadata={
                    "text": "hello",
                    "json": {
                        "hello": "world"
                    },
                    "one_float": 1.0,
                    "one_int": 1,
                },
            )
            yield Output(1)

        @pipeline(mode_defs=[_mode_def(_append_event)])
        def a_pipe():
            materialize_one()

        with instance_for_test() as instance:
            if not storage._instance:  # pylint: disable=protected-access
                storage.register_instance(instance)

            # first run
            execute_run(
                InMemoryPipeline(a_pipe),
                instance.create_run_for_pipeline(
                    a_pipe,
                    run_id="1",
                    run_config={"loggers": {
                        "callback": {},
                        "console": {}
                    }}),
                instance,
            )

            for event in events:
                storage.store_event(event)

            run_records = instance.get_run_records()
            assert len(run_records) == 1

            # all logs returned in descending order
            all_event_records = storage.get_event_records()
            assert _event_types([all_event_records[0].event_log_entry
                                 ]) == [DagsterEventType.PIPELINE_SUCCESS]
            assert _event_types([all_event_records[-1].event_log_entry
                                 ]) == [DagsterEventType.PIPELINE_START]

            # second run
            events = []
            execute_run(
                InMemoryPipeline(a_pipe),
                instance.create_run_for_pipeline(
                    a_pipe,
                    run_id="2",
                    run_config={"loggers": {
                        "callback": {},
                        "console": {}
                    }}),
                instance,
            )
            run_records = instance.get_run_records()
            assert len(run_records) == 2
            for event in events:
                storage.store_event(event)

            # third run
            events = []
            execute_run(
                InMemoryPipeline(a_pipe),
                instance.create_run_for_pipeline(
                    a_pipe,
                    run_id="3",
                    run_config={"loggers": {
                        "callback": {},
                        "console": {}
                    }}),
                instance,
            )
            run_records = instance.get_run_records()
            assert len(run_records) == 3
            for event in events:
                storage.store_event(event)

            # of_type
            filtered_records = storage.get_event_records(
                EventRecordsFilter(
                    event_type=DagsterEventType.PIPELINE_SUCCESS,
                    after_cursor=RunShardedEventsCursor(
                        id=0,
                        run_updated_after=run_records[-1].update_timestamp
                    ),  # events after first run
                ),
                ascending=True,
            )
            assert len(filtered_records) == 2
            assert _event_types([r.event_log_entry
                                 for r in filtered_records]) == [
                                     DagsterEventType.PIPELINE_SUCCESS,
                                     DagsterEventType.PIPELINE_SUCCESS,
                                 ]
            assert [r.event_log_entry.run_id
                    for r in filtered_records] == ["2", "3"]
Пример #30
0
def test_reexecution_fs_storage_with_subset():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def add_one(num):
        return num + 1

    pipeline_def = PipelineDefinition(
        solid_defs=[return_one, add_one],
        dependencies={'add_one': {
            'num': DependencyDefinition('return_one')
        }},
    )
    environment_dict = {'storage': {'filesystem': {}}}
    instance = DagsterInstance.ephemeral()
    pipeline_result = execute_pipeline(pipeline_def,
                                       environment_dict,
                                       instance=instance)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid('add_one').output_value() == 2

    # This is how this is actually done in dagster_graphql.implementation.pipeline_execution_manager
    reexecution_pipeline_run = instance.create_run_for_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        step_keys_to_execute=['return_one.compute'],
        parent_run_id=pipeline_result.run_id,
        root_run_id=pipeline_result.run_id,
    )
    reexecution_result_no_subset = execute_run(pipeline_def,
                                               reexecution_pipeline_run,
                                               instance)
    assert reexecution_result_no_subset.success
    assert len(reexecution_result_no_subset.solid_result_list) == 2
    assert reexecution_result_no_subset.result_for_solid('add_one').skipped
    assert reexecution_result_no_subset.result_for_solid(
        'return_one').output_value() == 1

    pipeline_result_subset = execute_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        instance=instance,
        solid_subset=['return_one'],
    )
    assert pipeline_result_subset.success
    assert len(pipeline_result_subset.solid_result_list) == 1
    with pytest.raises(DagsterInvariantViolationError):
        pipeline_result_subset.result_for_solid('add_one')
    assert pipeline_result_subset.result_for_solid(
        'return_one').output_value() == 1

    reexecution_pipeline_run = instance.create_run_for_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        parent_run_id=pipeline_result_subset.run_id,
        root_run_id=pipeline_result_subset.run_id,
        solid_subset=['return_one'],
        step_keys_to_execute=['return_one.compute'],
    )

    reexecution_result = execute_run(pipeline_def, reexecution_pipeline_run,
                                     instance)

    assert reexecution_result.success
    assert len(reexecution_result.solid_result_list) == 1
    with pytest.raises(DagsterInvariantViolationError):
        pipeline_result_subset.result_for_solid('add_one')
    assert reexecution_result.result_for_solid(
        'return_one').output_value() == 1

    with pytest.raises(
            DagsterExecutionStepNotFoundError,
            match=re.escape(
                'Execution plan does not contain step: add_one.compute'),
    ):
        instance.create_run_for_pipeline(
            pipeline_def,
            environment_dict=environment_dict,
            parent_run_id=pipeline_result_subset.run_id,
            root_run_id=pipeline_result_subset.run_id,
            solid_subset=['return_one'],
            step_keys_to_execute=['add_one.compute'],
        )

    re_reexecution_pipeline_run = instance.create_run_for_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        parent_run_id=reexecution_result.run_id,
        root_run_id=reexecution_result.run_id,
        solid_subset=['return_one'],
        step_keys_to_execute=['return_one.compute'],
    )

    re_reexecution_result = execute_run(pipeline_def,
                                        re_reexecution_pipeline_run, instance)

    assert re_reexecution_result.success
    assert len(re_reexecution_result.solid_result_list) == 1
    assert re_reexecution_result.result_for_solid(
        'return_one').output_value() == 1

    with pytest.raises(
            DagsterExecutionStepNotFoundError,
            match=re.escape('Execution plan does not contain step: add_one'),
    ):
        instance.create_run_for_pipeline(
            pipeline_def,
            environment_dict=environment_dict,
            parent_run_id=reexecution_result.run_id,
            root_run_id=reexecution_result.run_id,
            solid_subset=['return_one'],
            step_keys_to_execute=['add_one.compute'],
        )