Пример #1
0
def test_compile():
    run_config = RunConfig()
    environment_config = EnvironmentConfig.build(
        composition, {'solids': {'add_four': {'inputs': {'num': {'value': 1}}}}}, run_config=None
    )

    plan = ExecutionPlan.build(composition, environment_config, run_config)

    res = coalesce_execution_steps(plan)

    assert set(res.keys()) == {
        'add_four.add_two.add_one',
        'add_four.add_two.add_one_2',
        'add_four.add_two_2.add_one',
        'add_four.add_two_2.add_one_2',
        'div_four.div_two',
        'div_four.div_two_2',
    }
Пример #2
0
def test_solid_dictionary_some_no_config():
    @solid(name='int_config_solid', config=Int, input_defs=[], output_defs=[])
    def int_config_solid(_):
        return None

    @solid(name='no_config_solid', input_defs=[], output_defs=[])
    def no_config_solid(_):
        return None

    @pipeline
    def pipeline_def():
        int_config_solid()
        no_config_solid()

    env = EnvironmentConfig.build(pipeline_def, {'solids': {'int_config_solid': {'config': 1}}})

    assert {'int_config_solid', 'no_config_solid'} == set(env.solids.keys())
    assert env.solids == {'int_config_solid': SolidConfig(1), 'no_config_solid': SolidConfig()}
Пример #3
0
def test_clean_event_generator_exit():
    ''' Testing for generator cleanup
    (see https://amir.rachum.com/blog/2017/03/03/generator-cleanup/)
    '''
    from dagster.core.execution.context.init import InitResourceContext

    pipeline_def = gen_basic_resource_pipeline()
    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(pipeline_def)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def, execution_plan=execution_plan)
    log_manager = DagsterLogManager(run_id=pipeline_run.run_id,
                                    logging_tags={},
                                    loggers=[])
    environment_config = EnvironmentConfig.build(pipeline_def)
    execution_plan = create_execution_plan(pipeline_def)

    resource_name, resource_def = next(
        iter(pipeline_def.get_default_mode().resource_defs.items()))
    resource_context = InitResourceContext(
        pipeline_def=pipeline_def,
        resource_def=resource_def,
        resource_config=None,
        run_id=make_new_run_id(),
    )
    generator = single_resource_event_generator(resource_context,
                                                resource_name, resource_def)
    next(generator)
    generator.close()

    generator = resource_initialization_event_generator(
        execution_plan, environment_config, pipeline_run, log_manager, {'a'})
    next(generator)
    generator.close()

    generator = pipeline_initialization_event_generator(
        execution_plan,
        {},
        pipeline_run,
        instance,
        resource_initialization_manager,
    )
    next(generator)
    generator.close()
Пример #4
0
def create_execution_plan(
    pipeline: Union[IPipeline, PipelineDefinition],
    run_config: Optional[dict] = None,
    mode: Optional[str] = None,
    step_keys_to_execute: Optional[List[str]] = None,
) -> ExecutionPlan:
    pipeline = _check_pipeline(pipeline)
    pipeline_def = pipeline.get_definition()
    check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition)

    run_config = check.opt_dict_param(run_config, "run_config", key_type=str)
    mode = check.opt_str_param(mode, "mode", default=pipeline_def.get_default_mode_name())
    check.opt_list_param(step_keys_to_execute, "step_keys_to_execute", of_type=str)

    environment_config = EnvironmentConfig.build(pipeline_def, run_config, mode=mode)

    return ExecutionPlan.build(
        pipeline, environment_config, mode=mode, step_keys_to_execute=step_keys_to_execute
    )
Пример #5
0
def test_resolve_step_output_versions_no_external_dependencies():
    versioned_pipeline = versioned_pipeline_factory()
    speculative_execution_plan = create_execution_plan(
        versioned_pipeline, run_config={}, mode="main"
    )
    environment_config = EnvironmentConfig.build(versioned_pipeline, run_config={}, mode="main")

    versions = resolve_step_output_versions(
        versioned_pipeline, speculative_execution_plan, environment_config
    )

    assert (
        versions[StepOutputHandle("versioned_solid_no_input", "result")]
        == versioned_pipeline_expected_step1_output_version()
    )
    assert (
        versions[StepOutputHandle("versioned_solid_takes_input", "result")]
        == versioned_pipeline_expected_step2_output_version()
    )
Пример #6
0
def test_whole_environment():
    pipeline_def = PipelineDefinition(
        name="some_pipeline",
        mode_defs=[
            ModeDefinition(
                name="test_mode",
                resource_defs={
                    "test_resource": ResourceDefinition(
                        resource_fn=lambda _: None, config_schema=Any
                    )
                },
            )
        ],
        solid_defs=[
            SolidDefinition(
                name="int_config_solid",
                config_schema=Int,
                input_defs=[],
                output_defs=[OutputDefinition()],
                required_resource_keys={"test_resource"},
                compute_fn=lambda *args: None,
            ),
            SolidDefinition(
                name="no_config_solid", input_defs=[], output_defs=[], compute_fn=lambda *args: None
            ),
        ],
    )

    env = EnvironmentConfig.build(
        pipeline_def,
        {
            "resources": {"test_resource": {"config": 1}},
            "solids": {"int_config_solid": {"config": 123}},
        },
    )

    assert isinstance(env, EnvironmentConfig)
    assert env.solids == {
        "int_config_solid": SolidConfig.from_dict({"config": 123}),
        "no_config_solid": SolidConfig.from_dict({}),
    }
    assert env.resources == {"test_resource": ResourceConfig(1), "io_manager": ResourceConfig(None)}
Пример #7
0
def execute_list_versions_command(instance, kwargs):
    check.inst_param(instance, "instance", DagsterInstance)

    config = list(check.opt_tuple_param(kwargs.get("config"), "config", default=(), of_type=str))
    preset = kwargs.get("preset")
    mode = kwargs.get("mode")

    if preset and config:
        raise click.UsageError("Can not use --preset with --config.")

    pipeline_origin = get_pipeline_python_origin_from_kwargs(kwargs)
    pipeline = recon_pipeline_from_origin(pipeline_origin)
    run_config = get_run_config_from_file_list(config)

    environment_config = EnvironmentConfig.build(pipeline.get_definition(), run_config, mode=mode)
    execution_plan = ExecutionPlan.build(pipeline, environment_config)

    step_output_versions = resolve_step_output_versions(
        pipeline.get_definition(), execution_plan, environment_config
    )
    memoized_plan = resolve_memoized_execution_plan(
        execution_plan, pipeline.get_definition(), run_config, instance, environment_config
    )
    # the step keys that we need to execute are those which do not have their inputs populated.
    step_keys_not_stored = set(memoized_plan.step_keys_to_execute)
    table = []
    for step_output_handle, version in step_output_versions.items():
        table.append(
            [
                "{key}.{output}".format(
                    key=step_output_handle.step_key, output=step_output_handle.output_name
                ),
                version,
                "stored"
                if step_output_handle.step_key not in step_keys_not_stored
                else "to-be-recomputed",
            ]
        )
    table_str = tabulate(
        table, headers=["Step Output", "Version", "Status of Output"], tablefmt="github"
    )
    click.echo(table_str)
Пример #8
0
def test_execute_step_wrong_step_key():
    pipeline = define_inty_pipeline()
    instance = DagsterInstance.ephemeral()

    environment_config = EnvironmentConfig.build(pipeline, )
    execution_plan = ExecutionPlan.build(
        InMemoryPipeline(pipeline),
        environment_config,
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)

    with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info:
        execute_plan(
            execution_plan.build_subset_plan(["nope.compute"], pipeline,
                                             environment_config),
            InMemoryPipeline(pipeline),
            instance,
            pipeline_run=pipeline_run,
        )

    assert exc_info.value.step_keys == ["nope.compute"]

    assert str(exc_info.value
               ) == "Can not build subset plan from unknown step: nope.compute"

    with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info:
        execute_plan(
            execution_plan.build_subset_plan(
                ["nope.compute", "nuh_uh.compute"], pipeline,
                environment_config),
            InMemoryPipeline(pipeline),
            instance,
            pipeline_run=pipeline_run,
        )

    assert exc_info.value.step_keys == ["nope.compute", "nuh_uh.compute"]

    assert (
        str(exc_info.value) ==
        "Can not build subset plan from unknown steps: nope.compute, nuh_uh.compute"
    )
Пример #9
0
def test_resolve_step_versions_default_value():
    speculative_execution_plan = create_execution_plan(
        versioned_pipeline_default_value)
    environment_config = EnvironmentConfig.build(
        versioned_pipeline_default_value)

    versions = resolve_step_versions(versioned_pipeline_default_value,
                                     speculative_execution_plan,
                                     environment_config)

    input_version = join_and_hash(repr("DEFAULTVAL"))

    solid_def_version = versioned_solid_default_value.version
    solid_config_version = resolve_config_version(None)
    solid_resources_version = join_and_hash()
    solid_version = join_and_hash(solid_def_version, solid_config_version,
                                  solid_resources_version)

    step_version = join_and_hash(input_version, solid_version)
    assert versions["versioned_solid_default_value"] == step_version
Пример #10
0
def test_compile():
    run_config = RunConfig()
    environment_config = EnvironmentConfig.build(
        composition, {'solids': {'add_four': {'inputs': {'num': {'value': 1}}}}}, run_config=None
    )

    plan = ExecutionPlan.build(
        composition, environment_config, composition.get_mode_definition(run_config.mode)
    )

    res = coalesce_execution_steps(plan)

    assert set(res.keys()) == {
        'add_four.adder_1.adder_1',
        'add_four.adder_1.adder_2',
        'add_four.adder_2.adder_1',
        'add_four.adder_2.adder_2',
        'div_four.div_1',
        'div_four.div_2',
    }
Пример #11
0
def test_compile():
    environment_config = EnvironmentConfig.build(
        composition,
        {"solids": {"add_four": {"inputs": {"num": {"value": 1}}}}},
    )

    plan = ExecutionPlan.build(InMemoryPipeline(composition), environment_config)

    res = coalesce_execution_steps(plan)
    assert set(res.keys()) == {
        "add_four.add",
        "div_four.div_two",
        "div_four.div_two_2",
        "add_four.emit_two.emit_one_2",
        "add_four.emit_two_2.add",
        "int_to_float",
        "add_four.emit_two_2.emit_one_2",
        "add_four.emit_two.add",
        "add_four.emit_two_2.emit_one",
        "add_four.emit_two.emit_one",
    }
def create_context_creation_data(pipeline_def, environment_dict, run_config,
                                 instance):
    environment_config = EnvironmentConfig.build(pipeline_def,
                                                 environment_dict, run_config)

    mode_def = pipeline_def.get_mode_definition(run_config.mode)
    system_storage_def = system_storage_def_from_config(
        mode_def, environment_config)
    executor_def = executor_def_from_config(mode_def, environment_config)

    execution_target_handle, _ = ExecutionTargetHandle.get_handle(pipeline_def)
    return ContextCreationData(
        pipeline_def=pipeline_def,
        environment_config=environment_config,
        run_config=run_config,
        mode_def=mode_def,
        system_storage_def=system_storage_def,
        execution_target_handle=execution_target_handle,
        executor_def=executor_def,
        instance=instance,
    )
Пример #13
0
def test_solid_dictionary_some_no_config():
    @solid(name="int_config_solid", config_schema=Int, input_defs=[], output_defs=[])
    def int_config_solid(_):
        return None

    @solid(name="no_config_solid", input_defs=[], output_defs=[])
    def no_config_solid(_):
        return None

    @pipeline
    def pipeline_def():
        int_config_solid()
        no_config_solid()

    env = EnvironmentConfig.build(pipeline_def, {"solids": {"int_config_solid": {"config": 1}}})

    assert {"int_config_solid", "no_config_solid"} == set(env.solids.keys())
    assert env.solids == {
        "int_config_solid": SolidConfig.from_dict({"config": 1}),
        "no_config_solid": SolidConfig.from_dict({}),
    }
Пример #14
0
def test_using_intermediates_to_override():
    pipeline = define_inty_pipeline()

    run_config = {
        "storage": {
            "filesystem": {}
        },
        "intermediate_storage": {
            "in_memory": {}
        }
    }

    instance = DagsterInstance.ephemeral()
    environment_config = EnvironmentConfig.build(
        pipeline,
        run_config=run_config,
    )
    execution_plan = ExecutionPlan.build(
        InMemoryPipeline(pipeline),
        environment_config,
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)
    assert execution_plan.get_step_by_key("return_one")

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["return_one"], pipeline,
                                             environment_config),
            InMemoryPipeline(pipeline),
            instance,
            run_config=run_config,
            pipeline_run=pipeline_run,
        ))

    intermediate_storage = build_fs_intermediate_storage(
        instance.intermediates_directory, pipeline_run.run_id)
    assert get_step_output(return_one_step_events, "return_one")
    assert not intermediate_storage.has_intermediate(
        None, StepOutputHandle("return_one"))
Пример #15
0
def test_provided_default_on_resources_config():
    @solid(
        name="some_solid", input_defs=[], output_defs=[], required_resource_keys={"some_resource"}
    )
    def some_solid(_):
        return None

    @pipeline(
        mode_defs=[
            ModeDefinition(
                name="some_mode",
                resource_defs={
                    "some_resource": ResourceDefinition(
                        resource_fn=lambda _: None,
                        config_schema={
                            "with_default_int": Field(Int, is_required=False, default_value=23434)
                        },
                    )
                },
            )
        ]
    )
    def pipeline_def():
        some_solid()

    env_type = create_environment_type(pipeline_def)
    some_resource_field = env_type.fields["resources"].config_type.fields["some_resource"]
    assert some_resource_field.is_required is False

    some_resource_config_field = some_resource_field.config_type.fields["config"]
    assert some_resource_config_field.is_required is False
    assert some_resource_config_field.default_value == {"with_default_int": 23434}

    assert some_resource_field.default_value == {"config": {"with_default_int": 23434}}

    value = EnvironmentConfig.build(pipeline_def, {})
    assert value.resources == {
        "some_resource": ResourceConfig({"with_default_int": 23434}),
        "io_manager": ResourceConfig(None),
    }
def create_context_creation_data(
    execution_plan, run_config, pipeline_run, instance,
):
    pipeline_def = execution_plan.pipeline.get_definition()
    environment_config = EnvironmentConfig.build(pipeline_def, run_config, mode=pipeline_run.mode)

    mode_def = pipeline_def.get_mode_definition(pipeline_run.mode)
    system_storage_def = system_storage_def_from_config(mode_def, environment_config)
    executor_def = executor_def_from_config(mode_def, environment_config)

    return ContextCreationData(
        pipeline=execution_plan.pipeline,
        environment_config=environment_config,
        pipeline_run=pipeline_run,
        mode_def=mode_def,
        system_storage_def=system_storage_def,
        executor_def=executor_def,
        instance=instance,
        resource_keys_to_init=get_required_resource_keys_to_init(
            execution_plan, system_storage_def
        ),
    )
Пример #17
0
def test_whole_environment():
    pipeline_def = PipelineDefinition(
        name='some_pipeline',
        mode_defs=[
            ModeDefinition(
                name='test_mode',
                resource_defs={
                    'test_resource': ResourceDefinition(
                        resource_fn=lambda _: None, config_schema=Any
                    )
                },
            )
        ],
        solid_defs=[
            SolidDefinition(
                name='int_config_solid',
                config_schema=Int,
                input_defs=[],
                output_defs=[],
                compute_fn=lambda *args: None,
            ),
            SolidDefinition(
                name='no_config_solid', input_defs=[], output_defs=[], compute_fn=lambda *args: None
            ),
        ],
    )

    env = EnvironmentConfig.build(
        pipeline_def,
        {
            'resources': {'test_resource': {'config': 1}},
            'solids': {'int_config_solid': {'config': 123}},
        },
    )

    assert isinstance(env, EnvironmentConfig)
    assert env.solids == {'int_config_solid': SolidConfig(123), 'no_config_solid': SolidConfig()}
    assert env.resources == {'test_resource': {'config': 1}}
Пример #18
0
def test_required_solid_with_required_subfield():
    pipeline_def = PipelineDefinition(
        name="some_pipeline",
        solid_defs=[
            SolidDefinition(
                name="int_config_solid",
                config_schema={"required_field": String},
                input_defs=[],
                output_defs=[],
                compute_fn=lambda *_args: None,
            )
        ],
    )

    env_type = create_environment_type(pipeline_def)

    assert env_type.fields["solids"].is_required is True
    assert env_type.fields["solids"].config_type

    solids_type = env_type.fields["solids"].config_type
    assert solids_type.fields["int_config_solid"].is_required is True
    int_config_solid_type = solids_type.fields["int_config_solid"].config_type
    assert int_config_solid_type.fields["config"].is_required is True

    assert env_type.fields["execution"].is_required is False

    env_obj = EnvironmentConfig.build(
        pipeline_def,
        {"solids": {"int_config_solid": {"config": {"required_field": "foobar"}}}},
    )

    assert env_obj.solids["int_config_solid"].config["required_field"] == "foobar"

    res = process_config(env_type, {"solids": {}})
    assert not res.success

    res = process_config(env_type, {})
    assert not res.success
Пример #19
0
def test_resource_versions():
    run_config = {
        "resources": {
            "test_resource": {
                "config": {"input_str": "apple"},
            },
            "test_resource_no_version": {"config": {"input_str": "banana"}},
        }
    }

    environment_config = EnvironmentConfig.build(modes_pipeline, run_config, mode="fakemode")

    resource_versions_by_key = resolve_resource_versions(environment_config, modes_pipeline)

    assert resource_versions_by_key["test_resource"] == join_and_hash(
        resolve_config_version({"input_str": "apple"}), test_resource.version
    )

    assert resource_versions_by_key["test_resource_no_version"] == None

    assert resource_versions_by_key["test_resource_no_config"] == join_and_hash(
        join_and_hash(), "42"
    )
Пример #20
0
def create_execution_plan(pipeline,
                          environment_dict=None,
                          mode=None,
                          step_keys_to_execute=None):
    pipeline, pipeline_def = _check_pipeline(pipeline)
    environment_dict = check.opt_dict_param(environment_dict,
                                            'environment_dict',
                                            key_type=str)
    mode = check.opt_str_param(mode,
                               'mode',
                               default=pipeline_def.get_default_mode_name())
    check.opt_list_param(step_keys_to_execute,
                         'step_keys_to_execute',
                         of_type=str)

    environment_config = EnvironmentConfig.build(pipeline_def,
                                                 environment_dict,
                                                 mode=mode)

    return ExecutionPlan.build(pipeline,
                               environment_config,
                               mode=mode,
                               step_keys_to_execute=step_keys_to_execute)
def test_step_versions_with_resources():
    run_config = {
        "resources": {
            "test_resource": {
                "config": {
                    "input_str": "apple"
                }
            }
        }
    }
    speculative_execution_plan = create_execution_plan(
        versioned_modes_pipeline,
        run_config=run_config,
    )

    versions = resolve_step_versions(speculative_execution_plan,
                                     run_config=run_config,
                                     mode="fakemode")

    solid_def_version = fake_solid_resources_versioned.version
    solid_config_version = resolve_config_version(None)
    environment_config = EnvironmentConfig.build(versioned_modes_pipeline,
                                                 mode="fakemode",
                                                 run_config=run_config)
    resource_versions_by_key = resolve_resource_versions(
        environment_config,
        versioned_modes_pipeline.get_mode_definition("fakemode"))
    solid_resources_version = join_and_hash(*[
        resource_versions_by_key[resource_key] for resource_key in
        fake_solid_resources_versioned.required_resource_keys
    ])
    solid_version = join_and_hash(solid_def_version, solid_config_version,
                                  solid_resources_version)

    step_version = join_and_hash(solid_version)

    assert versions["fake_solid_resources_versioned.compute"] == step_version
Пример #22
0
def test_required_solid_with_required_subfield():
    pipeline_def = PipelineDefinition(
        name='some_pipeline',
        solid_defs=[
            SolidDefinition(
                name='int_config_solid',
                config_schema={'required_field': String},
                input_defs=[],
                output_defs=[],
                compute_fn=lambda *_args: None,
            )
        ],
    )

    env_type = create_environment_type(pipeline_def)

    assert env_type.fields['solids'].is_required is True
    assert env_type.fields['solids'].config_type

    solids_type = env_type.fields['solids'].config_type
    assert solids_type.fields['int_config_solid'].is_required is True
    int_config_solid_type = solids_type.fields['int_config_solid'].config_type
    assert int_config_solid_type.fields['config'].is_required is True

    assert env_type.fields['execution'].is_required is False

    env_obj = EnvironmentConfig.build(
        pipeline_def, {'solids': {'int_config_solid': {'config': {'required_field': 'foobar'}}}},
    )

    assert env_obj.solids['int_config_solid'].config['required_field'] == 'foobar'

    res = process_config(env_type, {'solids': {}})
    assert not res.success

    res = process_config(env_type, {})
    assert not res.success
Пример #23
0
def test_required_solid_with_required_subfield():
    pipeline_def = PipelineDefinition(
        name='some_pipeline',
        solid_defs=[
            SolidDefinition(
                name='int_config_solid',
                config={'required_field': String},
                input_defs=[],
                output_defs=[],
                compute_fn=lambda *_args: None,
            )
        ],
    )

    env_type = create_environment_type(pipeline_def)

    assert env_type.fields['solids'].is_required is True
    assert env_type.fields['solids'].config_type

    solids_type = env_type.fields['solids'].config_type
    assert solids_type.fields['int_config_solid'].is_required is True
    int_config_solid_type = solids_type.fields['int_config_solid'].config_type
    assert int_config_solid_type.fields['config'].is_required is True

    assert env_type.fields['execution'].is_required is False

    env_obj = EnvironmentConfig.build(
        pipeline_def, {'solids': {'int_config_solid': {'config': {'required_field': 'foobar'}}}},
    )

    assert env_obj.solids['int_config_solid'].config['required_field'] == 'foobar'

    with pytest.raises(DagsterEvaluateConfigValueError):
        throwing_validate_config_value(env_type, {'solids': {}})

    with pytest.raises(DagsterEvaluateConfigValueError):
        throwing_validate_config_value(env_type, {})
def test_solid_dictionary_type():
    pipeline_def = define_test_solids_config_pipeline()

    solid_dict_type = define_solid_dictionary_cls(
        pipeline_def.solids, pipeline_def.dependency_structure)

    env_obj = EnvironmentConfig.build(
        pipeline_def,
        {
            'solids': {
                'int_config_solid': {
                    'config': 1
                },
                'string_config_solid': {
                    'config': 'bar'
                }
            },
        },
    )

    value = env_obj.solids

    assert set(['int_config_solid',
                'string_config_solid']) == set(value.keys())
    assert value == {
        'int_config_solid': SolidConfig(1),
        'string_config_solid': SolidConfig('bar')
    }

    assert solid_dict_type.type_attributes.is_system_config

    for specific_solid_config_field in solid_dict_type.fields.values():
        specific_solid_config_type = specific_solid_config_field.config_type
        assert specific_solid_config_type.type_attributes.is_system_config
        user_config_field = specific_solid_config_field.config_type.fields[
            'config']
        assert user_config_field.config_type.type_attributes.is_system_config is False
Пример #25
0
def create_context_creation_data(
    pipeline_def, environment_dict, pipeline_run, instance, execution_plan
):
    environment_config = EnvironmentConfig.build(pipeline_def, environment_dict, pipeline_run)

    mode_def = pipeline_def.get_mode_definition(pipeline_run.mode)
    system_storage_def = system_storage_def_from_config(mode_def, environment_config)
    executor_def = executor_def_from_config(mode_def, environment_config)

    execution_target_handle, _ = ExecutionTargetHandle.get_handle(pipeline_def)

    return ContextCreationData(
        pipeline_def=pipeline_def,
        environment_config=environment_config,
        pipeline_run=pipeline_run,
        mode_def=mode_def,
        system_storage_def=system_storage_def,
        execution_target_handle=execution_target_handle,
        executor_def=executor_def,
        instance=instance,
        resource_keys_to_init=get_required_resource_keys_to_init(
            execution_plan, system_storage_def
        ),
    )
Пример #26
0
    def resolve_memoized_execution_plan(self, execution_plan, run_config, mode):
        """
        Returns:
            ExecutionPlan: Execution plan configured to only run unmemoized steps.
        """
        pipeline_def = execution_plan.pipeline.get_definition()
        pipeline_name = pipeline_def.name

        step_output_versions = resolve_step_output_versions(
            execution_plan,
            EnvironmentConfig.build(pipeline_def, run_config, mode),
            pipeline_def.get_mode_definition(mode),
        )
        if all(version is None for version in step_output_versions.values()):
            raise DagsterInvariantViolationError(
                "While creating a memoized pipeline run, no steps have versions. At least one step "
                "must have a version."
            )

        step_output_addresses = self.get_addresses_for_step_output_versions(
            {
                (pipeline_name, step_output_handle): version
                for step_output_handle, version in step_output_versions.items()
                if version
            }
        )

        step_keys_to_execute = list(
            {
                step_output_handle.step_key
                for step_output_handle in step_output_versions.keys()
                if (pipeline_name, step_output_handle) not in step_output_addresses
            }
        )

        return execution_plan.build_memoized_plan(step_keys_to_execute, step_output_addresses)
Пример #27
0
def create_execution_plan(pipeline,
                          run_config=None,
                          mode=None,
                          step_keys_to_execute=None):
    pipeline = _check_pipeline(pipeline)
    pipeline_def = pipeline.get_definition()
    check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition)

    run_config = check.opt_dict_param(run_config, 'run_config', key_type=str)
    mode = check.opt_str_param(mode,
                               'mode',
                               default=pipeline_def.get_default_mode_name())
    check.opt_list_param(step_keys_to_execute,
                         'step_keys_to_execute',
                         of_type=str)

    environment_config = EnvironmentConfig.build(pipeline_def,
                                                 run_config,
                                                 mode=mode)

    return ExecutionPlan.build(pipeline,
                               environment_config,
                               mode=mode,
                               step_keys_to_execute=step_keys_to_execute)
Пример #28
0
    def get_context(self, solid_config=None, mode_def=None, run_config=None):
        """Get a dagstermill execution context for interactive exploration and development.

        Args:
            solid_config (Optional[Any]): If specified, this value will be made available on the
                context as its ``solid_config`` property.
            mode_def (Optional[:class:`dagster.ModeDefinition`]): If specified, defines the mode to
                use to construct the context. Specify this if you would like a context constructed
                with specific ``resource_defs`` or ``logger_defs``. By default, an ephemeral mode
                with a console logger will be constructed.
            run_config(Optional[dict]): The environment config dict with which to construct
                the context.

        Returns:
            :py:class:`~dagstermill.DagstermillExecutionContext`
        """
        check.opt_inst_param(mode_def, "mode_def", ModeDefinition)
        run_config = check.opt_dict_param(run_config,
                                          "run_config",
                                          key_type=str)

        # If we are running non-interactively, and there is already a context reconstituted, return
        # that context rather than overwriting it.
        if self.context is not None and isinstance(
                self.context, DagstermillRuntimeExecutionContext):
            return self.context

        if not mode_def:
            mode_def = ModeDefinition(
                logger_defs={"dagstermill": colored_console_logger})
            run_config["loggers"] = {"dagstermill": {}}

        solid_def = SolidDefinition(
            name="this_solid",
            input_defs=[],
            compute_fn=lambda *args, **kwargs: None,
            output_defs=[],
            description=
            "Ephemeral solid constructed by dagstermill.get_context()",
            required_resource_keys=mode_def.resource_key_set,
        )

        pipeline_def = PipelineDefinition(
            [solid_def],
            mode_defs=[mode_def],
            name="ephemeral_dagstermill_pipeline")

        run_id = make_new_run_id()

        # construct stubbed PipelineRun for notebook exploration...
        # The actual pipeline run during pipeline execution will be serialized and reconstituted
        # in the `reconstitute_pipeline_context` call
        pipeline_run = PipelineRun(
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            run_config=run_config,
            mode=mode_def.name,
            step_keys_to_execute=None,
            status=PipelineRunStatus.NOT_STARTED,
            tags=None,
        )

        self.in_pipeline = False
        self.solid_def = solid_def
        self.pipeline = pipeline_def

        environment_config = EnvironmentConfig.build(pipeline_def,
                                                     run_config,
                                                     mode=mode_def.name)

        pipeline = InMemoryPipeline(pipeline_def)
        execution_plan = ExecutionPlan.build(pipeline, environment_config)

        with scoped_pipeline_context(
                execution_plan,
                pipeline,
                run_config,
                pipeline_run,
                DagsterInstance.ephemeral(),
                scoped_resources_builder_cm=self._setup_resources,
        ) as pipeline_context:

            self.context = DagstermillExecutionContext(
                pipeline_context=pipeline_context,
                pipeline_def=pipeline_def,
                solid_config=solid_config,
                resource_keys_to_init=get_required_resource_keys_to_init(
                    execution_plan,
                    pipeline_def,
                    environment_config,
                    pipeline_context.intermediate_storage_def,
                ),
                solid_name=solid_def.name,
            )

        return self.context
Пример #29
0
    def reconstitute_pipeline_context(
        self,
        output_log_path=None,
        marshal_dir=None,
        run_config=None,
        executable_dict=None,
        pipeline_run_dict=None,
        solid_handle_kwargs=None,
        instance_ref_dict=None,
    ):
        """Reconstitutes a context for dagstermill-managed execution.

        You'll see this function called to reconstruct a pipeline context within the ``injected
        parameters`` cell of a dagstermill output notebook. Users should not call this function
        interactively except when debugging output notebooks.

        Use :func:`dagstermill.get_context` in the ``parameters`` cell of your notebook to define a
        context for interactive exploration and development. This call will be replaced by one to
        :func:`dagstermill.reconstitute_pipeline_context` when the notebook is executed by
        dagstermill.
        """
        check.opt_str_param(output_log_path, "output_log_path")
        check.opt_str_param(marshal_dir, "marshal_dir")
        run_config = check.opt_dict_param(run_config,
                                          "run_config",
                                          key_type=str)
        check.dict_param(pipeline_run_dict, "pipeline_run_dict")
        check.dict_param(executable_dict, "executable_dict")
        check.dict_param(solid_handle_kwargs, "solid_handle_kwargs")
        check.dict_param(instance_ref_dict, "instance_ref_dict")

        pipeline = ReconstructablePipeline.from_dict(executable_dict)
        pipeline_def = pipeline.get_definition()

        try:
            instance_ref = unpack_value(instance_ref_dict)
            instance = DagsterInstance.from_ref(instance_ref)
        except Exception as err:  # pylint: disable=broad-except
            raise DagstermillError(
                "Error when attempting to resolve DagsterInstance from serialized InstanceRef"
            ) from err

        pipeline_run = unpack_value(pipeline_run_dict)

        solid_handle = SolidHandle.from_dict(solid_handle_kwargs)
        solid_def = pipeline_def.get_solid(solid_handle).definition

        self.marshal_dir = marshal_dir
        self.in_pipeline = True
        self.solid_def = solid_def
        self.pipeline = pipeline

        environment_config = EnvironmentConfig.build(pipeline_def,
                                                     run_config,
                                                     mode=pipeline_run.mode)

        execution_plan = ExecutionPlan.build(
            self.pipeline,
            environment_config,
            step_keys_to_execute=pipeline_run.step_keys_to_execute,
        )

        with scoped_pipeline_context(
                execution_plan,
                pipeline,
                run_config,
                pipeline_run,
                instance,
                scoped_resources_builder_cm=self._setup_resources,
                # Set this flag even though we're not in test for clearer error reporting
                raise_on_error=True,
        ) as pipeline_context:
            self.context = DagstermillRuntimeExecutionContext(
                pipeline_context=pipeline_context,
                pipeline_def=pipeline_def,
                solid_config=run_config.get("solids",
                                            {}).get(solid_def.name,
                                                    {}).get("config"),
                resource_keys_to_init=get_required_resource_keys_to_init(
                    execution_plan,
                    pipeline_def,
                    environment_config,
                    pipeline_context.intermediate_storage_def,
                ),
                solid_name=solid_def.name,
            )

        return self.context
Пример #30
0
def test_execution_plan_reexecution():
    pipeline_def = define_addy_pipeline()
    instance = DagsterInstance.ephemeral()
    run_config = env_with_fs(
        {"solids": {
            "add_one": {
                "inputs": {
                    "num": {
                        "value": 3
                    }
                }
            }
        }})
    result = execute_pipeline(
        pipeline_def,
        run_config=run_config,
        instance=instance,
    )

    assert result.success

    intermediate_storage = build_fs_intermediate_storage(
        instance.intermediates_directory, result.run_id)
    assert intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("add_one")).obj == 4
    assert intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("add_two")).obj == 6

    ## re-execute add_two

    environment_config = EnvironmentConfig.build(
        pipeline_def,
        run_config=run_config,
    )
    execution_plan = ExecutionPlan.build(
        InMemoryPipeline(pipeline_def),
        environment_config,
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def,
        execution_plan=execution_plan,
        run_config=run_config,
        parent_run_id=result.run_id,
        root_run_id=result.run_id,
    )

    step_events = execute_plan(
        execution_plan.build_subset_plan(["add_two"], pipeline_def,
                                         environment_config),
        InMemoryPipeline(pipeline_def),
        run_config=run_config,
        pipeline_run=pipeline_run,
        instance=instance,
    )

    intermediate_storage = build_fs_intermediate_storage(
        instance.intermediates_directory, result.run_id)
    assert intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("add_one")).obj == 4
    assert intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("add_two")).obj == 6

    assert not get_step_output_event(step_events, "add_one")
    assert get_step_output_event(step_events, "add_two")