def test_adls2_intermediate_storage_with_custom_serializer(
        storage_account, file_system):
    run_id = make_new_run_id()

    intermediate_storage = ADLS2IntermediateStorage(
        adls2_client=get_adls2_client(storage_account),
        blob_client=get_blob_client(storage_account),
        run_id=run_id,
        file_system=file_system,
    )

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_storage.set_intermediate(context, LowercaseString,
                                                  StepOutputHandle("foo"),
                                                  "foo")

            assert (
                intermediate_storage.object_store.file_system_client.
                get_file_client(
                    os.path.join(*[
                        intermediate_storage.root, "intermediates", "foo",
                        "result"
                    ]), ).download_file().readall().decode("utf-8") == "FOO")

            assert intermediate_storage.has_intermediate(
                context, StepOutputHandle("foo"))
            assert (intermediate_storage.get_intermediate(
                context, LowercaseString,
                StepOutputHandle("foo")).obj == "foo")
        finally:
            intermediate_storage.rm_intermediate(context,
                                                 StepOutputHandle("foo"))
Пример #2
0
def test_memoized_plan_root_input_manager_input_config():
    @root_input_manager(version="foo", input_config_schema={"my_str": str})
    def my_input_manager():
        return 5

    @solid(
        input_defs=[InputDefinition("x", root_manager_key="my_input_manager")],
        version="foo")
    def my_solid_takes_input(x):
        return x

    @pipeline(
        mode_defs=[
            ModeDefinition(resource_defs={
                "io_manager":
                IOManagerDefinition.hardcoded_io_manager(
                    VersionedInMemoryIOManager()),
                "my_input_manager":
                my_input_manager,
            }, ),
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def my_pipeline():
        my_solid_takes_input()

    input_config = {"my_str": "foo"}
    run_config = {
        "solids": {
            "my_solid_takes_input": {
                "inputs": {
                    "x": input_config
                }
            }
        }
    }
    with instance_for_test() as instance:
        plan = create_execution_plan(
            my_pipeline,
            instance_ref=instance.get_ref(),
            run_config=run_config,
        )
        output_version = plan.get_version_for_step_output_handle(
            StepOutputHandle("my_solid_takes_input", "result"))

        assert output_version is not None

        input_config["my_str"] = "bar"

        plan = create_execution_plan(
            my_pipeline,
            instance_ref=instance.get_ref(),
            run_config=run_config,
        )

        new_output_version = plan.get_version_for_step_output_handle(
            StepOutputHandle("my_solid_takes_input", "result"))

        # Ensure that after changing input config, the version changes.
        assert not new_output_version == output_version
Пример #3
0
def test_resolve_memoized_execution_plan_yes_stored_results():
    manager = VersionedInMemoryIOManager()
    versioned_pipeline = versioned_pipeline_factory(manager)
    speculative_execution_plan = create_execution_plan(versioned_pipeline)
    step_output_handle = StepOutputHandle("versioned_solid_no_input", "result")
    step_output_version = speculative_execution_plan.resolve_step_output_versions()[
        step_output_handle
    ]
    manager.values[
        (step_output_handle.step_key, step_output_handle.output_name, step_output_version)
    ] = 4

    with DagsterInstance.ephemeral() as dagster_instance:

        memoized_execution_plan = resolve_memoized_execution_plan(
            speculative_execution_plan, {}, dagster_instance
        )

        assert memoized_execution_plan.step_keys_to_execute == ["versioned_solid_takes_input"]

        expected_handle = StepOutputHandle(
            step_key="versioned_solid_no_input", output_name="result"
        )

        assert (
            memoized_execution_plan.get_step_by_key("versioned_solid_takes_input")
            .step_input_dict["intput"]
            .source.step_output_handle
            == expected_handle
        )
def test_adls2_intermediate_storage_with_custom_prefix(storage_account,
                                                       file_system):
    run_id = make_new_run_id()

    intermediate_storage = ADLS2IntermediateStorage(
        adls2_client=get_adls2_client(storage_account),
        blob_client=get_blob_client(storage_account),
        run_id=run_id,
        file_system=file_system,
        prefix="custom_prefix",
    )
    assert intermediate_storage.root == "/".join(
        ["custom_prefix", "storage", run_id])

    try:
        with yield_empty_pipeline_context(run_id=run_id) as context:

            intermediate_storage.set_intermediate(context, RuntimeBool,
                                                  StepOutputHandle("true"),
                                                  True)

            assert intermediate_storage.has_intermediate(
                context, StepOutputHandle("true"))
            assert intermediate_storage.uri_for_paths(["true"]).startswith(
                "abfss://{fs}@{account}.dfs.core.windows.net/custom_prefix".
                format(account=storage_account, fs=file_system))

    finally:
        intermediate_storage.rm_intermediate(context, StepOutputHandle("true"))
Пример #5
0
def test_gcs_intermediate_storage_composite_types_with_custom_serializer_for_inner_type(
        gcs_bucket):
    run_id = make_new_run_id()

    intermediate_storage = GCSIntermediateStorage(run_id=run_id,
                                                  gcs_bucket=gcs_bucket)

    obj_name = "list"

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_storage.set_intermediate(
                context,
                resolve_dagster_type(List[LowercaseString]),
                StepOutputHandle(obj_name),
                ["foo", "bar"],
            )
            assert intermediate_storage.has_intermediate(
                context, StepOutputHandle(obj_name))
            assert intermediate_storage.get_intermediate(
                context, resolve_dagster_type(List[Bool]),
                StepOutputHandle(obj_name)).obj == ["foo", "bar"]

        finally:
            intermediate_storage.rm_intermediate(context,
                                                 StepOutputHandle(obj_name))
Пример #6
0
def test_gcs_intermediate_storage_with_custom_prefix(gcs_bucket):
    run_id = make_new_run_id()

    intermediate_storage = GCSIntermediateStorage(run_id=run_id,
                                                  gcs_bucket=gcs_bucket,
                                                  gcs_prefix="custom_prefix")
    assert intermediate_storage.root == "/".join(
        ["custom_prefix", "storage", run_id])

    obj_name = "true"

    try:
        with yield_empty_pipeline_context(run_id=run_id) as context:

            intermediate_storage.set_intermediate(context, RuntimeBool,
                                                  StepOutputHandle(obj_name),
                                                  True)

            assert intermediate_storage.has_intermediate(
                context, StepOutputHandle(obj_name))
            assert intermediate_storage.uri_for_paths([obj_name]).startswith(
                "gs://%s/custom_prefix" % gcs_bucket)

    finally:
        intermediate_storage.rm_intermediate(context,
                                             StepOutputHandle(obj_name))
def test_adls2_intermediate_storage_composite_types_with_custom_serializer_for_inner_type(
        storage_account, file_system):
    run_id = make_new_run_id()

    intermediate_storage = ADLS2IntermediateStorage(
        adls2_client=get_adls2_client(storage_account),
        blob_client=get_blob_client(storage_account),
        run_id=run_id,
        file_system=file_system,
    )

    obj_name = "list"

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_storage.set_intermediate(
                context,
                resolve_dagster_type(List[LowercaseString]),
                StepOutputHandle(obj_name),
                ["foo", "bar"],
            )
            assert intermediate_storage.has_intermediate(
                context, StepOutputHandle(obj_name))
            assert intermediate_storage.get_intermediate(
                context, resolve_dagster_type(List[Bool]),
                StepOutputHandle(obj_name)).obj == ["foo", "bar"]

        finally:
            intermediate_storage.rm_intermediate(context,
                                                 StepOutputHandle(obj_name))
Пример #8
0
def test_gcs_intermediate_storage_with_type_storage_plugin(gcs_bucket):
    run_id = make_new_run_id()

    intermediate_storage = GCSIntermediateStorage(
        run_id=run_id,
        gcs_bucket=gcs_bucket,
        type_storage_plugin_registry=TypeStoragePluginRegistry([
            (RuntimeString, FancyStringGCSTypeStoragePlugin)
        ]),
    )

    obj_name = "obj_name"

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_storage.set_intermediate(context, RuntimeString,
                                                  StepOutputHandle(obj_name),
                                                  "hello")

            assert intermediate_storage.has_intermediate(
                context, StepOutputHandle(obj_name))
            assert (intermediate_storage.get_intermediate(
                context, RuntimeString, StepOutputHandle(obj_name)) == "hello")

        finally:
            intermediate_storage.rm_intermediate(context,
                                                 StepOutputHandle(obj_name))
Пример #9
0
def test_s3_intermediate_storage_with_custom_serializer(mock_s3_bucket):
    run_id = make_new_run_id()

    intermediate_storage = S3IntermediateStorage(run_id=run_id,
                                                 s3_bucket=mock_s3_bucket.name)

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_storage.set_intermediate(context, LowercaseString,
                                                  StepOutputHandle("foo"),
                                                  "foo")

            assert (intermediate_storage.object_store.s3.get_object(
                Bucket=intermediate_storage.object_store.bucket,
                Key=os.path.join(intermediate_storage.root, "intermediates",
                                 "foo", "result"),
            )["Body"].read().decode("utf-8") == "FOO")

            assert intermediate_storage.has_intermediate(
                context, StepOutputHandle("foo"))
            assert (intermediate_storage.get_intermediate(
                context, LowercaseString,
                StepOutputHandle("foo")).obj == "foo")
        finally:
            intermediate_storage.rm_intermediate(context,
                                                 StepOutputHandle("foo"))
Пример #10
0
def test_s3_intermediate_storage_with_type_storage_plugin(mock_s3_bucket):
    run_id = make_new_run_id()

    intermediate_storage = S3IntermediateStorage(
        run_id=run_id,
        s3_bucket=mock_s3_bucket.name,
        type_storage_plugin_registry=TypeStoragePluginRegistry([
            (RuntimeString, FancyStringS3TypeStoragePlugin)
        ]),
    )

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:

            intermediate_storage.set_intermediate(context, RuntimeString,
                                                  StepOutputHandle("obj_name"),
                                                  "hello")

            assert intermediate_storage.has_intermediate(
                context, StepOutputHandle("obj_name"))
            assert (intermediate_storage.get_intermediate(
                context, RuntimeString,
                StepOutputHandle("obj_name")) == "hello")

        finally:
            intermediate_storage.rm_intermediate(context,
                                                 StepOutputHandle("obj_name"))
Пример #11
0
def test_custom_read_write_mode(gcs_bucket):
    run_id = make_new_run_id()
    intermediate_storage = GCSIntermediateStorage(run_id=run_id, gcs_bucket=gcs_bucket)
    data_frame = [OrderedDict({"foo": "1", "bar": "1"}), OrderedDict({"foo": "2", "bar": "2"})]

    obj_name = "data_frame"

    try:
        with yield_empty_pipeline_context(run_id=run_id) as context:
            intermediate_storage.set_intermediate(
                context,
                resolve_dagster_type(LessSimpleDataFrame),
                StepOutputHandle(obj_name),
                data_frame,
            )

            assert intermediate_storage.has_intermediate(context, StepOutputHandle(obj_name))
            assert (
                intermediate_storage.get_intermediate(
                    context, resolve_dagster_type(LessSimpleDataFrame), StepOutputHandle(obj_name)
                ).obj
                == data_frame
            )
            assert intermediate_storage.uri_for_paths([obj_name]).startswith("gs://")

    finally:
        intermediate_storage.rm_intermediate(context, StepOutputHandle(obj_name))
def test_adls2_intermediate_storage_with_type_storage_plugin(
        storage_account, file_system):
    run_id = make_new_run_id()

    intermediate_storage = ADLS2IntermediateStorage(
        adls2_client=get_adls2_client(storage_account),
        blob_client=get_blob_client(storage_account),
        run_id=run_id,
        file_system=file_system,
        type_storage_plugin_registry=TypeStoragePluginRegistry([
            (RuntimeString, FancyStringS3TypeStoragePlugin)
        ]),
    )

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_storage.set_intermediate(context, RuntimeString,
                                                  StepOutputHandle("obj_name"),
                                                  "hello")

            assert intermediate_storage.has_intermediate(
                context, StepOutputHandle("obj_name"))
            assert (intermediate_storage.get_intermediate(
                context, RuntimeString,
                StepOutputHandle("obj_name")) == "hello")

        finally:
            intermediate_storage.rm_intermediate(context,
                                                 StepOutputHandle("obj_name"))
Пример #13
0
def test_using_intermediate_file_system_for_subplan_multiprocessing():
    with instance_for_test() as instance:

        run_config = {"intermediate_storage": {"filesystem": {}}}

        pipeline = reconstructable(define_inty_pipeline)

        environment_config = EnvironmentConfig.build(
            pipeline.get_definition(),
            run_config=run_config,
        )
        execution_plan = ExecutionPlan.build(
            pipeline,
            environment_config,
        )
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline.get_definition(),
            execution_plan=execution_plan)

        assert execution_plan.get_step_by_key("return_one")

        return_one_step_events = list(
            execute_plan(
                execution_plan.build_subset_plan(["return_one"],
                                                 pipeline.get_definition(),
                                                 environment_config),
                pipeline,
                instance,
                run_config=dict(run_config, execution={"multiprocess": {}}),
                pipeline_run=pipeline_run,
            ))

        intermediate_storage = build_fs_intermediate_storage(
            instance.intermediates_directory, pipeline_run.run_id)

        assert get_step_output(return_one_step_events, "return_one")
        assert intermediate_storage.has_intermediate(
            None, StepOutputHandle("return_one"))
        assert (intermediate_storage.get_intermediate(
            None, Int, StepOutputHandle("return_one")).obj == 1)

        add_one_step_events = list(
            execute_plan(
                execution_plan.build_subset_plan(["add_one"],
                                                 pipeline.get_definition(),
                                                 environment_config),
                pipeline,
                instance,
                run_config=dict(run_config, execution={"multiprocess": {}}),
                pipeline_run=pipeline_run,
            ))

        assert get_step_output(add_one_step_events, "add_one")
        assert intermediate_storage.has_intermediate(
            None, StepOutputHandle("add_one"))
        assert (intermediate_storage.get_intermediate(
            None, Int, StepOutputHandle("add_one")).obj == 2)
def test_execution_plan_reexecution():
    pipeline_def = define_addy_pipeline()
    instance = DagsterInstance.ephemeral()
    run_config = env_with_fs(
        {"solids": {
            "add_one": {
                "inputs": {
                    "num": {
                        "value": 3
                    }
                }
            }
        }})
    result = execute_pipeline(
        pipeline_def,
        run_config=run_config,
        instance=instance,
    )

    assert result.success

    intermediate_storage = build_fs_intermediate_storage(
        instance.intermediates_directory, result.run_id)
    assert intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("add_one")).obj == 4
    assert intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("add_two")).obj == 6

    ## re-execute add_two

    execution_plan = create_execution_plan(pipeline_def, run_config=run_config)

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def,
        execution_plan=execution_plan,
        run_config=run_config,
        parent_run_id=result.run_id,
        root_run_id=result.run_id,
    )

    step_events = execute_plan(
        execution_plan.build_subset_plan(["add_two"]),
        run_config=run_config,
        pipeline_run=pipeline_run,
        instance=instance,
    )

    intermediate_storage = build_fs_intermediate_storage(
        instance.intermediates_directory, result.run_id)
    assert intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("add_one")).obj == 4
    assert intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("add_two")).obj == 6

    assert not get_step_output_event(step_events, "add_one")
    assert get_step_output_event(step_events, "add_two")
Пример #15
0
def test_adls2_pipeline_with_custom_prefix(storage_account, file_system):
    adls2_prefix = "custom_prefix"

    pipe = define_inty_pipeline(should_throw=False)
    run_config = {
        "resources": {
            "adls2": {
                "config": {
                    "storage_account": storage_account,
                    "credential": get_azure_credential()
                }
            }
        },
        "intermediate_storage": {
            "adls2": {
                "config": {
                    "adls2_file_system": file_system,
                    "adls2_prefix": adls2_prefix
                }
            }
        },
    }

    pipeline_run = PipelineRun(pipeline_name=pipe.name, run_config=run_config)
    instance = DagsterInstance.ephemeral()

    result = execute_pipeline(
        pipe,
        run_config=run_config,
    )
    assert result.success

    execution_plan = create_execution_plan(pipe, run_config)
    with scoped_pipeline_context(
            execution_plan,
            InMemoryPipeline(pipe),
            run_config,
            pipeline_run,
            instance,
    ) as context:
        resource = context.scoped_resources_builder.build(
            required_resource_keys={"adls2"}).adls2
        intermediate_storage = ADLS2IntermediateStorage(
            run_id=result.run_id,
            file_system=file_system,
            prefix=adls2_prefix,
            adls2_client=resource.adls2_client,
            blob_client=resource.blob_client,
        )
        assert intermediate_storage.root == "/".join(
            ["custom_prefix", "storage", result.run_id])
        assert (intermediate_storage.get_intermediate(
            context, Int, StepOutputHandle("return_one")).obj == 1)
        assert (intermediate_storage.get_intermediate(
            context, Int, StepOutputHandle("add_one")).obj == 2)
Пример #16
0
def test_failing_execution_plan():
    pipeline_def = define_diamond_pipeline()
    plan = create_execution_plan(pipeline_def)

    with plan.start(retry_mode=(RetryMode.DISABLED)) as active_execution:

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 1
        step_1 = steps[0]
        assert step_1.key == "return_two"

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 0  # cant progress

        active_execution.mark_success(step_1.key)
        active_execution.mark_step_produced_output(
            StepOutputHandle(step_1.key, "result"))

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 2
        step_2 = steps[0]
        step_3 = steps[1]
        assert step_2.key == "add_three"
        assert step_3.key == "mult_three"

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 0  # cant progress

        active_execution.mark_success(step_2.key)
        active_execution.mark_step_produced_output(
            StepOutputHandle(step_2.key, "result"))

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 0  # cant progress

        # uh oh failure
        active_execution.mark_failed(step_3.key)
        active_execution.mark_step_produced_output(
            StepOutputHandle(step_3.key, "result"))

        # cant progres to 4th step
        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 0

        assert not active_execution.is_complete

        steps = active_execution.get_steps_to_abandon()
        assert len(steps) == 1
        step_4 = steps[0]

        assert step_4.key == "adder"
        active_execution.mark_abandoned(step_4.key)

        assert active_execution.is_complete
Пример #17
0
def test_file_system_intermediate_storage_composite_types():
    _, _, intermediate_storage = define_intermediate_storage()

    assert intermediate_storage.set_intermediate(
        None, List[Bool], StepOutputHandle("return_true_lst"), [True])

    assert intermediate_storage.has_intermediate(
        None, StepOutputHandle("return_true_lst"))

    assert intermediate_storage.get_intermediate(
        None, List[Bool], StepOutputHandle("return_true_lst")).obj == [True]
Пример #18
0
def test_using_intermediates_file_system_for_subplan():
    pipeline = define_inty_pipeline()

    run_config = {"intermediate_storage": {"filesystem": {}}}

    instance = DagsterInstance.ephemeral()
    environment_config = EnvironmentConfig.build(
        pipeline,
        run_config=run_config,
    )

    execution_plan = ExecutionPlan.build(
        InMemoryPipeline(pipeline),
        environment_config,
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)
    assert execution_plan.get_step_by_key("return_one")

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["return_one"], pipeline,
                                             environment_config),
            InMemoryPipeline(pipeline),
            instance,
            run_config=run_config,
            pipeline_run=pipeline_run,
        ))

    intermediate_storage = build_fs_intermediate_storage(
        instance.intermediates_directory, pipeline_run.run_id)
    assert get_step_output(return_one_step_events, "return_one")
    assert intermediate_storage.has_intermediate(
        None, StepOutputHandle("return_one"))
    assert intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("return_one")).obj == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["add_one"], pipeline,
                                             environment_config),
            InMemoryPipeline(pipeline),
            instance,
            run_config=run_config,
            pipeline_run=pipeline_run,
        ))

    assert get_step_output(add_one_step_events, "add_one")
    assert intermediate_storage.has_intermediate(None,
                                                 StepOutputHandle("add_one"))
    assert intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("add_one")).obj == 2
def test_resolve_step_output_versions_no_external_dependencies():
    versioned_pipeline = versioned_pipeline_factory()
    speculative_execution_plan = create_execution_plan(versioned_pipeline,
                                                       run_config={},
                                                       mode="main")
    versions = speculative_execution_plan.resolve_step_output_versions()

    assert (versions[StepOutputHandle(
        "versioned_solid_no_input",
        "result")] == versioned_pipeline_expected_step1_output_version())
    assert (versions[StepOutputHandle(
        "versioned_solid_takes_input",
        "result")] == versioned_pipeline_expected_step2_output_version())
Пример #20
0
def test_active_execution_plan():
    plan = create_execution_plan(define_diamond_pipeline())

    with plan.start(retries=Retries(RetryMode.DISABLED)) as active_execution:

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 1
        step_1 = steps[0]
        assert step_1.key == "return_two"

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 0  # cant progress

        active_execution.mark_success(step_1.key)
        active_execution.mark_step_produced_output(StepOutputHandle(step_1.key, "result"))

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 2
        step_2 = steps[0]
        step_3 = steps[1]
        assert step_2.key == "add_three"
        assert step_3.key == "mult_three"

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 0  # cant progress

        active_execution.mark_success(step_2.key)
        active_execution.mark_step_produced_output(StepOutputHandle(step_2.key, "result"))

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 0  # cant progress

        active_execution.mark_success(step_3.key)
        active_execution.mark_step_produced_output(StepOutputHandle(step_3.key, "result"))

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 1
        step_4 = steps[0]

        assert step_4.key == "adder"

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 0  # cant progress

        assert not active_execution.is_complete

        active_execution.mark_success(step_4.key)

        assert active_execution.is_complete
Пример #21
0
def test_file_system_intermediate_storage_composite_types_with_custom_serializer_for_inner_type(
):
    run_id, instance, intermediate_storage = define_intermediate_storage()

    with yield_empty_pipeline_context(run_id=run_id,
                                      instance=instance) as context:

        intermediate_storage.set_intermediate(
            context, resolve_dagster_type(List[LowercaseString]),
            StepOutputHandle("baz"), ["list"])
        assert intermediate_storage.has_intermediate(context,
                                                     StepOutputHandle("baz"))
        assert intermediate_storage.get_intermediate(
            context, resolve_dagster_type(List[Bool]),
            StepOutputHandle("baz")).obj == ["list"]
Пример #22
0
def test_resolve_memoized_execution_plan_partial_versioning():
    manager = VersionedInMemoryIOManager()

    partially_versioned_pipeline = partially_versioned_pipeline_factory(
        manager)
    speculative_execution_plan = create_execution_plan(
        partially_versioned_pipeline)

    resolved_run_config = ResolvedRunConfig.build(partially_versioned_pipeline)

    step_output_handle = StepOutputHandle("versioned_solid_no_input", "result")

    step_output_version = resolve_step_output_versions(
        partially_versioned_pipeline, speculative_execution_plan,
        resolved_run_config)[step_output_handle]
    manager.values[(step_output_handle.step_key,
                    step_output_handle.output_name, step_output_version)] = 4

    with DagsterInstance.ephemeral() as instance:
        assert (resolve_memoized_execution_plan(
            speculative_execution_plan,
            partially_versioned_pipeline,
            {},
            instance,
            resolved_run_config,
        ).step_keys_to_execute == ["solid_takes_input"])
Пример #23
0
def test_construct_log_string_for_event():
    step_output_event = DagsterEvent(
        event_type_value="STEP_OUTPUT",
        pipeline_name="my_pipeline",
        step_key="solid2",
        solid_handle=SolidHandle("solid2", None),
        step_kind_value="COMPUTE",
        logging_tags={},
        event_specific_data=StepOutputData(
            step_output_handle=StepOutputHandle("solid2", "result")),
        message=
        'Yielded output "result" of type "Any" for step "solid2". (Type check passed).',
        pid=54348,
    )
    message_props = {
        "dagster_event": step_output_event,
        "pipeline_name": "my_pipeline"
    }

    synth_props = {
        "orig_message": step_output_event.message,
        "run_id": "f79a8a93-27f1-41b5-b465-b35d0809b26d",
    }
    assert (
        construct_log_string(message_props=message_props,
                             logging_tags={},
                             synth_props=synth_props) ==
        'my_pipeline - f79a8a93-27f1-41b5-b465-b35d0809b26d - 54348 - STEP_OUTPUT - Yielded output "result" of type "Any" for step "solid2". (Type check passed).'
    )
Пример #24
0
def _type_check_and_store_output(
        step_context: SystemStepExecutionContext,
        output: Union[DynamicOutput, Output]) -> Iterator[DagsterEvent]:

    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.inst_param(output, "output", (Output, DynamicOutput))

    mapping_key = output.mapping_key if isinstance(output,
                                                   DynamicOutput) else None

    step_output_handle = StepOutputHandle(step_key=step_context.step.key,
                                          output_name=output.output_name,
                                          mapping_key=mapping_key)

    version = (step_context.execution_plan.resolve_step_output_versions().get(
        step_output_handle) if MEMOIZED_RUN_TAG
               in step_context.pipeline.get_definition().tags else None)

    for output_event in _type_check_output(step_context, step_output_handle,
                                           output, version):
        yield output_event

    for evt in _store_output(step_context, step_output_handle, output):
        yield evt

    for evt in _create_type_materializations(step_context, output.output_name,
                                             output.value):
        yield evt
Пример #25
0
    def yield_result(self, value, output_name="result"):
        """Yield a result directly from notebook code.

        When called interactively or in development, returns its input.

        Args:
            value (Any): The value to yield.
            output_name (Optional[str]): The name of the result to yield (default: ``'result'``).
        """
        if not self.in_pipeline:
            return value

        # deferred import for perf
        import scrapbook

        if not self.solid_def.has_output(output_name):
            raise DagstermillError(
                f"Solid {self.solid_def.name} does not have output named {output_name}."
                f"Expected one of {[str(output_def.name) for output_def in self.solid_def.output_defs]}"
            )

        # pass output value cross process boundary using io manager
        step_context = self.context._step_context  # pylint: disable=protected-access
        # Note: yield_result currently does not support DynamicOutput
        step_output_handle = StepOutputHandle(step_key=step_context.step.key,
                                              output_name=output_name)
        output_context = step_context.get_output_context(step_output_handle)
        io_manager = step_context.get_io_manager(step_output_handle)

        # Note that we assume io manager is symmetric, i.e handle_input(handle_output(X)) == X
        io_manager.handle_output(output_context, value)

        # record that the output has been yielded
        scrapbook.glue(output_name, "")
Пример #26
0
    def handle_output(self, context, obj):
        res = self.intermediate_storage.set_intermediate(
            context=context.step_context,
            dagster_type=context.dagster_type,
            step_output_handle=StepOutputHandle(
                context.step_key, context.name, context.mapping_key
            ),
            value=obj,
            version=context.version,
        )

        # Stopgap https://github.com/dagster-io/dagster/issues/3368
        if isinstance(res, ObjectStoreOperation):
            context.log.debug(
                (
                    'Stored output "{output_name}" in {object_store_name}object store{serialization_strategy_modifier} '
                    "at {address}"
                ).format(
                    output_name=context.name,
                    object_store_name=res.object_store_name,
                    serialization_strategy_modifier=(
                        " using {serialization_strategy_name}".format(
                            serialization_strategy_name=res.serialization_strategy_name
                        )
                        if res.serialization_strategy_name
                        else ""
                    ),
                    address=res.key,
                )
            )
Пример #27
0
def test_construct_log_string_for_event():
    step_output_event = DagsterEvent(
        event_type_value="STEP_OUTPUT",
        pipeline_name="my_pipeline",
        step_key="solid2",
        solid_handle=NodeHandle("solid2", None),
        step_kind_value="COMPUTE",
        logging_tags={},
        event_specific_data=StepOutputData(
            step_output_handle=StepOutputHandle("solid2", "result")),
        message=
        'Yielded output "result" of type "Any" for step "solid2". (Type check passed).',
        pid=54348,
    )

    logging_metadata = DagsterLoggingMetadata(
        run_id="f79a8a93-27f1-41b5-b465-b35d0809b26d",
        pipeline_name="my_pipeline")
    dagster_message_props = DagsterMessageProps(
        orig_message=step_output_event.message,
        dagster_event=step_output_event,
    )

    assert (
        construct_log_string(logging_metadata=logging_metadata,
                             message_props=dagster_message_props) ==
        'my_pipeline - f79a8a93-27f1-41b5-b465-b35d0809b26d - 54348 - STEP_OUTPUT - Yielded output "result" of type "Any" for step "solid2". (Type check passed).'
    )
def test_using_intermediates_to_override():
    pipeline = define_inty_pipeline()

    run_config = {
        "storage": {
            "filesystem": {}
        },
        "intermediate_storage": {
            "in_memory": {}
        }
    }

    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(
        pipeline,
        run_config=run_config,
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)
    assert execution_plan.get_step_by_key("return_one")

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["return_one"]),
            instance,
            run_config=run_config,
            pipeline_run=pipeline_run,
        ))

    intermediate_storage = build_fs_intermediate_storage(
        instance.intermediates_directory, pipeline_run.run_id)
    assert get_step_output(return_one_step_events, "return_one")
    assert not intermediate_storage.has_intermediate(
        None, StepOutputHandle("return_one"))
Пример #29
0
 def logged_solid(context):
     context.get_system_context().get_output_context(
         StepOutputHandle("logged_solid", "result")).log.debug(
             "test OUTPUT debug logging from logged_solid.")
     context.get_system_context().for_input_manager(
         "logged_solid", {}, {}, None, source_handle=None).log.debug(
             "test INPUT debug logging from logged_solid.")
Пример #30
0
def test_get_output_context_with_resources():
    @solid
    def basic_solid():
        pass

    @pipeline
    def basic_pipeline():
        basic_solid()

    with pytest.raises(
            CheckError,
            match="Expected either resources or step context to be set, but "
            "received both. If step context is provided, resources for IO manager will be "
            "retrieved off of that.",
    ):
        get_output_context(
            execution_plan=create_execution_plan(basic_pipeline),
            pipeline_def=basic_pipeline,
            resolved_run_config=ResolvedRunConfig.build(basic_pipeline),
            step_output_handle=StepOutputHandle("basic_solid", "result"),
            run_id=None,
            log_manager=None,
            step_context=mock.MagicMock(),
            resources=mock.MagicMock(),
        )