def test_pipeline_step_key_subset_execution(): pipeline_def = define_addy_pipeline() instance = DagsterInstance.ephemeral() run_config = env_with_fs( {'solids': { 'add_one': { 'inputs': { 'num': { 'value': 3 } } } }}) result = execute_pipeline(pipeline_def, run_config=run_config, instance=instance) assert result.success intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, result.run_id) assert (intermediate_storage.get_intermediate( None, Int, StepOutputHandle('add_one.compute')).obj == 4) assert (intermediate_storage.get_intermediate( None, Int, StepOutputHandle('add_two.compute')).obj == 6) ## re-execute add_two pipeline_reexecution_result = reexecute_pipeline( pipeline_def, parent_run_id=result.run_id, run_config=run_config, step_keys_to_execute=['add_two.compute'], instance=instance, ) assert pipeline_reexecution_result.success step_events = pipeline_reexecution_result.step_event_list assert step_events intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, result.run_id) assert (intermediate_storage.get_intermediate( None, Int, StepOutputHandle('add_one.compute')).obj == 4) assert (intermediate_storage.get_intermediate( None, Int, StepOutputHandle('add_two.compute')).obj == 6) assert not get_step_output_event(step_events, 'add_one.compute') assert get_step_output_event(step_events, 'add_two.compute') with pytest.raises(DagsterExecutionStepNotFoundError, match='Execution plan does not contain step'): reexecute_pipeline( pipeline_def, parent_run_id=result.run_id, run_config=run_config, step_keys_to_execute=['nope.compute'], instance=instance, )
def test_pipeline_step_key_subset_execution(): pipeline_def = define_addy_pipeline(using_file_system=True) instance = DagsterInstance.ephemeral() run_config = {"solids": {"add_one": {"inputs": {"num": {"value": 3}}}}} result = execute_pipeline(pipeline_def, run_config=run_config, instance=instance) assert result.success with open( os.path.join(instance.storage_directory(), result.run_id, "add_one", "result"), "rb", ) as read_obj: assert pickle.load(read_obj) == 4 with open( os.path.join(instance.storage_directory(), result.run_id, "add_two", "result"), "rb", ) as read_obj: assert pickle.load(read_obj) == 6 ## re-execute add_two pipeline_reexecution_result = reexecute_pipeline( pipeline_def, parent_run_id=result.run_id, run_config=run_config, step_selection=["add_two"], instance=instance, ) assert pipeline_reexecution_result.success step_events = pipeline_reexecution_result.step_event_list assert step_events assert not os.path.exists( os.path.join( instance.storage_directory(), pipeline_reexecution_result.run_id, "add_one", "result" ) ) with open( os.path.join( instance.storage_directory(), pipeline_reexecution_result.run_id, "add_two", "result" ), "rb", ) as read_obj: assert pickle.load(read_obj) == 6 assert not get_step_output_event(step_events, "add_one") assert get_step_output_event(step_events, "add_two") with pytest.raises( DagsterExecutionStepNotFoundError, match="Step selection refers to unknown step: nope", ): reexecute_pipeline( pipeline_def, parent_run_id=result.run_id, run_config=run_config, step_selection=["nope"], instance=instance, )
def test_reexec_from_parent_dynamic_fails(): with instance_for_test() as instance: parent_result = execute_pipeline( dynamic_pipeline, run_config={"storage": { "filesystem": {} }}, instance=instance) parent_run_id = parent_result.run_id # not currently supported, this needs to know all fan outs of previous step, should just run previous step with pytest.raises( DagsterInvariantViolationError, match= r'UnresolvedExecutionStep "multiply_inputs\[\?\]" is resolved by "emit" which is not part of the current step selection', ): reexecute_pipeline( pipeline=dynamic_pipeline, parent_run_id=parent_run_id, run_config={ "storage": { "filesystem": {} }, }, step_selection=["multiply_inputs[?]"], instance=instance, )
def test_reexecution_fs_storage(): @lambda_solid def return_one(): return 1 @lambda_solid def add_one(num): return num + 1 pipeline_def = PipelineDefinition( solid_defs=[return_one, add_one], name="test", dependencies={"add_one": { "num": DependencyDefinition("return_one") }}, ) run_config = {"storage": {"filesystem": {}}} instance = DagsterInstance.ephemeral() pipeline_result = execute_pipeline( pipeline_def, run_config={"storage": { "filesystem": {} }}, instance=instance) assert pipeline_result.success assert pipeline_result.result_for_solid("add_one").output_value() == 2 reexecution_result = reexecute_pipeline( pipeline_def, pipeline_result.run_id, run_config=run_config, instance=instance, ) assert reexecution_result.success assert len(reexecution_result.solid_result_list) == 2 assert reexecution_result.result_for_solid( "return_one").output_value() == 1 assert reexecution_result.result_for_solid("add_one").output_value() == 2 reexecution_run = instance.get_run_by_id(reexecution_result.run_id) assert reexecution_run.parent_run_id == pipeline_result.run_id assert reexecution_run.root_run_id == pipeline_result.run_id grandchild_result = reexecute_pipeline( pipeline_def, reexecution_result.run_id, run_config=run_config, instance=instance, ) assert grandchild_result.success assert len(grandchild_result.solid_result_list) == 2 assert grandchild_result.result_for_solid("return_one").output_value() == 1 assert grandchild_result.result_for_solid("add_one").output_value() == 2 grandchild_run = instance.get_run_by_id(grandchild_result.run_id) assert grandchild_run.parent_run_id == reexecution_result.run_id assert grandchild_run.root_run_id == pipeline_result.run_id
def test_reexecute_subset_of_subset(): with tempfile.TemporaryDirectory() as tmpdir_path: instance = DagsterInstance.ephemeral() my_fs_io_manager = fs_io_manager.configured({"base_dir": tmpdir_path}) def my_pipeline_def(should_fail): @solid def one(_): return 1 @solid def plus_two(_, i): if should_fail: raise Exception() return i + 2 @solid def plus_three(_, i): return i + 3 @pipeline(mode_defs=[ ModeDefinition(resource_defs={"io_manager": my_fs_io_manager}) ]) def my_pipeline(): plus_three(plus_two(one())) return my_pipeline first_result = execute_pipeline(my_pipeline_def(should_fail=True), instance=instance, raise_on_error=False) assert not first_result.success first_run_id = first_result.run_id second_result = reexecute_pipeline( my_pipeline_def(should_fail=False), instance=instance, parent_run_id=first_run_id, step_selection=["plus_two*"], ) assert second_result.success assert second_result.result_for_solid("plus_two").output_value() == 3 second_run_id = second_result.run_id # step_context._get_source_run_id should return first_run_id third_result = reexecute_pipeline( my_pipeline_def(should_fail=False), instance=instance, parent_run_id=second_run_id, step_selection=["plus_two*"], ) assert third_result.success assert third_result.result_for_solid("plus_two").output_value() == 3
def test_multiproc_reexecution_fs_storage_after_fail(): with instance_for_test() as instance: run_config = {"execution": {"multiprocess": {}}} pipeline_result = execute_pipeline( reconstructable(retry_pipeline), run_config={ "execution": { "multiprocess": {} }, "solids": { "return_one": { "config": { "fail": True } } }, }, instance=instance, raise_on_error=False, ) assert not pipeline_result.success reexecution_result = reexecute_pipeline( reconstructable(retry_pipeline), pipeline_result.run_id, run_config=run_config, instance=instance, ) assert reexecution_result.success assert len(reexecution_result.solid_result_list) == 2 assert reexecution_result.result_for_solid( "return_one").output_value() == 1 assert reexecution_result.result_for_solid( "add_one").output_value() == 2 reexecution_run = instance.get_run_by_id(reexecution_result.run_id) assert reexecution_run.parent_run_id == pipeline_result.run_id assert reexecution_run.root_run_id == pipeline_result.run_id grandchild_result = reexecute_pipeline( reconstructable(retry_pipeline), reexecution_result.run_id, run_config=run_config, instance=instance, ) assert grandchild_result.success assert len(grandchild_result.solid_result_list) == 2 assert grandchild_result.result_for_solid( "return_one").output_value() == 1 assert grandchild_result.result_for_solid( "add_one").output_value() == 2 grandchild_run = instance.get_run_by_id(grandchild_result.run_id) assert grandchild_run.parent_run_id == reexecution_result.run_id assert grandchild_run.root_run_id == pipeline_result.run_id
def test_reexecute_subset_of_subset_with_composite(): @solid def one(_): return 1 @solid def plus_two(_, i): return i + 2 @composite_solid def one_plus_two(): return plus_two(one()) @solid def plus_three(_, i): return i + 3 with tempfile.TemporaryDirectory() as tmpdir_path: instance = DagsterInstance.ephemeral() my_fs_io_manager = fs_io_manager.configured({"base_dir": tmpdir_path}) @pipeline(mode_defs=[ ModeDefinition(resource_defs={"io_manager": my_fs_io_manager}) ]) def my_pipeline(): plus_three(one_plus_two()) first_result = execute_pipeline(my_pipeline, instance=instance) assert first_result.success first_run_id = first_result.run_id second_result = reexecute_pipeline( my_pipeline, instance=instance, parent_run_id=first_run_id, step_selection=["plus_three"], ) assert second_result.success second_run_id = second_result.run_id # step_context._get_source_run_id should return first_run_id third_result = reexecute_pipeline( my_pipeline, instance=instance, parent_run_id=second_run_id, step_selection=["plus_three"], ) assert third_result.success
def test_reexecute_pipeline_with_step_selection_multi_clauses(): instance = DagsterInstance.ephemeral() run_config = {"storage": {"filesystem": {}}} pipeline_result_full = execute_pipeline(foo_pipeline, run_config=run_config, instance=instance) assert pipeline_result_full.success assert pipeline_result_full.result_for_solid("add_one").output_value() == 7 assert len(pipeline_result_full.solid_result_list) == 5 result_multi_disjoint = reexecute_pipeline( foo_pipeline, parent_run_id=pipeline_result_full.run_id, run_config=run_config, instance=instance, step_selection=[ "return_one.compute", "return_two.compute", "add_nums.compute+" ], ) assert result_multi_disjoint.success assert result_multi_disjoint.result_for_solid( "multiply_two").output_value() == 6 result_multi_overlap = reexecute_pipeline( foo_pipeline, parent_run_id=pipeline_result_full.run_id, run_config=run_config, instance=instance, step_selection=[ "return_one.compute++", "return_two.compute", "add_nums.compute+" ], ) assert result_multi_overlap.success assert result_multi_overlap.result_for_solid( "multiply_two").output_value() == 6 with pytest.raises( DagsterInvalidSubsetError, match=re.escape( "No qualified steps to execute found for step_selection"), ): reexecute_pipeline( foo_pipeline, parent_run_id=pipeline_result_full.run_id, run_config=run_config, instance=instance, step_selection=["a", "*add_nums.compute"], )
def test_single_step_reexecution(): @lambda_solid def return_one(): return 1 @lambda_solid def add_one(num): return num + 1 pipeline_def = PipelineDefinition( solid_defs=[return_one, add_one], dependencies={"add_one": {"num": DependencyDefinition("return_one")}}, ) run_config = {"storage": {"filesystem": {}}} instance = DagsterInstance.ephemeral() pipeline_result = execute_pipeline(pipeline_def, run_config, instance=instance) assert pipeline_result.success assert pipeline_result.result_for_solid("add_one").output_value() == 2 # This is how this is actually done in dagster_graphql.implementation.pipeline_execution_manager reexecution_result = reexecute_pipeline( pipeline_def, parent_run_id=pipeline_result.run_id, run_config=run_config, instance=instance, step_selection=["add_one.compute"], ) assert reexecution_result.success assert reexecution_result.result_for_solid("return_one").output_value() == None assert reexecution_result.result_for_solid("add_one").output_value() == 2
def test_two_step_reexecution(): @lambda_solid def return_one(): return 1 @lambda_solid def add_one(num): return num + 1 @pipeline def two_step_reexec(): add_one(add_one(return_one())) instance = DagsterInstance.ephemeral() run_config = {'storage': {'filesystem': {}}} pipeline_result = execute_pipeline(two_step_reexec, run_config=run_config, instance=instance) assert pipeline_result.success assert pipeline_result.result_for_solid('add_one_2').output_value() == 3 reexecution_result = reexecute_pipeline( two_step_reexec, parent_run_id=pipeline_result.run_id, run_config=run_config, instance=instance, step_keys_to_execute=['add_one.compute', 'add_one_2.compute'], ) assert reexecution_result.success assert reexecution_result.result_for_solid('return_one').output_value() == None assert reexecution_result.result_for_solid('add_one_2').output_value() == 3
def test_reexec_from_parent_1(): with instance_for_test() as instance: parent_result = execute_pipeline( dynamic_pipeline, run_config={"storage": { "filesystem": {} }}, instance=instance) parent_run_id = parent_result.run_id reexec_result = reexecute_pipeline( pipeline=dynamic_pipeline, parent_run_id=parent_run_id, run_config={ "storage": { "filesystem": {} }, }, step_selection=["multiply_inputs[0]"], instance=instance, ) assert reexec_result.success assert reexec_result.result_for_solid( "multiply_inputs").output_value() == { "0": 0, }
def test_retries(environment): instance = DagsterInstance.local_temp() pipe = reconstructable(define_run_retry_pipeline) fails = dict(environment) fails['solids'] = {'can_fail': {'config': {'fail': True}}} result = execute_pipeline( pipe, run_config=fails, instance=instance, raise_on_error=False, ) assert not result.success passes = dict(environment) passes['solids'] = {'can_fail': {'config': {'fail': False}}} second_result = reexecute_pipeline( pipe, parent_run_id=result.run_id, run_config=passes, instance=instance, ) assert second_result.success downstream_of_failed = second_result.result_for_solid( 'downstream_of_failed').output_value() assert downstream_of_failed == 'okay perfect' will_be_skipped = [ e for e in second_result.event_list if 'will_be_skipped' in str(e.solid_handle) ] assert str(will_be_skipped[0].event_type_value) == 'STEP_SKIPPED' assert str(will_be_skipped[1].event_type_value) == 'STEP_SKIPPED'
def test_two_step_reexecution(): @solid def return_one(): return 1 @solid def add_one(num): return num + 1 @pipeline(mode_defs=[default_mode_def_for_test]) def two_step_reexec(): add_one(add_one(return_one())) instance = DagsterInstance.ephemeral() pipeline_result = execute_pipeline(two_step_reexec, instance=instance) assert pipeline_result.success assert pipeline_result.result_for_solid("add_one_2").output_value() == 3 reexecution_result = reexecute_pipeline( two_step_reexec, parent_run_id=pipeline_result.run_id, instance=instance, step_selection=["add_one", "add_one_2"], ) assert reexecution_result.success assert reexecution_result.result_for_solid("return_one").output_value() == None assert reexecution_result.result_for_solid("add_one_2").output_value() == 3
def test_single_step_reexecution(): @solid def return_one(): return 1 @solid def add_one(num): return num + 1 pipeline_def = PipelineDefinition( solid_defs=[return_one, add_one], name="test", dependencies={"add_one": {"num": DependencyDefinition("return_one")}}, mode_defs=[default_mode_def_for_test], ) instance = DagsterInstance.ephemeral() pipeline_result = execute_pipeline(pipeline_def, instance=instance) assert pipeline_result.success assert pipeline_result.result_for_solid("add_one").output_value() == 2 # This is how this is actually done in dagster_graphql.implementation.pipeline_execution_manager reexecution_result = reexecute_pipeline( pipeline_def, parent_run_id=pipeline_result.run_id, instance=instance, step_selection=["add_one"], ) assert reexecution_result.success assert reexecution_result.result_for_solid("return_one").output_value() == None assert reexecution_result.result_for_solid("add_one").output_value() == 2
def test_two_step_reexecution(): @lambda_solid def return_one(): return 1 @lambda_solid def add_one(num): return num + 1 @pipeline def two_step_reexec(): add_one(add_one(return_one())) instance = DagsterInstance.ephemeral() run_config = {"storage": {"filesystem": {}}} pipeline_result = execute_pipeline(two_step_reexec, run_config=run_config, instance=instance) assert pipeline_result.success assert pipeline_result.result_for_solid("add_one_2").output_value() == 3 reexecution_result = reexecute_pipeline( two_step_reexec, parent_run_id=pipeline_result.run_id, run_config=run_config, instance=instance, step_selection=["add_one.compute", "add_one_2.compute"], ) assert reexecution_result.success assert reexecution_result.result_for_solid("return_one").output_value() == None assert reexecution_result.result_for_solid("add_one_2").output_value() == 3
def test_custom_path_asset_store(): with seven.TemporaryDirectory() as tmpdir_path: instance = DagsterInstance.ephemeral() run_config = { "resources": {"fs_asset_store": {"config": {"base_dir": tmpdir_path}}}, } result = execute_pipeline( custom_path_pipeline, run_config=run_config, mode="test", instance=instance ) assert result.success filepath_call_api = os.path.join(tmpdir_path, "call_api_output") assert os.path.isfile(filepath_call_api) with open(filepath_call_api, "rb") as read_obj: assert pickle.load(read_obj) == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] filepath_parse_df = os.path.join(tmpdir_path, "parse_df_output") assert os.path.isfile(filepath_parse_df) with open(filepath_parse_df, "rb") as read_obj: assert pickle.load(read_obj) == [1, 2, 3, 4, 5] assert reexecute_pipeline( custom_path_pipeline, result.run_id, run_config=run_config, mode="test", instance=instance, step_selection=["parse_df.compute*"], ).success
def test_default_object_manager_reexecution(): with seven.TemporaryDirectory() as tmpdir_path: default_asset_store = fs_object_manager.configured( {"base_dir": tmpdir_path}) pipeline_def = define_pipeline(default_asset_store, {}) instance = DagsterInstance.ephemeral() result = execute_pipeline(pipeline_def, instance=instance) assert result.success re_result = reexecute_pipeline( pipeline_def, result.run_id, instance=instance, step_selection=["solid_b"], ) # re-execution should yield asset_store_operation events instead of intermediate events get_asset_events = list( filter( lambda evt: evt.is_asset_store_operation and AssetStoreOperationType(evt.event_specific_data.op ) == AssetStoreOperationType.GET_ASSET, re_result.event_list, )) assert len(get_asset_events) == 1 assert get_asset_events[0].event_specific_data.step_key == "solid_a"
def test_retries(environment): with instance_for_test() as instance: pipe = reconstructable(define_run_retry_pipeline) fails = dict(environment) fails["solids"] = {"can_fail": {"config": {"fail": True}}} result = execute_pipeline( pipe, run_config=fails, instance=instance, raise_on_error=False, ) assert not result.success passes = dict(environment) passes["solids"] = {"can_fail": {"config": {"fail": False}}} second_result = reexecute_pipeline( pipe, parent_run_id=result.run_id, run_config=passes, instance=instance, ) assert second_result.success downstream_of_failed = second_result.result_for_solid( "downstream_of_failed").output_value() assert downstream_of_failed == "okay perfect" will_be_skipped = [ e for e in second_result.event_list if "will_be_skipped" in str(e.solid_handle) ] assert str(will_be_skipped[0].event_type_value) == "STEP_SKIPPED" assert str(will_be_skipped[1].event_type_value) == "STEP_SKIPPED"
def test_reexec_dynamic_with_optional_output_job_3(): with instance_for_test() as instance: result = dynamic_with_optional_output_job().execute_in_process( instance=instance) # re-execute the step where the source did not yield # -> error because the dynamic step wont exist in execution plan with pytest.raises( DagsterExecutionStepNotFoundError, match=r"Step selection refers to unknown step: echo\[0\]", ): reexecute_pipeline( reconstructable(dynamic_with_optional_output_job), parent_run_id=result.run_id, instance=instance, step_selection=["echo[0]"], )
def test_reexecute_pipeline_with_step_selection_single_clause(): instance = DagsterInstance.ephemeral() run_config = {"intermediate_storage": {"filesystem": {}}} pipeline_result_full = execute_pipeline(foo_pipeline, run_config=run_config, instance=instance) assert pipeline_result_full.success assert pipeline_result_full.result_for_solid("add_one").output_value() == 7 assert len(pipeline_result_full.solid_result_list) == 5 reexecution_result_full = reexecute_pipeline( foo_pipeline, parent_run_id=pipeline_result_full.run_id, run_config=run_config, instance=instance, ) assert reexecution_result_full.success assert len(reexecution_result_full.solid_result_list) == 5 assert reexecution_result_full.result_for_solid( "add_one").output_value() == 7 reexecution_result_up = reexecute_pipeline( foo_pipeline, parent_run_id=pipeline_result_full.run_id, run_config=run_config, instance=instance, step_selection=["*add_nums"], ) assert reexecution_result_up.success assert reexecution_result_up.result_for_solid( "add_nums").output_value() == 3 reexecution_result_down = reexecute_pipeline( foo_pipeline, parent_run_id=pipeline_result_full.run_id, run_config=run_config, instance=instance, step_selection=["add_nums++"], ) assert reexecution_result_down.success assert reexecution_result_down.result_for_solid( "add_one").output_value() == 7
def test_reexecute_pipeline_with_step_selection_multi_clauses(): instance = DagsterInstance.ephemeral() run_config = {"intermediate_storage": {"filesystem": {}}} pipeline_result_full = execute_pipeline(foo_pipeline, run_config=run_config, instance=instance) assert pipeline_result_full.success assert pipeline_result_full.result_for_solid("add_one").output_value() == 7 assert len(pipeline_result_full.solid_result_list) == 5 result_multi_disjoint = reexecute_pipeline( foo_pipeline, parent_run_id=pipeline_result_full.run_id, run_config=run_config, instance=instance, step_selection=["return_one", "return_two", "add_nums+"], ) assert result_multi_disjoint.success assert result_multi_disjoint.result_for_solid( "multiply_two").output_value() == 6 result_multi_overlap = reexecute_pipeline( foo_pipeline, parent_run_id=pipeline_result_full.run_id, run_config=run_config, instance=instance, step_selection=["return_one++", "return_two", "add_nums+"], ) assert result_multi_overlap.success assert result_multi_overlap.result_for_solid( "multiply_two").output_value() == 6 with pytest.raises( DagsterExecutionStepNotFoundError, match="Can not build subset plan from unknown step: a", ): reexecute_pipeline( foo_pipeline, parent_run_id=pipeline_result_full.run_id, run_config=run_config, instance=instance, step_selection=["a", "*add_nums"], )
def reexecution(): instance = DagsterInstance.ephemeral() # Initial execution pipeline_result_full = execute_pipeline(unreliable_pipeline, instance=instance) if not pipeline_result_full.success: # Re-execution: Entire pipeline reexecution_result_full = reexecute_pipeline( unreliable_pipeline, parent_run_id=pipeline_result_full.run_id, instance=instance, )
def test_reexec_dynamic_with_optional_output_job_1(): with instance_for_test() as instance: result = dynamic_with_optional_output_job().execute_in_process( instance=instance) # re-execute all re_result = reexecute_pipeline( reconstructable(dynamic_with_optional_output_job), parent_run_id=result.run_id, instance=instance, ) assert re_result.success assert re_result.output_for_solid("adder") == sum( [i for i in range(10) if i % 2 == 0])
def test_reexec_dynamic_with_transitive_optional_output_job_2(): with instance_for_test() as instance: result = dynamic_with_transitive_optional_output_job( ).execute_in_process(instance=instance) # re-execute the step where the source yielded an output re_result = reexecute_pipeline( reconstructable(dynamic_with_transitive_optional_output_job), parent_run_id=result.run_id, instance=instance, step_selection=["echo[1]"], ) assert re_result.success assert re_result.result_for_solid("echo").output_value() == {"1": 2}
def test_reexec_from_parent_2(): with instance_for_test() as instance: parent_result = execute_pipeline(dynamic_pipeline, instance=instance) parent_run_id = parent_result.run_id reexec_result = reexecute_pipeline( pipeline=dynamic_pipeline, parent_run_id=parent_run_id, step_selection=["multiply_by_two[1]"], instance=instance, ) assert reexec_result.success assert reexec_result.result_for_solid( "multiply_by_two").output_value() == { "1": 20, }
def test_fs_io_manager_reexecution(): with tempfile.TemporaryDirectory() as tmpdir_path: default_io_manager = fs_io_manager.configured({"base_dir": tmpdir_path}) pipeline_def = define_pipeline(default_io_manager, {}) instance = DagsterInstance.ephemeral() result = execute_pipeline(pipeline_def, instance=instance) assert result.success re_result = reexecute_pipeline( pipeline_def, result.run_id, instance=instance, step_selection=["solid_b"], ) # re-execution should yield asset_store_operation events instead of intermediate events loaded_input_events = list(filter(lambda evt: evt.is_loaded_input, re_result.event_list)) assert len(loaded_input_events) == 1 assert loaded_input_events[0].event_specific_data.upstream_step_key == "solid_a"
def test_reexec_from_parent_basic(): with instance_for_test() as instance: parent_result = execute_pipeline(dynamic_pipeline, instance=instance) parent_run_id = parent_result.run_id reexec_result = reexecute_pipeline( pipeline=dynamic_pipeline, parent_run_id=parent_run_id, step_selection=["emit"], instance=instance, ) assert reexec_result.success assert reexec_result.result_for_solid("emit").output_value() == { "0": 0, "1": 1, "2": 2, }
def test_reexec_dynamic_with_transitive_optional_output_job_3(): with instance_for_test() as instance: result = dynamic_with_transitive_optional_output_job( ).execute_in_process(instance=instance) # re-execute the step where the source did not yield re_result = reexecute_pipeline( reconstructable(dynamic_with_transitive_optional_output_job), parent_run_id=result.run_id, instance=instance, step_selection=["echo[0]"], raise_on_error=False, ) # when all the previous runs have skipped yielding the source, # run would fail because of run_id returns None # FIXME: https://github.com/dagster-io/dagster/issues/3511 # ideally it should skip the step because all its previous runs have skipped and finish the run successfully assert not re_result.success
def test_reexecute_pipeline_with_step_selection_multi_clauses(): instance = DagsterInstance.ephemeral() pipeline_result_full = execute_pipeline(foo_pipeline, instance=instance) assert pipeline_result_full.success assert pipeline_result_full.result_for_solid("add_one").output_value() == 7 assert len(pipeline_result_full.solid_result_list) == 5 result_multi_disjoint = reexecute_pipeline( foo_pipeline, parent_run_id=pipeline_result_full.run_id, instance=instance, step_selection=["return_one", "return_two", "add_nums+"], ) assert result_multi_disjoint.success assert result_multi_disjoint.result_for_solid( "multiply_two").output_value() == 6 result_multi_overlap = reexecute_pipeline( foo_pipeline, parent_run_id=pipeline_result_full.run_id, instance=instance, step_selection=["return_one++", "return_two", "add_nums+"], ) assert result_multi_overlap.success assert result_multi_overlap.result_for_solid( "multiply_two").output_value() == 6 with pytest.raises( DagsterExecutionStepNotFoundError, match="Step selection refers to unknown step: a", ): reexecute_pipeline( foo_pipeline, parent_run_id=pipeline_result_full.run_id, instance=instance, step_selection=["a", "*add_nums"], ) with pytest.raises( DagsterExecutionStepNotFoundError, match="Step selection refers to unknown steps: a, b", ): reexecute_pipeline( foo_pipeline, parent_run_id=pipeline_result_full.run_id, instance=instance, step_selection=["a+", "*b"], )
def test_reexecution(job_fn): with tempfile.TemporaryDirectory() as tmpdir: run_config = { "resources": { "initial_launcher": { "config": { "scratch_dir": tmpdir }, }, "final_launcher": { "config": { "scratch_dir": tmpdir }, }, "io_manager": { "config": { "base_dir": tmpdir } }, } } with instance_for_test() as instance: run1 = execute_pipeline( pipeline=reconstructable(job_fn), run_config=run_config, instance=instance, ) assert run1.success assert run1.result_for_solid("combine").output_value() == 3 run2 = reexecute_pipeline( pipeline=reconstructable(job_fn), parent_run_id=run1.run_id, run_config=run_config, instance=instance, step_selection=["combine"], ) assert run2.success assert run2.result_for_solid("combine").output_value() == 3