示例#1
0
def test_execution_plan_wrong_run_id():
    pipeline_def = define_addy_pipeline()

    unrun_id = str(uuid.uuid4())
    environment_dict = env_with_fs(
        {'solids': {
            'add_one': {
                'inputs': {
                    'num': {
                        'value': 3
                    }
                }
            }
        }})

    run_config = RunConfig(reexecution_config=ReexecutionConfig(
        previous_run_id=unrun_id,
        step_output_handles=[StepOutputHandle('add_one.compute')]))

    execution_plan = create_execution_plan(pipeline_def,
                                           environment_dict=environment_dict,
                                           run_config=run_config)

    with pytest.raises(DagsterRunNotFoundError) as exc_info:
        execute_plan(execution_plan,
                     environment_dict=environment_dict,
                     run_config=run_config)

    assert str(
        exc_info.value
    ) == 'Run id {} set as previous run id was not found in run storage'.format(
        unrun_id)

    assert exc_info.value.invalid_run_id == unrun_id
示例#2
0
def test_pipeline_step_key_subset_execution_wrong_step_key_in_subset():
    pipeline_def = define_addy_pipeline()
    old_run_id = str(uuid.uuid4())
    environment_dict = env_with_fs(
        {'solids': {
            'add_one': {
                'inputs': {
                    'num': {
                        'value': 3
                    }
                }
            }
        }})
    result = execute_pipeline(pipeline_def,
                              environment_dict=environment_dict,
                              run_config=RunConfig(run_id=old_run_id))
    assert result.success

    new_run_id = str(uuid.uuid4())

    with pytest.raises(DagsterExecutionStepNotFoundError):
        execute_pipeline(
            pipeline_def,
            environment_dict=environment_dict,
            run_config=RunConfig(
                run_id=new_run_id,
                reexecution_config=ReexecutionConfig(
                    previous_run_id=result.run_id,
                    step_output_handles=[StepOutputHandle('add_one.compute')],
                ),
                step_keys_to_execute=['nope'],
            ),
        )
示例#3
0
def test_execution_plan_reexecution_with_in_memory():
    pipeline_def = define_addy_pipeline()

    old_run_id = str(uuid.uuid4())
    environment_dict = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}}
    result = execute_pipeline(
        pipeline_def, environment_dict=environment_dict, run_config=RunConfig(run_id=old_run_id)
    )

    assert result.success

    ## re-execute add_two

    new_run_id = str(uuid.uuid4())

    in_memory_run_config = RunConfig(
        run_id=new_run_id,
        reexecution_config=ReexecutionConfig(
            previous_run_id=result.run_id, step_output_handles=[StepOutputHandle('add_one.compute')]
        ),
    )

    execution_plan = create_execution_plan(
        pipeline_def, environment_dict=environment_dict, run_config=in_memory_run_config
    )

    with pytest.raises(DagsterInvariantViolationError):
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=in_memory_run_config,
            step_keys_to_execute=['add_two.compute'],
        )
示例#4
0
def test_pipeline_step_key_subset_execution_wrong_output_name_in_step_output_handles():
    pipeline_def = define_addy_pipeline()
    old_run_id = str(uuid.uuid4())
    instance = DagsterInstance.ephemeral()
    environment_dict = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}}
    result = execute_pipeline(
        pipeline_def,
        environment_dict=env_with_fs(environment_dict),
        run_config=RunConfig(run_id=old_run_id),
        instance=instance,
    )
    assert result.success
    assert result.run_id == old_run_id

    new_run_id = str(uuid.uuid4())

    with pytest.raises(DagsterStepOutputNotFoundError):
        execute_pipeline(
            pipeline_def,
            environment_dict=env_with_fs(environment_dict),
            run_config=RunConfig(
                run_id=new_run_id,
                reexecution_config=ReexecutionConfig(
                    previous_run_id=result.run_id,
                    step_output_handles=[StepOutputHandle('add_one.compute', 'invalid_output')],
                ),
                step_keys_to_execute=['add_two.compute'],
            ),
            instance=instance,
        )
示例#5
0
def test_retries():
    fail = {'count': 0}

    @solid
    def fail_first_times(_, _start_fail):
        if fail['count'] < 1:
            fail['count'] += 1
            raise Exception('blah')

        return 'okay perfect'

    @solid(output_defs=[
        OutputDefinition(bool, 'start_fail', is_optional=True),
        OutputDefinition(bool, 'start_skip', is_optional=True),
    ])
    def two_outputs(_):
        yield Output(True, 'start_fail')
        # won't yield start_skip

    @solid
    def will_be_skipped(_, _start_skip):
        pass  # doesn't matter

    @solid
    def downstream_of_failed(_, input_str):
        return input_str

    @pipeline
    def pipe():
        start_fail, start_skip = two_outputs()
        downstream_of_failed(fail_first_times(start_fail))
        will_be_skipped(start_skip)

    env = {'storage': {'filesystem': {}}}

    instance = DagsterInstance.ephemeral()

    result = execute_pipeline(pipe,
                              environment_dict=env,
                              instance=instance,
                              raise_on_error=False)

    second_result = execute_pipeline(
        pipe,
        environment_dict=env,
        run_config=RunConfig(
            reexecution_config=ReexecutionConfig.from_previous_run(result)),
        instance=instance,
    )

    assert second_result.success
    downstream_of_failed = second_result.result_for_solid(
        'downstream_of_failed').output_value()
    assert downstream_of_failed == 'okay perfect'

    will_be_skipped = [
        e for e in second_result.event_list
        if str(e.solid_handle) == 'will_be_skipped'
    ][0]
    assert str(will_be_skipped.event_type_value) == 'STEP_SKIPPED'
示例#6
0
def test_execution_plan_wrong_invalid_step_key():
    pipeline_def = define_addy_pipeline()

    old_run_id = str(uuid.uuid4())
    environment_dict = env_with_fs({'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}})
    result = execute_pipeline(
        pipeline_def, environment_dict=environment_dict, run_config=RunConfig(run_id=old_run_id)
    )

    new_run_id = str(uuid.uuid4())

    run_config = RunConfig(
        run_id=new_run_id,
        reexecution_config=ReexecutionConfig(
            previous_run_id=result.run_id,
            step_output_handles=[StepOutputHandle('not_valid.compute')],
        ),
    )

    execution_plan = create_execution_plan(
        pipeline_def, environment_dict=environment_dict, run_config=run_config
    )

    with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info:
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=run_config,
            step_keys_to_execute=['add_two.compute'],
        )

    assert str(exc_info.value) == (
        'Step not_valid.compute was specified as a step from a previous run. ' 'It does not exist.'
    )
示例#7
0
    def to_reexecution_config(self):
        from dagster.core.execution.config import ReexecutionConfig
        from dagster.core.execution.plan.objects import StepOutputHandle

        return ReexecutionConfig(
            self.previousRunId,
            list(map(lambda g: StepOutputHandle(g.stepKey, g.outputName), self.stepOutputHandles)),
        )
def test_execution_plan_reexecution():
    pipeline_def = define_addy_pipeline()
    instance = DagsterInstance.ephemeral()
    old_run_id = str(uuid.uuid4())
    environment_dict = env_with_fs(
        {'solids': {
            'add_one': {
                'inputs': {
                    'num': {
                        'value': 3
                    }
                }
            }
        }})
    result = execute_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        run_config=RunConfig(run_id=old_run_id),
        instance=instance,
    )

    assert result.success

    store = build_fs_intermediate_store(instance.intermediates_directory,
                                        result.run_id)
    assert store.get_intermediate(None, 'add_one.compute', Int).obj == 4
    assert store.get_intermediate(None, 'add_two.compute', Int).obj == 6

    ## re-execute add_two

    new_run_id = str(uuid.uuid4())

    run_config = RunConfig(
        run_id=new_run_id,
        reexecution_config=ReexecutionConfig(
            previous_run_id=result.run_id,
            step_output_handles=[StepOutputHandle('add_one.compute')]),
    )

    execution_plan = create_execution_plan(pipeline_def,
                                           environment_dict=environment_dict,
                                           run_config=run_config)

    step_events = execute_plan(
        execution_plan,
        environment_dict=environment_dict,
        run_config=run_config,
        step_keys_to_execute=['add_two.compute'],
        instance=instance,
    )

    store = build_fs_intermediate_store(instance.intermediates_directory,
                                        new_run_id)
    assert store.get_intermediate(None, 'add_one.compute', Int).obj == 4
    assert store.get_intermediate(None, 'add_two.compute', Int).obj == 6

    assert not get_step_output_event(step_events, 'add_one.compute')
    assert get_step_output_event(step_events, 'add_two.compute')
示例#9
0
def test_pipeline_step_key_subset_execution():
    pipeline_def = define_addy_pipeline()

    old_run_id = str(uuid.uuid4())
    environment_dict = {
        'solids': {
            'add_one': {
                'inputs': {
                    'num': {
                        'value': 3
                    }
                }
            }
        }
    }
    result = execute_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        run_config=RunConfig(storage_mode=RunStorageMode.FILESYSTEM,
                             run_id=old_run_id),
    )

    assert result.success

    store = FileSystemIntermediateStore(result.run_id)
    assert store.get_intermediate(None, 'add_one.compute', Int) == 4
    assert store.get_intermediate(None, 'add_two.compute', Int) == 6

    ## re-execute add_two

    new_run_id = str(uuid.uuid4())

    pipeline_reexecution_result = execute_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        run_config=RunConfig(
            run_id=new_run_id,
            reexecution_config=ReexecutionConfig(
                previous_run_id=result.run_id,
                step_output_handles=[StepOutputHandle('add_one.compute')],
            ),
            storage_mode=RunStorageMode.FILESYSTEM,
            step_keys_to_execute=['add_two.compute'],
        ),
    )

    assert pipeline_reexecution_result.success

    step_events = pipeline_reexecution_result.step_event_list
    assert step_events

    store = FileSystemIntermediateStore(new_run_id)
    assert store.get_intermediate(None, 'add_one.compute', Int) == 4
    assert store.get_intermediate(None, 'add_two.compute', Int) == 6

    assert not get_step_output_event(step_events, 'add_one.compute')
    assert get_step_output_event(step_events, 'add_two.compute')
示例#10
0
def test_execution_plan_wrong_invalid_output_name():
    pipeline_def = define_addy_pipeline()

    old_run_id = str(uuid.uuid4())
    environment_dict = {
        'solids': {
            'add_one': {
                'inputs': {
                    'num': {
                        'value': 3
                    }
                }
            }
        }
    }
    result = execute_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        run_config=RunConfig(storage_mode=RunStorageMode.FILESYSTEM,
                             run_id=old_run_id),
    )

    new_run_id = str(uuid.uuid4())

    run_config = RunConfig(
        run_id=new_run_id,
        reexecution_config=ReexecutionConfig(
            previous_run_id=result.run_id,
            step_output_handles=[
                StepOutputHandle('add_one.compute', 'not_an_output')
            ],
        ),
        storage_mode=RunStorageMode.FILESYSTEM,
    )

    execution_plan = create_execution_plan(pipeline_def,
                                           environment_dict=environment_dict)

    with pytest.raises(DagsterStepOutputNotFoundError) as exc_info:
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=run_config,
            step_keys_to_execute=['add_two.compute'],
        )

    assert str(exc_info.value) == (
        'You specified a step_output_handle in the ReexecutionConfig that does not exist: '
        'Step add_one.compute does not have output not_an_output.')

    assert exc_info.value.step_key == 'add_one.compute'
    assert exc_info.value.output_name == 'not_an_output'
示例#11
0
def test_pipeline_step_key_subset_execution():
    pipeline_def = define_addy_pipeline()
    instance = DagsterInstance.ephemeral()
    old_run_id = str(uuid.uuid4())
    environment_dict = env_with_fs({'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}})
    result = execute_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        run_config=RunConfig(run_id=old_run_id),
        instance=instance,
    )

    assert result.success

    store = FilesystemIntermediateStore.for_instance(instance, result.run_id)
    assert store.get_intermediate(None, 'add_one.compute', Int).obj == 4
    assert store.get_intermediate(None, 'add_two.compute', Int).obj == 6

    ## re-execute add_two

    new_run_id = str(uuid.uuid4())

    pipeline_reexecution_result = execute_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        run_config=RunConfig(
            run_id=new_run_id,
            reexecution_config=ReexecutionConfig(
                previous_run_id=result.run_id,
                step_output_handles=[StepOutputHandle('add_one.compute')],
            ),
            step_keys_to_execute=['add_two.compute'],
        ),
        instance=instance,
    )

    assert pipeline_reexecution_result.success

    step_events = pipeline_reexecution_result.step_event_list
    assert step_events

    store = FilesystemIntermediateStore.for_instance(instance, new_run_id)
    assert store.get_intermediate(None, 'add_one.compute', Int).obj == 4
    assert store.get_intermediate(None, 'add_two.compute', Int).obj == 6

    assert not get_step_output_event(step_events, 'add_one.compute')
    assert get_step_output_event(step_events, 'add_two.compute')
def test_reexecution():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def add_one(num):
        return num + 1

    pipeline_def = PipelineDefinition(
        solid_defs=[return_one, add_one],
        dependencies={'add_one': {
            'num': DependencyDefinition('return_one')
        }},
    )
    instance = DagsterInstance.ephemeral()
    pipeline_result = execute_pipeline(
        pipeline_def,
        environment_dict={'storage': {
            'filesystem': {}
        }},
        instance=instance)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid('add_one').output_value() == 2

    reexecution_run_config = RunConfig(reexecution_config=ReexecutionConfig(
        previous_run_id=pipeline_result.run_id,
        step_output_handles=[StepOutputHandle('return_one.compute')],
    ))
    reexecution_result = execute_pipeline(
        pipeline_def,
        environment_dict={'storage': {
            'filesystem': {}
        }},
        run_config=reexecution_run_config,
        instance=instance,
    )

    assert reexecution_result.success
    assert len(reexecution_result.solid_result_list) == 2
    assert reexecution_result.result_for_solid(
        'return_one').output_value() == 1
    assert reexecution_result.result_for_solid('add_one').output_value() == 2
示例#13
0
def test_pipeline_step_key_subset_execution_wrong_output_name_in_step_output_handles(
):
    pipeline_def = define_addy_pipeline()
    old_run_id = str(uuid.uuid4())
    environment_dict = {
        'solids': {
            'add_one': {
                'inputs': {
                    'num': {
                        'value': 3
                    }
                }
            }
        }
    }
    result = execute_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        run_config=RunConfig(storage_mode=RunStorageMode.FILESYSTEM,
                             run_id=old_run_id),
    )
    assert result.success

    new_run_id = str(uuid.uuid4())

    with pytest.raises(DagsterStepOutputNotFoundError):
        execute_pipeline(
            pipeline_def,
            environment_dict=environment_dict,
            run_config=RunConfig(
                run_id=new_run_id,
                reexecution_config=ReexecutionConfig(
                    previous_run_id=result.run_id,
                    step_output_handles=[
                        StepOutputHandle('add_one.compute', 'invalid_output')
                    ],
                ),
                storage_mode=RunStorageMode.FILESYSTEM,
                step_keys_to_execute=['add_two.compute'],
            ),
        )