示例#1
0
def test_create_execution_plan_with_bad_inputs():
    with pytest.raises(PipelineConfigEvaluationError):
        create_execution_plan(
            define_diamond_pipeline(),
            {'solids': {
                'add_three': {
                    'inputs': {
                        'num': 3
                    }
                }
            }})
示例#2
0
def test_basic_int_execution_plan():
    execution_plan = create_execution_plan(
        single_int_output_pipeline(),
        {
            'solids': {
                'return_one': {
                    'outputs': [{
                        'result': {
                            'json': {
                                'path': 'dummy.json'
                            }
                        }
                    }]
                }
            }
        },
    )

    assert len(execution_plan.steps) == 3

    steps = execution_plan.topological_steps()

    assert steps[0].key == 'return_one.transform'
    assert steps[1].key == 'return_one.materialization.output.result.0'
    assert steps[2].key == 'return_one.materialization.output.result.join'
def test_execution_plan_wrong_run_id():
    pipeline_def = define_addy_pipeline()

    unrun_id = str(uuid.uuid4())
    environment_dict = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}}

    execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict)

    with pytest.raises(DagsterRunNotFoundError) as exc_info:
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=RunConfig(
                storage_mode=RunStorageMode.FILESYSTEM,
                reexecution_config=ReexecutionConfig(
                    previous_run_id=unrun_id,
                    step_output_handles=[StepOutputHandle('add_one.transform')],
                ),
            ),
        )

    assert str(
        exc_info.value
    ) == 'Run id {} set as previous run id was not found in run storage'.format(unrun_id)

    assert exc_info.value.invalid_run_id == unrun_id
示例#4
0
def test_running():
    run_id = 'run-1'
    repository_container = RepositoryContainer(
        RepositoryTargetInfo(
            repository_yaml=None,
            python_file=__file__,
            fn_name='define_passing_pipeline',
            module_name=None,
        )
    )
    pipeline = define_passing_pipeline()
    env_config = {
        'solids': {
            'sum_solid': {'inputs': {'num': {'csv': {'path': script_relative_path('num.csv')}}}}
        }
    }
    selector = ExecutionSelector('pandas_hello_world')
    pipeline_run = InMemoryPipelineRun(
        run_id, selector, env_config, create_execution_plan(pipeline, env_config)
    )
    execution_manager = MultiprocessingExecutionManager()
    execution_manager.execute_pipeline(repository_container, pipeline, pipeline_run)
    execution_manager.join()
    assert pipeline_run.status == PipelineRunStatus.SUCCESS
    events = pipeline_run.all_logs()
    assert events

    process_start_events = get_events_of_type(events, EventType.PIPELINE_PROCESS_START)
    assert len(process_start_events) == 1

    process_started_events = get_events_of_type(events, EventType.PIPELINE_PROCESS_STARTED)
    assert len(process_started_events) == 1
def test_create_subplan_middle_step():
    pipeline_def = define_two_int_pipeline()
    typed_environment = create_typed_environment(pipeline_def, None)
    execution_plan = create_execution_plan(pipeline_def)
    with yield_context(pipeline_def, typed_environment,
                       ExecutionMetadata()) as context:
        subplan = create_subplan(
            ExecutionPlanInfo(context=context,
                              pipeline=pipeline_def,
                              environment=typed_environment),
            StepBuilderState(pipeline_name=pipeline_def.name),
            execution_plan,
            ExecutionPlanSubsetInfo(['add_one.transform'],
                                    {'add_one.transform': {
                                        'num': 2
                                    }}),
        )
        assert subplan
        steps = subplan.topological_steps()
        assert len(steps) == 2
        assert steps[0].key == 'add_one.transform.input.num.value'
        assert not steps[0].step_inputs
        assert len(steps[0].step_outputs) == 1
        assert steps[1].key == 'add_one.transform'
        assert len(steps[1].step_inputs) == 1
        step_input = steps[1].step_inputs[0]
        assert step_input.prev_output_handle.step.key == 'add_one.transform.input.num.value'
        assert step_input.prev_output_handle.output_name == VALUE_OUTPUT
        assert len(steps[1].step_outputs) == 1
        assert len(subplan.topological_steps()) == 2
        assert [step.key for step in subplan.topological_steps()] == [
            'add_one.transform.input.num.value',
            'add_one.transform',
        ]
示例#6
0
def test_failing():
    run_id = 'run-1'
    repository_container = RepositoryContainer(
        RepositoryTargetInfo(
            repository_yaml=None,
            python_file=__file__,
            fn_name='define_failing_pipeline',
            module_name=None,
        )
    )
    pipeline = define_failing_pipeline()
    env_config = {
        'solids': {
            'sum_solid': {'inputs': {'num': {'csv': {'path': script_relative_path('num.csv')}}}}
        }
    }
    selector = ExecutionSelector('pandas_hello_world')
    pipeline_run = InMemoryPipelineRun(
        run_id, selector, env_config, create_execution_plan(pipeline, env_config)
    )
    execution_manager = MultiprocessingExecutionManager()
    execution_manager.execute_pipeline(repository_container, pipeline, pipeline_run)
    execution_manager.join()
    assert pipeline_run.status == PipelineRunStatus.FAILURE
    assert pipeline_run.all_logs()
示例#7
0
def test_external_execution_input_marshal_code_error():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(IOError):
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['add_one.transform'],
            inputs_to_marshal={'add_one.transform': {
                'num': 'nope'
            }},
            execution_metadata=ExecutionMetadata(),
            throw_on_user_error=True,
        )

    results = execute_externalized_plan(
        pipeline,
        execution_plan,
        ['add_one.transform'],
        inputs_to_marshal={'add_one.transform': {
            'num': 'nope'
        }},
        execution_metadata=ExecutionMetadata(),
        throw_on_user_error=False,
    )

    assert len(results) == 1
    marshal_result = results[0]
    assert marshal_result.success is False
    assert marshal_result.step.kind == StepKind.UNMARSHAL_INPUT
    assert isinstance(marshal_result.failure_data.dagster_error.user_exception,
                      IOError)
示例#8
0
def test_external_execution_output_code_error():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(DagsterMarshalOutputError) as exc_info:
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['return_one.transform', 'add_one.transform'],
            outputs_to_marshal={
                'add_one.transform': [{
                    'output': 'result',
                    'path': 23434
                }]
            },
            execution_metadata=ExecutionMetadata(),
        )

    assert (
        str(exc_info.value) ==
        'Error during the marshalling of output result in step add_one.transform'
    )
    assert exc_info.value.output_name == 'result'
    assert exc_info.value.step_key == 'add_one.transform'
def test_basic_int_multiple_serializations_execution_plan():
    execution_plan = create_execution_plan(
        single_int_output_pipeline(),
        {
            'solids': {
                'return_one': {
                    'outputs': [
                        {'result': {'json': {'path': 'dummy_one.json'}}},
                        {'result': {'json': {'path': 'dummy_two.json'}}},
                    ]
                }
            }
        },
    )

    assert len(execution_plan.steps) == 4

    steps = execution_plan.topological_steps()
    assert steps[0].key == 'return_one.transform'

    assert_plan_topological_level(
        steps,
        [1, 2],
        ['return_one.outputs.result.materialize.0', 'return_one.outputs.result.materialize.1'],
    )

    assert steps[3].key == 'return_one.outputs.result.materialize.join'
示例#10
0
def test_create_subplan_middle_step():
    subplan = create_execution_plan(
        define_two_int_pipeline(),
        subset_info=ExecutionPlanSubsetInfo.with_input_values(
            ['add_one.transform'], {'add_one.transform': {
                'num': 2
            }}),
    )
    assert subplan
    steps = subplan.topological_steps()
    assert len(steps) == 2
    assert steps[0].key == 'add_one.transform.input.num.value'
    assert not steps[0].step_inputs
    assert len(steps[0].step_outputs) == 1
    assert steps[1].key == 'add_one.transform'
    assert len(steps[1].step_inputs) == 1
    step_input = steps[1].step_inputs[0]
    assert step_input.prev_output_handle.step.key == 'add_one.transform.input.num.value'
    assert step_input.prev_output_handle.output_name == VALUE_OUTPUT
    assert len(steps[1].step_outputs) == 1
    assert len(subplan.topological_steps()) == 2
    assert [step.key for step in subplan.topological_steps()] == [
        'add_one.transform.input.num.value',
        'add_one.transform',
    ]
def test_execution_plan_reexecution_with_in_memory():
    pipeline_def = define_addy_pipeline()

    old_run_id = str(uuid.uuid4())
    environment_dict = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}}
    result = execute_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        run_config=RunConfig(storage_mode=RunStorageMode.IN_MEMORY, run_id=old_run_id),
    )

    assert result.success

    ## re-execute add_two

    new_run_id = str(uuid.uuid4())

    in_memory_run_config = RunConfig(
        run_id=new_run_id,
        reexecution_config=ReexecutionConfig(
            previous_run_id=result.run_id,
            step_output_handles=[StepOutputHandle('add_one.transform')],
        ),
        storage_mode=RunStorageMode.IN_MEMORY,
    )

    execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict)

    with pytest.raises(DagsterInvariantViolationError):
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=in_memory_run_config,
            step_keys_to_execute=['add_two.transform'],
        )
示例#12
0
def test_execution_crash():
    run_id = 'run-1'
    repository_container = RepositoryContainer(
        RepositoryTargetInfo(
            repository_yaml=None,
            python_file=__file__,
            fn_name='define_crashy_pipeline',
            module_name=None,
        )
    )
    pipeline = define_crashy_pipeline()
    env_config = {
        'solids': {
            'sum_solid': {'inputs': {'num': {'csv': {'path': script_relative_path('num.csv')}}}}
        }
    }
    selector = ExecutionSelector('pandas_hello_world')
    pipeline_run = InMemoryPipelineRun(
        run_id, selector, env_config, create_execution_plan(pipeline, env_config)
    )
    execution_manager = MultiprocessingExecutionManager()
    execution_manager.execute_pipeline(repository_container, pipeline, pipeline_run)
    execution_manager.join()
    assert pipeline_run.status == PipelineRunStatus.FAILURE
    last_log = pipeline_run.all_logs()[-1]
    assert last_log.message == (
        'Exception: Pipeline execution process for {run_id} unexpectedly exited\n'
    ).format(run_id=run_id)
def test_using_s3_for_subplan(s3_bucket):
    pipeline = define_inty_pipeline()

    environment_dict = {'storage': {'s3': {'s3_bucket': s3_bucket}}}

    execution_plan = create_execution_plan(pipeline,
                                           environment_dict=environment_dict)

    assert execution_plan.get_step_by_key('return_one.transform')

    step_keys = ['return_one.transform']

    run_id = str(uuid.uuid4())

    try:
        return_one_step_events = list(
            execute_plan(
                execution_plan,
                environment_dict=environment_dict,
                run_config=RunConfig(run_id=run_id),
                step_keys_to_execute=step_keys,
            ))

        assert get_step_output(return_one_step_events, 'return_one.transform')
        with yield_pipeline_execution_context(
                pipeline, environment_dict,
                RunConfig(run_id=run_id)) as context:
            assert has_s3_intermediate(context, s3_bucket, run_id,
                                       'return_one.transform')
            assert get_s3_intermediate(context, s3_bucket, run_id,
                                       'return_one.transform', Int) == 1

        add_one_step_events = list(
            execute_plan(
                execution_plan,
                environment_dict=environment_dict,
                run_config=RunConfig(run_id=run_id),
                step_keys_to_execute=['add_one.transform'],
            ))

        assert get_step_output(add_one_step_events, 'add_one.transform')
        with yield_pipeline_execution_context(
                pipeline, environment_dict,
                RunConfig(run_id=run_id)) as context:
            assert has_s3_intermediate(context, s3_bucket, run_id,
                                       'add_one.transform')
            assert get_s3_intermediate(context, s3_bucket, run_id,
                                       'add_one.transform', Int) == 2
    finally:
        with yield_pipeline_execution_context(
                pipeline, environment_dict,
                RunConfig(run_id=run_id)) as context:
            rm_s3_intermediate(context, s3_bucket, run_id,
                               'return_one.transform')
            rm_s3_intermediate(context, s3_bucket, run_id, 'add_one.transform')
def test_basic_int_and_string_execution_plan():
    pipeline = multiple_output_pipeline()
    execution_plan = create_execution_plan(
        pipeline,
        {
            'solids': {
                'return_one_and_foo': {
                    'outputs': [
                        {'string': {'json': {'path': 'dummy_string.json'}}},
                        {'number': {'json': {'path': 'dummy_number.json'}}},
                    ]
                }
            }
        },
    )

    assert len(execution_plan.steps) == 5
    steps = execution_plan.topological_steps()
    assert steps[0].key == 'return_one_and_foo.transform'

    assert_plan_topological_level(
        steps,
        [1, 2],
        [
            'return_one_and_foo.outputs.string.materialize.0',
            'return_one_and_foo.outputs.number.materialize.0',
        ],
    )

    assert_plan_topological_level(
        steps,
        [3, 4],
        [
            'return_one_and_foo.outputs.string.materialize.join',
            'return_one_and_foo.outputs.number.materialize.join',
        ],
    )

    transform_step = execution_plan.get_step_by_key('return_one_and_foo.transform')

    string_mat_step = execution_plan.get_step_by_key(
        'return_one_and_foo.outputs.string.materialize.0'
    )
    assert len(string_mat_step.step_inputs) == 1
    assert string_mat_step.step_inputs[0].prev_output_handle == StepOutputHandle.from_step(
        step=transform_step, output_name='string'
    )

    string_mat_join_step = execution_plan.get_step_by_key(
        'return_one_and_foo.outputs.string.materialize.join'
    )
    assert len(string_mat_join_step.step_inputs) == 1
    assert string_mat_join_step.step_inputs[0].prev_output_handle == StepOutputHandle.from_step(
        step=string_mat_step, output_name=MATERIALIZATION_THUNK_OUTPUT
    )
示例#15
0
def test_execution_plan_source_step():
    pipeline_def = define_two_int_pipeline()
    execution_plan = create_execution_plan(
        pipeline_def,
        subset_info=ExecutionPlanSubsetInfo.only_subset(
            included_step_keys=['return_one.transform']),
    )
    step_events = execute_plan(execution_plan)

    assert len(step_events) == 1
    assert step_events[0].success_data.value == 1
示例#16
0
def test_compute_noop_node():
    pipeline = silencing_pipeline(solids=[noop])

    plan = create_execution_plan(pipeline)

    assert len(plan.steps) == 1
    outputs = list(
        execute_step(plan.steps[0], create_test_runtime_execution_context(),
                     {}))

    assert outputs[0].success_data.value == 'foo'
def test_execute_step_wrong_step_key():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info:
        execute_plan(execution_plan, step_keys_to_execute=['nope'])

    assert exc_info.value.step_key == 'nope'

    assert str(exc_info.value) == 'Execution plan does not contain step "nope"'
示例#18
0
def test_topological_sort():
    plan = create_execution_plan(define_diamond_pipeline())

    levels = plan.topological_step_levels()

    assert len(levels) == 3

    assert [step.key for step in levels[0]] == ['return_two.transform']
    assert [step.key for step in levels[1]
            ] == ['add_three.transform', 'mult_three.transform']
    assert [step.key for step in levels[2]] == ['adder.transform']
示例#19
0
def test_create_subplan_source_step():
    subplan = create_execution_plan(
        define_two_int_pipeline(),
        subset_info=ExecutionPlanSubsetInfo.only_subset(
            ['return_one.transform']),
    )
    assert subplan
    assert len(subplan.steps) == 1
    assert subplan.steps[0].key == 'return_one.transform'
    assert not subplan.steps[0].step_inputs
    assert len(subplan.steps[0].step_outputs) == 1
    assert len(subplan.topological_steps()) == 1
示例#20
0
def test_external_execution_step_for_output_missing():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(DagsterExecutionStepNotFoundError):
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['add_one.transform'],
            outputs_to_marshal={'nope': [MarshalledOutput('nope', 'nope')]},
            execution_metadata=ExecutionMetadata(),
        )
示例#21
0
 def create_plan(pipeline):
     config_or_error = _config_or_error_from_pipeline(
         graphene_info, pipeline, config)
     return config_or_error.chain(
         lambda evaluate_value_result: graphene_info.schema.type_named(
             'ExecutionPlan')(
                 pipeline,
                 create_execution_plan(
                     pipeline.get_dagster_pipeline(),
                     evaluate_value_result.value,
                     ExecutionMetadata(),
                 ),
             ))
def test_execution_plan_middle_step():
    pipeline_def = define_two_int_pipeline()
    execution_plan = create_execution_plan(pipeline_def)
    step_results = execute_plan(
        pipeline_def,
        execution_plan,
        subset_info=ExecutionPlanSubsetInfo(['add_one.transform'],
                                            {'add_one.transform': {
                                                'num': 2
                                            }}),
    )

    assert len(step_results) == 2
    assert step_results[1].success_data.value == 3
示例#23
0
        def _start_execution(validated_config_either):
            new_run_id = run_id if run_id else make_new_run_id()
            execution_plan = create_execution_plan(
                pipeline.get_dagster_pipeline(), validated_config_either.value)
            run = pipeline_run_storage.create_run(
                new_run_id,
                selector,
                environment_dict,
                execution_plan,
                reexecution_config,
                step_keys_to_execute,
            )
            pipeline_run_storage.add_run(run)

            if step_keys_to_execute:
                for step_key in step_keys_to_execute:
                    if not execution_plan.has_step(step_key):
                        return graphene_info.schema.type_named(
                            'InvalidStepError')(invalid_step_key=step_key)

            if reexecution_config and reexecution_config.step_output_handles:
                for step_output_handle in reexecution_config.step_output_handles:
                    if not execution_plan.has_step(
                            step_output_handle.step_key):
                        return graphene_info.schema.type_named(
                            'InvalidStepError')(
                                invalid_step_key=step_output_handle.step_key)

                    step = execution_plan.get_step_by_key(
                        step_output_handle.step_key)

                    if not step.has_step_output(
                            step_output_handle.output_name):
                        return graphene_info.schema.type_named(
                            'InvalidOutputError')(
                                step_key=step_output_handle.step_key,
                                invalid_output_name=step_output_handle.
                                output_name,
                            )

            graphene_info.context.execution_manager.execute_pipeline(
                graphene_info.context.repository_container,
                pipeline.get_dagster_pipeline(),
                run,
                raise_on_error=graphene_info.context.raise_on_error,
            )

            return graphene_info.schema.type_named(
                'StartPipelineExecutionSuccess')(
                    run=graphene_info.schema.type_named('PipelineRun')(run))
示例#24
0
def test_external_execution_output_code_error_throw_on_user_error():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(Exception) as exc_info:
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['user_throw_exception.transform'],
            execution_metadata=ExecutionMetadata(),
            throw_on_user_error=True,
        )

    assert str(exc_info.value) == 'whoops'
示例#25
0
def test_external_execution_step_for_input_missing():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info:
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['add_one.transform'],
            inputs_to_marshal={'nope': {
                'nope': 'nope'
            }},
            execution_metadata=ExecutionMetadata(),
        )

    assert exc_info.value.step_key == 'nope'
def test_using_file_system_for_subplan_missing_input():
    pipeline = define_inty_pipeline()

    environment_dict = {'storage': {'filesystem': {}}}

    execution_plan = create_execution_plan(pipeline,
                                           environment_dict=environment_dict)

    run_id = str(uuid.uuid4())

    with pytest.raises(DagsterStepOutputNotFoundError):
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=RunConfig(run_id=run_id),
            step_keys_to_execute=['add_one.transform'],
        )
def test_using_file_system_for_subplan_invalid_step():
    pipeline = define_inty_pipeline()

    environment_dict = {'storage': {'filesystem': {}}}

    execution_plan = create_execution_plan(pipeline,
                                           environment_dict=environment_dict)

    run_id = str(uuid.uuid4())

    with pytest.raises(DagsterExecutionStepNotFoundError):
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=RunConfig(run_id=run_id),
            step_keys_to_execute=['nope'],
        )
示例#28
0
def test_external_execution_marshal_output_code_error():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    # guaranteed that folder does not exist
    hardcoded_uuid = '83fb4ace-5cab-459d-99b6-2ca9808c54a1'

    outputs_to_marshal = {
        'add_one.transform': [
            MarshalledOutput(
                output_name='result',
                marshalling_key='{uuid}/{uuid}'.format(uuid=hardcoded_uuid))
        ]
    }

    with pytest.raises(IOError) as exc_info:
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['return_one.transform', 'add_one.transform'],
            outputs_to_marshal=outputs_to_marshal,
            execution_metadata=ExecutionMetadata(),
            throw_on_user_error=True,
        )

    assert 'No such file or directory' in str(exc_info.value)

    results = execute_externalized_plan(
        pipeline,
        execution_plan,
        ['return_one.transform', 'add_one.transform'],
        outputs_to_marshal=outputs_to_marshal,
        execution_metadata=ExecutionMetadata(),
        throw_on_user_error=False,
    )

    assert len(results) == 3

    results_dict = {result.step.key: result for result in results}

    assert results_dict['return_one.transform'].success is True
    assert results_dict['add_one.transform'].success is True
    assert results_dict[
        'add_one.transform.marshal-output.result'].success is False
示例#29
0
def test_external_execution_output_missing():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(DagsterMarshalOutputNotFoundError):
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['add_one.transform'],
            outputs_to_marshal={
                'add_one.transform': [{
                    'output': 'nope',
                    'path': 'nope'
                }]
            },
            execution_metadata=ExecutionMetadata(),
        )
def test_create_subplan_source_step():
    pipeline_def = define_two_int_pipeline()
    typed_environment = create_typed_environment(pipeline_def, None)
    execution_plan = create_execution_plan(pipeline_def)
    with yield_context(pipeline_def, typed_environment) as context:
        subplan = create_subplan(
            ExecutionPlanInfo(context=context,
                              pipeline=pipeline_def,
                              environment=typed_environment),
            execution_plan,
            ExecutionPlanSubsetInfo(['return_one.transform']),
        )
        assert subplan
        assert len(subplan.steps) == 1
        assert subplan.steps[0].key == 'return_one.transform'
        assert not subplan.steps[0].step_inputs
        assert len(subplan.steps[0].step_outputs) == 1
        assert len(subplan.topological_steps()) == 1