def test_custom_contexts():
    @solid(inputs=[], outputs=[OutputDefinition()])
    def custom_context_transform(info):
        assert info.context.resources == {'field_one': 'value_two'}

    pipeline = PipelineDefinition(
        solids=[custom_context_transform],
        context_definitions={
            'custom_one':
            PipelineContextDefinition(
                config_def=ConfigDefinition.config_dict(
                    {'field_one': Field(dagster_type=types.String)}),
                context_fn=lambda info: ExecutionContext(resources=info.config
                                                         ),
            ),
            'custom_two':
            PipelineContextDefinition(
                config_def=ConfigDefinition.config_dict(
                    {'field_one': Field(dagster_type=types.String)}),
                context_fn=lambda info: ExecutionContext(resources=info.config
                                                         ),
            )
        },
    )

    environment_one = config.Environment(
        context=config.Context('custom_one', {'field_one': 'value_two'}))

    execute_pipeline(pipeline, environment=environment_one)

    environment_two = config.Environment(
        context=config.Context('custom_two', {'field_one': 'value_two'}))

    execute_pipeline(pipeline, environment=environment_two)
示例#2
0
def execute_single_solid(context,
                         solid_def,
                         environment=None,
                         throw_on_error=True):
    check.inst_param(context, 'context', ExecutionContext)
    check.inst_param(solid_def, 'solid_def', SolidDefinition)
    environment = check.opt_inst_param(
        environment,
        'environment',
        config.Environment,
        config.Environment(),
    )
    check.bool_param(throw_on_error, 'throw_on_error')

    single_solid_environment = config.Environment(
        expectations=environment.expectations,
        context=environment.context,
        solids={solid_def.name: environment.solids[solid_def.name]}
        if solid_def.name in environment.solids else None)

    pipeline_result = execute_pipeline(
        PipelineDefinition(
            solids=[solid_def],
            context_definitions=PipelineContextDefinition.
            passthrough_context_definition(context),
        ),
        environment=single_solid_environment,
    )

    return pipeline_result
def test_invalid_context():
    @lambda_solid
    def never_transform():
        raise Exception('should never execute')

    default_context_pipeline = PipelineDefinition(solids=[never_transform])

    environment_context_not_found = config.Environment(
        context=config.Context('not_found', {}))

    with pytest.raises(DagsterInvariantViolationError,
                       message='Context not_found not found'):
        execute_pipeline(default_context_pipeline,
                         environment=environment_context_not_found,
                         throw_on_error=True)

    environment_field_name_mismatch = config.Environment(
        context=config.Context(config={'unexpected': 'value'}))

    with pytest.raises(DagsterTypeError,
                       message='Argument mismatch in context default'):
        execute_pipeline(default_context_pipeline,
                         environment=environment_field_name_mismatch,
                         throw_on_error=True)

    with_argful_context_pipeline = PipelineDefinition(
        solids=[never_transform],
        context_definitions={
            'default':
            PipelineContextDefinition(
                config_def=ConfigDefinition.config_dict(
                    {'string_field': Field(types.String)}),
                context_fn=lambda info: info.config,
            )
        })

    environment_no_config_error = config.Environment(context=config.Context(
        config={}))

    with pytest.raises(DagsterTypeError,
                       message='Argument mismatch in context default'):
        execute_pipeline(with_argful_context_pipeline,
                         environment=environment_no_config_error,
                         throw_on_error=True)

    environment_type_mismatch_error = config.Environment(
        context=config.Context(config={'string_field': 1}))

    with pytest.raises(DagsterTypeError,
                       message='Argument mismatch in context default'):
        execute_pipeline(with_argful_context_pipeline,
                         environment=environment_type_mismatch_error,
                         throw_on_error=True)
def test_execute_pipeline():
    pipeline = define_success_pipeline()
    environment = config.Environment(solids={
        'load_num_csv':
        config.Solid({'path': script_relative_path('num.csv')})
    }, )

    result = execute_pipeline(pipeline, environment=environment)

    assert result.success

    assert result.result_for_solid('sum_solid').transformed_value().to_dict(
        'list') == {
            'num1': [1, 3],
            'num2': [2, 4],
            'sum': [3, 7],
        }

    assert result.result_for_solid('sum_sq_solid').transformed_value().to_dict(
        'list') == {
            'num1': [1, 3],
            'num2': [2, 4],
            'sum': [3, 7],
            'sum_sq': [9, 49],
        }
def test_hello_world_composed():
    pipeline = create_hello_world_solid_composed_pipeline()

    pipeline_result = execute_pipeline(
        pipeline,
        environment=config.Environment(
            solids={
                'read_hello_world': config.Solid({
                    'path': script_relative_path('num.csv')
                }),
            },
        ),
    )

    assert pipeline_result.success

    result = pipeline_result.result_for_solid('hello_world')

    assert result.success

    assert result.transformed_value().to_dict('list') == {
        'num1': [1, 3],
        'num2': [2, 4],
        'sum': [3, 7],
    }
示例#6
0
def test_output_sql_sum_sq_solid():
    create_sum_sq_table = define_create_table_solid('create_sum_sq_table')

    pipeline = create_sum_sq_pipeline(
        in_mem_context(), DagsterSqlTableExpression('num_table'), [create_sum_sq_table],
        {create_sum_sq_table.name: {
            'expr': DependencyDefinition('sum_sq_table')
        }}
    )

    environment = config.Environment(
        solids={'create_sum_sq_table': config.Solid({
            'table_name': 'sum_sq_table'
        })},
    )

    pipeline_result = execute_pipeline(pipeline=pipeline, environment=environment)

    assert pipeline_result.success

    result_list = pipeline_result.result_list

    assert len(result_list) == 3
    engine = pipeline_result.context.resources.sa.engine
    result_list = engine.connect().execute('SELECT * FROM sum_sq_table').fetchall()
    assert result_list == [(1, 2, 3, 9), (3, 4, 7, 49)]
示例#7
0
def execute_pipeline(
    pipeline,
    environment=None,
    throw_on_error=True,
):
    '''
    "Synchronous" version of `execute_pipeline_iterator`.

    Note: throw_on_error is very useful in testing contexts when not testing for error conditions

    Parameters:
      pipeline (PipelineDefinition): pipeline to run
      execution (ExecutionContext): execution context of the run
      throw_on_error (bool):
        throw_on_error makes the function throw when an error is encoutered rather than returning
        the py:class:`SolidExecutionResult` in an error-state.


    Returns:
      PipelineExecutionResult
    '''

    check.inst_param(pipeline, 'pipeline', PipelineDefinition)
    environment = check.opt_inst_param(
        environment,
        'environment',
        config.Environment,
        config.Environment(),
    )
    execution_graph = ExecutionGraph.from_pipeline(pipeline)
    return _execute_graph(execution_graph, environment, throw_on_error)
示例#8
0
def test_config_for_no_config():
    def _t_fn(*_args):
        raise Exception('should not reach')

    solid_def = SolidDefinition(
        name='no_config_solid',
        inputs=[],
        outputs=[],
        transform_fn=_t_fn,
    )

    pipeline = PipelineDefinition(solids=[solid_def])

    with pytest.raises(
        DagsterInvariantViolationError,
        match="Solid no_config_solid was provided {'some_config': 1} but does not take config",
    ):
        execute_pipeline(
            pipeline,
            config.Environment(solids={
                'no_config_solid': config.Solid({
                    'some_config': 1,
                }),
            }),
        )
示例#9
0
def test_basic_solid_with_config():
    did_get = {}

    def _t_fn(info, _inputs):
        did_get['yep'] = info.config

    solid = SolidDefinition(
        name='solid_with_context',
        inputs=[],
        outputs=[],
        config_def=ConfigDefinition.config_dict({
            'some_config': Field(types.String)
        }),
        transform_fn=_t_fn,
    )

    pipeline = PipelineDefinition(solids=[solid])

    execute_pipeline(
        pipeline,
        config.Environment(solids={'solid_with_context': config.Solid({
            'some_config': 'foo'
        })}),
    )

    assert 'yep' in did_get
    assert 'some_config' in did_get['yep']
示例#10
0
def test_execute_solid_with_input_same_name():
    a_thing_solid = single_output_transform(
        'a_thing',
        inputs=[InputDefinition(name='a_thing')],
        transform_fn=lambda context, inputs: inputs['a_thing'] + inputs[
            'a_thing'],
        output=dagster.OutputDefinition(),
    )

    pipeline = PipelineDefinition(
        solids=[define_pass_value_solid('pass_value'), a_thing_solid],
        dependencies={
            'a_thing': {
                'a_thing': DependencyDefinition('pass_value')
            }
        },
    )

    result = execute_pipeline(
        pipeline,
        config.Environment(
            solids={'pass_value': config.Solid({'value': 'foo'})}),
    )

    assert result.result_for_solid('a_thing').transformed_value() == 'foofoo'
示例#11
0
def test_yield_context():
    events = []

    @solid(inputs=[], outputs=[OutputDefinition()])
    def custom_context_transform(info):
        assert info.context.resources == {'field_one': 'value_two'}
        assert info.context._context_dict['foo'] == 'bar'  # pylint: disable=W0212
        events.append('during')

    def _yield_context(info):
        events.append('before')
        context = ExecutionContext(resources=info.config)
        with context.value('foo', 'bar'):
            yield context
        events.append('after')

    pipeline = PipelineDefinition(
        solids=[custom_context_transform],
        context_definitions={
            'custom_one':
            PipelineContextDefinition(
                config_def=ConfigDefinition.config_dict(
                    {'field_one': Field(dagster_type=types.String)}),
                context_fn=_yield_context,
            ),
        })

    environment_one = config.Environment(
        context=config.Context('custom_one', {'field_one': 'value_two'}))

    execute_pipeline(pipeline, environment=environment_one)

    assert events == ['before', 'during', 'after']
示例#12
0
def test_multi_input_partial_execution():
    pipeline = create_multi_input_pipeline()

    first_sum_table = 'first_sum_table'
    first_mult_table = 'first_mult_table'
    first_sum_mult_table = 'first_sum_mult_table'

    environment = config.Environment(solids={
        'sum_table':
        config.Solid({'sum_table': first_sum_table}),
        'mult_table':
        config.Solid({
            'mult_table': first_mult_table,
        }),
        'sum_mult_table':
        config.Solid({
            'sum_table': first_sum_table,
            'mult_table': first_mult_table,
            'sum_mult_table': first_sum_mult_table,
        }),
    }, )

    first_pipeline_result = execute_pipeline(pipeline, environment=environment)

    assert first_pipeline_result.success
    assert len(first_pipeline_result.result_list) == 3
    assert _load_table(first_pipeline_result.context,
                       first_sum_table) == [(1, 2, 3), (3, 4, 7)]
    assert _load_table(first_pipeline_result.context,
                       first_mult_table) == [(1, 2, 2), (3, 4, 12)]
    assert _load_table(first_pipeline_result.context,
                       first_sum_mult_table) == [(1, 3, 2), (3, 7, 12)]

    return
示例#13
0
def test_single_templated_sql_solid_double_table_with_api():
    sum_table_arg = 'specific_sum_table'
    num_table_arg = 'specific_num_table'

    sql = '''CREATE TABLE {{sum_table}} AS
    SELECT num1, num2, num1 + num2 as sum FROM {{num_table}}'''

    sum_solid = create_templated_sql_transform_solid(
        name='sum_solid',
        sql=sql,
        table_arguments=['sum_table', 'num_table'],
    )

    pipeline = pipeline_test_def(solids=[sum_solid],
                                 context=in_mem_context(num_table_arg))

    environment = config.Environment(
        solids={
            'sum_solid':
            config.Solid({
                'sum_table': sum_table_arg,
                'num_table': num_table_arg,
            })
        })

    result = execute_pipeline(pipeline, environment=environment)
    assert result.success

    assert _load_table(result.context, sum_table_arg) == [(1, 2, 3), (3, 4, 7)]
示例#14
0
def test_with_from_through_specifying_all_solids():
    pipeline = create_multi_input_pipeline()

    first_sum_table = 'first_sum_table'
    first_mult_table = 'first_mult_table'
    first_sum_mult_table = 'first_sum_mult_table'

    environment = config.Environment(solids={
        'sum_table':
        config.Solid({
            'sum_table': first_sum_table,
        }),
        'mult_table':
        config.Solid({
            'mult_table': first_mult_table,
        }),
        'sum_mult_table':
        config.Solid({
            'sum_table': first_sum_table,
            'mult_table': first_mult_table,
            'sum_mult_table': first_sum_mult_table,
        }),
    }, )

    pipeline_result = execute_pipeline(pipeline, environment=environment)
    assert len(pipeline_result.result_list) == 3
    assert _load_table(pipeline_result.context, first_sum_table) == [(1, 2, 3),
                                                                     (3, 4, 7)]
    assert _load_table(pipeline_result.context,
                       first_mult_table) == [(1, 2, 2), (3, 4, 12)]
    assert _load_table(pipeline_result.context,
                       first_sum_mult_table) == [(1, 3, 2), (3, 7, 12)]
def test_pipeline_execution_graph_diamond():
    pipeline = PipelineDefinition(solids=create_diamond_solids(), dependencies=diamond_deps())
    environment = config.Environment()
    return _do_test(pipeline, lambda: execute_pipeline_iterator(
        pipeline,
        environment=environment,
    ))
示例#16
0
def test_aliased_configs():
    @solid(
        inputs=[],
        config_def=ConfigDefinition(types.Int),
    )
    def load_constant(info):
        return info.config

    pipeline = PipelineDefinition(
        solids=[load_constant],
        dependencies={
            SolidInstance(load_constant.name, 'load_a'): {},
            SolidInstance(load_constant.name, 'load_b'): {},
        })

    result = execute_pipeline(
        pipeline,
        config.Environment(solids={
            'load_a': config.Solid(2),
            'load_b': config.Solid(3),
        }))

    assert result.success
    assert result.result_for_solid('load_a').transformed_value() == 2
    assert result.result_for_solid('load_b').transformed_value() == 3
示例#17
0
def test_default_context_with_log_level():
    @solid(inputs=[], outputs=[OutputDefinition()])
    def default_context_transform(info):
        for logger in info.context._logger.loggers:
            assert logger.level == INFO

    pipeline = PipelineDefinition(solids=[default_context_transform])
    execute_pipeline(pipeline,
                     environment=config.Environment(context=config.Context(
                         config={'log_level': 'INFO'})))

    with pytest.raises(DagsterTypeError,
                       message='Argument mismatch in context default'):
        execute_pipeline(pipeline,
                         environment=config.Environment(context=config.Context(
                             config={'log_level': 2})))
示例#18
0
def test_default_value():
    def _get_config_test_solid(config_key, config_value):
        @solid(inputs=[], outputs=[OutputDefinition()])
        def config_test(info):
            assert info.context.resources == {config_key: config_value}

        return config_test

    pipeline = PipelineDefinition(
        solids=[_get_config_test_solid('field_one', 'heyo')],
        context_definitions={
            'custom_one':
            PipelineContextDefinition(
                config_def=ConfigDefinition.config_dict({
                    'field_one':
                    Field(
                        dagster_type=types.String,
                        is_optional=True,
                        default_value='heyo',
                    )
                }),
                context_fn=lambda info: ExecutionContext(resources=info.config
                                                         ),
            ),
        })

    execute_pipeline(pipeline,
                     environment=config.Environment(
                         context=config.Context('custom_one', {})))
示例#19
0
def test_construct_full_environment_default_context_name():
    document = '''
context:
    config:
        context_arg: context_value
'''

    environment = config.construct_environment(yaml.load(document))

    assert environment == config.Environment(context=config.Context(
        'default', {'context_arg': 'context_value'}), )
def test_notebook_dag():
    pipeline_result = execute_pipeline(
        define_test_notebook_dag_pipeline(),
        environment=config.Environment(solids={
            'load_a': config.Solid(1),
            'load_b': config.Solid(2),
        }))
    assert pipeline_result.success
    assert pipeline_result.result_for_solid('add_two').transformed_value() == 3
    assert pipeline_result.result_for_solid(
        'mult_two').transformed_value() == 6
def test_part_thirteen_step_two():
    pipeline_result = execute_pipeline(
        define_part_thirteen_step_two(),
        config.Environment(solids={
            'load_a': config.Solid(23),
            'load_b': config.Solid(38),
        })
    )

    assert pipeline_result.success
    solid_result = pipeline_result.result_for_solid('a_plus_b')
    assert solid_result.transformed_value() == 23 + 38
示例#22
0
def test_execute_two_solids_with_same_input_name():
    input_def = InputDefinition(name='a_thing')

    solid_one = single_output_transform(
        'solid_one',
        inputs=[input_def],
        transform_fn=lambda context, inputs: inputs['a_thing'] + inputs['a_thing'],
        output=dagster.OutputDefinition(),
    )

    solid_two = single_output_transform(
        'solid_two',
        inputs=[input_def],
        transform_fn=lambda context, inputs: inputs['a_thing'] + inputs['a_thing'],
        output=dagster.OutputDefinition(),
    )

    pipeline = dagster.PipelineDefinition(
        solids=[
            define_pass_value_solid('pass_to_one'),
            define_pass_value_solid('pass_to_two'),
            solid_one,
            solid_two,
        ],
        dependencies={
            'solid_one': {
                'a_thing': DependencyDefinition('pass_to_one')
            },
            'solid_two': {
                'a_thing': DependencyDefinition('pass_to_two')
            }
        }
    )

    result = execute_pipeline(
        pipeline,
        environment=config.Environment(
            solids={
                'pass_to_one': config.Solid({
                    'value': 'foo'
                }),
                'pass_to_two': config.Solid({
                    'value': 'bar'
                }),
            }
        )
    )

    assert result.success

    assert result.result_for_solid('solid_one').transformed_value() == 'foofoo'
    assert result.result_for_solid('solid_two').transformed_value() == 'barbar'
示例#23
0
def test_two_input_solid():
    def transform(_context, inputs):
        num_csv1 = inputs['num_csv1']
        num_csv2 = inputs['num_csv2']
        check.inst_param(num_csv1, 'num_csv1', pd.DataFrame)
        check.inst_param(num_csv2, 'num_csv2', pd.DataFrame)
        num_csv1['sum'] = num_csv1['num1'] + num_csv2['num2']
        return num_csv1

    two_input_solid = _dataframe_solid(
        name='two_input_solid',
        inputs=[
            InputDefinition('num_csv1', dagster_pd.DataFrame),
            InputDefinition('num_csv2', dagster_pd.DataFrame),
        ],
        transform_fn=transform,
    )

    environment = config.Environment(
        solids={
            'load_csv1': config.Solid(
                {'path': script_relative_path('num.csv')}),
            'load_csv2': config.Solid(
                {'path': script_relative_path('num.csv')}),
        })

    pipeline = PipelineDefinition(solids=[
        dagster_pd.load_csv_solid('load_csv1'),
        dagster_pd.load_csv_solid('load_csv2'), two_input_solid
    ],
                                  dependencies={
                                      'two_input_solid': {
                                          'num_csv1':
                                          DependencyDefinition('load_csv1'),
                                          'num_csv2':
                                          DependencyDefinition('load_csv2'),
                                      }
                                  })

    pipeline_result = execute_pipeline(pipeline, environment)
    assert pipeline_result.success

    df = pipeline_result.result_for_solid(
        'two_input_solid').transformed_value()

    # df = get_solid_transformed_value(create_test_context(), two_input_solid, environment)
    assert isinstance(df, pd.DataFrame)
    assert df.to_dict('list') == {
        'num1': [1, 3],
        'num2': [2, 4],
        'sum': [3, 7]
    }
示例#24
0
def test_pandas_source_test_pipeline():
    pipeline = define_pandas_source_test_pipeline()
    pipeline_result = execute_pipeline(
        pipeline,
        config.Environment(solids={
            'pandas_source_test':
            config.Solid(script_relative_path('num.csv')),
        }, ),
    )
    assert pipeline_result.success
    solid_result = pipeline_result.result_for_solid('pandas_source_test')
    expected = pd.read_csv(script_relative_path('num.csv'))
    assert solid_result.transformed_value().equals(expected)
def test_intro_tutorial_part_nine_step_one():
    result = execute_pipeline(
        define_part_nine_step_one(),
        config.Environment(solids={
            'injest_a': config.Solid(2),
            'injest_b': config.Solid(3),
        }, ))

    assert result.success
    assert result.result_for_solid('injest_a').transformed_value() == 2
    assert result.result_for_solid('injest_b').transformed_value() == 3
    assert result.result_for_solid('add_ints').transformed_value() == 5
    assert result.result_for_solid('mult_ints').transformed_value() == 6
示例#26
0
def do_execute_command(pipeline, env, printer):
    check.inst_param(pipeline, 'pipeline', PipelineDefinition)
    check.opt_str_param(env, 'env')
    check.callable_param(printer, 'printer')

    if env:
        env_config = load_yaml_from_path(env)
        environment = config.construct_environment(env_config)
    else:
        environment = config.Environment()

    pipeline_iter = execute_pipeline_iterator(pipeline, environment)

    process_results_for_console(pipeline_iter)
def test_hello_world_config():
    with_config_solid = dm.define_dagstermill_solid(
        'with_config',
        nb_test_path('hello_world_with_config'),
        [],
        [OutputDefinition()],
    )

    pipeline = PipelineDefinition(solids=[with_config_solid])
    pipeline_result = execute_pipeline(
        pipeline,
        config.Environment(solids={'with_config': config.Solid(script_relative_path('num.csv'))}),
    )

    assert pipeline_result.success
    assert pipeline_result.result_for_solid('with_config').transformed_value() == 100
示例#28
0
def test_any_config_definition():
    called = {}
    conf_value = 234

    @solid(config_def=ConfigDefinition())
    def hello_world(info):
        assert info.config == conf_value
        called['yup'] = True

    result = execute_single_solid(
        create_test_context(),
        hello_world,
        environment=config.Environment(
            solids={'hello_world': config.Solid(conf_value)}))

    assert called['yup']
示例#29
0
def test_pandas_multiple_inputs():
    environment = config.Environment(solids={
        'load_one':
        config.Solid({'path': script_relative_path('num.csv')}),
        'load_two':
        config.Solid({'path': script_relative_path('num.csv')}),
    }, )

    def transform_fn(_context, inputs):
        return inputs['num_csv1'] + inputs['num_csv2']

    double_sum = _dataframe_solid(name='double_sum',
                                  inputs=[
                                      InputDefinition('num_csv1',
                                                      dagster_pd.DataFrame),
                                      InputDefinition('num_csv2',
                                                      dagster_pd.DataFrame),
                                  ],
                                  transform_fn=transform_fn)

    pipeline = PipelineDefinition(
        solids=[
            dagster_pd.load_csv_solid('load_one'),
            dagster_pd.load_csv_solid('load_two'), double_sum
        ],
        dependencies={
            'double_sum': {
                'num_csv1': DependencyDefinition('load_one'),
                'num_csv2': DependencyDefinition('load_two'),
            }
        },
    )

    output_df = execute_pipeline(
        pipeline,
        environment=environment,
    ).result_for_solid('double_sum').transformed_value()

    assert not output_df.empty

    assert output_df.to_dict('list') == {
        'num1': [2, 6],
        'num2': [4, 8],
    }
def test_hello_world_pipeline_no_api():
    def hello_world_transform_fn(_context, inputs):
        num_df = inputs['num_df']
        num_df['sum'] = num_df['num1'] + num_df['num2']
        return num_df

    read_csv_solid = define_read_csv_solid('read_csv_solid')

    hello_world = single_output_transform(
        name='hello_world',
        inputs=[InputDefinition('num_df')],
        transform_fn=hello_world_transform_fn,
        output=OutputDefinition(),
    )

    pipeline = PipelineDefinition(
        solids=[read_csv_solid, hello_world],
        dependencies={
            'hello_world': {
                'num_df': DependencyDefinition('read_csv_solid'),
            },
        }
    )

    pipeline_result = execute_pipeline(
        pipeline,
        config.Environment(
            solids={
                'read_csv_solid': config.Solid({
                    'path': script_relative_path('num.csv'),
                }),
            },
        ),
    )

    assert pipeline_result.success

    result = pipeline_result.result_for_solid('hello_world')

    assert result.transformed_value().to_dict('list') == {
        'num1': [1, 3],
        'num2': [2, 4],
        'sum': [3, 7],
    }