def test_custom_contexts(): @solid(inputs=[], outputs=[OutputDefinition()]) def custom_context_transform(info): assert info.context.resources == {'field_one': 'value_two'} pipeline = PipelineDefinition( solids=[custom_context_transform], context_definitions={ 'custom_one': PipelineContextDefinition( config_def=ConfigDefinition.config_dict( {'field_one': Field(dagster_type=types.String)}), context_fn=lambda info: ExecutionContext(resources=info.config ), ), 'custom_two': PipelineContextDefinition( config_def=ConfigDefinition.config_dict( {'field_one': Field(dagster_type=types.String)}), context_fn=lambda info: ExecutionContext(resources=info.config ), ) }, ) environment_one = config.Environment( context=config.Context('custom_one', {'field_one': 'value_two'})) execute_pipeline(pipeline, environment=environment_one) environment_two = config.Environment( context=config.Context('custom_two', {'field_one': 'value_two'})) execute_pipeline(pipeline, environment=environment_two)
def test_default_arg(): config_def = ConfigDefinition.config_dict({ 'int_field': Field(types.Int, default_value=2, is_optional=True), }) assert config_def.config_type.evaluate_value({}) == {'int_field': 2}
def _single_optional_string_field_config_dict_with_default(): optional_field_def = Field( types.String, is_optional=True, default_value='some_default', ) return ConfigDefinition.config_dict({'optional_field': optional_field_def})
def test_default_value(): def _get_config_test_solid(config_key, config_value): @solid(inputs=[], outputs=[OutputDefinition()]) def config_test(info): assert info.context.resources == {config_key: config_value} return config_test pipeline = PipelineDefinition( solids=[_get_config_test_solid('field_one', 'heyo')], context_definitions={ 'custom_one': PipelineContextDefinition( config_def=ConfigDefinition.config_dict({ 'field_one': Field( dagster_type=types.String, is_optional=True, default_value='heyo', ) }), context_fn=lambda info: ExecutionContext(resources=info.config ), ), }) execute_pipeline(pipeline, environment=config.Environment( context=config.Context('custom_one', {})))
def test_yield_context(): events = [] @solid(inputs=[], outputs=[OutputDefinition()]) def custom_context_transform(info): assert info.context.resources == {'field_one': 'value_two'} assert info.context._context_dict['foo'] == 'bar' # pylint: disable=W0212 events.append('during') def _yield_context(info): events.append('before') context = ExecutionContext(resources=info.config) with context.value('foo', 'bar'): yield context events.append('after') pipeline = PipelineDefinition( solids=[custom_context_transform], context_definitions={ 'custom_one': PipelineContextDefinition( config_def=ConfigDefinition.config_dict( {'field_one': Field(dagster_type=types.String)}), context_fn=_yield_context, ), }) environment_one = config.Environment( context=config.Context('custom_one', {'field_one': 'value_two'})) execute_pipeline(pipeline, environment=environment_one) assert events == ['before', 'during', 'after']
def _multiple_required_fields_config_dict(): return ConfigDefinition.config_dict( 'MultipleRequiredFields', { 'field_one': Field(types.String), 'field_two': Field(types.String), } )
def test_basic_solid_with_config(): did_get = {} def _t_fn(info, _inputs): did_get['yep'] = info.config solid = SolidDefinition( name='solid_with_context', inputs=[], outputs=[], config_def=ConfigDefinition.config_dict({ 'some_config': Field(types.String) }), transform_fn=_t_fn, ) pipeline = PipelineDefinition(solids=[solid]) execute_pipeline( pipeline, config.Environment(solids={'solid_with_context': config.Solid({ 'some_config': 'foo' })}), ) assert 'yep' in did_get assert 'some_config' in did_get['yep']
def test_int_field(): config_def = ConfigDefinition.config_dict({ 'int_field': Field(types.Int), }) assert config_def.config_type.evaluate_value({'int_field': 1}) == { 'int_field': 1 }
def test_int_field(): config_def = ConfigDefinition.config_dict( 'SingleRequiredInt', { 'int_field': Field(types.Int), }, ) assert config_def.config_type.evaluate_value({'int_field': 1}) == {'int_field': 1}
def test_int_fails(): config_def = ConfigDefinition.config_dict({ 'int_field': Field(types.Int), }) with pytest.raises(DagsterEvaluateValueError): config_def.config_type.evaluate_value({'int_field': 'fjkdj'}) with pytest.raises(DagsterEvaluateValueError): config_def.config_type.evaluate_value({'int_field': True})
def test_invalid_context(): @lambda_solid def never_transform(): raise Exception('should never execute') default_context_pipeline = PipelineDefinition(solids=[never_transform]) environment_context_not_found = config.Environment( context=config.Context('not_found', {})) with pytest.raises(DagsterInvariantViolationError, message='Context not_found not found'): execute_pipeline(default_context_pipeline, environment=environment_context_not_found, throw_on_error=True) environment_field_name_mismatch = config.Environment( context=config.Context(config={'unexpected': 'value'})) with pytest.raises(DagsterTypeError, message='Argument mismatch in context default'): execute_pipeline(default_context_pipeline, environment=environment_field_name_mismatch, throw_on_error=True) with_argful_context_pipeline = PipelineDefinition( solids=[never_transform], context_definitions={ 'default': PipelineContextDefinition( config_def=ConfigDefinition.config_dict( {'string_field': Field(types.String)}), context_fn=lambda info: info.config, ) }) environment_no_config_error = config.Environment(context=config.Context( config={})) with pytest.raises(DagsterTypeError, message='Argument mismatch in context default'): execute_pipeline(with_argful_context_pipeline, environment=environment_no_config_error, throw_on_error=True) environment_type_mismatch_error = config.Environment( context=config.Context(config={'string_field': 1})) with pytest.raises(DagsterTypeError, message='Argument mismatch in context default'): execute_pipeline(with_argful_context_pipeline, environment=environment_type_mismatch_error, throw_on_error=True)
def _mixed_required_optional_string_config_dict_with_default(): return ConfigDefinition.config_dict( 'MixedRequired', { 'optional_arg': Field( types.String, is_optional=True, default_value='some_default', ), 'required_arg': Field(types.String, is_optional=False), 'optional_arg_no_default': Field(types.String, is_optional=True), } )
def define_to_csv_solid(name): def _t_fn(info, inputs): inputs['df'].to_csv(info.config['path'], index=False) return SolidDefinition( name=name, inputs=[InputDefinition('df')], outputs=[], config_def=ConfigDefinition.config_dict({ 'path': Field(types.Path) }), transform_fn=_t_fn, )
def define_read_csv_solid(name): def _t_fn(info, _inputs): yield Result(pd.read_csv(info.config['path'])) return SolidDefinition( name=name, inputs=[], outputs=[OutputDefinition()], config_def=ConfigDefinition.config_dict({ 'path': Field(types.Path) }), transform_fn=_t_fn )
def to_parquet_solid(name): def _t_fn(info, inputs): inputs['df'].to_parquet(info.config['path']) return SolidDefinition( name=name, inputs=[InputDefinition('df', DataFrame)], outputs=[], config_def=ConfigDefinition.config_dict({ 'path': Field(types.Path) }), transform_fn=_t_fn, )
def load_csv_solid(name): check.str_param(name, 'name') def _t_fn(info, _inputs): yield Result(pd.read_csv(info.config['path'])) return SolidDefinition( name=name, inputs=[], outputs=[OutputDefinition(DataFrame)], transform_fn=_t_fn, config_def=ConfigDefinition.config_dict({ 'path': Field(types.Path), }), )
def define_pass_value_solid(name, description=None): check.str_param(name, 'name') check.opt_str_param(description, 'description') def _value_t_fn(info, _inputs): yield Result(info.config['value']) return SolidDefinition( name=name, description=description, inputs=[], outputs=[OutputDefinition(types.String)], config_def=ConfigDefinition.config_dict({'value': Field(types.String)}), transform_fn=_value_t_fn, )
def define_create_table_solid(name): def _materialization_fn(info, inputs): sql_expr = inputs['expr'] check.inst(sql_expr, DagsterSqlExpression) output_table_name = check.str_elem(info.config, 'table_name') total_sql = '''CREATE TABLE {output_table_name} AS {query_text}'''.format( output_table_name=output_table_name, query_text=sql_expr.query_text ) info.context.resources.sa.engine.connect().execute(total_sql) return SolidDefinition( name=name, inputs=[InputDefinition('expr')], outputs=[], transform_fn=_materialization_fn, config_def=ConfigDefinition.config_dict({ 'table_name': Field(types.String), }), )
def test_config_arg_mismatch(): def _t_fn(*_args): raise Exception('should not reach') solid = SolidDefinition( name='solid_with_context', inputs=[], outputs=[], config_def=ConfigDefinition.config_dict('SomeConfig', {'some_config': Field(types.String)}), transform_fn=_t_fn, ) pipeline = PipelineDefinition(solids=[solid]) with pytest.raises(DagsterTypeError): execute_pipeline( pipeline, config.Environment(solids={'solid_with_context': config.Solid({ 'some_config': 1 })}), )
def create_templated_sql_transform_solid(name, sql, table_arguments, dependant_solids=None): check.str_param(name, 'name') check.str_param(sql, 'sql') check.list_param(table_arguments, 'table_arguments', of_type=str) dependant_solids = check.opt_list_param(dependant_solids, 'dependant_solids', of_type=SolidDefinition) field_dict = {} for table in table_arguments: field_dict[table] = Field(types.String) return SolidDefinition( name=name, inputs=[InputDefinition(solid.name) for solid in dependant_solids], config_def=ConfigDefinition.config_dict(field_dict), transform_fn=_create_templated_sql_transform_with_output(sql), outputs=[OutputDefinition()], )
def _single_optional_string_config_dict(): return ConfigDefinition.config_dict( {'optional_field': Field(types.String, is_optional=True)})
def _single_required_string_config_dict(): return ConfigDefinition.config_dict({'string_field': Field(types.String)})