def _traverse_airflow_dag(task, seen_tasks, pipeline_dependencies, solid_defs): check.inst_param(task, 'task', BaseOperator) check.list_param(seen_tasks, 'seen_tasks', BaseOperator) check.list_param(solid_defs, 'solid_defs', SolidDefinition) seen_tasks.append(task) current_solid = make_dagster_solid_from_airflow_task(task) solid_defs.append(current_solid) if len(task.upstream_list) > 0: # To enforce predictable iteration order task_upstream_list = sorted(task.upstream_list, key=lambda x: x.task_id) pipeline_dependencies[current_solid.name] = { 'airflow_task_ready': MultiDependencyDefinition( [ DependencyDefinition( solid='airflow_' + task_upstream.task_id, output='airflow_task_complete' ) for task_upstream in task_upstream_list ] ) } # To enforce predictable iteration order task_downstream_list = sorted(task.downstream_list, key=lambda x: x.task_id) for child_task in task_downstream_list: if child_task not in seen_tasks: _traverse_airflow_dag(child_task, seen_tasks, pipeline_dependencies, solid_defs)
def test_interleaved_values(): @solid(inputs=[InputDefinition('stuff', List(Any))]) def collect(_context, stuff): assert set(stuff) == set([1, None, 'one']) return stuff @lambda_solid def emit_num(): return 1 @lambda_solid def emit_none(): pass @lambda_solid def emit_str(): return 'one' result = execute_pipeline( PipelineDefinition( name='input_test', solids=[emit_num, emit_none, emit_str, collect], dependencies={ 'collect': { 'stuff': MultiDependencyDefinition([ DependencyDefinition('emit_num'), DependencyDefinition('emit_none'), DependencyDefinition('emit_str'), ]) } }, )) assert result.success
def define_many_events_pipeline(): return PipelineDefinition( name='many_events', solids=[ many_table_materializations, many_materializations_and_passing_expectations, check_users_and_groups_one_fails_one_succeeds, check_admins_both_succeed, ] + create_raw_file_solids(), dependencies=merge_dicts( {'many_table_materializations': {}}, { 'many_table_materializations': { 'start': MultiDependencyDefinition( [DependencyDefinition(raw_file) for raw_file in raw_files] ) }, 'many_materializations_and_passing_expectations': { 'start': DependencyDefinition('many_table_materializations') }, 'check_users_and_groups_one_fails_one_succeeds': { 'start': DependencyDefinition('many_materializations_and_passing_expectations') }, 'check_admins_both_succeed': { 'start': DependencyDefinition('many_materializations_and_passing_expectations') }, }, ), mode_definitions=[ModeDefinition()], )
def test_fanin_deps(): called = defaultdict(int) @lambda_solid def emit_two(): return 2 @lambda_solid(output=OutputDefinition(Nothing)) def emit_nothing(): called['emit_nothing'] += 1 @solid( inputs=[ InputDefinition('ready', Nothing), InputDefinition('num_1', Int), InputDefinition('num_2', Int), ] ) def adder(_context, num_1, num_2): assert called['emit_nothing'] == 3 called['adder'] += 1 return num_1 + num_2 pipeline = PipelineDefinition( name='input_test', solids=[emit_two, emit_nothing, adder], dependencies={ SolidInstance('emit_two', 'emit_1'): {}, SolidInstance('emit_two', 'emit_2'): {}, SolidInstance('emit_nothing', '_one'): {}, SolidInstance('emit_nothing', '_two'): {}, SolidInstance('emit_nothing', '_three'): {}, 'adder': { 'ready': MultiDependencyDefinition( [ DependencyDefinition('_one'), DependencyDefinition('_two'), DependencyDefinition('_three'), ] ), 'num_1': DependencyDefinition('emit_1'), 'num_2': DependencyDefinition('emit_2'), }, }, ) result = execute_pipeline(pipeline) assert result.success assert called['adder'] == 1 assert called['emit_nothing'] == 3
def test_fanin_deps(): called = defaultdict(int) @lambda_solid def emit_two(): return 2 @lambda_solid(output_def=OutputDefinition(Nothing)) def emit_nothing(): called["emit_nothing"] += 1 @solid( input_defs=[ InputDefinition("ready", Nothing), InputDefinition("num_1", Int), InputDefinition("num_2", Int), ] ) def adder(_context, num_1, num_2): assert called["emit_nothing"] == 3 called["adder"] += 1 return num_1 + num_2 pipeline = PipelineDefinition( name="input_test", solid_defs=[emit_two, emit_nothing, adder], dependencies={ SolidInvocation("emit_two", "emit_1"): {}, SolidInvocation("emit_two", "emit_2"): {}, SolidInvocation("emit_nothing", "_one"): {}, SolidInvocation("emit_nothing", "_two"): {}, SolidInvocation("emit_nothing", "_three"): {}, "adder": { "ready": MultiDependencyDefinition( [ DependencyDefinition("_one"), DependencyDefinition("_two"), DependencyDefinition("_three"), ] ), "num_1": DependencyDefinition("emit_1"), "num_2": DependencyDefinition("emit_2"), }, }, ) result = execute_pipeline(pipeline) assert result.success assert called["adder"] == 1 assert called["emit_nothing"] == 3
def test_pipeline_subset_with_multi_dependency(): @lambda_solid def return_one(): return 1 @lambda_solid def return_two(): return 2 @lambda_solid(inputs=[InputDefinition('dep', Nothing)]) def noop(): return 3 pipeline_def = PipelineDefinition( solid_defs=[return_one, return_two, noop], dependencies={ 'noop': { 'dep': MultiDependencyDefinition([ DependencyDefinition('return_one'), DependencyDefinition('return_two') ]) } }, ) pipeline_result = execute_pipeline(pipeline_def) assert pipeline_result.success assert pipeline_result.result_for_solid('noop').result_value() == 3 subset_result = execute_pipeline(pipeline_def.build_sub_pipeline(['noop'])) assert subset_result.success assert len(subset_result.solid_result_list) == 1 assert pipeline_result.result_for_solid('noop').result_value() == 3 events = execute_pipeline_iterator( pipeline_def.build_sub_pipeline(['noop'])) for step_event in step_output_event_filter(events): assert step_event.is_step_success subset_result = execute_pipeline( pipeline_def.build_sub_pipeline(['return_one', 'return_two', 'noop'])) assert subset_result.success assert len(subset_result.solid_result_list) == 3 assert pipeline_result.result_for_solid('noop').result_value() == 3
def test_interleaved_values(): result = execute_pipeline( PipelineDefinition( name="input_test", solid_defs=[emit_num, emit_none, emit_str, collect], dependencies={ "collect": { "stuff": MultiDependencyDefinition([ DependencyDefinition("emit_num"), DependencyDefinition("emit_none"), DependencyDefinition("emit_str"), ]) } }, )) assert result.success
def test_interleaved_values(): result = execute_pipeline( PipelineDefinition( name='input_test', solid_defs=[emit_num, emit_none, emit_str, collect], dependencies={ 'collect': { 'stuff': MultiDependencyDefinition([ DependencyDefinition('emit_num'), DependencyDefinition('emit_none'), DependencyDefinition('emit_str'), ]) } }, )) assert result.success
def test_pipeline_subset_with_multi_dependency(): @lambda_solid def return_one(): return 1 @lambda_solid def return_two(): return 2 @lambda_solid(input_defs=[InputDefinition("dep", Nothing)]) def noop(): return 3 pipeline_def = PipelineDefinition( solid_defs=[return_one, return_two, noop], name="test", dependencies={ "noop": { "dep": MultiDependencyDefinition([ DependencyDefinition("return_one"), DependencyDefinition("return_two") ]) } }, ) pipeline_result = execute_pipeline(pipeline_def) assert pipeline_result.success assert pipeline_result.result_for_solid("noop").output_value() == 3 subset_result = execute_pipeline( pipeline_def.get_pipeline_subset_def({"noop"})) assert subset_result.success assert len(subset_result.solid_result_list) == 1 assert pipeline_result.result_for_solid("noop").output_value() == 3 subset_result = execute_pipeline( pipeline_def.get_pipeline_subset_def( {"return_one", "return_two", "noop"})) assert subset_result.success assert len(subset_result.solid_result_list) == 3 assert pipeline_result.result_for_solid("noop").output_value() == 3
def test_pipeline_subset_with_multi_dependency(): @lambda_solid def return_one(): return 1 @lambda_solid def return_two(): return 2 @lambda_solid(input_defs=[InputDefinition('dep', Nothing)]) def noop(): return 3 pipeline_def = PipelineDefinition( solid_defs=[return_one, return_two, noop], dependencies={ 'noop': { 'dep': MultiDependencyDefinition([ DependencyDefinition('return_one'), DependencyDefinition('return_two') ]) } }, ) pipeline_result = execute_pipeline(pipeline_def) assert pipeline_result.success assert pipeline_result.result_for_solid('noop').output_value() == 3 subset_result = execute_pipeline( pipeline_def.subset_for_execution(['noop'])) assert subset_result.success assert len(subset_result.solid_result_list) == 1 assert pipeline_result.result_for_solid('noop').output_value() == 3 subset_result = execute_pipeline( pipeline_def.subset_for_execution(['return_one', 'return_two', 'noop'])) assert subset_result.success assert len(subset_result.solid_result_list) == 3 assert pipeline_result.result_for_solid('noop').output_value() == 3
def _traverse_airflow_dag( task, seen_tasks, pipeline_dependencies, solid_defs, use_airflow_template_context, unique_id ): check.inst_param(task, "task", BaseOperator) check.list_param(seen_tasks, "seen_tasks", BaseOperator) check.list_param(solid_defs, "solid_defs", SolidDefinition) check.bool_param(use_airflow_template_context, "use_airflow_template_context") unique_id = check.opt_int_param(unique_id, "unique_id") seen_tasks.append(task) current_solid = make_dagster_solid_from_airflow_task( task, use_airflow_template_context, unique_id ) solid_defs.append(current_solid) if len(task.upstream_list) > 0: # To enforce predictable iteration order task_upstream_list = sorted(task.upstream_list, key=lambda x: x.task_id) pipeline_dependencies[current_solid.name] = { "airflow_task_ready": MultiDependencyDefinition( [ DependencyDefinition( solid=normalized_name(task_upstream.task_id, unique_id), output="airflow_task_complete", ) for task_upstream in task_upstream_list ] ) } # To enforce predictable iteration order task_downstream_list = sorted(task.downstream_list, key=lambda x: x.task_id) for child_task in task_downstream_list: if child_task not in seen_tasks: _traverse_airflow_dag( child_task, seen_tasks, pipeline_dependencies, solid_defs, use_airflow_template_context, unique_id, )
def test_nothing_deps(): with pytest.raises( DagsterInvalidDefinitionError, match=r'Input "stuff" expects a value of type \[Any\] and output ' '"result" returns type Nothing', ): PipelineDefinition( name="input_test", solid_defs=[emit_num, emit_nothing, emit_str, collect], dependencies={ "collect": { "stuff": MultiDependencyDefinition([ DependencyDefinition("emit_num"), DependencyDefinition("emit_nothing"), DependencyDefinition("emit_str"), ]) } }, )
def test_simple_values(): @solid(input_defs=[InputDefinition("numbers", List[Int])]) def sum_num(_context, numbers): # cant guarantee order assert set(numbers) == set([1, 2, 3]) return sum(numbers) @lambda_solid def emit_1(): return 1 @lambda_solid def emit_2(): return 2 @lambda_solid def emit_3(): return 3 result = execute_pipeline( PipelineDefinition( name="input_test", solid_defs=[emit_1, emit_2, emit_3, sum_num], dependencies={ "sum_num": { "numbers": MultiDependencyDefinition( [ DependencyDefinition("emit_1"), DependencyDefinition("emit_2"), DependencyDefinition("emit_3"), ] ) } }, ) ) assert result.success assert result.result_for_solid("sum_num").output_value() == 6
def test_nothing_deps(): @solid(inputs=[InputDefinition('stuff', List(Any))]) def collect(_context, stuff): return stuff @lambda_solid(output=OutputDefinition(Int)) def emit_num(): return 1 @lambda_solid(output=OutputDefinition(Nothing)) def emit_nothing(): pass @lambda_solid(output=OutputDefinition(String)) def emit_str(): return 'one' with pytest.raises( DagsterInvalidDefinitionError, match=r'Input "stuff" expects a value of type \[Any\] and output ' '"result" returns type Nothing', ): PipelineDefinition( name='input_test', solids=[emit_num, emit_nothing, emit_str, collect], dependencies={ 'collect': { 'stuff': MultiDependencyDefinition([ DependencyDefinition('emit_num'), DependencyDefinition('emit_nothing'), DependencyDefinition('emit_str'), ]) } }, )
def test_simple_values(): @solid(inputs=[InputDefinition('numbers', List(Int))]) def sum_num(_context, numbers): # cant guarantee order assert set(numbers) == set([1, 2, 3]) return sum(numbers) @lambda_solid def emit_1(): return 1 @lambda_solid def emit_2(): return 2 @lambda_solid def emit_3(): return 3 result = execute_pipeline( PipelineDefinition( name='input_test', solids=[emit_1, emit_2, emit_3, sum_num], dependencies={ 'sum_num': { 'numbers': MultiDependencyDefinition([ DependencyDefinition('emit_1'), DependencyDefinition('emit_2'), DependencyDefinition('emit_3'), ]) } }, )) assert result.success assert result.result_for_solid('sum_num').transformed_value() == 6
def test_fan_in_manual(): # manually building up this guy @composite_solid def _target_composite_dsl(str_in, none_in): num = emit_num() return collect([num, str_in, none_in]) # base case works _target_composite_manual = CompositeSolidDefinition( name="manual_composite", solid_defs=[emit_num, collect], input_mappings=[ InputDefinition("str_in").mapping_to("collect", "stuff", 1), InputDefinition("none_in").mapping_to("collect", "stuff", 2), ], output_mappings=[OutputDefinition().mapping_from("collect")], dependencies={ "collect": { "stuff": MultiDependencyDefinition([ DependencyDefinition("emit_num"), MappedInputPlaceholder, MappedInputPlaceholder, ]) } }, ) with pytest.raises( DagsterInvalidDefinitionError, match= "index 2 in the MultiDependencyDefinition is not a MappedInputPlaceholder", ): _missing_placeholder = CompositeSolidDefinition( name="manual_composite", solid_defs=[emit_num, collect], input_mappings=[ InputDefinition("str_in").mapping_to("collect", "stuff", 1), InputDefinition("none_in").mapping_to("collect", "stuff", 2), ], output_mappings=[OutputDefinition().mapping_from("collect")], dependencies={ "collect": { "stuff": MultiDependencyDefinition([ DependencyDefinition("emit_num"), MappedInputPlaceholder, ]) } }, ) with pytest.raises(DagsterInvalidDefinitionError, match="is not a MultiDependencyDefinition"): _bad_target = CompositeSolidDefinition( name="manual_composite", solid_defs=[emit_num, collect], input_mappings=[ InputDefinition("str_in").mapping_to("collect", "stuff", 1), InputDefinition("none_in").mapping_to("collect", "stuff", 2), ], output_mappings=[OutputDefinition().mapping_from("collect")], dependencies={ "collect": { "stuff": DependencyDefinition("emit_num") } }, ) with pytest.raises( DagsterInvalidDefinitionError, match="Unsatisfied MappedInputPlaceholder at index 3", ): _missing_placeholder = CompositeSolidDefinition( name="manual_composite", solid_defs=[emit_num, collect], input_mappings=[ InputDefinition("str_in").mapping_to("collect", "stuff", 1), InputDefinition("none_in").mapping_to("collect", "stuff", 2), ], output_mappings=[OutputDefinition().mapping_from("collect")], dependencies={ "collect": { "stuff": MultiDependencyDefinition([ DependencyDefinition("emit_num"), MappedInputPlaceholder, MappedInputPlaceholder, MappedInputPlaceholder, ]) } }, )