def test_using_file_system_for_subplan_multiprocessing(): pipeline = define_inty_pipeline() environment_dict = {'storage': {'filesystem': {}}} execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict) assert execution_plan.get_step_by_key('return_one.compute') step_keys = ['return_one.compute'] run_id = str(uuid.uuid4()) return_one_step_events = list( execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig( run_id=run_id, executor_config=MultiprocessExecutorConfig( handle=ExecutionTargetHandle.for_pipeline_fn(define_inty_pipeline) ), ), step_keys_to_execute=step_keys, ) ) store = FileSystemIntermediateStore(run_id) assert get_step_output(return_one_step_events, 'return_one.compute') assert store.has_intermediate(None, 'return_one.compute') assert store.get_intermediate(None, 'return_one.compute', Int) == 1 add_one_step_events = list( execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig( run_id=run_id, executor_config=MultiprocessExecutorConfig( handle=ExecutionTargetHandle.for_pipeline_fn(define_inty_pipeline) ), ), step_keys_to_execute=['add_one.compute'], ) ) assert get_step_output(add_one_step_events, 'add_one.compute') assert store.has_intermediate(None, 'add_one.compute') assert store.get_intermediate(None, 'add_one.compute', Int) == 2
class TestExecuteDagContainerizedS3Storage(object): handle = ExecutionTargetHandle.for_pipeline_fn( define_demo_execution_pipeline) pipeline_name = 'demo_pipeline' environment_yaml = [ script_relative_path('test_project/env.yaml'), script_relative_path('test_project/env_s3.yaml'), ] run_id = str(uuid.uuid4()) execution_date = datetime.datetime.utcnow() image = IMAGE # pylint: disable=redefined-outer-name def test_execute_dag_containerized( self, dagster_airflow_docker_operator_pipeline): for result in dagster_airflow_docker_operator_pipeline: assert 'data' in result assert 'executePlan' in result['data'] assert '__typename' in result['data']['executePlan'] assert result['data']['executePlan'][ '__typename'] == 'ExecutePlanSuccess' result = list( filter( lambda x: x['__typename'] == 'ExecutionStepOutputEvent', result['data']['executePlan']['stepEvents'], ))[0] if result['step']['kind'] == 'INPUT_THUNK': continue
class TestAirflowPython_1WarehouseExecution(object): handle = ExecutionTargetHandle.for_pipeline_fn( define_airline_demo_warehouse_pipeline) pipeline_name = 'airline_demo_warehouse_pipeline' environment_yaml = [ script_relative_path( os.path.join('..', '..', 'dagster_examples', 'airline_demo', 'environments', 'local_base.yaml')), script_relative_path( os.path.join('..', '..', 'dagster_examples', 'airline_demo', 'environments', 'local_airflow.yaml')), script_relative_path( os.path.join( '..', '..', 'dagster_examples', 'airline_demo', 'environments', 'local_warehouse.yaml', )), ] mode = 'local' def test_airflow_run_warehouse_pipeline( self, dagster_airflow_python_operator_pipeline): pass
def test_error_pipeline_multiprocess(): result = execute_pipeline( ExecutionTargetHandle.for_pipeline_fn(define_error_pipeline).build_pipeline_definition(), environment_dict={'storage': {'filesystem': {}}, 'execution': {'multiprocess': {}}}, instance=DagsterInstance.local_temp(), ) assert not result.success
def test_failing(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_fn(define_failing_pipeline) pipeline = define_failing_pipeline() env_config = { 'solids': { 'sum_solid': { 'inputs': { 'num': script_relative_path('data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, mode='default', reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.FAILURE assert pipeline_run.all_logs()
def test_execution_crash(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_fn(define_crashy_pipeline) pipeline = define_crashy_pipeline() env_config = { 'solids': {'sum_solid': {'inputs': {'num': script_relative_path('data/num.csv')}}} } selector = ExecutionSelector('csv_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, mode='default', reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.FAILURE last_log = pipeline_run.all_logs()[-1] print(last_log.message) assert last_log.message.startswith( 'Exception: Pipeline execution process for {run_id} unexpectedly exited\n'.format( run_id=run_id ) )
def test_running(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_fn(define_passing_pipeline) pipeline = define_passing_pipeline() env_config = { 'solids': {'sum_solid': {'inputs': {'num': script_relative_path('data/num.csv')}}} } selector = ExecutionSelector('csv_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, mode='default', reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.SUCCESS events = pipeline_run.all_logs() assert events process_start_events = get_events_of_type(events, DagsterEventType.PIPELINE_PROCESS_START) assert len(process_start_events) == 1 process_started_events = get_events_of_type(events, DagsterEventType.PIPELINE_PROCESS_STARTED) assert len(process_started_events) == 1
def test_event_sink_serialization(): event_records = [] class TestEventSink(EventSink): def on_dagster_event(self, dagster_event): event_records.append(dagster_event) def on_log_message(self, log_message): event_records.append(log_message) @lambda_solid def no(): raise Exception('no') @pipeline def fails(): no() sink = TestEventSink() # basic success execute_pipeline(define_simple(), run_config=RunConfig(event_sink=sink)) # basic failure execute_pipeline( fails, run_config=RunConfig(event_sink=sink), environment_dict={ 'execution': { 'in_process': { 'config': { 'raise_on_error': False } } } }, ) # multiproc execute_pipeline( ExecutionTargetHandle.for_pipeline_fn( define_simple).build_pipeline_definition(), run_config=RunConfig(event_sink=sink), environment_dict={ 'storage': { 'filesystem': {} }, 'execution': { 'multiprocess': {} } }, ) # kitchen sink execute_pipeline(many_events, run_config=RunConfig(event_sink=sink)) for dagster_event in event_records: payload = dagster_event.to_json() clone = EventRecord.from_json(payload) assert clone == dagster_event
def test_execute_on_dask(): result = execute_on_dask( ExecutionTargetHandle.for_pipeline_fn(define_dask_test_pipeline), env_config={'storage': { 'filesystem': {} }}, dask_config=DaskConfig(timeout=30), ) assert result.result_for_solid('simple').result_value() == 1
def test_mem_storage_error_pipeline_multiprocess(): result = execute_pipeline( ExecutionTargetHandle.for_pipeline_fn(define_diamond_pipeline).build_pipeline_definition(), environment_dict={'execution': {'multiprocess': {}}}, instance=DagsterInstance.local_temp(), raise_on_error=False, ) assert not result.success assert len(result.event_list) == 1 assert result.event_list[0].is_failure
def test_error_pipeline_multiprocess(): pipeline = define_error_pipeline() result = execute_pipeline( pipeline, run_config=RunConfig(executor_config=MultiprocessExecutorConfig( ExecutionTargetHandle.for_pipeline_fn(define_error_pipeline))), environment_dict={'storage': { 'filesystem': {} }}, ) assert not result.success
def test_error_pipeline_multiprocess(): pipeline = define_error_pipeline() result = execute_pipeline( pipeline, run_config=RunConfig( executor_config=MultiprocessExecutorConfig( ExecutionTargetHandle.for_pipeline_fn(define_error_pipeline) ), storage_mode=RunStorageMode.FILESYSTEM, ), ) assert not result.success
class TestAirflowPython_0IngestExecution: handle = ExecutionTargetHandle.for_pipeline_fn(define_airline_demo_ingest_pipeline) pipeline_name = 'airline_demo_ingest_pipeline' config_yaml = [ script_relative_path(os.path.join('..', 'environments', 'local_base.yaml')), script_relative_path(os.path.join('..', 'environments', 'local_airflow.yaml')), script_relative_path(os.path.join('..', 'environments', 'local_fast_ingest.yaml')), ] mode = 'local' def test_airflow_run_ingest_pipeline(self, dagster_airflow_python_operator_pipeline): pass
def test_execute_on_dask(): '''This test is flaky on py27, I believe because of https://github.com/dask/distributed/issues/2446. For now, we just retry a couple times... ''' result = execute_on_dask( ExecutionTargetHandle.for_pipeline_fn(define_dask_test_pipeline), env_config={'storage': { 'filesystem': {} }}, run_config=RunConfig(storage_mode=RunStorageMode.FILESYSTEM), dask_config=DaskConfig(timeout=30), ) assert result.result_for_solid('simple').transformed_value() == 1
class TestAirflowizedEventPipeline(object): config_yaml = [ script_relative_path( '../../dagster_examples/airline_demo/environments/default.yaml') ] handle = ExecutionTargetHandle.for_pipeline_fn( define_event_ingest_pipeline) pipeline_name = 'event_ingest_pipeline' # pylint: disable=redefined-outer-name def test_airflowized_event_pipeline( self, dagster_airflow_python_operator_pipeline): pass
def test_invalid_instance(): result = execute_pipeline( ExecutionTargetHandle.for_pipeline_fn(define_diamond_pipeline).build_pipeline_definition(), environment_dict={'storage': {'filesystem': {}}, 'execution': {'multiprocess': {}}}, instance=DagsterInstance.ephemeral(), raise_on_error=False, ) assert not result.success assert len(result.event_list) == 1 assert result.event_list[0].is_failure assert ( result.event_list[0].pipeline_init_failure_data.error.cls_name == 'DagsterUnmetExecutorRequirementsError' ) assert 'non-ephemeral instance' in result.event_list[0].pipeline_init_failure_data.error.message
def test_using_file_system_for_subplan_multiprocessing(): environment_dict = {'storage': {'filesystem': {}}} instance = DagsterInstance.local_temp() execution_plan = create_execution_plan( ExecutionTargetHandle.for_pipeline_fn( define_inty_pipeline).build_pipeline_definition(), environment_dict=environment_dict, ) assert execution_plan.get_step_by_key('return_one.compute') step_keys = ['return_one.compute'] run_id = str(uuid.uuid4()) instance.create_empty_run(run_id, execution_plan.pipeline_def.name) return_one_step_events = list( execute_plan( execution_plan, instance, environment_dict=dict(environment_dict, execution={'multiprocess': {}}), run_config=RunConfig(run_id=run_id), step_keys_to_execute=step_keys, )) store = build_fs_intermediate_store(instance.intermediates_directory, run_id) assert get_step_output(return_one_step_events, 'return_one.compute') assert store.has_intermediate(None, 'return_one.compute') assert store.get_intermediate(None, 'return_one.compute', Int).obj == 1 add_one_step_events = list( execute_plan( execution_plan, instance, environment_dict=dict(environment_dict, execution={'multiprocess': {}}), run_config=RunConfig(run_id=run_id), step_keys_to_execute=['add_one.compute'], )) assert get_step_output(add_one_step_events, 'add_one.compute') assert store.has_intermediate(None, 'add_one.compute') assert store.get_intermediate(None, 'add_one.compute', Int).obj == 2
def test_mem_storage_error_pipeline_multiprocess(): with pytest.raises(DagsterInvariantViolationError) as exc_info: execute_pipeline( define_diamond_pipeline(), run_config=RunConfig(executor_config=MultiprocessExecutorConfig( ExecutionTargetHandle.for_pipeline_fn(define_error_pipeline))), ) assert ('While invoking ' 'pipeline diamond_execution. You have attempted to use the ' 'multiprocessing executor while using system storage in_memory ' 'which does not persist intermediates. This means there would ' 'be no way to move data between different processes. Please ' 'configure your pipeline in the storage config section to use ' 'persistent system storage such as the filesystem.') in str( exc_info.value)
def test_invalid_instance(): result = execute_pipeline( ExecutionTargetHandle.for_pipeline_fn( define_diamond_pipeline).build_pipeline_definition(), environment_dict={ 'storage': { 'filesystem': {} }, 'execution': { 'multiprocess': {} } }, instance=DagsterInstance.ephemeral(), ) assert not result.success assert len(result.event_list) == 1 assert result.event_list[0].is_failure
def test_using_file_system_for_subplan_multiprocessing(): environment_dict = {'storage': {'filesystem': {}}} instance = DagsterInstance.local_temp() pipeline_def = ExecutionTargetHandle.for_pipeline_fn( define_inty_pipeline ).build_pipeline_definition() execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict) pipeline_run = instance.create_run_for_pipeline( pipeline=pipeline_def, execution_plan=execution_plan ) assert execution_plan.get_step_by_key('return_one.compute') return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(['return_one.compute']), instance, environment_dict=dict(environment_dict, execution={'multiprocess': {}}), pipeline_run=pipeline_run, ) ) store = build_fs_intermediate_store(instance.intermediates_directory, pipeline_run.run_id) assert get_step_output(return_one_step_events, 'return_one.compute') assert store.has_intermediate(None, 'return_one.compute') assert store.get_intermediate(None, 'return_one.compute', Int).obj == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(['add_one.compute']), instance, environment_dict=dict(environment_dict, execution={'multiprocess': {}}), pipeline_run=pipeline_run, ) ) assert get_step_output(add_one_step_events, 'add_one.compute') assert store.has_intermediate(None, 'add_one.compute') assert store.get_intermediate(None, 'add_one.compute', Int).obj == 2
class TestExecuteDagPythonS3Storage(object): handle = ExecutionTargetHandle.for_pipeline_fn( define_demo_execution_pipeline) pipeline_name = 'demo_pipeline' environment_yaml = [ script_relative_path('test_project/env.yaml'), script_relative_path('test_project/env_s3.yaml'), ] run_id = str(uuid.uuid4()) execution_date = datetime.datetime.utcnow() # pylint: disable=redefined-outer-name def test_execute_dag(self, dagster_airflow_python_operator_pipeline): expected_results = { 'multiply_the_word': '"barbar"', 'count_letters': '{"b": 2, "a": 2, "r": 2}', } for result in dagster_airflow_python_operator_pipeline: assert 'data' in result assert 'executePlan' in result['data'] assert '__typename' in result['data']['executePlan'] assert result['data']['executePlan'][ '__typename'] == 'ExecutePlanSuccess' result = list( filter( lambda x: x['__typename'] == 'ExecutionStepOutputEvent', result['data']['executePlan']['stepEvents'], ))[0] if result['step']['kind'] == 'INPUT_THUNK': continue # This ugly beast is to deal with cross-python-version differences in `valueRepr` -- # in py2 we'll get 'u"barbar"', in py3 we'll get '"barbar"', etc. assert json.loads( # pylint: disable=anomalous-backslash-in-string re.sub( r'\{u\'', '{\'', re.sub(' u\'', ' \'', re.sub('^u\'', '\'', result['valueRepr'])), ).replace('\'', '"')) == json.loads( expected_results[result['step']['solidHandleID']].replace( '\'', '"'))
class TestExecuteDagContainerizedFilesystemStorage(object): handle = ExecutionTargetHandle.for_pipeline_fn( define_demo_execution_pipeline) pipeline_name = 'demo_pipeline' environment_yaml = [ script_relative_path('test_project/env.yaml'), script_relative_path('test_project/env_filesystem.yaml'), ] run_id = str(uuid.uuid4()) execution_date = datetime.datetime.utcnow() op_kwargs = {'host_tmp_dir': '/tmp'} image = IMAGE # pylint: disable=redefined-outer-name def test_execute_dag_containerized( self, dagster_airflow_docker_operator_pipeline): expected_results = { 'multiply_the_word': '"barbar"', 'count_letters': '{"b": 2, "a": 2, "r": 2}', } for result in dagster_airflow_docker_operator_pipeline: assert 'data' in result assert 'executePlan' in result['data'] assert '__typename' in result['data']['executePlan'] assert result['data']['executePlan'][ '__typename'] == 'ExecutePlanSuccess' result = list( filter( lambda x: x['__typename'] == 'ExecutionStepOutputEvent', result['data']['executePlan']['stepEvents'], ))[0] if result['step']['kind'] == 'INPUT_THUNK': continue assert json.loads( re.sub( '{u\'', '{\'', re.sub(' u\'', ' \'', re.sub('^u\'', '\'', result['valueRepr']))).replace( '\'', '"')) == json.loads(expected_results[ result['step']['solidHandleID']].replace( '\'', '"'))
def test_diamond_multi_execution(): pipeline = define_diamond_pipeline() result = execute_pipeline( pipeline, environment_dict={'storage': {'filesystem': {}}}, run_config=RunConfig( executor_config=MultiprocessExecutorConfig( ExecutionTargetHandle.for_pipeline_fn(define_diamond_pipeline) ) ), ) assert result.success assert result.result_for_solid('adder').result_value() == 11 pids_by_solid = {} for solid in pipeline.solids: pids_by_solid[solid.name] = transform_event(result, solid.name).logging_tags['pid'] # guarantee that all solids ran in their own process assert len(set(pids_by_solid.values())) == len(pipeline.solids)
def test_diamond_multi_execution(): pipeline = define_diamond_pipeline() result = execute_pipeline( pipeline, run_config=RunConfig( executor_config=MultiprocessExecutorConfig( ExecutionTargetHandle.for_pipeline_fn(define_diamond_pipeline) ), storage_mode=RunStorageMode.FILESYSTEM, ), ) assert result.success # FIXME: be able to get this value # https://github.com/dagster-io/dagster/issues/953 # assert result.result_for_solid('adder').transformed_value() == 11 pids_by_solid = {} for solid in pipeline.solids: pids_by_solid[solid.name] = transform_event(result, solid.name).logging_tags['pid'] # guarantee that all solids ran in their own process assert len(set(pids_by_solid.values())) == len(pipeline.solids)
@solid(resources={'R2'}) def two(_): return 1 @solid(resources={'R1', 'R2', 'R3'}) def one_and_two_and_three(_): return 1 def define_resource_pipeline(): return PipelineDefinition( name='resources for days', solids=[all_resources, one, two, one_and_two_and_three], mode_definitions=[ModeDefinition(resources=lots_of_resources)], ) if __name__ == '__main__': pipeline = define_resource_pipeline() result = execute_pipeline( pipeline, run_config=RunConfig( executor_config=MultiprocessExecutorConfig( ExecutionTargetHandle.for_pipeline_fn( define_resource_pipeline)), storage_mode=RunStorageMode.FILESYSTEM, ), )
def test_all_step_events(): # pylint: disable=too-many-locals handle = ExecutionTargetHandle.for_pipeline_fn(define_test_events_pipeline) pipeline = handle.build_pipeline_definition() mode = pipeline.get_default_mode_name() run_config = RunConfig(mode=mode) execution_plan = create_execution_plan(pipeline, {}, run_config=run_config) step_levels = execution_plan.topological_step_levels() unhandled_events = STEP_EVENTS.copy() # Exclude types that are not step events ignored_events = { 'LogMessageEvent', 'PipelineStartEvent', 'PipelineSuccessEvent', 'PipelineInitFailureEvent', 'PipelineFailureEvent', } event_counts = defaultdict(int) for step_level in step_levels: for step in step_level: variables = { 'executionParams': { 'selector': { 'name': pipeline.name }, 'environmentConfigData': { 'storage': { 'filesystem': {} } }, 'mode': mode, 'executionMetadata': { 'runId': run_config.run_id }, 'stepKeys': [step.key], } } instance = DagsterInstance.ephemeral() res = execute_query(handle, START_PIPELINE_EXECUTION_MUTATION, variables, instance=instance) # go through the same dict, decrement all the event records we've seen from the GraphQL # response if not res.get('errors'): run_logs = res['data']['startPipelineExecution']['run'][ 'logs']['nodes'] events = [ dagster_event_from_dict(e, pipeline.name) for e in run_logs if e['__typename'] not in ignored_events ] for event in events: if event.step_key: key = event.step_key + '.' + event.event_type_value else: key = event.event_type_value event_counts[key] -= 1 unhandled_events -= { DagsterEventType(e.event_type_value) for e in events } else: raise Exception(res['errors']) # build up a dict, incrementing all the event records we've produced in the run storage logs = instance.all_logs(run_config.run_id) for log in logs: if not log.dagster_event or ( DagsterEventType(log.dagster_event.event_type_value) not in STEP_EVENTS.union( set([DagsterEventType.ENGINE_EVENT]))): continue if log.dagster_event.step_key: key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value else: key = log.dagster_event.event_type_value event_counts[key] += 1 # Ensure we've processed all the events that were generated in the run storage assert sum(event_counts.values()) == 0 # Ensure we've handled the universe of event types assert not unhandled_events
def test_all_step_events(): # pylint: disable=too-many-locals handle = ExecutionTargetHandle.for_pipeline_fn(define_test_events_pipeline) pipeline = handle.build_pipeline_definition() mode = pipeline.get_default_mode_name() execution_plan = create_execution_plan(pipeline, {}, mode=mode) step_levels = execution_plan.topological_step_levels() run_config = RunConfig( executor_config=InProcessExecutorConfig(raise_on_error=False), storage_mode=RunStorageMode.FILESYSTEM, ) unhandled_events = STEP_EVENTS.copy() # Exclude types that are not step events ignored_events = { 'LogMessageEvent', 'PipelineStartEvent', 'PipelineSuccessEvent', 'PipelineInitFailureEvent', 'PipelineFailureEvent', } step_event_fragment = get_step_event_fragment() log_message_event_fragment = get_log_message_event_fragment() query = '\n'.join( ( PIPELINE_EXECUTION_QUERY_TEMPLATE.format( step_event_fragment=step_event_fragment.include_key, log_message_event_fragment=log_message_event_fragment.include_key, ), step_event_fragment.fragment, log_message_event_fragment.fragment, ) ) event_counts = defaultdict(int) for step_level in step_levels: for step in step_level: variables = { 'executionParams': { 'selector': {'name': pipeline.name}, 'environmentConfigData': {'storage': {'filesystem': {}}}, 'mode': mode, 'executionMetadata': {'runId': run_config.run_id}, 'stepKeys': [step.key], } } pipeline_run_storage = PipelineRunStorage() res = execute_query(handle, query, variables, pipeline_run_storage=pipeline_run_storage) # go through the same dict, decrement all the event records we've seen from the GraphQL # response if not res.get('errors'): run_logs = res['data']['startPipelineExecution']['run']['logs']['nodes'] events = [ dagster_event_from_dict(e, pipeline.name) for e in run_logs if e['__typename'] not in ignored_events ] for event in events: key = event.step_key + '.' + event.event_type_value event_counts[key] -= 1 unhandled_events -= {DagsterEventType(e.event_type_value) for e in events} # build up a dict, incrementing all the event records we've produced in the run storage logs = pipeline_run_storage.get_run_by_id(run_config.run_id).all_logs() for log in logs: if not log.dagster_event or ( DagsterEventType(log.dagster_event.event_type_value) not in STEP_EVENTS ): continue key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value event_counts[key] += 1 # Ensure we've processed all the events that were generated in the run storage assert sum(event_counts.values()) == 0 # Ensure we've handled the universe of event types assert not unhandled_events
def test_all_step_events(): # pylint: disable=too-many-locals handle = ExecutionTargetHandle.for_pipeline_fn(define_test_events_pipeline) pipeline = handle.build_pipeline_definition() mode = pipeline.get_default_mode_name() instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline, mode=mode) pipeline_run = instance.create_run_for_pipeline( pipeline=pipeline, execution_plan=execution_plan, mode=mode) step_levels = execution_plan.topological_step_levels() unhandled_events = STEP_EVENTS.copy() # Exclude types that are not step events ignored_events = { 'LogMessageEvent', 'PipelineStartEvent', 'PipelineSuccessEvent', 'PipelineInitFailureEvent', 'PipelineFailureEvent', } event_counts = defaultdict(int) for step_level in step_levels: for step in step_level: variables = { 'executionParams': { 'selector': { 'name': pipeline.name }, 'environmentConfigData': { 'storage': { 'filesystem': {} } }, 'mode': mode, 'executionMetadata': { 'runId': pipeline_run.run_id }, 'stepKeys': [step.key], } } res = execute_query(handle, EXECUTE_PLAN_MUTATION, variables, instance=instance) # go through the same dict, decrement all the event records we've seen from the GraphQL # response if not res.get('errors'): assert 'data' in res, res assert 'executePlan' in res['data'], res assert 'stepEvents' in res['data']['executePlan'], res step_events = res['data']['executePlan']['stepEvents'] events = [ dagster_event_from_dict(e, pipeline.name) for e in step_events if e['__typename'] not in ignored_events ] for event in events: if event.step_key: key = event.step_key + '.' + event.event_type_value else: key = event.event_type_value event_counts[key] -= 1 unhandled_events -= { DagsterEventType(e.event_type_value) for e in events } else: raise Exception(res['errors']) # build up a dict, incrementing all the event records we've produced in the run storage logs = instance.all_logs(pipeline_run.run_id) for log in logs: if not log.dagster_event or (DagsterEventType( log.dagster_event.event_type_value) not in STEP_EVENTS.union( set([DagsterEventType.ENGINE_EVENT]))): continue if log.dagster_event.step_key: key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value else: key = log.dagster_event.event_type_value event_counts[key] += 1 # Ensure we've processed all the events that were generated in the run storage assert sum(event_counts.values()) == 0 # Ensure we've handled the universe of event types # Why are these retry events not handled? Because right now there is no way to configure retries # on executePlan -- this needs to change, and we should separate the ExecutionParams that get # sent to executePlan fromm those that get sent to startPipelineExecution and friends assert unhandled_events == { DagsterEventType.STEP_UP_FOR_RETRY, DagsterEventType.STEP_RESTARTED }
SolidInstance('hammer', alias='hammer_2'): { 'chase_duration': DependencyDefinition('giver', 'out_2') }, SolidInstance('hammer', alias='hammer_3'): { 'chase_duration': DependencyDefinition('giver', 'out_3') }, SolidInstance('hammer', alias='hammer_4'): { 'chase_duration': DependencyDefinition('giver', 'out_4') }, SolidInstance('total'): { 'in_1': DependencyDefinition('hammer_1', 'total'), 'in_2': DependencyDefinition('hammer_2', 'total'), 'in_3': DependencyDefinition('hammer_3', 'total'), 'in_4': DependencyDefinition('hammer_4', 'total'), }, }, mode_definitions=[ModeDefinition()], ) if __name__ == '__main__': result = execute_on_dask( ExecutionTargetHandle.for_pipeline_fn(define_hammer_pipeline), env_config={'storage': { 'filesystem': {} }}, run_config=RunConfig(storage_mode=RunStorageMode.FILESYSTEM), ) print('Total Hammer Time: ', result.result_for_solid('total').transformed_value())
def test_exc_target_handle(): res = ExecutionTargetHandle.for_pipeline_fn(define_pipeline) assert res.data.python_file == __file__ assert res.data.fn_name == 'define_pipeline'