def test_can_handle_all_step_events(): '''This test is designed to ensure we catch the case when new step events are added, as they must be handled by the event parsing, but this does not check that the event parsing works correctly. ''' handled = set(HANDLED_EVENTS.values()) # The distinction between "step events" and "pipeline events" needs to be reexamined assert handled == STEP_EVENTS.union(set([DagsterEventType.ENGINE_EVENT]))
def test_all_step_events(): # pylint: disable=too-many-locals handle = ExecutionTargetHandle.for_pipeline_fn(define_test_events_pipeline) pipeline = handle.build_pipeline_definition() mode = pipeline.get_default_mode_name() run_config = RunConfig(mode=mode) execution_plan = create_execution_plan(pipeline, {}, run_config=run_config) step_levels = execution_plan.topological_step_levels() unhandled_events = STEP_EVENTS.copy() # Exclude types that are not step events ignored_events = { 'LogMessageEvent', 'PipelineStartEvent', 'PipelineSuccessEvent', 'PipelineInitFailureEvent', 'PipelineFailureEvent', } event_counts = defaultdict(int) for step_level in step_levels: for step in step_level: variables = { 'executionParams': { 'selector': { 'name': pipeline.name }, 'environmentConfigData': { 'storage': { 'filesystem': {} } }, 'mode': mode, 'executionMetadata': { 'runId': run_config.run_id }, 'stepKeys': [step.key], } } instance = DagsterInstance.ephemeral() res = execute_query(handle, START_PIPELINE_EXECUTION_MUTATION, variables, instance=instance) # go through the same dict, decrement all the event records we've seen from the GraphQL # response if not res.get('errors'): run_logs = res['data']['startPipelineExecution']['run'][ 'logs']['nodes'] events = [ dagster_event_from_dict(e, pipeline.name) for e in run_logs if e['__typename'] not in ignored_events ] for event in events: if event.step_key: key = event.step_key + '.' + event.event_type_value else: key = event.event_type_value event_counts[key] -= 1 unhandled_events -= { DagsterEventType(e.event_type_value) for e in events } else: raise Exception(res['errors']) # build up a dict, incrementing all the event records we've produced in the run storage logs = instance.all_logs(run_config.run_id) for log in logs: if not log.dagster_event or ( DagsterEventType(log.dagster_event.event_type_value) not in STEP_EVENTS.union( set([DagsterEventType.ENGINE_EVENT]))): continue if log.dagster_event.step_key: key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value else: key = log.dagster_event.event_type_value event_counts[key] += 1 # Ensure we've processed all the events that were generated in the run storage assert sum(event_counts.values()) == 0 # Ensure we've handled the universe of event types assert not unhandled_events
def test_all_step_events(): # pylint: disable=too-many-locals pipeline = reconstructable(define_test_events_pipeline) pipeline_def = pipeline.get_definition() mode = pipeline_def.get_default_mode_name() instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline, mode=mode) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan, mode=mode ) step_levels = execution_plan.topological_step_levels() unhandled_events = STEP_EVENTS.copy() # Exclude types that are not step events ignored_events = { 'LogMessageEvent', 'PipelineStartEvent', 'PipelineSuccessEvent', 'PipelineInitFailureEvent', 'PipelineFailureEvent', } event_counts = defaultdict(int) for step_level in step_levels: for step in step_level: variables = { 'executionParams': { 'selector': {'name': pipeline_def.name}, 'environmentConfigData': {'storage': {'filesystem': {}}}, 'mode': mode, 'executionMetadata': {'runId': pipeline_run.run_id}, 'stepKeys': [step.key], } } res = execute_query( pipeline.get_reconstructable_repository(), EXECUTE_PLAN_MUTATION, variables, instance=instance, ) # go through the same dict, decrement all the event records we've seen from the GraphQL # response if not res.get('errors'): assert 'data' in res, res assert 'executePlan' in res['data'], res assert 'stepEvents' in res['data']['executePlan'], res step_events = res['data']['executePlan']['stepEvents'] events = [ dagster_event_from_dict(e, pipeline_def.name) for e in step_events if e['__typename'] not in ignored_events ] for event in events: if event.step_key: key = event.step_key + '.' + event.event_type_value else: key = event.event_type_value event_counts[key] -= 1 unhandled_events -= {DagsterEventType(e.event_type_value) for e in events} else: raise Exception(res['errors']) # build up a dict, incrementing all the event records we've produced in the run storage logs = instance.all_logs(pipeline_run.run_id) for log in logs: if not log.dagster_event or ( DagsterEventType(log.dagster_event.event_type_value) not in STEP_EVENTS.union(set([DagsterEventType.ENGINE_EVENT])) ): continue if log.dagster_event.step_key: key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value else: key = log.dagster_event.event_type_value event_counts[key] += 1 # Ensure we've processed all the events that were generated in the run storage assert sum(event_counts.values()) == 0 # Ensure we've handled the universe of event types # Why are these retry events not handled? Because right now there is no way to configure retries # on executePlan -- this needs to change, and we should separate the ExecutionParams that get # sent to executePlan fromm those that get sent to startPipelineExecution and friends assert unhandled_events == {DagsterEventType.STEP_UP_FOR_RETRY, DagsterEventType.STEP_RESTARTED}
def test_all_step_events(): # pylint: disable=too-many-locals instance = DagsterInstance.ephemeral() workspace = workspace_from_load_target( PythonFileTarget(__file__, define_test_events_pipeline.__name__, working_directory=None), instance, ) pipeline_def = define_test_events_pipeline() mode = pipeline_def.get_default_mode_name() execution_plan = create_execution_plan(pipeline_def, mode=mode) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan, mode=mode) step_levels = execution_plan.topological_step_levels() unhandled_events = STEP_EVENTS.copy() # Exclude types that are not step events ignored_events = { "LogMessageEvent", "PipelineStartEvent", "PipelineSuccessEvent", "PipelineInitFailureEvent", "PipelineFailureEvent", } event_counts = defaultdict(int) for step_level in step_levels: for step in step_level: variables = { "executionParams": { "selector": { "repositoryLocationName": get_ephemeral_repository_name(pipeline_def.name), "repositoryName": get_ephemeral_repository_name(pipeline_def.name), "pipelineName": pipeline_def.name, }, "runConfigData": { "storage": { "filesystem": {} } }, "mode": mode, "executionMetadata": { "runId": pipeline_run.run_id }, "stepKeys": [step.key], }, } res = execute_query( workspace, EXECUTE_PLAN_MUTATION, variables, instance=instance, ) # go through the same dict, decrement all the event records we've seen from the GraphQL # response if not res.get("errors"): assert "data" in res, res assert "executePlan" in res["data"], res assert "stepEvents" in res["data"]["executePlan"], res step_events = res["data"]["executePlan"]["stepEvents"] events = [ dagster_event_from_dict(e, pipeline_def.name) for e in step_events if e["__typename"] not in ignored_events ] for event in events: if event.step_key: key = event.step_key + "." + event.event_type_value else: key = event.event_type_value event_counts[key] -= 1 unhandled_events -= { DagsterEventType(e.event_type_value) for e in events } else: raise Exception(res["errors"]) # build up a dict, incrementing all the event records we've produced in the run storage logs = instance.all_logs(pipeline_run.run_id) for log in logs: if not log.dagster_event or (DagsterEventType( log.dagster_event.event_type_value) not in STEP_EVENTS.union( set([DagsterEventType.ENGINE_EVENT]))): continue if log.dagster_event.step_key: key = log.dagster_event.step_key + "." + log.dagster_event.event_type_value else: key = log.dagster_event.event_type_value event_counts[key] += 1 # Ensure we've processed all the events that were generated in the run storage assert sum(event_counts.values()) == 0 # Ensure we've handled the universe of event types # Why are these retry events not handled? Because right now there is no way to configure retries # on executePlan -- this needs to change, and we should separate the ExecutionParams that get # sent to executePlan fromm those that get sent to startPipelineExecution and friends assert unhandled_events == { DagsterEventType.STEP_UP_FOR_RETRY, DagsterEventType.STEP_RESTARTED }