def test_execute_execute_plan_mutation_raw(): pipeline_name = 'sleepy_pipeline' pipeline = sleepy_recon_pipeline() instance = DagsterInstance.local_temp() workspace = load_sleepy_workspace(instance) pipeline_run = instance.create_run_for_pipeline(pipeline_def=pipeline.get_definition()) variables = { 'executionParams': { 'runConfigData': {}, 'mode': 'default', 'selector': { 'repositoryLocationName': get_ephemeral_repository_name(pipeline_name), 'repositoryName': get_ephemeral_repository_name(pipeline_name), 'pipelineName': pipeline_name, }, 'executionMetadata': {'runId': pipeline_run.run_id}, } } result = execute_execute_plan_mutation_raw( workspace, variables, instance_ref=instance.get_ref() ) seen_events = set() for event in result: seen_events.add((event.dagster_event.event_type_value, event.step_key)) assert seen_events == EXPECTED_EVENTS
def test_execute_execute_plan_mutation_out_of_process_fails(): pipeline_name = "sleepy_pipeline" instance = DagsterInstance.local_temp() pipeline = sleepy_recon_pipeline() workspace = load_sleepy_workspace(instance) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline.get_definition()) variables = { "executionParams": { "runConfigData": {}, "mode": "default", "selector": { "repositoryLocationName": get_ephemeral_repository_name(pipeline_name), "repositoryName": get_ephemeral_repository_name(pipeline_name), "pipelineName": pipeline_name, }, "executionMetadata": { "runId": pipeline_run.run_id }, } } with pytest.raises( DagsterGraphQLClientError, match=re.escape( "execute_plan is not supported for out-of-process repository locations" ), ): execute_execute_plan_mutation(workspace, variables, instance_ref=instance.get_ref())
def test_execute_execute_plan_mutation(): pipeline_name = "sleepy_pipeline" instance = DagsterInstance.local_temp() pipeline = sleepy_recon_pipeline() workspace = load_sleepy_workspace(instance) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline.get_definition()) variables = { "executionParams": { "runConfigData": {}, "mode": "default", "selector": { "repositoryLocationName": get_ephemeral_repository_name(pipeline_name), "repositoryName": get_ephemeral_repository_name(pipeline_name), "pipelineName": pipeline_name, }, "executionMetadata": { "runId": pipeline_run.run_id }, } } result = execute_execute_plan_mutation(workspace, variables, instance_ref=instance.get_ref()) seen_events = set() for event in result: seen_events.add((event.event_type_value, event.step_key)) assert seen_events == EXPECTED_EVENTS
def test_dagster_telemetry_unset(caplog): with seven.TemporaryDirectory() as temp_dir: with instance_for_test_tempdir(temp_dir): runner = CliRunner(env={"DAGSTER_HOME": temp_dir}) with pushd(path_to_file("")): pipeline_attribute = "foo_pipeline" pipeline_name = "foo" result = runner.invoke( pipeline_execute_command, [ "-f", path_to_file("test_cli_commands.py"), "-a", pipeline_attribute ], ) for record in caplog.records: message = json.loads(record.getMessage()) if message.get("action") == UPDATE_REPO_STATS: assert message.get("pipeline_name_hash") == hash_name( pipeline_name) assert message.get("num_pipelines_in_repo") == str(1) assert message.get("repo_hash") == hash_name( get_ephemeral_repository_name(pipeline_name)) assert set(message.keys()) == EXPECTED_KEYS assert len(caplog.records) == 5 assert result.exit_code == 0
def test_dagster_telemetry_enabled(caplog): with instance_for_test(overrides={"telemetry": {"enabled": True}}): runner = CliRunner() with pushd(path_to_file("")): pipeline_attribute = "foo_pipeline" pipeline_name = "foo" result = runner.invoke( pipeline_execute_command, [ "-f", path_to_file("test_cli_commands.py"), "-a", pipeline_attribute, ], ) for record in caplog.records: message = json.loads(record.getMessage()) if message.get("action") == UPDATE_REPO_STATS: assert message.get("pipeline_name_hash") == hash_name( pipeline_name) assert message.get("num_pipelines_in_repo") == str(1) assert message.get("repo_hash") == hash_name( get_ephemeral_repository_name(pipeline_name)) assert set(message.keys()) == EXPECTED_KEYS assert len(caplog.records) == 5 assert result.exit_code == 0
def test_dagster_telemetry_unset(caplog): with seven.TemporaryDirectory() as temp_dir: with environ({'DAGSTER_HOME': temp_dir}): with open(os.path.join(temp_dir, 'dagster.yaml'), 'w') as fd: yaml.dump({}, fd, default_flow_style=False) DagsterInstance.local_temp(temp_dir) runner = CliRunner(env={'DAGSTER_HOME': temp_dir}) with pushd(path_to_file('')): pipeline_attribute = 'foo_pipeline' pipeline_name = 'foo' result = runner.invoke( pipeline_execute_command, [ '-f', path_to_file('test_cli_commands.py'), '-a', pipeline_attribute ], ) for record in caplog.records: message = json.loads(record.getMessage()) if message.get('action') == UPDATE_REPO_STATS: assert message.get('pipeline_name_hash') == hash_name( pipeline_name) assert message.get('num_pipelines_in_repo') == str(1) assert message.get('repo_hash') == hash_name( get_ephemeral_repository_name(pipeline_name)) assert set(message.keys()) == EXPECTED_KEYS assert len(caplog.records) == 5 assert result.exit_code == 0
def log_repo_stats(instance, source, pipeline=None, repo=None): check.inst_param(instance, "instance", DagsterInstance) check.str_param(source, "source") check.opt_inst_param(pipeline, "pipeline", IPipeline) check.opt_inst_param(repo, "repo", ReconstructableRepository) if _get_instance_telemetry_enabled(instance): instance_id = _get_or_set_instance_id() if isinstance(pipeline, ReconstructablePipeline): pipeline_name_hash = hash_name(pipeline.get_definition().name) repository = pipeline.get_reconstructable_repository().get_definition() repo_hash = hash_name(repository.name) num_pipelines_in_repo = len(repository.pipeline_names) num_schedules_in_repo = len(repository.schedule_defs) num_sensors_in_repo = len(repository.sensor_defs) elif isinstance(repo, ReconstructableRepository): pipeline_name_hash = "" repository = repo.get_definition() repo_hash = hash_name(repository.name) num_pipelines_in_repo = len(repository.pipeline_names) num_schedules_in_repo = len(repository.schedule_defs) num_sensors_in_repo = len(repository.sensor_defs) else: pipeline_name_hash = hash_name(pipeline.get_definition().name) repo_hash = hash_name(get_ephemeral_repository_name(pipeline.get_definition().name)) num_pipelines_in_repo = 1 num_schedules_in_repo = 0 num_sensors_in_repo = 0 write_telemetry_log_line( TelemetryEntry( action=UPDATE_REPO_STATS, client_time=str(datetime.datetime.now()), event_id=str(uuid.uuid4()), instance_id=instance_id, metadata={ "source": source, "pipeline_name_hash": pipeline_name_hash, "num_pipelines_in_repo": str(num_pipelines_in_repo), "num_schedules_in_repo": str(num_schedules_in_repo), "num_sensors_in_repo": str(num_sensors_in_repo), "repo_hash": repo_hash, }, )._asdict() )
def log_repo_stats(instance, source, pipeline=None, repo=None): check.inst_param(instance, 'instance', DagsterInstance) check.str_param(source, 'source') check.opt_inst_param(pipeline, 'pipeline', ExecutablePipeline) check.opt_inst_param(repo, 'repo', ReconstructableRepository) if _get_instance_telemetry_enabled(instance): instance_id = _get_or_set_instance_id() if isinstance(pipeline, ReconstructablePipeline): pipeline_name_hash = hash_name(pipeline.get_definition().name) repository = pipeline.get_reconstructable_repository( ).get_definition() repo_hash = hash_name(repository.name) num_pipelines_in_repo = len(repository.pipeline_names) elif isinstance(repo, ReconstructableRepository): pipeline_name_hash = '' repository = repo.get_definition() repo_hash = hash_name(repository.name) num_pipelines_in_repo = len(repository.pipeline_names) else: pipeline_name_hash = hash_name(pipeline.get_definition().name) repo_hash = hash_name( get_ephemeral_repository_name(pipeline.get_definition().name)) num_pipelines_in_repo = 1 write_telemetry_log_line( TelemetryEntry( action=UPDATE_REPO_STATS, client_time=str(datetime.datetime.now()), event_id=str(uuid.uuid4()), instance_id=instance_id, pipeline_name_hash=pipeline_name_hash, num_pipelines_in_repo=str(num_pipelines_in_repo), repo_hash=repo_hash, metadata={ 'source': source }, )._asdict())
def test_dagster_telemetry_enabled(caplog): with seven.TemporaryDirectory() as temp_dir: with environ({"DAGSTER_HOME": temp_dir}): with open(os.path.join(temp_dir, "dagster.yaml"), "w") as fd: yaml.dump({"telemetry": { "enabled": True }}, fd, default_flow_style=False) DagsterInstance.local_temp(temp_dir) runner = CliRunner(env={"DAGSTER_HOME": temp_dir}) with pushd(path_to_file("")): pipeline_attribute = "foo_pipeline" pipeline_name = "foo" result = runner.invoke( pipeline_execute_command, [ "-f", path_to_file("test_cli_commands.py"), "-a", pipeline_attribute, ], ) for record in caplog.records: message = json.loads(record.getMessage()) if message.get("action") == UPDATE_REPO_STATS: assert message.get("pipeline_name_hash") == hash_name( pipeline_name) assert message.get("num_pipelines_in_repo") == str(1) assert message.get("repo_hash") == hash_name( get_ephemeral_repository_name(pipeline_name)) assert set(message.keys()) == EXPECTED_KEYS assert len(caplog.records) == 5 assert result.exit_code == 0
def test_all_step_events(): # pylint: disable=too-many-locals instance = DagsterInstance.ephemeral() workspace = workspace_from_load_target( PythonFileTarget(__file__, define_test_events_pipeline.__name__), instance, ) pipeline_def = define_test_events_pipeline() mode = pipeline_def.get_default_mode_name() execution_plan = create_execution_plan(pipeline_def, mode=mode) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan, mode=mode) step_levels = execution_plan.topological_step_levels() unhandled_events = STEP_EVENTS.copy() # Exclude types that are not step events ignored_events = { 'LogMessageEvent', 'PipelineStartEvent', 'PipelineSuccessEvent', 'PipelineInitFailureEvent', 'PipelineFailureEvent', } event_counts = defaultdict(int) for step_level in step_levels: for step in step_level: variables = { 'executionParams': { 'selector': { 'repositoryLocationName': get_ephemeral_repository_name(pipeline_def.name), 'repositoryName': get_ephemeral_repository_name(pipeline_def.name), 'pipelineName': pipeline_def.name, }, 'runConfigData': { 'storage': { 'filesystem': {} } }, 'mode': mode, 'executionMetadata': { 'runId': pipeline_run.run_id }, 'stepKeys': [step.key], }, } res = execute_query( workspace, EXECUTE_PLAN_MUTATION, variables, instance=instance, ) # go through the same dict, decrement all the event records we've seen from the GraphQL # response if not res.get('errors'): assert 'data' in res, res assert 'executePlan' in res['data'], res assert 'stepEvents' in res['data']['executePlan'], res step_events = res['data']['executePlan']['stepEvents'] events = [ dagster_event_from_dict(e, pipeline_def.name) for e in step_events if e['__typename'] not in ignored_events ] for event in events: if event.step_key: key = event.step_key + '.' + event.event_type_value else: key = event.event_type_value event_counts[key] -= 1 unhandled_events -= { DagsterEventType(e.event_type_value) for e in events } else: raise Exception(res['errors']) # build up a dict, incrementing all the event records we've produced in the run storage logs = instance.all_logs(pipeline_run.run_id) for log in logs: if not log.dagster_event or (DagsterEventType( log.dagster_event.event_type_value) not in STEP_EVENTS.union( set([DagsterEventType.ENGINE_EVENT]))): continue if log.dagster_event.step_key: key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value else: key = log.dagster_event.event_type_value event_counts[key] += 1 # Ensure we've processed all the events that were generated in the run storage assert sum(event_counts.values()) == 0 # Ensure we've handled the universe of event types # Why are these retry events not handled? Because right now there is no way to configure retries # on executePlan -- this needs to change, and we should separate the ExecutionParams that get # sent to executePlan fromm those that get sent to startPipelineExecution and friends assert unhandled_events == { DagsterEventType.STEP_UP_FOR_RETRY, DagsterEventType.STEP_RESTARTED }
def test_all_step_events(): # pylint: disable=too-many-locals instance = DagsterInstance.ephemeral() pipeline_def = define_test_events_pipeline() workspace = create_in_process_ephemeral_workspace( pointer=CodePointer.from_python_file( __file__, define_test_events_pipeline.__name__, working_directory=None)) mode = pipeline_def.get_default_mode_name() execution_plan = create_execution_plan(pipeline_def, mode=mode) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan, mode=mode) step_levels = execution_plan.topological_step_levels() unhandled_events = STEP_EVENTS.copy() # Exclude types that are not step events ignored_events = { "LogMessageEvent", "PipelineStartEvent", "PipelineSuccessEvent", "PipelineInitFailureEvent", "PipelineFailureEvent", } event_counts = defaultdict(int) for step_level in step_levels: for step in step_level: variables = { "executionParams": { "selector": { "repositoryLocationName": IN_PROCESS_NAME, "repositoryName": get_ephemeral_repository_name(pipeline_def.name), "pipelineName": pipeline_def.name, }, "runConfigData": { "storage": { "filesystem": {} } }, "mode": mode, "executionMetadata": { "runId": pipeline_run.run_id }, "stepKeys": [step.key], }, } res = execute_query( workspace, EXECUTE_PLAN_MUTATION, variables, instance=instance, ) # go through the same dict, decrement all the event records we've seen from the GraphQL # response if not res.get("errors"): assert "data" in res, res assert "executePlan" in res["data"], res assert "stepEvents" in res["data"]["executePlan"], res step_events = res["data"]["executePlan"]["stepEvents"] events = [ dagster_event_from_dict(e, pipeline_def.name) for e in step_events if e["__typename"] not in ignored_events ] for event in events: if event.step_key: key = event.step_key + "." + event.event_type_value else: key = event.event_type_value event_counts[key] -= 1 unhandled_events -= { DagsterEventType(e.event_type_value) for e in events } else: raise Exception(res["errors"]) # build up a dict, incrementing all the event records we've produced in the run storage logs = instance.all_logs(pipeline_run.run_id) for log in logs: if not log.dagster_event or (DagsterEventType( log.dagster_event.event_type_value) not in STEP_EVENTS.union( set([DagsterEventType.ENGINE_EVENT]))): continue if log.dagster_event.step_key: key = log.dagster_event.step_key + "." + log.dagster_event.event_type_value else: key = log.dagster_event.event_type_value event_counts[key] += 1 # Ensure we've processed all the events that were generated in the run storage assert sum(event_counts.values()) == 0 # Ensure we've handled the universe of event types # Why are these retry events not handled? Because right now there is no way to configure retries # on executePlan -- this needs to change, and we should separate the ExecutionParams that get # sent to executePlan fromm those that get sent to startPipelineExecution and friends assert unhandled_events == { DagsterEventType.STEP_UP_FOR_RETRY, DagsterEventType.STEP_RESTARTED }