def test_dagster_telemetry_enabled(caplog): with instance_for_test(overrides={"telemetry": {"enabled": True}}): runner = CliRunner() with pushd(path_to_file("")): pipeline_attribute = "foo_pipeline" pipeline_name = "foo" result = runner.invoke( pipeline_execute_command, [ "-f", path_to_file("test_cli_commands.py"), "-a", pipeline_attribute, ], ) for record in caplog.records: message = json.loads(record.getMessage()) if message.get("action") == UPDATE_REPO_STATS: metadata = message.get("metadata") assert metadata.get("pipeline_name_hash") == hash_name(pipeline_name) assert metadata.get("num_pipelines_in_repo") == str(1) assert metadata.get("repo_hash") == hash_name( get_ephemeral_repository_name(pipeline_name) ) assert set(message.keys()) == EXPECTED_KEYS assert len(caplog.records) == 5 assert result.exit_code == 0
def test_repo_stats(caplog): with tempfile.TemporaryDirectory() as temp_dir: with instance_for_test(temp_dir=temp_dir, overrides={"telemetry": {"enabled": True}}): runner = CliRunner(env={"DAGSTER_HOME": temp_dir}) with pushd(path_to_file("")): pipeline_name = "multi_mode_with_resources" result = runner.invoke( pipeline_execute_command, [ "-f", file_relative_path(__file__, "../../general_tests/test_repository.py"), "-a", "dagster_test_repository", "-p", pipeline_name, "--preset", "add", "--tags", '{ "foo": "bar" }', ], ) assert result.exit_code == 0, result.stdout for record in caplog.records: message = json.loads(record.getMessage()) if message.get("action") == UPDATE_REPO_STATS: metadata = message.get("metadata") assert metadata.get("pipeline_name_hash") == hash_name(pipeline_name) assert metadata.get("num_pipelines_in_repo") == str(4) assert metadata.get("repo_hash") == hash_name("dagster_test_repository") assert set(message.keys()) == EXPECTED_KEYS assert len(caplog.records) == 5 assert result.exit_code == 0
def test_dagster_telemetry_enabled(caplog): with seven.TemporaryDirectory() as temp_dir: with environ({'DAGSTER_HOME': temp_dir}): with open(os.path.join(temp_dir, 'dagster.yaml'), 'w') as fd: yaml.dump({'telemetry': { 'enabled': True }}, fd, default_flow_style=False) DagsterInstance.local_temp(temp_dir) runner = CliRunner(env={'DAGSTER_HOME': temp_dir}) with pushd(path_to_file('')): pipeline_name = 'foo_pipeline' result = runner.invoke( pipeline_execute_command, [ '-f', path_to_file('test_cli_commands.py'), '-a', pipeline_name, ], ) for record in caplog.records: message = json.loads(record.getMessage()) if message.get('action') == UPDATE_REPO_STATS: assert message.get('pipeline_name_hash') == hash_name( 'foo') assert message.get('num_pipelines_in_repo') == str(1) assert message.get('repo_hash') == hash_name( EPHEMERAL_NAME) assert set(message.keys()) == EXPECTED_KEYS assert len(caplog.records) == 5 assert result.exit_code == 0
def test_dagster_telemetry_unset(caplog): with tempfile.TemporaryDirectory() as temp_dir: with instance_for_test_tempdir(temp_dir): runner = CliRunner(env={"DAGSTER_HOME": temp_dir}) with pushd(path_to_file("")): pipeline_attribute = "foo_pipeline" pipeline_name = "foo" result = runner.invoke( pipeline_execute_command, [ "-f", path_to_file("test_cli_commands.py"), "-a", pipeline_attribute ], ) for record in caplog.records: message = json.loads(record.getMessage()) if message.get("action") == UPDATE_REPO_STATS: assert message.get("pipeline_name_hash") == hash_name( pipeline_name) assert message.get("num_pipelines_in_repo") == str(1) assert message.get("repo_hash") == hash_name( get_ephemeral_repository_name(pipeline_name)) assert set(message.keys()) == EXPECTED_KEYS assert len(caplog.records) == 5 assert result.exit_code == 0
def _create_scheduler_run( instance, schedule_time, repo_location, external_schedule, external_pipeline, run_request, ): from dagster.daemon.daemon import get_telemetry_daemon_session_id run_config = run_request.run_config schedule_tags = run_request.tags external_execution_plan = repo_location.get_external_execution_plan( external_pipeline, run_config, external_schedule.mode, step_keys_to_execute=None, known_state=None, ) execution_plan_snapshot = external_execution_plan.execution_plan_snapshot pipeline_tags = external_pipeline.tags or {} check_tags(pipeline_tags, "pipeline_tags") tags = merge_dicts(pipeline_tags, schedule_tags) tags[SCHEDULED_EXECUTION_TIME_TAG] = to_timezone(schedule_time, "UTC").isoformat() if run_request.run_key: tags[RUN_KEY_TAG] = run_request.run_key log_action( instance, SCHEDULED_RUN_CREATED, metadata={ "DAEMON_SESSION_ID": get_telemetry_daemon_session_id(), "SCHEDULE_NAME_HASH": hash_name(external_schedule.name), "repo_hash": hash_name(repo_location.name), "pipeline_name_hash": hash_name(external_pipeline.name), }, ) return instance.create_run( pipeline_name=external_schedule.pipeline_name, run_id=None, run_config=run_config, mode=external_schedule.mode, solids_to_execute=external_pipeline.solids_to_execute, step_keys_to_execute=None, solid_selection=external_pipeline.solid_selection, status=PipelineRunStatus.NOT_STARTED, root_run_id=None, parent_run_id=None, tags=tags, pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=execution_plan_snapshot, parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), )
def test_dagit_logs( server_mock, caplog, ): with tempfile.TemporaryDirectory() as temp_dir: with instance_for_test(temp_dir=temp_dir, overrides={"telemetry": {"enabled": True}}): runner = CliRunner(env={"DAGSTER_HOME": temp_dir}) workspace_path = file_relative_path(__file__, "telemetry_repository.yaml") result = runner.invoke( ui, ["-w", workspace_path], ) assert result.exit_code == 0, str(result.exception) expected_repo_stats = { hash_name("test_repository"): 1, hash_name("dagster_test_repository"): 4, } actions = set() records = [] for record in caplog.records: try: message = json.loads(record.getMessage()) except seven.JSONDecodeError: continue records.append(record) actions.add(message.get("action")) if message.get("action") == UPDATE_REPO_STATS: assert message.get("pipeline_name_hash") == "" repo_hash = message.get("repo_hash") assert repo_hash in expected_repo_stats expected_num_pipelines_in_repo = expected_repo_stats.get(repo_hash) assert message.get("num_pipelines_in_repo") == str( expected_num_pipelines_in_repo ) assert set(message.keys()) == set( [ "action", "client_time", "elapsed_time", "event_id", "instance_id", "pipeline_name_hash", "num_pipelines_in_repo", "repo_hash", "python_version", "metadata", "version", ] ) assert actions == set([START_DAGIT_WEBSERVER, UPDATE_REPO_STATS]) assert len(records) == 3 assert server_mock.call_args_list == [mock.call()]
def _create_sensor_run( instance, repo_location, external_sensor, external_pipeline, run_request, target_data ): from dagster.daemon.daemon import get_telemetry_daemon_session_id external_execution_plan = repo_location.get_external_execution_plan( external_pipeline, run_request.run_config, target_data.mode, step_keys_to_execute=None, known_state=None, instance=instance, ) execution_plan_snapshot = external_execution_plan.execution_plan_snapshot pipeline_tags = external_pipeline.tags or {} check_tags(pipeline_tags, "pipeline_tags") tags = merge_dicts( merge_dicts(pipeline_tags, run_request.tags), PipelineRun.tags_for_sensor(external_sensor), ) if run_request.run_key: tags[RUN_KEY_TAG] = run_request.run_key log_action( instance, SENSOR_RUN_CREATED, metadata={ "DAEMON_SESSION_ID": get_telemetry_daemon_session_id(), "SENSOR_NAME_HASH": hash_name(external_sensor.name), "pipeline_name_hash": hash_name(external_pipeline.name), "repo_hash": hash_name(repo_location.name), }, ) return instance.create_run( pipeline_name=target_data.pipeline_name, run_id=None, run_config=run_request.run_config, mode=target_data.mode, solids_to_execute=external_pipeline.solids_to_execute, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, solid_selection=target_data.solid_selection, root_run_id=None, parent_run_id=None, tags=tags, pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=execution_plan_snapshot, parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), )
def test_hash_name(): pipelines = ['pipeline_1', 'pipeline_2', 'pipeline_3'] hashes = [hash_name(p) for p in pipelines] for h in hashes: assert len(h) == 64 assert SequenceMatcher(None, hashes[0], hashes[1]).ratio() < 0.4 assert SequenceMatcher(None, hashes[0], hashes[2]).ratio() < 0.4 assert SequenceMatcher(None, hashes[1], hashes[2]).ratio() < 0.4
def test_repo_stats(caplog): with seven.TemporaryDirectory() as temp_dir: with environ({"DAGSTER_HOME": temp_dir}): with open(os.path.join(temp_dir, "dagster.yaml"), "w") as fd: yaml.dump({}, fd, default_flow_style=False) DagsterInstance.local_temp(temp_dir) runner = CliRunner(env={"DAGSTER_HOME": temp_dir}) with pushd(path_to_file("")): pipeline_name = "multi_mode_with_resources" result = runner.invoke( pipeline_execute_command, [ "-f", file_relative_path( __file__, "../../general_tests/test_repository.py"), "-a", "dagster_test_repository", "-p", pipeline_name, "--preset", "add", "--tags", '{ "foo": "bar" }', ], ) assert result.exit_code == 0, result.stdout for record in caplog.records: message = json.loads(record.getMessage()) if message.get("action") == UPDATE_REPO_STATS: assert message.get("pipeline_name_hash") == hash_name( pipeline_name) assert message.get("num_pipelines_in_repo") == str(4) assert message.get("repo_hash") == hash_name( "dagster_test_repository") assert set(message.keys()) == EXPECTED_KEYS assert len(caplog.records) == 5 assert result.exit_code == 0
def test_repo_stats(caplog): with seven.TemporaryDirectory() as temp_dir: with environ({'DAGSTER_HOME': temp_dir}): with open(os.path.join(temp_dir, 'dagster.yaml'), 'w') as fd: yaml.dump({}, fd, default_flow_style=False) DagsterInstance.local_temp(temp_dir) runner = CliRunner(env={'DAGSTER_HOME': temp_dir}) with pushd(path_to_file('')): pipeline_name = 'multi_mode_with_resources' result = runner.invoke( pipeline_execute_command, [ '-w', file_relative_path(__file__, '../workspace.yaml'), '-p', pipeline_name, '--preset', 'add', '--tags', '{ "foo": "bar" }', ], ) assert result.exit_code == 0, result.stdout for record in caplog.records: message = json.loads(record.getMessage()) if message.get('action') == UPDATE_REPO_STATS: assert message.get('pipeline_name_hash') == hash_name( pipeline_name) assert message.get('num_pipelines_in_repo') == str(4) assert message.get('repo_hash') == hash_name( 'dagster_test_repository') assert set(message.keys()) == EXPECTED_KEYS assert len(caplog.records) == 5 assert result.exit_code == 0
def test_dagit_logs( server_mock, caplog, ): with seven.TemporaryDirectory() as temp_dir: with environ({'DAGSTER_HOME': temp_dir}): with open(os.path.join(temp_dir, 'dagster.yaml'), 'w') as fd: yaml.dump({}, fd, default_flow_style=False) DagsterInstance.local_temp(temp_dir) runner = CliRunner(env={'DAGSTER_HOME': temp_dir}) with pushd(path_to_tutorial_file('')): result = runner.invoke( ui, [ '-w', file_relative_path(__file__, 'telemetry_repository.yaml'), ], ) assert result.exit_code == 0, str(result.exception) actions = set() for record in caplog.records: message = json.loads(record.getMessage()) actions.add(message.get('action')) if message.get('action') == UPDATE_REPO_STATS: assert message.get('pipeline_name_hash') == '' assert message.get('num_pipelines_in_repo') == str(4) assert message.get('repo_hash') == hash_name( 'dagster_test_repository') assert set(message.keys()) == set([ 'action', 'client_time', 'elapsed_time', 'event_id', 'instance_id', 'pipeline_name_hash', 'num_pipelines_in_repo', 'repo_hash', 'python_version', 'metadata', 'version', ]) assert actions == set( [START_DAGIT_WEBSERVER, UPDATE_REPO_STATS]) assert len(caplog.records) == 2 assert server_mock.call_args_list == [mock.call()]
def test_dagster_telemetry_enabled(caplog): with seven.TemporaryDirectory() as temp_dir: with environ({"DAGSTER_HOME": temp_dir}): with open(os.path.join(temp_dir, "dagster.yaml"), "w") as fd: yaml.dump({"telemetry": { "enabled": True }}, fd, default_flow_style=False) DagsterInstance.local_temp(temp_dir) runner = CliRunner(env={"DAGSTER_HOME": temp_dir}) with pushd(path_to_file("")): pipeline_attribute = "foo_pipeline" pipeline_name = "foo" result = runner.invoke( pipeline_execute_command, [ "-f", path_to_file("test_cli_commands.py"), "-a", pipeline_attribute, ], ) for record in caplog.records: message = json.loads(record.getMessage()) if message.get("action") == UPDATE_REPO_STATS: assert message.get("pipeline_name_hash") == hash_name( pipeline_name) assert message.get("num_pipelines_in_repo") == str(1) assert message.get("repo_hash") == hash_name( get_ephemeral_repository_name(pipeline_name)) assert set(message.keys()) == EXPECTED_KEYS assert len(caplog.records) == 5 assert result.exit_code == 0
def test_dagit_logs( server_mock, caplog, ): with seven.TemporaryDirectory() as temp_dir: with environ({"DAGSTER_HOME": temp_dir}): with open(os.path.join(temp_dir, "dagster.yaml"), "w") as fd: yaml.dump({}, fd, default_flow_style=False) DagsterInstance.local_temp(temp_dir) runner = CliRunner(env={"DAGSTER_HOME": temp_dir}) result = runner.invoke( ui, [ "-w", file_relative_path(__file__, "telemetry_repository.yaml"), ], ) assert result.exit_code == 0, str(result.exception) actions = set() for record in caplog.records: message = json.loads(record.getMessage()) actions.add(message.get("action")) if message.get("action") == UPDATE_REPO_STATS: assert message.get("pipeline_name_hash") == "" assert message.get("num_pipelines_in_repo") == str(4) assert message.get("repo_hash") == hash_name( "dagster_test_repository") assert set(message.keys()) == set([ "action", "client_time", "elapsed_time", "event_id", "instance_id", "pipeline_name_hash", "num_pipelines_in_repo", "repo_hash", "python_version", "metadata", "version", ]) assert actions == set([START_DAGIT_WEBSERVER, UPDATE_REPO_STATS]) assert len(caplog.records) == 2 assert server_mock.call_args_list == [mock.call()]
def create_backfill_run(instance, repo_location, external_pipeline, external_partition_set, backfill_job, partition_data): from dagster.daemon.daemon import get_telemetry_daemon_session_id check.inst_param(instance, "instance", DagsterInstance) check.inst_param(repo_location, "repo_location", RepositoryLocation) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) check.inst_param(external_partition_set, "external_partition_set", ExternalPartitionSet) check.inst_param(backfill_job, "backfill_job", PartitionBackfill) check.inst_param(partition_data, "partition_data", ExternalPartitionExecutionParamData) log_action( instance, BACKFILL_RUN_CREATED, metadata={ "DAEMON_SESSION_ID": get_telemetry_daemon_session_id(), "repo_hash": hash_name(repo_location.name), "pipeline_name_hash": hash_name(external_pipeline.name), }, ) tags = merge_dicts( external_pipeline.tags, partition_data.tags, PipelineRun.tags_for_backfill_id(backfill_job.backfill_id), backfill_job.tags, ) solids_to_execute = None solid_selection = None if not backfill_job.from_failure and not backfill_job.reexecution_steps: step_keys_to_execute = None parent_run_id = None root_run_id = None known_state = None if external_partition_set.solid_selection: solids_to_execute = frozenset( external_partition_set.solid_selection) solid_selection = external_partition_set.solid_selection elif backfill_job.from_failure: last_run = _fetch_last_run(instance, external_partition_set, partition_data.name) if not last_run or last_run.status != PipelineRunStatus.FAILURE: return None return instance.create_reexecuted_run_from_failure( last_run, repo_location, external_pipeline, tags=tags, run_config=partition_data.run_config, mode=external_partition_set.mode, ) elif backfill_job.reexecution_steps: last_run = _fetch_last_run(instance, external_partition_set, partition_data.name) parent_run_id = last_run.run_id if last_run else None root_run_id = (last_run.root_run_id or last_run.run_id) if last_run else None if parent_run_id and root_run_id: tags = merge_dicts(tags, { PARENT_RUN_ID_TAG: parent_run_id, ROOT_RUN_ID_TAG: root_run_id }) step_keys_to_execute = backfill_job.reexecution_steps if last_run and last_run.status == PipelineRunStatus.SUCCESS: known_state = KnownExecutionState.for_reexecution( instance.all_logs(parent_run_id), step_keys_to_execute, ) else: known_state = None if external_partition_set.solid_selection: solids_to_execute = frozenset( external_partition_set.solid_selection) solid_selection = external_partition_set.solid_selection external_execution_plan = repo_location.get_external_execution_plan( external_pipeline, partition_data.run_config, external_partition_set.mode, step_keys_to_execute=step_keys_to_execute, known_state=known_state, instance=instance, ) return instance.create_run( pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=external_execution_plan. execution_plan_snapshot, parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot, pipeline_name=external_pipeline.name, run_id=make_new_run_id(), solids_to_execute=solids_to_execute, run_config=partition_data.run_config, mode=external_partition_set.mode, step_keys_to_execute=step_keys_to_execute, tags=tags, root_run_id=root_run_id, parent_run_id=parent_run_id, status=PipelineRunStatus.NOT_STARTED, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), solid_selection=solid_selection, )