def test_job_post_process_config(): @op(config_schema={"foo": Field(StringSource)}) def the_op(context): return context.op_config["foo"] @graph def basic(): the_op() with environ({"SOME_ENV_VAR": None}): # Ensure that the env var not existing will not throw an error, since resolution happens in post-processing. the_job = basic.to_job(config={ "ops": { "the_op": { "config": { "foo": { "env": "SOME_ENV_VAR" } } } } }) with environ({"SOME_ENV_VAR": "blah"}): assert the_job.execute_in_process().success
def test_int_source(): assert process_config(IntSource, 1).success assert not process_config(IntSource, 'foo').success assert not process_config(IntSource, {'env': 1}).success assert 'DAGSTER_TEST_ENV_VAR' not in os.environ assert not process_config(IntSource, {'env': 'DAGSTER_TEST_ENV_VAR'}).success assert ( 'You have attempted to fetch the environment variable "DAGSTER_TEST_ENV_VAR" ' 'which is not set. In order for this execution to succeed it must be set in ' 'this environment.' ) in process_config(IntSource, {'env': 'DAGSTER_TEST_ENV_VAR'}).errors[0].message with environ({'DAGSTER_TEST_ENV_VAR': '4'}): assert process_config(IntSource, {'env': 'DAGSTER_TEST_ENV_VAR'}).success assert process_config(IntSource, {'env': 'DAGSTER_TEST_ENV_VAR'}).value == 4 with environ({'DAGSTER_TEST_ENV_VAR': 'four'}): assert not process_config(IntSource, {'env': 'DAGSTER_TEST_ENV_VAR'}).success assert ( 'Value "four" stored in env variable "DAGSTER_TEST_ENV_VAR" cannot ' 'be coerced into an int.' ) in process_config(IntSource, {'env': 'DAGSTER_TEST_ENV_VAR'}).errors[0].message
def test_int_source(): assert process_config(IntSource, 1).success assert not process_config(IntSource, "foo").success assert not process_config(IntSource, {"env": 1}).success assert "DAGSTER_TEST_ENV_VAR" not in os.environ assert not process_config(IntSource, {"env": "DAGSTER_TEST_ENV_VAR"}).success assert ( 'You have attempted to fetch the environment variable "DAGSTER_TEST_ENV_VAR" ' "which is not set. In order for this execution to succeed it must be set in " "this environment." ) in process_config(IntSource, {"env": "DAGSTER_TEST_ENV_VAR"}).errors[0].message with environ({"DAGSTER_TEST_ENV_VAR": "4"}): assert process_config(IntSource, {"env": "DAGSTER_TEST_ENV_VAR"}).success assert process_config(IntSource, {"env": "DAGSTER_TEST_ENV_VAR"}).value == 4 with environ({"DAGSTER_TEST_ENV_VAR": "four"}): assert not process_config(IntSource, {"env": "DAGSTER_TEST_ENV_VAR"}).success assert ( 'Value "four" stored in env variable "DAGSTER_TEST_ENV_VAR" cannot ' "be coerced into an int." ) in process_config(IntSource, {"env": "DAGSTER_TEST_ENV_VAR"}).errors[0].message
def test_int_source(): assert process_config(IntSource, 1).success assert not process_config(IntSource, 'foo').success assert not process_config(IntSource, {'env': 1}).success assert 'DAGSTER_TEST_ENV_VAR' not in os.environ assert not process_config(IntSource, { 'env': 'DAGSTER_TEST_ENV_VAR' }).success assert ('Environment variable "DAGSTER_TEST_ENV_VAR" is not set' in process_config(IntSource, { 'env': 'DAGSTER_TEST_ENV_VAR' }).errors[0].message) with environ({'DAGSTER_TEST_ENV_VAR': '4'}): assert process_config(IntSource, { 'env': 'DAGSTER_TEST_ENV_VAR' }).success assert process_config(IntSource, { 'env': 'DAGSTER_TEST_ENV_VAR' }).value == 4 with environ({'DAGSTER_TEST_ENV_VAR': 'four'}): assert not process_config(IntSource, { 'env': 'DAGSTER_TEST_ENV_VAR' }).success assert ("invalid literal for int() with base 10: 'four'" in process_config(IntSource, { 'env': 'DAGSTER_TEST_ENV_VAR' }).errors[0].message)
def test_bool_source(): assert process_config(BoolSource, True).success assert process_config(BoolSource, False).success assert not process_config(BoolSource, "False").success assert not process_config(BoolSource, "foo").success assert not process_config(BoolSource, 1).success assert not process_config(BoolSource, {"env": 1}).success assert "DAGSTER_TEST_ENV_VAR" not in os.environ assert not process_config(BoolSource, {"env": "DAGSTER_TEST_ENV_VAR"}).success assert ( 'You have attempted to fetch the environment variable "DAGSTER_TEST_ENV_VAR" ' "which is not set. In order for this execution to succeed it must be set in " "this environment." ) in process_config(BoolSource, {"env": "DAGSTER_TEST_ENV_VAR"}).errors[0].message with environ({"DAGSTER_TEST_ENV_VAR": ""}): assert process_config(BoolSource, {"env": "DAGSTER_TEST_ENV_VAR"}).success assert process_config(BoolSource, {"env": "DAGSTER_TEST_ENV_VAR"}).value == False with environ({"DAGSTER_TEST_ENV_VAR": "True"}): assert process_config(BoolSource, {"env": "DAGSTER_TEST_ENV_VAR"}).success assert process_config(BoolSource, {"env": "DAGSTER_TEST_ENV_VAR"}).value == True
def test_dagster_home_not_abspath(dirname): with environ({"DAGSTER_HOME": dirname}): with pytest.raises( DagsterInvariantViolationError, match=re.escape('$DAGSTER_HOME "{}" must be an absolute path.'.format(dirname)), ): _dagster_home()
def test_dagster_telemetry_disabled(caplog): with seven.TemporaryDirectory() as temp_dir: with environ({"DAGSTER_HOME": temp_dir}): with open(os.path.join(temp_dir, "dagster.yaml"), "w") as fd: yaml.dump({"telemetry": { "enabled": False }}, fd, default_flow_style=False) DagsterInstance.local_temp(temp_dir) runner = CliRunner(env={"DAGSTER_HOME": temp_dir}) with pushd(path_to_file("")): pipeline_name = "foo_pipeline" result = runner.invoke( pipeline_execute_command, [ "-f", path_to_file("test_cli_commands.py"), "-a", pipeline_name, ], ) assert not os.path.exists( os.path.join(get_dir_from_dagster_home("logs"), "event.log")) assert len(caplog.records) == 0 assert result.exit_code == 0
def test_dagster_telemetry_unset(caplog): with seven.TemporaryDirectory() as temp_dir: with environ({"DAGSTER_HOME": temp_dir}): with open(os.path.join(temp_dir, "dagster.yaml"), "w") as fd: yaml.dump({}, fd, default_flow_style=False) DagsterInstance.local_temp(temp_dir) runner = CliRunner(env={"DAGSTER_HOME": temp_dir}) with pushd(path_to_file("")): pipeline_attribute = "foo_pipeline" pipeline_name = "foo" result = runner.invoke( pipeline_execute_command, [ "-f", path_to_file("test_cli_commands.py"), "-a", pipeline_attribute ], ) for record in caplog.records: message = json.loads(record.getMessage()) if message.get("action") == UPDATE_REPO_STATS: assert message.get("pipeline_name_hash") == hash_name( pipeline_name) assert message.get("num_pipelines_in_repo") == str(1) assert message.get("repo_hash") == hash_name( get_ephemeral_repository_name(pipeline_name)) assert set(message.keys()) == EXPECTED_KEYS assert len(caplog.records) == 5 assert result.exit_code == 0
def test_dagster_home_raises(dirname): with environ({"DAGSTER_HOME": dirname}): with pytest.raises( DagsterInvariantViolationError, match="DAGSTER_HOME must be absolute path: {}".format(dirname), ): _dagster_home()
def test_dagster_telemetry_upload(env): logger = logging.getLogger("dagster_telemetry_logger") for handler in logger.handlers: logger.removeHandler(handler) with environ(env): with instance_for_test(enable_telemetry=True): runner = CliRunner() with pushd(path_to_file("")): pipeline_attribute = "foo_pipeline" runner.invoke( pipeline_execute_command, [ "-f", path_to_file("test_cli_commands.py"), "-a", pipeline_attribute ], ) mock_stop_event = mock.MagicMock() mock_stop_event.is_set.return_value = False def side_effect(): mock_stop_event.is_set.return_value = True mock_stop_event.wait.side_effect = side_effect upload_logs(mock_stop_event) assert responses.assert_call_count(DAGSTER_TELEMETRY_URL, 1)
def test_lazy_load_via_env_var(): with environ({"DAGSTER_CLI_API_GRPC_LAZY_LOAD_USER_CODE": "1"}): port = find_free_port() python_file = file_relative_path(__file__, "grpc_repo_with_error.py") subprocess_args = [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, ] process = subprocess.Popen( subprocess_args, stdout=subprocess.PIPE, ) try: wait_for_grpc_server( process, DagsterGrpcClient(port=port, host="localhost"), subprocess_args) list_repositories_response = deserialize_json_to_dagster_namedtuple( DagsterGrpcClient(port=port).list_repositories()) assert isinstance(list_repositories_response, SerializableErrorInfo) assert "No module named" in list_repositories_response.message finally: process.terminate()
def test_dagster_telemetry_upload(env): logger = logging.getLogger("dagster_telemetry_logger") for handler in logger.handlers: logger.removeHandler(handler) responses.add(responses.POST, DAGSTER_TELEMETRY_URL) with instance_for_test(overrides={"telemetry": {"enabled": True}}): with environ(env): runner = CliRunner() with pushd(path_to_file("")): pipeline_attribute = "foo_pipeline" runner.invoke( pipeline_execute_command, [ "-f", path_to_file("test_cli_commands.py"), "-a", pipeline_attribute ], ) mock_stop_event = mock.MagicMock() mock_stop_event.is_set.return_value = False def side_effect(_): mock_stop_event.is_set.return_value = True mock_stop_event.wait.side_effect = side_effect # Needed to avoid file contention issues on windows with the telemetry log file cleanup_telemetry_logger() upload_logs(mock_stop_event, raise_errors=True) assert responses.assert_call_count(DAGSTER_TELEMETRY_URL, 1)
def test_event_log_subscription_chunked(): schema = create_schema() server = DagsterSubscriptionServer(schema=schema) with instance_for_test() as instance, environ( {"DAGIT_EVENT_LOAD_CHUNK_SIZE": "2"}): run = execute_pipeline(example_pipeline, instance=instance) assert run.success assert run.run_id with create_subscription_context(instance) as context: start_subscription(server, context, EVENT_LOG_SUBSCRIPTION, {"runId": run.run_id}) gc.collect() assert len(objgraph.by_type("SubscriptionObserver")) == 1 subs = objgraph.by_type("PipelineRunObservableSubscribe") assert len(subs) == 1 subscription_obj = subs[0] end_subscription(server, context) subscription_obj.stopped.wait(30) subs = None subscription_obj = None gc.collect() assert len(objgraph.by_type("SubscriptionObserver")) == 0 assert len(objgraph.by_type("PipelineRunObservableSubscribe")) == 0
def cli_api_schedule_origin(schedule_name): with seven.TemporaryDirectory() as temp_dir: with environ({'DAGSTER_HOME': temp_dir}): recon_repo = ReconstructableRepository.for_file( __file__, 'the_repo') schedule = recon_repo.get_reconstructable_schedule(schedule_name) yield schedule.get_origin()
def central_timezone(): try: with environ({"TZ": "US/Central"}): time.tzset() yield finally: time.tzset()
def test_string_source(): assert process_config(StringSource, 'foo').success assert not process_config(StringSource, 1).success assert not process_config(StringSource, {'env': 1}).success assert 'DAGSTER_TEST_ENV_VAR' not in os.environ assert not process_config(StringSource, { 'env': 'DAGSTER_TEST_ENV_VAR' }).success assert ( 'You have attempted to fetch the environment variable "DAGSTER_TEST_ENV_VAR" ' 'which is not set. In order for this execution to succeed it must be set in ' 'this environment.') in process_config(StringSource, { 'env': 'DAGSTER_TEST_ENV_VAR' }).errors[0].message with environ({'DAGSTER_TEST_ENV_VAR': 'baz'}): assert process_config(StringSource, { 'env': 'DAGSTER_TEST_ENV_VAR' }).success assert process_config(StringSource, { 'env': 'DAGSTER_TEST_ENV_VAR' }).value == 'baz'
def check_cli_execute_file_pipeline(path, pipeline_fn_name, env_file=None): from dagster.core.test_utils import environ with TemporaryDirectory() as temp_dir: with environ({'DAGSTER_HOME': temp_dir}): cli_cmd = [ sys.executable, '-m', 'dagster', 'pipeline', 'execute', '-f', path, '-a', pipeline_fn_name, ] if env_file: cli_cmd.append('-c') cli_cmd.append(env_file) try: subprocess.check_output(cli_cmd) except subprocess.CalledProcessError as cpe: print(cpe) # pylint: disable=print-call raise cpe
def test_lazy_load_via_env_var(): with environ({"DAGSTER_CLI_API_GRPC_LAZY_LOAD_USER_CODE": "1"}): port = find_free_port() python_file = file_relative_path(__file__, "grpc_repo_with_error.py") ipc_output_file = _get_ipc_output_file() process = subprocess.Popen( [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, "--ipc-output-file", ipc_output_file, ], stdout=subprocess.PIPE, ) try: wait_for_grpc_server(process, ipc_output_file) list_repositories_response = DagsterGrpcClient(port=port).list_repositories() assert isinstance(list_repositories_response, SerializableErrorInfo) assert "No module named" in list_repositories_response.message finally: process.terminate()
def test_dagster_telemetry_unset(caplog): with seven.TemporaryDirectory() as temp_dir: with environ({'DAGSTER_HOME': temp_dir}): with open(os.path.join(temp_dir, 'dagster.yaml'), 'w') as fd: yaml.dump({}, fd, default_flow_style=False) DagsterInstance.local_temp(temp_dir) runner = CliRunner(env={'DAGSTER_HOME': temp_dir}) with pushd(path_to_tutorial_file('')): result = runner.invoke( pipeline_execute_command, [ '-f', path_to_tutorial_file('hello_cereal.py'), '-n', 'hello_cereal_pipeline', ], ) for record in caplog.records: message = json.loads(record.getMessage()) assert set(message.keys()) == EXPECTED_KEYS assert len(caplog.records) == 4 assert result.exit_code == 0
def test_load_via_env_var(): port = find_free_port() python_file = file_relative_path(__file__, "grpc_repo.py") subprocess_args = [ "dagster", "api", "grpc", "--python-file", python_file, ] with environ({ "DAGSTER_CLI_API_GRPC_HOST": "localhost", "DAGSTER_CLI_API_GRPC_PORT": str(port) }): process = subprocess.Popen( subprocess_args, stdout=subprocess.PIPE, ) try: wait_for_grpc_server( process, DagsterGrpcClient(port=port, host="localhost"), subprocess_args) assert DagsterGrpcClient(port=port).ping("foobar") == "foobar" finally: process.terminate()
def test_bad_should_execute(): with seven.TemporaryDirectory() as temp_dir: with environ({'DAGSTER_HOME': temp_dir}): instance = DagsterInstance.get() recon_repo = ReconstructableRepository.for_file( __file__, 'the_repo') bad_should_execute = recon_repo.get_reconstructable_schedule( 'bad_should_execute_schedule') result = sync_launch_scheduled_execution( bad_should_execute.get_origin()) assert isinstance(result, ScheduledExecutionFailed) assert ( 'Error occurred during the execution of should_execute for schedule bad_should_execute_schedule' in result.errors[0].to_string()) assert not result.run_id ticks = instance.get_schedule_ticks( bad_should_execute.get_origin_id()) assert ticks[0].status == ScheduleTickStatus.FAILURE assert ( 'Error occurred during the execution of should_execute for schedule bad_should_execute_schedule' in ticks[0].error.message)
def test_config_unique_value(self): with environ({"MAX_RUNS": "10", "DEQUEUE_INTERVAL": "7"}): with instance_for_test( overrides={ "run_coordinator": { "module": "dagster.core.run_coordinator", "class": "QueuedRunCoordinator", "config": { "max_concurrent_runs": { "env": "MAX_RUNS", }, "tag_concurrency_limits": [ { "key": "foo", "value": { "applyLimitPerUniqueValue": True }, "limit": 3, }, { "key": "backfill", "limit": 2 }, ], "dequeue_interval_seconds": { "env": "DEQUEUE_INTERVAL", }, }, } }) as _: pass
def test_dagster_home_not_set(): with environ({"DAGSTER_HOME": ""}): with pytest.raises( DagsterInvariantViolationError, match=r"The environment variable \$DAGSTER_HOME is not set\.", ): _dagster_home()
def test_dagster_home_not_set(): with environ({"DAGSTER_HOME": ""}): with pytest.raises( DagsterHomeNotSetError, match=r"The environment variable \$DAGSTER_HOME is not set\.", ): DagsterInstance.get()
def test_dagster_telemetry_disabled(caplog): with seven.TemporaryDirectory() as temp_dir: with environ({'DAGSTER_HOME': temp_dir}): with open(os.path.join(temp_dir, 'dagster.yaml'), 'w') as fd: yaml.dump({'telemetry': { 'enabled': False }}, fd, default_flow_style=False) DagsterInstance.local_temp(temp_dir) runner = CliRunner(env={'DAGSTER_HOME': temp_dir}) with pushd(path_to_tutorial_file('')): pipeline_name = 'hello_cereal_pipeline' result = runner.invoke( pipeline_execute_command, [ '-f', path_to_tutorial_file('hello_cereal.py'), '-n', pipeline_name ], ) assert not os.path.exists( os.path.join(get_dir_from_dagster_home('logs'), 'event.log')) assert len(caplog.records) == 0 assert result.exit_code == 0
def test_schedules(): with seven.TemporaryDirectory() as temp_dir: with environ({'DAGSTER_HOME': temp_dir}): with open(os.path.join(temp_dir, 'dagster.yaml'), 'w') as fd: yaml.dump( { 'scheduler': { 'module': 'dagster.utils.test', 'class': 'FilesystemTestScheduler', 'config': { 'base_dir': temp_dir }, } }, fd, default_flow_style=False, ) recon_repo = ReconstructableRepository.from_legacy_repository_yaml( file_relative_path(__file__, '../repository.yaml')) for schedule_name in [ 'many_events_every_min', 'pandas_hello_world_hourly', ]: schedule = recon_repo.get_reconstructable_schedule( schedule_name) result = sync_launch_scheduled_execution(schedule.get_origin()) assert isinstance(result, ScheduledExecutionSuccess)
def test_dagster_telemetry_enabled(caplog): with seven.TemporaryDirectory() as temp_dir: with environ({'DAGSTER_HOME': temp_dir}): with open(os.path.join(temp_dir, 'dagster.yaml'), 'w') as fd: yaml.dump({'telemetry': { 'enabled': True }}, fd, default_flow_style=False) DagsterInstance.local_temp(temp_dir) runner = CliRunner(env={'DAGSTER_HOME': temp_dir}) with pushd(path_to_tutorial_file('')): pipeline_name = 'hello_cereal_pipeline' result = runner.invoke( pipeline_execute_command, [ '-f', path_to_tutorial_file('hello_cereal.py'), '-n', pipeline_name ], ) for record in caplog.records: message = json.loads(record.getMessage()) if message.get('action') == UPDATE_REPO_STATS: assert message.get('pipeline_name_hash') == hash_name( 'hello_cereal_pipeline') assert message.get('num_pipelines_in_repo') == str(1) assert message.get('repo_hash') == hash_name( EPHEMERAL_NAME) assert set(message.keys()) == EXPECTED_KEYS assert len(caplog.records) == 5 assert result.exit_code == 0
def test_load_from_grpc_server_env(): with environ({ "TEST_EXECUTABLE_PATH": "executable/path/bin/python", "FOO_PORT": "1234", "FOO_SOCKET": "barsocket", "FOO_HOST": "barhost", }): valid_yaml = """ load_from: - grpc_server: host: env: FOO_HOST port: env: FOO_PORT location_name: 'my_grpc_server' """ validation_result = _validate_yaml_contents(valid_yaml) valid_socket_yaml = """ load_from: - grpc_server: host: env: FOO_HOST socket: env: FOO_SOCKET location_name: 'my_grpc_server' """ validation_result = _validate_yaml_contents(valid_socket_yaml) assert validation_result.success
def test_repo_stats(caplog): with seven.TemporaryDirectory() as temp_dir: with environ({'DAGSTER_HOME': temp_dir}): with open(os.path.join(temp_dir, 'dagster.yaml'), 'w') as fd: yaml.dump({}, fd, default_flow_style=False) DagsterInstance.local_temp(temp_dir) runner = CliRunner(env={'DAGSTER_HOME': temp_dir}) with pushd(path_to_tutorial_file('')): pipeline_name = 'multi_mode_with_resources' result = runner.invoke( pipeline_execute_command, [ '-y', file_relative_path(__file__, '../repository.yaml'), '-p', 'add', '--tags', '{ "foo": "bar" }', pipeline_name, ], ) for record in caplog.records: message = json.loads(record.getMessage()) if message.get('action') == UPDATE_REPO_STATS: assert message.get('pipeline_name_hash') == hash_name( pipeline_name) assert message.get('num_pipelines_in_repo') == str(4) assert message.get('repo_hash') == hash_name( 'dagster_test_repository') assert set(message.keys()) == EXPECTED_KEYS assert len(caplog.records) == 5 assert result.exit_code == 0
def test_invalid_instance_run(get_workspace): with tempfile.TemporaryDirectory() as temp_dir: correct_run_storage_dir = os.path.join(temp_dir, "history", "") wrong_run_storage_dir = os.path.join(temp_dir, "wrong", "") with environ({"RUN_STORAGE_ENV": correct_run_storage_dir}): with instance_for_test( temp_dir=temp_dir, overrides={ "run_storage": { "module": "dagster.core.storage.runs", "class": "SqliteRunStorage", "config": { "base_dir": { "env": "RUN_STORAGE_ENV" } }, } }, ) as instance: # Server won't be able to load the run from run storage with environ({"RUN_STORAGE_ENV": wrong_run_storage_dir}): with get_workspace(instance) as workspace: external_pipeline = ( workspace.get_repository_location("test"). get_repository("nope").get_full_external_pipeline( "noop_pipeline")) pipeline_run = instance.create_run_for_pipeline( pipeline_def=noop_pipeline, external_pipeline_origin=external_pipeline. get_external_origin(), pipeline_code_origin=external_pipeline. get_python_origin(), ) with pytest.raises( DagsterLaunchFailedError, match=re.escape( "gRPC server could not load run {run_id} in order to execute it" .format(run_id=pipeline_run.run_id)), ): instance.launch_run(run_id=pipeline_run.run_id, workspace=workspace) failed_run = instance.get_run_by_id( pipeline_run.run_id) assert failed_run.status == PipelineRunStatus.FAILURE