def test_server_down(): with grpc_instance() as instance: repo_yaml = file_relative_path(__file__, "repo.yaml") recon_repo = ReconstructableRepository.from_legacy_repository_yaml( repo_yaml) loadable_target_origin = recon_repo.get_origin().loadable_target_origin server_process = GrpcServerProcess( loadable_target_origin=loadable_target_origin, max_workers=4, force_port=True) with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle.create_grpc_server_location( location_name="test", port=api_client.port, socket=api_client.socket, host=api_client.host, )) external_pipeline = repository_location.get_repository( "nope").get_full_external_pipeline("sleepy_pipeline") pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=None) launcher = instance.run_launcher launcher.launch_run(instance, pipeline_run, external_pipeline) poll_for_step_start(instance, pipeline_run.run_id) assert launcher.can_terminate(pipeline_run.run_id) original_run_tags = instance.get_run_by_id( pipeline_run.run_id).tags[GRPC_INFO_TAG] # Replace run tags with an invalid port instance.add_run_tags( pipeline_run.run_id, { GRPC_INFO_TAG: seven.json.dumps( merge_dicts({"host": "localhost"}, {"port": find_free_port()})) }, ) assert not launcher.can_terminate(pipeline_run.run_id) instance.add_run_tags( pipeline_run.run_id, { GRPC_INFO_TAG: original_run_tags, }, ) assert launcher.terminate(pipeline_run.run_id) server_process.wait()
def test_server_down(): with instance_for_test() as instance: loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, attribute="nope", python_file=file_relative_path(__file__, "test_default_run_launcher.py"), ) server_process = GrpcServerProcess( loadable_target_origin=loadable_target_origin, max_workers=4, force_port=True ) with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle.create_from_repository_location_origin( GrpcServerRepositoryLocationOrigin( location_name="test", port=api_client.port, socket=api_client.socket, host=api_client.host, ) ) ) external_pipeline = repository_location.get_repository( "nope" ).get_full_external_pipeline("sleepy_pipeline") pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=None ) instance.launch_run(pipeline_run.run_id, external_pipeline) poll_for_step_start(instance, pipeline_run.run_id) launcher = instance.run_launcher assert launcher.can_terminate(pipeline_run.run_id) original_run_tags = instance.get_run_by_id(pipeline_run.run_id).tags[GRPC_INFO_TAG] # Replace run tags with an invalid port instance.add_run_tags( pipeline_run.run_id, { GRPC_INFO_TAG: seven.json.dumps( merge_dicts({"host": "localhost"}, {"port": find_free_port()}) ) }, ) assert not launcher.can_terminate(pipeline_run.run_id) instance.add_run_tags( pipeline_run.run_id, {GRPC_INFO_TAG: original_run_tags,}, ) assert launcher.terminate(pipeline_run.run_id) server_process.wait()
def get_external_pipeline_from_grpc_server_repository(pipeline_name): loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, attribute="nope", python_file=file_relative_path(__file__, "test_default_run_launcher.py"), ) server_process = GrpcServerProcess(loadable_target_origin=loadable_target_origin) try: with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle.create_from_repository_location_origin( GrpcServerRepositoryLocationOrigin( location_name="test", port=api_client.port, socket=api_client.socket, host=api_client.host, ) ) ) yield repository_location.get_repository("nope").get_full_external_pipeline( pipeline_name ) finally: server_process.wait()
def create_in_process_ephemeral_workspace(pointer): from dagster.cli.workspace.workspace import Workspace check.inst_param(pointer, "pointer", CodePointer) repository_location_handles = [RepositoryLocationHandle.create_in_process_location(pointer)] workspace = Workspace(repository_location_handles=repository_location_handles) return workspace
def test_run_always_finishes(): # pylint: disable=redefined-outer-name with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance.local_temp( temp_dir, overrides={ "run_launcher": { "module": "dagster.core.launcher.grpc_run_launcher", "class": "GrpcRunLauncher", } }, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=slow_pipeline, run_config=None) run_id = pipeline_run.run_id loadable_target_origin = LoadableTargetOrigin( attribute="nope", python_file=file_relative_path(__file__, "test_default_run_launcher.py"), ) server_process = GrpcServerProcess( loadable_target_origin=loadable_target_origin, max_workers=4) with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle.create_grpc_server_location( location_name="test", port=api_client.port, socket=api_client.socket, host=api_client.host, )) external_pipeline = repository_location.get_repository( "nope").get_full_external_pipeline("slow_pipeline") assert instance.get_run_by_id( run_id).status == PipelineRunStatus.NOT_STARTED launcher = instance.run_launcher launcher.launch_run(instance=instance, run=pipeline_run, external_pipeline=external_pipeline) # Server process now receives shutdown event, run has not finished yet pipeline_run = instance.get_run_by_id(run_id) assert not pipeline_run.is_finished assert server_process.server_process.poll() is None # Server should wait until run finishes, then shutdown pipeline_run = poll_for_finished_run(instance, run_id) assert pipeline_run.status == PipelineRunStatus.SUCCESS start_time = time.time() while server_process.server_process.poll() is None: time.sleep(0.05) # Verify server process cleans up eventually assert time.time() - start_time < 5 server_process.wait()
def get_bar_repo_grpc_repository_location_handle(): return RepositoryLocationHandle.create_process_bound_grpc_server_location( loadable_target_origin=LoadableTargetOrigin( attribute='bar_repo', python_file=file_relative_path(__file__, 'api_tests_repo.py'), ), location_name='bar_repo', )
def get_bar_repo_repository_location_handle(): return RepositoryLocationHandle.create_out_of_process_location( location_name='bar_repo_location', repository_code_pointer_dict={ 'bar_repo': FileCodePointer(file_relative_path(__file__, 'api_tests_repo.py'), 'bar_repo') }, )
def get_bar_repo_repository_location_handle(): return RepositoryLocationHandle.create_python_env_location( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, python_file=file_relative_path(__file__, "api_tests_repo.py"), attribute="bar_repo", ), location_name="bar_repo_location", )
def get_bar_repo_grpc_repository_location_handle(): handle = RepositoryLocationHandle.create_process_bound_grpc_server_location( loadable_target_origin=LoadableTargetOrigin( attribute="bar_repo", python_file=file_relative_path(__file__, "api_tests_repo.py"), ), location_name="bar_repo", ) try: yield handle finally: handle.cleanup()
def get_giant_repo_grpc_repository_location_handle(): handle = RepositoryLocationHandle.create_process_bound_grpc_server_location( loadable_target_origin=LoadableTargetOrigin( attribute="giant_repo", module_name="dagster_tests.api_tests.test_api_snapshot_repository", ), location_name="giant_repo_location", ) try: yield handle finally: handle.cleanup()
def get_giant_repo_grpc_repository_location_handle(): with RepositoryLocationHandle.create_from_repository_location_origin( ManagedGrpcPythonEnvRepositoryLocationOrigin( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, attribute="giant_repo", module_name= "dagster_tests.api_tests.test_api_snapshot_repository", ), location_name="giant_repo_location", )) as handle: yield handle
def get_external_pipeline_from_managed_grpc_python_env_repository(pipeline_name): with RepositoryLocationHandle.create_from_repository_location_origin( ManagedGrpcPythonEnvRepositoryLocationOrigin( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, attribute="nope", python_file=file_relative_path(__file__, "test_default_run_launcher.py"), ), location_name="nope", ) ) as repository_location_handle: repository_location = GrpcServerRepositoryLocation(repository_location_handle) yield repository_location.get_repository("nope").get_full_external_pipeline(pipeline_name)
def _external_pipeline_from_def(pipeline_def, solid_selection=None): recon_pipeline = reconstructable(pipeline_def) recon_repo = recon_pipeline.repository repo_def = recon_repo.get_definition() location_handle = RepositoryLocationHandle.create_in_process_location(recon_repo.pointer) repository_handle = RepositoryHandle( repository_name=repo_def.name, repository_location_handle=location_handle, ) return external_pipeline_from_recon_pipeline( reconstructable(pipeline_def), solid_selection=solid_selection, repository_handle=repository_handle, )
def get_external_pipeline_from_python_location(pipeline_name): repository_location_handle = RepositoryLocationHandle.create_python_env_location( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, attribute="nope", python_file=file_relative_path(__file__, "test_default_run_launcher.py"), ), location_name="nope", ) yield (PythonEnvRepositoryLocation(repository_location_handle). get_repository("nope").get_full_external_pipeline(pipeline_name))
def get_external_pipeline_from_managed_grpc_python_env_repository(pipeline_name): repository_location_handle = RepositoryLocationHandle.create_process_bound_grpc_server_location( loadable_target_origin=LoadableTargetOrigin( attribute='nope', python_file=file_relative_path(__file__, 'test_cli_api_run_launcher.py'), ), location_name='nope', ) repository_location = GrpcServerRepositoryLocation(repository_location_handle) yield repository_location.get_repository('nope').get_full_external_pipeline(pipeline_name)
def get_external_pipeline_from_managed_grpc_python_env_repository(pipeline_name): repository_location_handle = RepositoryLocationHandle.create_process_bound_grpc_server_location( loadable_target_origin=LoadableTargetOrigin( attribute="nope", python_file=file_relative_path(__file__, "test_default_run_launcher.py"), ), location_name="nope", ) repository_location = GrpcServerRepositoryLocation(repository_location_handle) try: yield repository_location.get_repository("nope").get_full_external_pipeline(pipeline_name) finally: repository_location_handle.cleanup()
def get_external_pipeline_from_grpc_server_repository(pipeline_name): repo_yaml = file_relative_path(__file__, 'repo.yaml') recon_repo = ReconstructableRepository.from_legacy_repository_yaml(repo_yaml) loadable_target_origin = LoadableTargetOrigin.from_python_origin(recon_repo.get_origin()) with GrpcServerProcess( loadable_target_origin=loadable_target_origin ).create_ephemeral_client() as server: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle.create_grpc_server_location( location_name='test', port=server.port, socket=server.socket, host='localhost', ) ) yield repository_location.get_repository('nope').get_full_external_pipeline(pipeline_name)
def test_run_always_finishes(): # pylint: disable=redefined-outer-name with instance_for_test() as instance: pipeline_run = instance.create_run_for_pipeline(pipeline_def=slow_pipeline, run_config=None) run_id = pipeline_run.run_id loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, attribute="nope", python_file=file_relative_path(__file__, "test_default_run_launcher.py"), ) server_process = GrpcServerProcess( loadable_target_origin=loadable_target_origin, max_workers=4 ) with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle.create_from_repository_location_origin( GrpcServerRepositoryLocationOrigin( location_name="test", port=api_client.port, socket=api_client.socket, host=api_client.host, ) ) ) external_pipeline = repository_location.get_repository( "nope" ).get_full_external_pipeline("slow_pipeline") assert instance.get_run_by_id(run_id).status == PipelineRunStatus.NOT_STARTED instance.launch_run(run_id=pipeline_run.run_id, external_pipeline=external_pipeline) # Server process now receives shutdown event, run has not finished yet pipeline_run = instance.get_run_by_id(run_id) assert not pipeline_run.is_finished assert server_process.server_process.poll() is None # Server should wait until run finishes, then shutdown pipeline_run = poll_for_finished_run(instance, run_id) assert pipeline_run.status == PipelineRunStatus.SUCCESS start_time = time.time() while server_process.server_process.poll() is None: time.sleep(0.05) # Verify server process cleans up eventually assert time.time() - start_time < 5 server_process.wait()
def get_external_pipeline_from_run(self, pipeline_run): repo_location_origin = ( pipeline_run.external_pipeline_origin.external_repository_origin. repository_location_origin) origin_id = repo_location_origin.get_id() if origin_id not in self._location_handles: self._location_handles[ origin_id] = RepositoryLocationHandle.create_from_repository_location_origin( repo_location_origin) return external_pipeline_from_location_handle( self._location_handles[origin_id], pipeline_run.external_pipeline_origin, pipeline_run.solid_selection, )
def test_terminate_after_shutdown(): with instance_for_test() as instance: with RepositoryLocationHandle.create_from_repository_location_origin( ManagedGrpcPythonEnvRepositoryLocationOrigin( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, attribute="nope", python_file=file_relative_path(__file__, "test_default_run_launcher.py"), ), location_name="nope", ) ) as repository_location_handle: repository_location = GrpcServerRepositoryLocation(repository_location_handle) external_pipeline = repository_location.get_repository( "nope" ).get_full_external_pipeline("sleepy_pipeline") pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=None ) instance.launch_run(pipeline_run.run_id, external_pipeline) poll_for_step_start(instance, pipeline_run.run_id) # Tell the server to shut down once executions finish repository_location_handle.client.cleanup_server() # Trying to start another run fails doomed_to_fail_external_pipeline = repository_location.get_repository( "nope" ).get_full_external_pipeline("math_diamond") doomed_to_fail_pipeline_run = instance.create_run_for_pipeline( pipeline_def=math_diamond, run_config=None ) with pytest.raises(DagsterLaunchFailedError): instance.launch_run( doomed_to_fail_pipeline_run.run_id, doomed_to_fail_external_pipeline ) launcher = instance.run_launcher # Can terminate the run even after the shutdown event has been received assert launcher.can_terminate(pipeline_run.run_id) assert launcher.terminate(pipeline_run.run_id)
def test_giant_external_repository(): repository_location_handle = RepositoryLocationHandle.create_python_env_location( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, module_name="dagster_tests.api_tests.test_api_snapshot_repository", attribute="giant_repo", ), location_name="giant_repo_location", ) external_repos = sync_get_external_repositories(repository_location_handle) assert len(external_repos) == 1 external_repository = external_repos[0] assert isinstance(external_repository, ExternalRepository) assert external_repository.name == "giant_repo"
def test_run_always_finishes(temp_instance): # pylint: disable=redefined-outer-name instance = temp_instance pipeline_run = instance.create_run_for_pipeline(pipeline_def=slow_pipeline, run_config=None) run_id = pipeline_run.run_id recon_repo = ReconstructableRepository.for_file(__file__, 'nope') loadable_target_origin = LoadableTargetOrigin.from_python_origin(recon_repo.get_origin()) server_process = GrpcServerProcess(loadable_target_origin=loadable_target_origin, max_workers=4) with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle.create_grpc_server_location( location_name='test', port=api_client.port, socket=api_client.socket, host=api_client.host, ) ) external_pipeline = repository_location.get_repository('nope').get_full_external_pipeline( 'slow_pipeline' ) assert instance.get_run_by_id(run_id).status == PipelineRunStatus.NOT_STARTED launcher = instance.run_launcher launcher.launch_run( instance=instance, run=pipeline_run, external_pipeline=external_pipeline ) # Server process now receives shutdown event, run has not finished yet pipeline_run = instance.get_run_by_id(run_id) assert not pipeline_run.is_finished assert server_process.server_process.poll() is None # Server should wait until run finishes, then shutdown pipeline_run = poll_for_run(instance, run_id) assert pipeline_run.status == PipelineRunStatus.SUCCESS start_time = time.time() while server_process.server_process.poll() is None: time.sleep(0.05) # Verify server process cleans up eventually assert time.time() - start_time < 5
def workspace_from_load_target(load_target, instance): check.inst_param(load_target, "load_target", WorkspaceLoadTarget) check.inst_param(instance, "instance", DagsterInstance) opt_in_settings = instance.get_settings("opt_in") python_user_process_api = (UserProcessApi.GRPC if (opt_in_settings and opt_in_settings["local_servers"]) else UserProcessApi.CLI) if isinstance(load_target, WorkspaceFileTarget): return load_workspace_from_yaml_paths(load_target.paths, python_user_process_api) elif isinstance(load_target, PythonFileTarget): return Workspace([ location_handle_from_python_file( python_file=load_target.python_file, attribute=load_target.attribute, working_directory=load_target.working_directory, user_process_api=python_user_process_api, ) ]) elif isinstance(load_target, ModuleTarget): return Workspace([ location_handle_from_module_name( load_target.module_name, load_target.attribute, user_process_api=python_user_process_api, ) ]) elif isinstance(load_target, GrpcServerTarget): return Workspace([ RepositoryLocationHandle.create_grpc_server_location( port=load_target.port, socket=load_target.socket, host=load_target.host, ) ]) elif isinstance(load_target, EmptyWorkspaceTarget): return Workspace([]) else: check.not_implemented("Unsupported: {}".format(load_target))
def get_external_pipeline_from_grpc_server_repository(pipeline_name): repo_yaml = file_relative_path(__file__, "repo.yaml") recon_repo = ReconstructableRepository.from_legacy_repository_yaml( repo_yaml) loadable_target_origin = recon_repo.get_origin().loadable_target_origin server_process = GrpcServerProcess( loadable_target_origin=loadable_target_origin) try: with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle.create_grpc_server_location( location_name="test", port=api_client.port, socket=api_client.socket, host=api_client.host, )) yield repository_location.get_repository( "nope").get_full_external_pipeline(pipeline_name) finally: server_process.wait()