def test_origin_ids_stable(): # This test asserts fixed schedule origin IDs to prevent any changes from # accidentally shifting these ids that are persisted to ScheduleStorage python_origin = ExternalJobOrigin( ExternalRepositoryOrigin( ManagedGrpcPythonEnvRepositoryLocationOrigin( LoadableTargetOrigin( executable_path="/fake/executable", python_file="/fake/file/path", attribute="fake_attribute", ) ), "fake_repo", ), "fake_schedule", ) assert python_origin.get_id() == "eb01cc697463ba614a67567fdeaafcccc60f0fc4" grpc_origin = ExternalJobOrigin( ExternalRepositoryOrigin( GrpcServerRepositoryLocationOrigin(host="fakehost", port=52618), "repo_name" ), "fake_schedule", ) assert grpc_origin.get_id() == "0961ecddbddfc71104adf036ebe8cd97a94dc77b"
def get_external_pipeline_from_grpc_server_repository(pipeline_name): loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, attribute="nope", python_file=file_relative_path(__file__, "test_default_run_launcher.py"), ) server_process = GrpcServerProcess( loadable_target_origin=loadable_target_origin) try: with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle. create_from_repository_location_origin( GrpcServerRepositoryLocationOrigin( location_name="test", port=api_client.port, socket=api_client.socket, host=api_client.host, ))) yield repository_location.get_repository( "nope").get_full_external_pipeline(pipeline_name) finally: server_process.wait()
def _location_origin_from_grpc_server_config(grpc_server_config, yaml_path): check.dict_param(grpc_server_config, "grpc_server_config") check.str_param(yaml_path, "yaml_path") port, socket, host, location_name, use_ssl = ( grpc_server_config.get("port"), grpc_server_config.get("socket"), grpc_server_config.get("host"), grpc_server_config.get("location_name"), grpc_server_config.get("ssl"), ) check.invariant((socket or port) and not (socket and port), "must supply either a socket or a port") if not host: host = "localhost" return GrpcServerRepositoryLocationOrigin( port=port, socket=socket, host=host, location_name=location_name, use_ssl=use_ssl, )
def test_grpc_server_down(): with _default_instance() as instance: down_grpc_repo_origin = ExternalRepositoryOrigin( GrpcServerRepositoryLocationOrigin( host="localhost", port=find_free_port(), socket=None, ), repository_name="down_repo", ) down_grpc_schedule_origin = down_grpc_repo_origin.get_job_origin( "down_schedule") instance = DagsterInstance.get() result = sync_launch_scheduled_execution(down_grpc_schedule_origin, "US/Eastern") assert isinstance(result, ScheduledExecutionFailed) assert "failed to connect to all addresses" in result.errors[ 0].to_string() ticks = instance.get_job_ticks(down_grpc_schedule_origin.get_id()) assert ticks[0].status == JobTickStatus.FAILURE assert "failed to connect to all addresses" in ticks[0].error.message
def location_origins_from_load_target(load_target): if isinstance(load_target, WorkspaceFileTarget): return location_origins_from_yaml_paths(load_target.paths,) elif isinstance(load_target, PythonFileTarget): return [ location_origin_from_python_file( python_file=load_target.python_file, attribute=load_target.attribute, working_directory=load_target.working_directory, ) ] elif isinstance(load_target, ModuleTarget): return [location_origin_from_module_name(load_target.module_name, load_target.attribute,)] elif isinstance(load_target, PackageTarget): return [location_origin_from_package_name(load_target.package_name, load_target.attribute,)] elif isinstance(load_target, GrpcServerTarget): return [ GrpcServerRepositoryLocationOrigin( port=load_target.port, socket=load_target.socket, host=load_target.host, ) ] elif isinstance(load_target, EmptyWorkspaceTarget): return [] else: check.not_implemented("Unsupported: {}".format(load_target))
def test_server_down(): with instance_for_test() as instance: loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, attribute="nope", python_file=file_relative_path(__file__, "test_default_run_launcher.py"), ) server_process = GrpcServerProcess( loadable_target_origin=loadable_target_origin, max_workers=4, force_port=True ) with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle.create_from_repository_location_origin( GrpcServerRepositoryLocationOrigin( location_name="test", port=api_client.port, socket=api_client.socket, host=api_client.host, ) ) ) external_pipeline = repository_location.get_repository( "nope" ).get_full_external_pipeline("sleepy_pipeline") pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=None ) instance.launch_run(pipeline_run.run_id, external_pipeline) poll_for_step_start(instance, pipeline_run.run_id) launcher = instance.run_launcher assert launcher.can_terminate(pipeline_run.run_id) original_run_tags = instance.get_run_by_id(pipeline_run.run_id).tags[GRPC_INFO_TAG] # Replace run tags with an invalid port instance.add_run_tags( pipeline_run.run_id, { GRPC_INFO_TAG: seven.json.dumps( merge_dicts({"host": "localhost"}, {"port": find_free_port()}) ) }, ) assert not launcher.can_terminate(pipeline_run.run_id) instance.add_run_tags( pipeline_run.run_id, {GRPC_INFO_TAG: original_run_tags,}, ) assert launcher.terminate(pipeline_run.run_id) server_process.wait()
def create_origins(self): return [ GrpcServerRepositoryLocationOrigin( port=self.port, socket=self.socket, host=self.host, location_name=self.location_name, ) ]
def test_run_always_finishes(): # pylint: disable=redefined-outer-name with instance_for_test() as instance: pipeline_run = instance.create_run_for_pipeline( pipeline_def=slow_pipeline, run_config=None) run_id = pipeline_run.run_id loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, attribute="nope", python_file=file_relative_path(__file__, "test_default_run_launcher.py"), ) server_process = GrpcServerProcess( loadable_target_origin=loadable_target_origin, max_workers=4) with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle. create_from_repository_location_origin( GrpcServerRepositoryLocationOrigin( location_name="test", port=api_client.port, socket=api_client.socket, host=api_client.host, ))) external_pipeline = repository_location.get_repository( "nope").get_full_external_pipeline("slow_pipeline") assert instance.get_run_by_id( run_id).status == PipelineRunStatus.NOT_STARTED instance.launch_run(run_id=pipeline_run.run_id, external_pipeline=external_pipeline) # Server process now receives shutdown event, run has not finished yet pipeline_run = instance.get_run_by_id(run_id) assert not pipeline_run.is_finished assert server_process.server_process.poll() is None # Server should wait until run finishes, then shutdown pipeline_run = poll_for_finished_run(instance, run_id) assert pipeline_run.status == PipelineRunStatus.SUCCESS start_time = time.time() while server_process.server_process.poll() is None: time.sleep(0.05) # Verify server process cleans up eventually assert time.time() - start_time < 5 server_process.wait()
def grpc_schedule_origin(schedule_name): loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, python_file=__file__, attribute="the_repo" ) server_process = GrpcServerProcess(loadable_target_origin=loadable_target_origin) with server_process.create_ephemeral_client() as api_client: repo_origin = ExternalRepositoryOrigin( GrpcServerRepositoryLocationOrigin( host=api_client.host, port=api_client.port, socket=api_client.socket, ), repository_name="the_repo", ) yield repo_origin.get_job_origin(schedule_name) server_process.wait()
def get_external_origin(self): """ Hack! Inject origin that the k8s images will use. The k8s helm chart workspace uses a gRPC server repo location origin. As a result the normal origin won't work, we need to inject this one. """ return ExternalInstigatorOrigin( external_repository_origin=ExternalRepositoryOrigin( repository_location_origin=GrpcServerRepositoryLocationOrigin( host="user-code-deployment-1", port=3030, location_name="user-code-deployment-1", ), repository_name="demo_execution_repo", ), instigator_name=self.name, )
def _mgr_fn(recon_repo): check.inst_param(recon_repo, "recon_repo", ReconstructableRepository) loadable_target_origin = recon_repo.get_python_origin().loadable_target_origin server_process = GrpcServerProcess(loadable_target_origin=loadable_target_origin) try: with server_process.create_ephemeral_client() as api_client: with Workspace( [ GrpcServerRepositoryLocationOrigin( port=api_client.port, socket=api_client.socket, host=api_client.host, location_name="test", ) ] ) as workspace: yield workspace finally: server_process.wait()
def test_cancel_run(): with instance_for_test() as instance: loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, python_file=__file__, working_directory=None, ) server_process = GrpcServerProcess(loadable_target_origin, max_workers=10) with server_process.create_ephemeral_client() as api_client: streaming_results = [] pipeline_run = instance.create_run_for_pipeline( streaming_pipeline, run_config={ "solids": { "streamer": { "config": { "length": 20 } } } }, ) execute_run_args = ExecuteExternalPipelineArgs( pipeline_origin=ExternalPipelineOrigin( ExternalRepositoryOrigin( repository_location_origin= GrpcServerRepositoryLocationOrigin( host="localhost", socket=api_client.socket, port=api_client.port, ), repository_name="test_repository", ), pipeline_name="streaming_pipeline", ), pipeline_run_id=pipeline_run.run_id, instance_ref=instance.get_ref(), ) stream_events_result_thread = threading.Thread( target=_stream_events_target, args=[streaming_results, api_client, execute_run_args]) stream_events_result_thread.daemon = True stream_events_result_thread.start() poll_for_step_start(instance, pipeline_run.run_id) res = api_client.cancel_execution( cancel_execution_request=CancelExecutionRequest( run_id=pipeline_run.run_id)) assert res.success is True poll_for_finished_run(instance, pipeline_run.run_id) logs = instance.all_logs(pipeline_run.run_id) assert (len([ ev for ev in logs if ev.dagster_event.event_type_value == "STEP_MATERIALIZATION" ]) < 20) # soft termination assert [ ev for ev in logs if ev.dagster_event.event_type_value == "STEP_FAILURE" ] server_process.wait()