def daemon_controller_from_instance(instance, wait_for_processes_on_exit=False): check.inst_param(instance, "instance", DagsterInstance) grpc_server_registry = None try: with ExitStack() as stack: grpc_server_registry = stack.enter_context( ProcessGrpcServerRegistry()) daemons = [ stack.enter_context(daemon) for daemon in create_daemons_from_instance(instance) ] # Create this in each daemon to generate a workspace per-daemon @contextmanager def gen_workspace(_instance): with DynamicWorkspace(grpc_server_registry) as workspace: yield workspace with DagsterDaemonController(instance, daemons, gen_workspace) as controller: yield controller finally: if wait_for_processes_on_exit and grpc_server_registry: grpc_server_registry.wait_for_processes() # pylint: disable=no-member
def daemon_controller_from_instance( instance, heartbeat_interval_seconds=DEFAULT_HEARTBEAT_INTERVAL_SECONDS, heartbeat_tolerance_seconds=DEFAULT_DAEMON_HEARTBEAT_TOLERANCE_SECONDS, wait_for_processes_on_exit=False, gen_daemons=create_daemons_from_instance, error_interval_seconds=DEFAULT_DAEMON_ERROR_INTERVAL_SECONDS, ): check.inst_param(instance, "instance", DagsterInstance) grpc_server_registry = None try: with ExitStack() as stack: grpc_server_registry = stack.enter_context(ProcessGrpcServerRegistry()) daemons = [stack.enter_context(daemon) for daemon in gen_daemons(instance)] # Create this in each daemon to generate a workspace per-daemon @contextmanager def gen_workspace(_instance): with DynamicWorkspace(grpc_server_registry) as workspace: yield workspace with DagsterDaemonController( instance, daemons, gen_workspace, heartbeat_interval_seconds=heartbeat_interval_seconds, heartbeat_tolerance_seconds=heartbeat_tolerance_seconds, error_interval_seconds=error_interval_seconds, ) as controller: yield controller finally: if wait_for_processes_on_exit and grpc_server_registry: grpc_server_registry.wait_for_processes() # pylint: disable=no-member
def test_process_server_registry(): origin = ManagedGrpcPythonEnvRepositoryLocationOrigin( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, attribute="repo", python_file=file_relative_path(__file__, "test_grpc_server_registry.py"), ), ) with ProcessGrpcServerRegistry(reload_interval=5, heartbeat_ttl=10, startup_timeout=5) as registry: endpoint_one = registry.get_grpc_endpoint(origin) endpoint_two = registry.get_grpc_endpoint(origin) assert endpoint_two == endpoint_one assert _can_connect(origin, endpoint_one) assert _can_connect(origin, endpoint_two) start_time = time.time() while True: # Registry should return a new server endpoint after 5 seconds endpoint_three = registry.get_grpc_endpoint(origin) if endpoint_three.server_id != endpoint_one.server_id: break if time.time() - start_time > 15: raise Exception("Server ID never changed") time.sleep(1) assert _can_connect(origin, endpoint_three) start_time = time.time() while True: # Server at endpoint_one should eventually die due to heartbeat failure if not _can_connect(origin, endpoint_one): break if time.time() - start_time > 30: raise Exception( "Old Server never died after process manager released it") time.sleep(1) # Make one more fresh process, then leave the context so that it will be cleaned up while True: endpoint_four = registry.get_grpc_endpoint(origin) if endpoint_four.server_id != endpoint_three.server_id: assert _can_connect(origin, endpoint_four) break registry.wait_for_processes() assert not _can_connect(origin, endpoint_three) assert not _can_connect(origin, endpoint_four)
def __init__(self, workspace_load_target, grpc_server_registry=None): self._stack = ExitStack() # Guards changes to _location_dict, _location_error_dict, and _location_origin_dict self._lock = threading.Lock() # Only ever set up by main thread self._watch_thread_shutdown_events = {} self._watch_threads = {} self._state_subscribers: List[LocationStateSubscriber] = [] from .cli_target import WorkspaceLoadTarget self._workspace_load_target = check.opt_inst_param( workspace_load_target, "workspace_load_target", WorkspaceLoadTarget ) if grpc_server_registry: self._grpc_server_registry = check.inst_param( grpc_server_registry, "grpc_server_registry", GrpcServerRegistry ) else: self._grpc_server_registry = self._stack.enter_context( ProcessGrpcServerRegistry(reload_interval=0, heartbeat_ttl=30) ) self._location_dict = {} self._location_error_dict = {} with self._lock: self._load_workspace()
def test_error_repo_in_registry(): error_origin = ManagedGrpcPythonEnvRepositoryLocationOrigin( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, attribute="error_repo", python_file=file_relative_path(__file__, "error_repo.py"), ), ) with ProcessGrpcServerRegistry(reload_interval=5, heartbeat_ttl=10) as registry: # Repository with a loading error does not raise an exception endpoint = registry.get_grpc_endpoint(error_origin) # But using that endpoint to load a location results in an error with pytest.raises(DagsterUserCodeProcessError, match="object is not callable"): with GrpcServerRepositoryLocation( origin=error_origin, server_id=endpoint.server_id, port=endpoint.port, socket=endpoint.socket, host=endpoint.host, watch_server=False, ): pass # that error is idempotent with pytest.raises(DagsterUserCodeProcessError, match="object is not callable"): with GrpcServerRepositoryLocation( origin=error_origin, server_id=endpoint.server_id, port=endpoint.port, socket=endpoint.socket, host=endpoint.host, watch_server=False, ): pass
def _create_tick(instance): with ProcessGrpcServerRegistry() as grpc_server_registry: with DynamicWorkspace(grpc_server_registry) as workspace: list( execute_sensor_iteration( instance, get_default_daemon_logger("SensorDaemon"), workspace))
def test_registry_multithreading(): origin = ManagedGrpcPythonEnvRepositoryLocationOrigin( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, attribute="repo", python_file=file_relative_path(__file__, "test_grpc_server_registry.py"), ), ) with ProcessGrpcServerRegistry(reload_interval=300, heartbeat_ttl=600) as registry: endpoint = registry.get_grpc_endpoint(origin) threads = [] success_events = [] for _index in range(5): event = threading.Event() thread = threading.Thread(target=_registry_thread, args=(origin, registry, endpoint, event)) threads.append(thread) success_events.append(event) thread.start() for thread in threads: thread.join() for event in success_events: assert event.is_set() assert _can_connect(origin, endpoint) registry.wait_for_processes() assert not _can_connect(origin, endpoint)
def _create_sensor_tick(instance): with ProcessGrpcServerRegistry(wait_for_processes_on_exit=True) as grpc_server_registry: with RepositoryLocationHandleManager(grpc_server_registry) as handle_manager: list( execute_sensor_iteration( instance, get_default_daemon_logger("SensorDaemon"), handle_manager ) )
def _create_sensor_tick(instance): with ProcessGrpcServerRegistry() as grpc_server_registry: with RepositoryLocationManager( grpc_server_registry) as location_manager: list( execute_sensor_iteration( instance, get_default_daemon_logger("SensorDaemon"), location_manager))
def get_repository_location_from_kwargs(kwargs): origin = get_repository_location_origin_from_kwargs(kwargs) with ProcessGrpcServerRegistry(reload_interval=0, heartbeat_ttl=30) as grpc_server_registry: with RepositoryLocationManager( grpc_server_registry) as location_manager: with location_manager.get_location(origin) as location: yield location
def __init__( self, instance: DagsterInstance, workspace_load_target: Optional[WorkspaceLoadTarget], version: str = "", read_only: bool = False, grpc_server_registry=None, ): self._stack = ExitStack() check.opt_str_param(version, "version") check.bool_param(read_only, "read_only") # lazy import for perf from rx.subjects import Subject self._instance = check.inst_param(instance, "instance", DagsterInstance) self._workspace_load_target = check.opt_inst_param( workspace_load_target, "workspace_load_target", WorkspaceLoadTarget) self._location_state_events = Subject() self._location_state_subscriber = LocationStateSubscriber( self._location_state_events_handler) self._read_only = read_only self._version = version # Guards changes to _location_dict, _location_error_dict, and _location_origin_dict self._lock = threading.Lock() # Only ever set up by main thread self._watch_thread_shutdown_events: Dict[str, threading.Event] = {} self._watch_threads: Dict[str, threading.Thread] = {} self._state_subscribers: List[LocationStateSubscriber] = [] self.add_state_subscriber(self._location_state_subscriber) if grpc_server_registry: self._grpc_server_registry: GrpcServerRegistry = check.inst_param( grpc_server_registry, "grpc_server_registry", GrpcServerRegistry) else: self._grpc_server_registry = self._stack.enter_context( ProcessGrpcServerRegistry( reload_interval=0, heartbeat_ttl=DAGIT_GRPC_SERVER_HEARTBEAT_TTL, startup_timeout=instance. code_server_process_startup_timeout, )) self._location_entry_dict: Dict[ str, WorkspaceLocationEntry] = OrderedDict() with self._lock: self._load_workspace()
def get_repository_location_from_kwargs(kwargs): origin = get_repository_location_origin_from_kwargs(kwargs) with ProcessGrpcServerRegistry(reload_interval=0, heartbeat_ttl=30) as grpc_server_registry: from dagster.cli.workspace.dynamic_workspace import DynamicWorkspace with DynamicWorkspace(grpc_server_registry) as workspace: with workspace.get_location(origin) as location: yield location
def __init__(self, workspace_load_target): from .cli_target import WorkspaceLoadTarget self._stack = ExitStack() self._workspace_load_target = check.opt_inst_param( workspace_load_target, "workspace_load_target", WorkspaceLoadTarget ) self._grpc_server_registry = self._stack.enter_context( ProcessGrpcServerRegistry(reload_interval=0, heartbeat_ttl=30) ) self._load_workspace()
def daemon_controller_from_instance(instance, wait_for_processes_on_exit=False): check.inst_param(instance, "instance", DagsterInstance) with ExitStack() as stack: grpc_server_registry = stack.enter_context( ProcessGrpcServerRegistry( wait_for_processes_on_exit=wait_for_processes_on_exit)) daemons = [ stack.enter_context(daemon) for daemon in create_daemons_from_instance(instance) ] with DagsterDaemonController(instance, daemons, grpc_server_registry) as controller: yield controller
def _test_launch_scheduled_runs_in_subprocess(instance_ref, execution_datetime, debug_crash_flags): with DagsterInstance.from_ref(instance_ref) as instance: with ProcessGrpcServerRegistry(wait_for_processes_on_exit=True) as grpc_server_registry: try: with pendulum.test(execution_datetime): list( launch_scheduled_runs( instance, grpc_server_registry, logger(), pendulum.now("UTC"), debug_crash_flags=debug_crash_flags, ) ) finally: cleanup_test_instance(instance)
def _test_launch_sensor_runs_in_subprocess(instance_ref, execution_datetime, debug_crash_flags): with DagsterInstance.from_ref(instance_ref) as instance: try: with pendulum.test(execution_datetime), ProcessGrpcServerRegistry( ) as grpc_server_registry: with DynamicWorkspace(grpc_server_registry) as workspace: list( execute_sensor_iteration( instance, get_default_daemon_logger("SensorDaemon"), workspace, debug_crash_flags=debug_crash_flags, )) finally: cleanup_test_instance(instance)
def _test_launch_scheduled_runs_in_subprocess(instance_ref, execution_datetime, debug_crash_flags): with DagsterInstance.from_ref(instance_ref) as instance: try: with ProcessGrpcServerRegistry() as grpc_server_registry: with DynamicWorkspace(grpc_server_registry) as workspace: with pendulum.test(execution_datetime): list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), debug_crash_flags=debug_crash_flags, )) finally: cleanup_test_instance(instance)
def _test_launch_sensor_runs_in_subprocess(instance_ref, execution_datetime, debug_crash_flags): with DagsterInstance.from_ref(instance_ref) as instance: try: with pendulum.test(execution_datetime), ProcessGrpcServerRegistry( wait_for_processes_on_exit=True ) as grpc_server_registry: with RepositoryLocationHandleManager(grpc_server_registry) as handle_manager: list( execute_sensor_iteration( instance, get_default_daemon_logger("SensorDaemon"), handle_manager, debug_crash_flags=debug_crash_flags, ) ) finally: cleanup_test_instance(instance)
def test_error_repo_in_registry(): error_origin = ManagedGrpcPythonEnvRepositoryLocationOrigin( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, attribute="error_repo", python_file=file_relative_path(__file__, "error_repo.py"), ), ) with ProcessGrpcServerRegistry(reload_interval=5, heartbeat_ttl=10) as registry: # Repository with a loading error raises an exception with the reason why with pytest.raises(DagsterUserCodeProcessError, match="object is not callable"): registry.get_grpc_endpoint(error_origin) # the exception is idempotent with pytest.raises(DagsterUserCodeProcessError, match="object is not callable"): registry.get_grpc_endpoint(error_origin)
def daemon_controller_from_instance(instance, wait_for_processes_on_exit=False): check.inst_param(instance, "instance", DagsterInstance) grpc_server_registry = None try: with ExitStack() as stack: grpc_server_registry = stack.enter_context( ProcessGrpcServerRegistry()) daemons = [ stack.enter_context(daemon) for daemon in create_daemons_from_instance(instance) ] with DagsterDaemonController(instance, daemons, grpc_server_registry) as controller: yield controller finally: if wait_for_processes_on_exit and grpc_server_registry: grpc_server_registry.wait_for_processes() # pylint: disable=no-member
def _test_backfill_in_subprocess(instance_ref, debug_crash_flags): execution_datetime = to_timezone( create_pendulum_time( year=2021, month=2, day=17, ), "US/Central", ) with DagsterInstance.from_ref(instance_ref) as instance: try: with pendulum.test(execution_datetime), ProcessGrpcServerRegistry( wait_for_processes_on_exit=True) as grpc_server_registry: list( execute_backfill_iteration( instance, grpc_server_registry, get_default_daemon_logger("BackfillDaemon"), debug_crash_flags=debug_crash_flags, )) finally: cleanup_test_instance(instance)
def instance_for_context(external_repo_context, overrides=None): with instance_for_test(overrides) as instance: with ProcessGrpcServerRegistry() as grpc_server_registry: with external_repo_context() as external_repo: yield (instance, grpc_server_registry, external_repo)
def workspace_fixture(): with ProcessGrpcServerRegistry() as registry, DynamicWorkspace( registry) as workspace: yield workspace
def create_daemon_grpc_server_registry(instance): return ProcessGrpcServerRegistry( reload_interval=DAEMON_GRPC_SERVER_RELOAD_INTERVAL, heartbeat_ttl=DAEMON_GRPC_SERVER_HEARTBEAT_TTL, startup_timeout=instance.code_server_process_startup_timeout, )
def instance_with_sensors(external_repo_context, overrides=None): with instance_for_test(overrides) as instance: with ProcessGrpcServerRegistry() as grpc_server_registry: with DynamicWorkspace(grpc_server_registry) as workspace: with external_repo_context() as external_repo: yield (instance, workspace, external_repo)
def create_daemon_grpc_server_registry(): return ProcessGrpcServerRegistry( reload_interval=DAEMON_GRPC_SERVER_RELOAD_INTERVAL, heartbeat_ttl=DAEMON_GRPC_SERVER_HEARTBEAT_TTL, startup_timeout=DAEMON_GRPC_SERVER_STARTUP_TIMEOUT, )
def grpc_server_registry(instance): # pylint: disable=unused-argument with ProcessGrpcServerRegistry() as registry: yield registry
def test_process_server_registry(): origin = ManagedGrpcPythonEnvRepositoryLocationOrigin( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, attribute="repo", python_file=file_relative_path(__file__, "test_grpc_server_registry.py"), ), ) with ProcessGrpcServerRegistry(wait_for_processes_on_exit=True, cleanup_interval=5, heartbeat_interval=10) as registry: with RepositoryLocationHandleManager(registry) as handle_manager: endpoint_one = registry.get_grpc_endpoint(origin) handle_one = handle_manager.get_handle(origin) endpoint_two = registry.get_grpc_endpoint(origin) handle_two = handle_manager.get_handle(origin) assert endpoint_two == endpoint_one assert handle_two == handle_one assert _can_connect(origin, endpoint_one) assert _can_connect(origin, endpoint_two) start_time = time.time() while True: # Registry should return a new server endpoint after 5 seconds endpoint_three = registry.get_grpc_endpoint(origin) if endpoint_three.server_id != endpoint_one.server_id: # Handle manager now produces a new handle as well handle_three = handle_manager.get_handle(origin) assert handle_three != handle_one break if time.time() - start_time > 15: raise Exception("Server ID never changed") time.sleep(1) assert _can_connect(origin, endpoint_three) # Leave handle_manager context, all heartbeats stop start_time = time.time() while True: # Server at endpoint_one should eventually die due to heartbeat failure if not _can_connect(origin, endpoint_one): break if time.time() - start_time > 30: raise Exception( "Old Server never died after process manager released it") time.sleep(1) # Make one more fresh process, then leave the context so that it will be cleaned up while True: endpoint_four = registry.get_grpc_endpoint(origin) if endpoint_four.server_id != endpoint_three.server_id: assert _can_connect(origin, endpoint_four) break # Once we leave the ProcessGrpcServerRegistry context, all processes should be cleaned up # (if wait_for_processes_on_exit was set) assert not _can_connect(origin, endpoint_three) assert not _can_connect(origin, endpoint_four)
def instance_with_sensors(external_repo_context, overrides=None): with instance_for_test(overrides) as instance: with ProcessGrpcServerRegistry( wait_for_processes_on_exit=True) as grpc_server_registry: with external_repo_context() as external_repo: yield (instance, grpc_server_registry, external_repo)
def grpc_server_registry(instance): # pylint: disable=unused-argument with ProcessGrpcServerRegistry(wait_for_processes_on_exit=True) as registry: yield registry