class ProcManager(object): def __init__(self, container): self.container = container # Define the callables that can be added to Container public API, and add self.container_api = [self.spawn_process, self.terminate_process] for call in self.container_api: setattr(self.container, call.__name__, call) self.proc_id_pool = IDPool() # Registry of running processes self.procs = {} self.procs_by_name = {} # BAD: This is not correct if procs have the same name # mapping of greenlets we spawn to process_instances for error handling self._spawned_proc_to_process = {} # Effective execution engine config (after merging in child process overrides) self.ee_cfg = self._get_execution_engine_config() # Process dispatcher (if configured/enabled and not a child container process) self.pd_cfg = CFG.get_safe("service.process_dispatcher") or {} self.pd_enabled = self.pd_cfg.get("enabled", False) is True and not self.ee_cfg["container"]["is_child"] self.pd_core = None self.event_pub = EventPublisher() self.publish_events = CFG.get_safe("container.process.publish_events") is True # Passive manager for simple threads/greenlets, to keep them registered (these are not OS threads) # Note that each ION process has its own thread manager, so this is for container level threads self.thread_manager = ThreadManager(heartbeat_secs=None, failure_notify_callback=None) # Active supervisor for ION processes self.proc_sup = IonProcessThreadManager(heartbeat_secs=CFG.get_safe("container.timeout.heartbeat"), failure_notify_callback=self._spawned_proc_failed) # list of callbacks for process state changes self._proc_state_change_callbacks = [] def start(self): log.debug("ProcManager starting ...") if self.pd_enabled: self._start_process_dispatcher() self.proc_sup.start() if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY): # Register container as resource object cc_obj = self._get_capability_container_object() self.cc_id, _ = self.container.resource_registry.create(cc_obj) # Create an association to an Org object if not the rot ION org and only if found if CFG.get_safe("container.org_name") != CFG.get_safe("system.root_org"): org, _ = self.container.resource_registry.find_resources( restype=RT.Org, name=CFG.get_safe("container.org_name"), id_only=True) if org: self.container.resource_registry.create_association(org[0], PRED.hasResource, self.cc_id) # TODO - replace with proper association log.debug("ProcManager started, OK.") def stop(self): log.debug("ProcManager stopping ...") # Call quit on procs to give them ability to clean up in reverse order procs_list = sorted(self.procs.values(), key=lambda proc: proc._proc_start_time, reverse=True) for proc in procs_list: try: self.terminate_process(proc.id) except Exception as ex: log.warn("Failed to terminate process (%s): %s", proc.id, ex) # TODO: Have a choice of shutdown behaviors for waiting on children, timeouts, etc self.proc_sup.shutdown(CFG.get_safe("container.timeout.shutdown")) if self.procs: log.warn("ProcManager procs not empty: %s", self.procs) if self.procs_by_name: log.warn("ProcManager procs_by_name not empty: %s", self.procs_by_name) # Remove Resource registration if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY): try: self.container.resource_registry.delete(self.cc_id, del_associations=True) except NotFound: # already gone, this is ok pass if self.pd_enabled: self._stop_process_dispatcher() log.debug("ProcManager stopped, OK.") def _get_execution_engine_config(self): ee_base_cfg = CFG.get_safe("container.execution_engine") or {} if ee_base_cfg.get("type", None) != "scioncc": raise ContainerConfigError("Execution engine config invalid: %s", ee_base_cfg) ee_cfg = deepcopy(ee_base_cfg) # If we are a child process, merge in child config override proc_name = multiprocessing.current_process().name ee_cfg["container"] = dict(child_proc_name=proc_name, is_child=False) child_cfgs = ee_base_cfg.get("child_configs", None) or {} if proc_name.startswith("Container-child-"): ee_cfg["container"]["is_child"] = True if proc_name in child_cfgs: log.info("Applying execution engine config override for child: %s", proc_name) dict_merge(ee_cfg, child_cfgs[proc_name], inplace=True) else: for cfg_name, ch_cfg in child_cfgs.iteritems(): pattern = ch_cfg.get("name_pattern", None) if pattern and re.match(pattern, proc_name): log.info("Applying execution engine config override %s for child: %s", cfg_name, proc_name) dict_merge(ee_cfg, ch_cfg, inplace=True) break ee_cfg.pop("child_configs", None) return ee_cfg def _get_capability_container_object(self): container_info = dict(proc_name=multiprocessing.current_process().name, process_id=os.getpid(), parent_process_id=os.getppid(), hostname=socket.gethostname(), host=socket.gethostbyname(socket.gethostname()), platform=sys.platform, argv=sys.argv, python_version=sys.version, cwd=os.getcwd(), start_time=self.container.start_time, ) # Other possibilities: username, os package versions, IP address host_info = {k: v for (k, v) in zip(("os_sysname", "os_nodename", "os_release", "os_version", "os_machine"), os.uname())} container_info.update(host_info) container_info["env"] = {k: str(v) for (k,v) in os.environ.iteritems()} container_info["python_path"] = sys.path cc_obj = CapabilityContainer(name=self.container.id, version=self.container.version, cc_agent=self.container.name, container_info=container_info, execution_engine_config=self.ee_cfg) return cc_obj # ----------------------------------------------------------------- def spawn_process(self, name=None, module=None, cls=None, config=None, process_id=None): """ Spawn a process within the container. Processes can be of different type. """ if process_id and not is_valid_identifier(process_id, ws_sub='_'): raise BadRequest("Given process_id %s is not a valid identifier" % process_id) # PROCESS ID. Generate a new process id if not provided # TODO: Ensure it is system-wide unique process_id = process_id or "%s.%s" % (self.container.id, self.proc_id_pool.get_id()) log.debug("ProcManager.spawn_process(name=%s, module.cls=%s.%s, config=%s) as pid=%s", name, module, cls, config, process_id) # CONFIG process_cfg = self._create_process_config(config) try: service_cls = named_any("%s.%s" % (module, cls)) except AttributeError as ae: # Try to nail down the error import importlib importlib.import_module(module) raise # PROCESS TYPE. Determines basic process context (messaging, service interface) process_type = get_safe(process_cfg, "process.type") or getattr(service_cls, "process_type", PROCTYPE_SERVICE) process_start_mode = get_safe(config, "process.start_mode") process_instance = None # alert we have a spawning process, but we don't have the instance yet, so give the class instead (more accurate than name) # Note: this uses a str as first argument instead of a process instance self._call_proc_state_changed("%s.%s" % (module, cls), ProcessStateEnum.PENDING) try: # Additional attributes to set with the process instance proc_attr = {"_proc_type": process_type, "_proc_spawn_cfg": config } # SPAWN. Determined by type if process_type == PROCTYPE_SERVICE: process_instance = self._spawn_service_process(process_id, name, module, cls, process_cfg, proc_attr) elif process_type == PROCTYPE_STREAMPROC: process_instance = self._spawn_stream_process(process_id, name, module, cls, process_cfg, proc_attr) elif process_type == PROCTYPE_AGENT: process_instance = self._spawn_agent_process(process_id, name, module, cls, process_cfg, proc_attr) elif process_type == PROCTYPE_STANDALONE: process_instance = self._spawn_standalone_process(process_id, name, module, cls, process_cfg, proc_attr) elif process_type == PROCTYPE_IMMEDIATE: process_instance = self._spawn_immediate_process(process_id, name, module, cls, process_cfg, proc_attr) elif process_type == PROCTYPE_SIMPLE: process_instance = self._spawn_simple_process(process_id, name, module, cls, process_cfg, proc_attr) else: raise BadRequest("Unknown process type: %s" % process_type) # REGISTER. self._register_process(process_instance, name) process_instance.errcause = "OK" log.info("ProcManager.spawn_process: %s.%s -> pid=%s OK", module, cls, process_id) if process_type == PROCTYPE_IMMEDIATE: log.debug('Terminating immediate process: %s', process_instance.id) self.terminate_process(process_instance.id) # Terminate process also triggers TERMINATING/TERMINATED self._call_proc_state_changed(process_instance, ProcessStateEnum.EXITED) else: # Update local policies for the new process if self.container.has_capability(self.container.CCAP.GOVERNANCE_CONTROLLER): self.container.governance_controller.update_process_policies( process_instance, safe_mode=True, force_update=False) return process_instance.id except IonProcessError: errcause = process_instance.errcause if process_instance else "instantiating process" log.exception("Error spawning %s %s process (process_id: %s): %s", name, process_type, process_id, errcause) return None except Exception: errcause = process_instance.errcause if process_instance else "instantiating process" log.exception("Error spawning %s %s process (process_id: %s): %s", name, process_type, process_id, errcause) # trigger failed notification - catches problems in init/start self._call_proc_state_changed(process_instance, ProcessStateEnum.FAILED) raise def _create_process_config(self, config): """ Prepare the config for the new process. Clone system config and apply process overrides. Support including config by reference of a resource attribute or object from object store. """ process_cfg = deepcopy(CFG) if config: # Use provided config. Must be dict or DotDict if not isinstance(config, DotDict): config = DotDict(config) if config.get_safe("process.config_ref"): # Use a reference config_ref = config.get_safe("process.config_ref") log.info("Enhancing new process spawn config from ref=%s" % config_ref) matches = re.match(r'^([A-Za-z]+):([A-Za-z0-9_\.]+)/(.*)$', config_ref) if matches: ref_type, ref_id, ref_ext = matches.groups() if ref_type == "resources": if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY): try: obj = self.container.resource_registry.read(ref_id) if obj and hasattr(obj, ref_ext): ref_config = getattr(obj, ref_ext) if isinstance(ref_config, dict): dict_merge(process_cfg, ref_config, inplace=True) else: raise BadRequest("config_ref %s exists but not dict" % config_ref) else: raise BadRequest("config_ref %s - attribute not found" % config_ref) except NotFound as nf: log.warn("config_ref %s - object not found" % config_ref) raise else: log.error("Container missing RESOURCE_REGISTRY capability to resolve process config ref %s" % config_ref) elif ref_type == "objects": if self.container.has_capability(self.container.CCAP.OBJECT_STORE): try: obj = self.container.object_store.read_doc(ref_id) ref_config = obj if ref_ext: ref_config = get_safe(obj, ref_ext, None) if ref_config is None: raise BadRequest("config_ref %s - attribute not found" % config_ref) if isinstance(ref_config, dict): dict_merge(process_cfg, ref_config, inplace=True) else: raise BadRequest("config_ref %s exists but not dict" % config_ref) except NotFound as nf: log.warn("config_ref %s - object not found" % config_ref) raise else: log.error("Container missing OBJECT_STORE capability to resolve process config ref %s" % config_ref) else: raise BadRequest("Unknown reference type in: %s" % config_ref) dict_merge(process_cfg, config, inplace=True) if self.container.spawn_args: # Override config with spawn args dict_merge(process_cfg, self.container.spawn_args, inplace=True) #log.debug("spawn_process() pid=%s process_cfg=%s", process_id, process_cfg) return process_cfg def list_local_processes(self, process_type=''): """ Returns a list of the running ION processes in the container or filtered by the process_type """ if not process_type: return self.procs.values() return [p for p in self.procs.itervalues() if p.process_type == process_type] def get_a_local_process(self, proc_name=''): """ Returns a running ION process in the container for the specified process name """ for p in self.procs.itervalues(): if p.name == proc_name: return p if p.process_type == PROCTYPE_AGENT and p.resource_type == proc_name: return p return None def get_local_service_processes(self, service_name=''): """ Returns a list of running ION processes in the container for the specified service name """ proc_list = [p for p in self.procs.itervalues() if p.process_type == PROCTYPE_SERVICE and p.name == service_name] return proc_list def is_local_service_process(self, service_name): local_services = self.list_local_processes(PROCTYPE_SERVICE) for p in local_services: if p.name == service_name: return True return False def is_local_agent_process(self, resource_type): local_agents = self.list_local_processes(PROCTYPE_AGENT) for p in local_agents: if p.resource_type == resource_type: return True return False def _spawned_proc_failed(self, gproc): log.error("ProcManager._spawned_proc_failed: %s, %s", gproc, gproc.exception) prc = self._spawned_proc_to_process.get(gproc, None) # stop the rest of the process if prc is not None: try: self.terminate_process(prc.id, False) except Exception as e: log.warn("Problem while stopping rest of failed process %s: %s", prc, e) finally: self._call_proc_state_changed(prc, ProcessStateEnum.FAILED) else: log.warn("No ION process found for failed proc manager child: %s", gproc) # Stop the container if this was the last process if not self.procs and CFG.get_safe("container.process.exit_once_empty", False): self.container.fail_fast("Terminating container after last process (%s) failed: %s" % (gproc, gproc.exception)) def add_proc_state_changed_callback(self, cb): """ Adds a callback to be called when a process' state changes. The callback should take three parameters: The process, the state, and the container. """ self._proc_state_change_callbacks.append(cb) def remove_proc_state_changed_callback(self, cb): """ Removes a callback from the process state change callback list. If the callback is not registered, this method does nothing. """ if cb in self._proc_state_change_callbacks: self._proc_state_change_callbacks.remove(cb) def _call_proc_state_changed(self, svc, state): """ Internal method to call all registered process state change callbacks. """ #log.debug("Proc State Changed (%s): %s", ProcessStateEnum._str_map.get(state, state), svc) for cb in self._proc_state_change_callbacks: cb(svc, state, self.container) # Trigger event if self.publish_events: self._publish_process_event(svc, state) def _create_listening_endpoint(self, **kwargs): """ Creates a listening endpoint for spawning processes. This method exists to be able to override the type created via configuration. In most cases it will create a ProcessRPCServer. """ eptypestr = CFG.get_safe('container.messaging.endpoint.proc_listening_type', None) if eptypestr is not None: module, cls = eptypestr.rsplit('.', 1) mod = __import__(module, fromlist=[cls]) eptype = getattr(mod, cls) ep = eptype(**kwargs) else: ep = ProcessRPCServer(**kwargs) return ep def _add_process_publishers(self, process_instance, config): # Add publishers if declared... publish_streams = get_safe(config, "process.publish_streams") pub_names = self._set_publisher_endpoints(process_instance, publish_streams) return pub_names # ----------------------------------------------------------------- # PROCESS TYPE: service # - has service listen binding/queue and RPC interface def _spawn_service_process(self, process_id, name, module, cls, config, proc_attr): """ Spawn a process acting as a service worker. Attach to service queue with service definition, attach to service pid """ process_instance = self._create_app_instance(process_id, name, module, cls, config, proc_attr) listen_name = get_safe(config, "process.listen_name") or process_instance.name listen_name_xo = self.container.create_service_xn(listen_name) log.debug("Service Process (%s) listen_name: %s", name, listen_name) process_instance._proc_listen_name = listen_name # Service RPC endpoint rsvc1 = self._create_listening_endpoint(node=self.container.node, from_name=listen_name_xo, process=process_instance) # Start an ION process with the right kind of endpoint factory proc = self.proc_sup.spawn(name=process_instance.id, service=process_instance, listeners=[rsvc1], proc_name=process_instance._proc_name) proc.proc._glname = "ION Proc %s" % process_instance._proc_name self.proc_sup.ensure_ready(proc, "_spawn_service_process for %s" % ",".join((str(listen_name), process_instance.id))) # map gproc to process_instance self._spawned_proc_to_process[proc.proc] = process_instance # set service's reference to process process_instance._process = proc self._process_init(process_instance) self._process_start(process_instance) try: proc.start_listeners() except IonProcessError: self._process_quit(process_instance) self._call_proc_state_changed(process_instance, ProcessStateEnum.FAILED) raise return process_instance # ----------------------------------------------------------------- # PROCESS TYPE: stream process # - has stream listen binding/queue # - has publishers if declared def _spawn_stream_process(self, process_id, name, module, cls, config, proc_attr): """ Spawn a process acting as a data stream process. Attach to subscription queue with process function. """ process_instance = self._create_app_instance(process_id, name, module, cls, config, proc_attr) listeners = [] # Stream listener listen_name = get_safe(config, "process.listen_name") or name log.debug("Stream Process (%s) listen_name: %s", name, listen_name) process_instance._proc_listen_name = listen_name process_instance.stream_subscriber = StreamSubscriber(process=process_instance, exchange_name=listen_name, callback=process_instance.call_process) listeners.append(process_instance.stream_subscriber) pub_names = self._add_process_publishers(process_instance, config) # Private PID listener # pid_listener_xo = self.container.create_process_xn(process_instance.id) # rsvc = self._create_listening_endpoint(node=self.container.node, # from_name=pid_listener_xo, # process=process_instance) # listeners.append(rsvc) # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs) def cleanup(*args): for name in pub_names: p = getattr(process_instance, name) p.close() proc = self.proc_sup.spawn(name=process_instance.id, service=process_instance, listeners=listeners, proc_name=process_instance._proc_name, cleanup_method=cleanup) proc.proc._glname = "ION Proc %s" % process_instance._proc_name self.proc_sup.ensure_ready(proc, "_spawn_stream_process for %s" % process_instance._proc_name) # map gproc to process_instance self._spawned_proc_to_process[proc.proc] = process_instance # set service's reference to process process_instance._process = proc self._process_init(process_instance) self._process_start(process_instance) try: proc.start_listeners() except IonProcessError: self._process_quit(process_instance) self._call_proc_state_changed(process_instance, ProcessStateEnum.FAILED) raise return process_instance # ----------------------------------------------------------------- # PROCESS TYPE: agent # - has resource ID (or if non-existent PID) listen binding/queue # - has RPC interface def _spawn_agent_process(self, process_id, name, module, cls, config, proc_attr): """ Spawn a process acting as agent process. Attach to service pid. """ process_instance = self._create_app_instance(process_id, name, module, cls, config, proc_attr) if not isinstance(process_instance, ResourceAgent): raise ContainerConfigError("Agent process must extend ResourceAgent") listeners = [] # Set the resource ID if we get it through the config resource_id = get_safe(process_instance.CFG, "agent.resource_id") if resource_id: process_instance.resource_id = resource_id # Resource ID listener resource_id_xo = self.container.create_process_xn(resource_id) alistener = self._create_listening_endpoint(node=self.container.node, from_name=resource_id_xo, process=process_instance) listeners.append(alistener) else: # Private PID listener pid_listener_xo = self.container.create_process_xn(process_instance.id) rsvc = self._create_listening_endpoint(node=self.container.node, from_name=pid_listener_xo, process=process_instance) listeners.append(rsvc) proc = self.proc_sup.spawn(name=process_instance.id, service=process_instance, listeners=listeners, proc_name=process_instance._proc_name) proc.proc._glname = "ION Proc %s" % process_instance._proc_name self.proc_sup.ensure_ready(proc, "_spawn_agent_process for %s" % process_instance.id) # map gproc to process_instance self._spawned_proc_to_process[proc.proc] = process_instance # set service's reference to process process_instance._process = proc # Now call the on_init of the agent. self._process_init(process_instance) if not process_instance.resource_id: log.warn("New agent pid=%s has no resource_id set" % process_id) self._process_start(process_instance) try: proc.start_listeners() except IonProcessError: self._process_quit(process_instance) self._call_proc_state_changed(process_instance, ProcessStateEnum.FAILED) raise if not process_instance.resource_id: log.warn("Agent process id=%s does not define resource_id!!" % process_instance.id) return process_instance # ----------------------------------------------------------------- # PROCESS TYPE: standalone # - has PID binding/queue with RPC interface # - has publishers if declared def _spawn_standalone_process(self, process_id, name, module, cls, config, proc_attr): """ Spawn a process acting as standalone process. Attach to service pid. """ process_instance = self._create_app_instance(process_id, name, module, cls, config, proc_attr) # Private PID listener pid_listener_xo = self.container.create_process_xn(process_instance.id, auto_delete=True) rsvc = self._create_listening_endpoint(node=self.container.node, from_name=pid_listener_xo, process=process_instance) pub_names = self._add_process_publishers(process_instance, config) # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs) def cleanup(*args): for name in pub_names: p = getattr(process_instance, name) p.close() proc = self.proc_sup.spawn(name=process_instance.id, service=process_instance, listeners=[rsvc], proc_name=process_instance._proc_name, cleanup_method=cleanup) proc.proc._glname = "ION Proc %s" % process_instance._proc_name self.proc_sup.ensure_ready(proc, "_spawn_standalone_process for %s" % process_instance.id) # map gproc to process_instance self._spawned_proc_to_process[proc.proc] = process_instance # set service's reference to process process_instance._process = proc self._process_init(process_instance) self._process_start(process_instance) try: proc.start_listeners() except IonProcessError: self._process_quit(process_instance) self._call_proc_state_changed(process_instance, ProcessStateEnum.FAILED) raise return process_instance # ----------------------------------------------------------------- # PROCESS TYPE: simple # - has publishers if declared def _spawn_simple_process(self, process_id, name, module, cls, config, proc_attr): """ Spawn a process acting as simple process. No attachments. """ process_instance = self._create_app_instance(process_id, name, module, cls, config, proc_attr) pub_names = self._add_process_publishers(process_instance, config) # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs) def cleanup(*args): for name in pub_names: p = getattr(process_instance, name) p.close() proc = self.proc_sup.spawn(name=process_instance.id, service=process_instance, listeners=[], proc_name=process_instance._proc_name, cleanup_method=cleanup) proc.proc._glname = "ION Proc %s" % process_instance._proc_name self.proc_sup.ensure_ready(proc, "_spawn_simple_process for %s" % process_instance.id) # map gproc to process_instance self._spawned_proc_to_process[proc.proc] = process_instance # set service's reference to process process_instance._process = proc self._process_init(process_instance) self._process_start(process_instance) return process_instance # ----------------------------------------------------------------- # PROCESS TYPE: immediate # - will not be registered # - will be terminated right after start def _spawn_immediate_process(self, process_id, name, module, cls, config, proc_attr): """ Spawn a process acting as immediate one off process. No messaging attachments. """ process_instance = self._create_app_instance(process_id, name, module, cls, config, proc_attr) self._process_init(process_instance) self._process_start(process_instance) return process_instance # ----------------------------------------------------------------- def _create_app_instance(self, process_id, name, module, cls, config, proc_attr): """ Creates an instance of a BaseService, representing the app logic of a ION process. This is independent of the process type service, agent, standalone, etc. """ # APP INSTANCE. app_instance = for_name(module, cls) if not isinstance(app_instance, BaseService): raise ContainerConfigError("Instantiated service not a BaseService %r" % app_instance) # Set BaseService instance common attributes app_instance.errcause = "" app_instance.id = process_id app_instance.container = self.container app_instance.CFG = config app_instance._proc_name = name app_instance._proc_start_time = time.time() for att, att_val in proc_attr.iteritems(): setattr(app_instance, att, att_val) # Unless the process has been started as part of another Org, default to the container Org or the ION Org if 'org_governance_name' in config: app_instance.org_governance_name = config['org_governance_name'] else: app_instance.org_governance_name = CFG.get_safe('container.org_name', CFG.get_safe('system.root_org', 'ION')) # Add process state management, if applicable self._add_process_state(app_instance) # Check dependencies (RPC clients) self._check_process_dependencies(app_instance) return app_instance def _add_process_state(self, process_instance): """ Add stateful process operations, if applicable """ # Only applies if the process implements stateful interface if hasattr(process_instance, "_flush_state"): def _flush_state(): with process_instance._state_lock: state_obj = process_instance.container.state_repository.put_state(process_instance.id, process_instance._proc_state, state_obj=process_instance._proc_state_obj) state_obj.state = None # Make sure memory footprint is low for larger states process_instance._proc_state_obj = state_obj process_instance._proc_state_changed = False def _load_state(): if not hasattr(process_instance, "_proc_state"): process_instance._proc_state = {} try: with process_instance._state_lock: new_state, state_obj = process_instance.container.state_repository.get_state(process_instance.id) process_instance._proc_state.clear() process_instance._proc_state.update(new_state) process_instance._proc_state_obj = state_obj process_instance._proc_state_changed = False except NotFound as nf: log.debug("No persisted state available for process %s", process_instance.id) except Exception as ex: log.warn("Process %s load state failed: %s", process_instance.id, str(ex)) process_instance._flush_state = _flush_state process_instance._load_state = _load_state process_instance._state_lock = RLock() process_instance._proc_state = {} process_instance._proc_state_obj = None process_instance._proc_state_changed = False # PROCESS RESTART: Need to check whether this process had persisted state. # Note: This could happen anytime during a system run, not just on RESTART boot log.debug("Loading persisted state for process %s", process_instance.id) process_instance._load_state() def _check_process_dependencies(self, app_instance): app_instance.errcause = "setting service dependencies" log.debug("spawn_process dependencies: %s", app_instance.dependencies) # TODO: Service dependency != process dependency for dependency in app_instance.dependencies: client = getattr(app_instance.clients, dependency) assert client, "Client for dependency not found: %s" % dependency # @TODO: should be in a start_client in RPCClient chain client.process = app_instance client.node = self.container.node # Ensure that dep actually exists and is running? def _process_init(self, process_instance): """ Initialize the process, primarily by calling on_init() """ process_instance.errcause = "initializing service" process_instance.init() def _process_start(self, process_instance): """ Start the process, primarily by calling on_start() """ # Should this be after spawn_process? # Should we check for timeout? process_instance.errcause = "starting service" process_instance.start() def _process_quit(self, process_instance): """ Common method to handle process stopping. """ process_instance.errcause = "quitting process" # Give the process notice to quit doing stuff. process_instance.quit() # Terminate IonProcessThread (may not have one, i.e. simple process) # @TODO: move this into process' on_quit() if getattr(process_instance, '_process', None) is not None and process_instance._process: process_instance._process.notify_stop() process_instance._process.stop() def _set_publisher_endpoints(self, process_instance, publisher_streams=None): """ Creates and attaches named stream publishers """ publisher_streams = publisher_streams or {} names = [] for name, stream_id in publisher_streams.iteritems(): # problem is here pub = StreamPublisher(process=process_instance, stream_id=stream_id) setattr(process_instance, name, pub) names.append(name) return names def _register_process(self, process_instance, name): """ Performs all actions related to registering the new process in the system. Also performs process type specific registration, such as for services and agents """ # Add process instance to container's process dict if name in self.procs_by_name: log.warn("Process name already registered in container: %s" % name) self.procs_by_name[name] = process_instance self.procs[process_instance.id] = process_instance # Add Process to resource registry process_instance.errcause = "registering" if process_instance._proc_type != PROCTYPE_IMMEDIATE: if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY): proc_obj = Process(name=process_instance.id, label=name, process_type=process_instance._proc_type, service_name=getattr(process_instance, "name", None) or "", process_state=ProcessStateEnum.RUNNING) proc_id, _ = self.container.resource_registry.create(proc_obj) process_instance._proc_res_id = proc_id # Associate process with container resource self.container.resource_registry.create_association(self.cc_id, PRED.hasProcess, proc_id) else: process_instance._proc_res_id = None # Process type specific registration if process_instance._proc_type == PROCTYPE_SERVICE: if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY): # Registration of SERVICE process: in resource registry service_list, _ = self.container.resource_registry.find_resources( restype=RT.Service, name=process_instance.name, id_only=True) if service_list: process_instance._proc_svc_id = service_list[0] if len(service_list) > 1: log.warn("More than 1 Service resource found with name %s: %s", process_instance.name, service_list) else: # We are starting the first process of a service instance # TODO: This should be created by the HA Service agent in the future svc_obj = Service(name=process_instance.name, exchange_name=process_instance._proc_listen_name, state=ServiceStateEnum.READY) process_instance._proc_svc_id, _ = self.container.resource_registry.create(svc_obj) # Create association to service definition resource svcdef_list, _ = self.container.resource_registry.find_resources( restype=RT.ServiceDefinition, name=process_instance.name, id_only=True) if svcdef_list: if len(svcdef_list) > 1: log.warn("More than 1 ServiceDefinition resource found with name %s: %s", process_instance.name, svcdef_list) self.container.resource_registry.create_association( process_instance._proc_svc_id, PRED.hasServiceDefinition, svcdef_list[0]) else: log.error("Cannot find ServiceDefinition resource for %s", process_instance.name) self.container.resource_registry.create_association( process_instance._proc_svc_id, PRED.hasProcess, proc_id) elif process_instance._proc_type == PROCTYPE_AGENT: if self.container.has_capability(self.container.CCAP.DIRECTORY): # Registration of AGENT process: in Directory caps = process_instance.get_capabilities() self.container.directory.register("/Agents", process_instance.id, **dict(name=process_instance._proc_name, container=process_instance.container.id, resource_id=process_instance.resource_id, agent_id=process_instance.agent_id, def_id=process_instance.agent_def_id, capabilities=caps)) self._call_proc_state_changed(process_instance, ProcessStateEnum.RUNNING) def terminate_process(self, process_id, do_notifications=True): """ Terminates a process and all its resources. Termination is graceful with timeout. @param process_id The id of the process to terminate. Should exist in the container's list of processes or this will raise. @param do_notifications If True, emits process state changes for TERMINATING and TERMINATED. If False, supresses any state changes. Used near EXITED and FAILED. """ process_instance = self.procs.get(process_id, None) if not process_instance: raise BadRequest("Cannot terminate. Process id='%s' unknown on container id='%s'" % ( process_id, self.container.id)) log.info("ProcManager.terminate_process: %s -> pid=%s", process_instance._proc_name, process_id) if do_notifications: self._call_proc_state_changed(process_instance, ProcessStateEnum.TERMINATING) self._process_quit(process_instance) self._unregister_process(process_id, process_instance) if do_notifications: self._call_proc_state_changed(process_instance, ProcessStateEnum.TERMINATED) def _unregister_process(self, process_id, process_instance): # Remove process registration in resource registry if process_instance._proc_res_id: if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY): try: self.container.resource_registry.delete(process_instance._proc_res_id, del_associations=True) except NotFound: # OK if already gone pass except Exception as ex: log.exception(ex) pass # Cleanup for specific process types if process_instance._proc_type == PROCTYPE_SERVICE: if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY): # Check if this is the last process for this service and do auto delete service resources here svcproc_list, _ = self.container.resource_registry.find_objects( process_instance._proc_svc_id, PRED.hasProcess, RT.Process, id_only=True) if not svcproc_list: try: self.container.resource_registry.delete(process_instance._proc_svc_id, del_associations=True) except NotFound: # OK if already gone pass except Exception as ex: log.exception(ex) pass elif process_instance._proc_type == PROCTYPE_AGENT: if self.container.has_capability(self.container.CCAP.DIRECTORY): self.container.directory.unregister_safe("/Agents", process_instance.id) # Remove internal registration in container del self.procs[process_id] if process_instance._proc_name in self.procs_by_name: del self.procs_by_name[process_instance._proc_name] else: log.warn("Process name %s not in local registry", process_instance.name) def _publish_process_event(self, proc_inst, state): sub_type = ProcessStateEnum._str_map.get(state, state) if isinstance(proc_inst, basestring): # self.event_pub.publish_event(event_type=OT.ProcessLifecycleEvent, # origin=proc_inst, origin_type=RT.Process, sub_type=sub_type, # state=state, # container_id=self.container.id, # process_type="", process_name=proc_inst, # process_resource_id="", service_name="") # This is a PENDING process without process_id pass else: try: self.event_pub.publish_event(event_type=OT.ProcessLifecycleEvent, origin=getattr(proc_inst, "id", "PD"), origin_type=RT.Process, sub_type=sub_type, state=state, container_id=self.container.id, process_type=getattr(proc_inst, "_proc_type", ""), process_name=getattr(proc_inst, "_proc_name", ""), process_resource_id=getattr(proc_inst, "_proc_res_id", ""), service_name=getattr(proc_inst, "name", "")) except Exception: log.exception("Could not publish process event") # ----------------------------------------------------------------- def _start_process_dispatcher(self): from ion.core.process.pd_core import ProcessDispatcher self.pd_core = ProcessDispatcher(container=self.container, config=self.pd_cfg) self.pd_core.start() def _stop_process_dispatcher(self): if self.pd_core: self.pd_core.stop()
def _start_process_dispatcher(self): from ion.core.process.pd_core import ProcessDispatcher self.pd_core = ProcessDispatcher(container=self.container, config=self.pd_cfg) self.pd_core.start()