Exemplo n.º 1
0
class ProcManager(object):
    def __init__(self, container):
        self.container = container

        # Define the callables that can be added to Container public API
        self.container_api = [self.spawn_process, self.terminate_process]

        # Add the public callables to Container
        for call in self.container_api:
            setattr(self.container, call.__name__, call)

        self.proc_id_pool = IDPool()

        # Temporary registry of running processes
        self.procs_by_name = {}
        self.procs = {}

        # mapping of greenlets we spawn to process_instances for error handling
        self._spawned_proc_to_process = {}

        # The pyon worker process supervisor
        self.proc_sup = IonProcessThreadManager(
            heartbeat_secs=CFG.cc.timeout.heartbeat,
            failure_notify_callback=self._spawned_proc_failed)

        # list of callbacks for process state changes
        self._proc_state_change_callbacks = []

    def start(self):
        log.debug("ProcManager starting ...")
        self.proc_sup.start()

        # Register container as resource object
        cc_obj = CapabilityContainer(name=self.container.id,
                                     cc_agent=self.container.name)
        self.cc_id, _ = self.container.resource_registry.create(cc_obj)

        #Create an association to an Org object if not the rot ION org and only if found
        if CFG.container.org_name and CFG.container.org_name != CFG.system.root_org:
            org, _ = self.container.resource_registry.find_resources(
                restype=RT.Org, name=CFG.container.org_name, id_only=True)
            if org:
                self.container.resource_registry.create_association(
                    org[0], PRED.hasResource,
                    self.cc_id)  # TODO - replace with proper association

        log.debug("ProcManager started, OK.")

    def stop(self):
        log.debug("ProcManager stopping ...")

        from pyon.datastore.couchdb.couchdb_datastore import CouchDB_DataStore
        stats1 = CouchDB_DataStore._stats.get_stats()

        # Call quit on procs to give them ability to clean up
        # @TODO terminate_process is not gl-safe
        #        gls = map(lambda k: spawn(self.terminate_process, k), self.procs.keys())
        #        join(gls)
        procs_list = sorted(self.procs.values(),
                            key=lambda proc: proc._proc_start_time,
                            reverse=True)

        for proc in procs_list:
            try:
                self.terminate_process(proc.id)
            except Exception as ex:
                log.warn("Failed to terminate process (%s): %s", proc.id, ex)

        # TODO: Have a choice of shutdown behaviors for waiting on children, timeouts, etc
        self.proc_sup.shutdown(CFG.cc.timeout.shutdown)

        if self.procs:
            log.warn("ProcManager procs not empty: %s", self.procs)
        if self.procs_by_name:
            log.warn("ProcManager procs_by_name not empty: %s",
                     self.procs_by_name)

        # Remove Resource registration
        try:
            self.container.resource_registry.delete(self.cc_id,
                                                    del_associations=True)
        except NotFound:
            # already gone, this is ok
            pass
        # TODO: Check associations to processes

        stats2 = CouchDB_DataStore._stats.get_stats()

        stats3 = CouchDB_DataStore._stats.diff_stats(stats2, stats1)
        log.debug("Datastore stats difference during stop(): %s", stats3)

        log.debug("ProcManager stopped, OK.")

    def spawn_process(self,
                      name=None,
                      module=None,
                      cls=None,
                      config=None,
                      process_id=None):
        """
        Spawn a process within the container. Processes can be of different type.
        """

        if process_id and not is_valid_identifier(process_id, ws_sub='_'):
            raise BadRequest("Given process_id %s is not a valid identifier" %
                             process_id)

        # Generate a new process id if not provided
        # TODO: Ensure it is system-wide unique
        process_id = process_id or "%s.%s" % (self.container.id,
                                              self.proc_id_pool.get_id())
        log.debug(
            "ProcManager.spawn_process(name=%s, module.cls=%s.%s, config=%s) as pid=%s",
            name, module, cls, config, process_id)

        process_cfg = deepcopy(CFG)
        if config:
            # Use provided config. Must be dict or DotDict
            if not isinstance(config, DotDict):
                config = DotDict(config)
            dict_merge(process_cfg, config, inplace=True)
            if self.container.spawn_args:
                # Override config with spawn args
                dict_merge(process_cfg,
                           self.container.spawn_args,
                           inplace=True)

        #log.debug("spawn_process() pid=%s process_cfg=%s", process_id, process_cfg)

        # PROCESS TYPE. Determines basic process context (messaging, service interface)
        # One of: service, stream_process, agent, simple, immediate

        service_cls = named_any("%s.%s" % (module, cls))
        process_type = get_safe(process_cfg, "process.type") or getattr(
            service_cls, "process_type", "service")

        process_start_mode = get_safe(config, "process.start_mode")

        process_instance = None

        # alert we have a spawning process, but we don't have the instance yet, so give the class instead (more accurate than name)
        self._call_proc_state_changed("%s.%s" % (module, cls),
                                      ProcessStateEnum.PENDING)

        try:
            # spawn service by type
            if process_type == "service":
                process_instance = self._spawn_service_process(
                    process_id, name, module, cls, process_cfg)

            elif process_type == "stream_process":
                process_instance = self._spawn_stream_process(
                    process_id, name, module, cls, process_cfg)

            elif process_type == "agent":
                process_instance = self._spawn_agent_process(
                    process_id, name, module, cls, process_cfg)

            elif process_type == "standalone":
                process_instance = self._spawn_standalone_process(
                    process_id, name, module, cls, process_cfg)

            elif process_type == "immediate":
                process_instance = self._spawn_immediate_process(
                    process_id, name, module, cls, process_cfg)

            elif process_type == "simple":
                process_instance = self._spawn_simple_process(
                    process_id, name, module, cls, process_cfg)

            else:
                raise BadRequest("Unknown process type: %s" % process_type)

            process_instance._proc_type = process_type
            self._register_process(process_instance, name)

            process_instance.errcause = "OK"
            log.info("ProcManager.spawn_process: %s.%s -> pid=%s OK", module,
                     cls, process_id)

            if process_type == 'immediate':
                log.info('Terminating immediate process: %s',
                         process_instance.id)
                self.terminate_process(process_instance.id)

                # terminate process also triggers TERMINATING/TERMINATED
                self._call_proc_state_changed(process_instance,
                                              ProcessStateEnum.EXITED)
            else:
                #Shouldn't be any policies for immediate processes
                self.update_container_policies(process_instance)

            return process_instance.id

        except IonProcessError:
            errcause = process_instance.errcause if process_instance else "instantiating process"
            log.exception("Error spawning %s %s process (process_id: %s): %s",
                          name, process_type, process_id, errcause)
            return None

        except Exception:
            errcause = process_instance.errcause if process_instance else "instantiating process"
            log.exception("Error spawning %s %s process (process_id: %s): %s",
                          name, process_type, process_id, errcause)

            # trigger failed notification - catches problems in init/start
            self._call_proc_state_changed(process_instance,
                                          ProcessStateEnum.FAILED)

            raise

    #This must be called after registering the process
    def update_container_policies(self, process_instance):

        if not self.container.governance_controller:
            return

        if process_instance._proc_type == "service":

            # look to load any existing policies for this service
            self.container.governance_controller.safe_update_service_access_policy(
                process_instance._proc_listen_name)

        if process_instance._proc_type == "agent":

            # look to load any existing policies for this agent service
            if process_instance.resource_type is None:
                self.container.governance_controller.safe_update_service_access_policy(
                    process_instance.name)
            else:
                self.container.governance_controller.safe_update_service_access_policy(
                    process_instance.resource_type)

            if process_instance.resource_id:
                # look to load any existing policies for this resource
                self.container.governance_controller.safe_update_resource_access_policy(
                    process_instance.resource_id)

    def list_local_processes(self, process_type=''):
        """
        Returns a list of the running ION processes in the container or filtered by the process_type
        """
        if not process_type:
            return self.procs.values()

        return [
            p for p in self.procs.itervalues()
            if p.process_type == process_type
        ]

    def get_a_local_process(self, proc_name=''):
        """
        Returns a running ION processes in the container for the specified name
        """
        for p in self.procs.itervalues():

            if p.name == proc_name:
                return p

            if p.process_type == 'agent' and p.resource_type == proc_name:
                return p

        return None

    def is_local_service_process(self, service_name):
        local_services = self.list_local_processes('service')
        for p in local_services:
            if p.name == service_name:
                return True

        return False

    def is_local_agent_process(self, resource_type):
        local_agents = self.list_local_processes('agent')
        for p in local_agents:
            if p.resource_type == resource_type:
                return True
        return False

    def _spawned_proc_failed(self, gproc):
        log.error("ProcManager._spawned_proc_failed: %s, %s", gproc,
                  gproc.exception)

        # for now - don't worry about the mapping, if we get a failure, just kill the container.
        # leave the mapping in place for potential expansion later.

        #        # look it up in mapping
        #        if not gproc in self._spawned_proc_to_process:
        #            log.warn("No record of gproc %s in our map (%s)", gproc, self._spawned_proc_to_process)
        #            return
        #
        prc = self._spawned_proc_to_process.get(gproc, None)
        #
        #        # make sure prc is in our list
        #        if not prc in self.procs.values():
        #            log.warn("prc %s not found in procs list", prc)
        #            return

        # stop the rest of the process
        if prc is not None:
            try:
                self.terminate_process(prc.id, False)
            except Exception as e:
                log.warn(
                    "Problem while stopping rest of failed process %s: %s",
                    prc, e)
            finally:
                self._call_proc_state_changed(prc, ProcessStateEnum.FAILED)
        else:
            log.warn("No ION process found for failed proc manager child: %s",
                     gproc)

        #self.container.fail_fast("Container process (%s) failed: %s" % (svc, gproc.exception))

    def _cleanup_method(self, queue_name, ep=None):
        """
        Common method to be passed to each spawned ION process to clean up their process-queue.

        @TODO Leaks implementation detail, should be using XOs
        """
        if ep._chan is not None and not ep._chan._queue_auto_delete:
            # only need to delete if AMQP didn't handle it for us already!
            # @TODO this will not work with XOs (future)
            try:
                ch = self.container.node.channel(RecvChannel)
                ch._recv_name = NameTrio(
                    get_sys_name(), "%s.%s" % (get_sys_name(), queue_name))
                ch._destroy_queue()
            except TransportError as ex:
                log.warn("Cleanup method triggered an error, ignoring: %s", ex)

    def add_proc_state_changed_callback(self, cb):
        """
        Adds a callback to be called when a process' state changes.

        The callback should take three parameters: The process, the state, and the container.
        """
        self._proc_state_change_callbacks.append(cb)

    def remove_proc_state_changed_callback(self, cb):
        """
        Removes a callback from the process state change callback list.

        If the callback is not registered, this method does nothing.
        """
        if cb in self._proc_state_change_callbacks:
            self._proc_state_change_callbacks.remove(cb)

    def _call_proc_state_changed(self, svc, state):
        """
        Internal method to call all registered process state change callbacks.
        """
        log.debug("Proc State Changed (%s): %s",
                  ProcessStateEnum._str_map.get(state, state), svc)
        for cb in self._proc_state_change_callbacks:
            cb(svc, state, self.container)

    def _create_listening_endpoint(self, **kwargs):
        """
        Creates a listening endpoint for spawning processes.

        This method exists to be able to override the type created via configuration.
        In most cases it will create a ConversationRPCServer.
        """
        eptypestr = CFG.get_safe(
            'container.messaging.endpoint.proc_listening_type', None)
        if eptypestr is not None:
            module, cls = eptypestr.rsplit('.', 1)
            mod = __import__(module, fromlist=[cls])
            eptype = getattr(mod, cls)
            ep = eptype(**kwargs)
        else:
            conv_enabled = CFG.get_safe(
                'container.messaging.endpoint.rpc_conversation_enabled', False)
            if conv_enabled:
                ep = ConversationRPCServer(**kwargs)
            else:
                ep = ProcessRPCServer(**kwargs)
        return ep

    # -----------------------------------------------------------------
    # PROCESS TYPE: service
    def _spawn_service_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as a service worker.
        Attach to service queue with service definition, attach to service pid
        """
        process_instance = self._create_process_instance(
            process_id, name, module, cls, config)

        listen_name = get_safe(config,
                               "process.listen_name") or process_instance.name
        log.debug("Service Process (%s) listen_name: %s", name, listen_name)
        process_instance._proc_listen_name = listen_name

        # Service RPC endpoint
        rsvc1 = self._create_listening_endpoint(node=self.container.node,
                                                from_name=listen_name,
                                                service=process_instance,
                                                process=process_instance)
        # Named local RPC endpoint
        rsvc2 = self._create_listening_endpoint(node=self.container.node,
                                                from_name=process_instance.id,
                                                service=process_instance,
                                                process=process_instance)

        # cleanup method to delete process queue
        cleanup = lambda _: self._cleanup_method(process_instance.id, rsvc2)

        # Start an ION process with the right kind of endpoint factory
        proc = self.proc_sup.spawn(name=process_instance.id,
                                   service=process_instance,
                                   listeners=[rsvc1, rsvc2],
                                   proc_name=process_instance._proc_name,
                                   cleanup_method=cleanup)
        self.proc_sup.ensure_ready(
            proc, "_spawn_service_process for %s" % ",".join(
                (listen_name, process_instance.id)))

        # map gproc to process_instance
        self._spawned_proc_to_process[proc.proc] = process_instance

        # set service's reference to process
        process_instance._process = proc

        self._process_init(process_instance)
        self._process_start(process_instance)

        try:
            proc.start_listeners()
        except IonProcessError:
            self._process_quit(process_instance)
            self._call_proc_state_changed(process_instance,
                                          ProcessStateEnum.FAILED)
            raise

        return process_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: stream process
    def _spawn_stream_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as a data stream process.
        Attach to subscription queue with process function.
        """
        process_instance = self._create_process_instance(
            process_id, name, module, cls, config)

        listen_name = get_safe(config, "process.listen_name") or name
        log.debug("Stream Process (%s) listen_name: %s", name, listen_name)
        process_instance._proc_listen_name = listen_name

        process_instance.stream_subscriber = StreamSubscriber(
            process=process_instance,
            exchange_name=listen_name,
            callback=process_instance.call_process)

        # Add publishers if any...
        publish_streams = get_safe(config, "process.publish_streams")
        pub_names = self._set_publisher_endpoints(process_instance,
                                                  publish_streams)

        rsvc = self._create_listening_endpoint(node=self.container.node,
                                               from_name=process_instance.id,
                                               service=process_instance,
                                               process=process_instance)

        # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs)
        def cleanup(*args):
            self._cleanup_method(process_instance.id, rsvc)
            for name in pub_names:
                p = getattr(process_instance, name)
                p.close()

        proc = self.proc_sup.spawn(
            name=process_instance.id,
            service=process_instance,
            listeners=[rsvc, process_instance.stream_subscriber],
            proc_name=process_instance._proc_name,
            cleanup_method=cleanup)
        self.proc_sup.ensure_ready(
            proc, "_spawn_stream_process for %s" % process_instance._proc_name)

        # map gproc to process_instance
        self._spawned_proc_to_process[proc.proc] = process_instance

        # set service's reference to process
        process_instance._process = proc

        self._process_init(process_instance)
        self._process_start(process_instance)

        try:
            proc.start_listeners()
        except IonProcessError:
            self._process_quit(process_instance)
            self._call_proc_state_changed(process_instance,
                                          ProcessStateEnum.FAILED)
            raise

        return process_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: agent
    def _spawn_agent_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as agent process.
        Attach to service pid.
        """
        process_instance = self._create_process_instance(
            process_id, name, module, cls, config)
        if not isinstance(process_instance, ResourceAgent) and not isinstance(
                process_instance, SimpleResourceAgent):
            raise ContainerConfigError(
                "Agent process must extend ResourceAgent")
        listeners = []

        # Set the resource ID if we get it through the config
        resource_id = get_safe(process_instance.CFG, "agent.resource_id")
        if resource_id:
            process_instance.resource_id = resource_id

            alistener = self._create_listening_endpoint(
                node=self.container.node,
                from_name=resource_id,
                service=process_instance,
                process=process_instance)

            listeners.append(alistener)

        rsvc = self._create_listening_endpoint(node=self.container.node,
                                               from_name=process_instance.id,
                                               service=process_instance,
                                               process=process_instance)

        listeners.append(rsvc)

        # cleanup method to delete process/agent queue (@TODO: leaks a bit here - should use XOs)
        def agent_cleanup(x):
            self._cleanup_method(process_instance.id, rsvc)
            if resource_id:
                pass
                #self._cleanup_method(resource_id, alistener)
                # disabled, it's probably not architecturally correct to delete this queue

        proc = self.proc_sup.spawn(name=process_instance.id,
                                   service=process_instance,
                                   listeners=listeners,
                                   proc_name=process_instance._proc_name,
                                   cleanup_method=agent_cleanup)
        self.proc_sup.ensure_ready(
            proc, "_spawn_agent_process for %s" % process_instance.id)

        # map gproc to process_instance
        self._spawned_proc_to_process[proc.proc] = process_instance

        # set service's reference to process
        process_instance._process = proc

        # Now call the on_init of the agent.
        self._process_init(process_instance)

        if not process_instance.resource_id:
            log.warn("New agent pid=%s has no resource_id set" % process_id)

        self._process_start(process_instance)

        try:
            proc.start_listeners()
        except IonProcessError:
            self._process_quit(process_instance)
            self._call_proc_state_changed(process_instance,
                                          ProcessStateEnum.FAILED)
            raise

        if not process_instance.resource_id:
            log.warn("Agent process id=%s does not define resource_id!!" %
                     process_instance.id)

        return process_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: standalone
    def _spawn_standalone_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as standalone process.
        Attach to service pid.
        """
        process_instance = self._create_process_instance(
            process_id, name, module, cls, config)
        rsvc = self._create_listening_endpoint(node=self.container.node,
                                               from_name=process_instance.id,
                                               service=process_instance,
                                               process=process_instance)

        # Add publishers if any...
        publish_streams = get_safe(config, "process.publish_streams")
        pub_names = self._set_publisher_endpoints(process_instance,
                                                  publish_streams)

        # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs)
        def cleanup(*args):
            self._cleanup_method(process_instance.id, rsvc)
            for name in pub_names:
                p = getattr(process_instance, name)
                p.close()

        proc = self.proc_sup.spawn(name=process_instance.id,
                                   service=process_instance,
                                   listeners=[rsvc],
                                   proc_name=process_instance._proc_name,
                                   cleanup_method=cleanup)
        self.proc_sup.ensure_ready(
            proc, "_spawn_standalone_process for %s" % process_instance.id)

        # map gproc to process_instance
        self._spawned_proc_to_process[proc.proc] = process_instance

        # set service's reference to process
        process_instance._process = proc

        self._process_init(process_instance)
        self._process_start(process_instance)

        try:
            proc.start_listeners()
        except IonProcessError:
            self._process_quit(process_instance)
            self._call_proc_state_changed(process_instance,
                                          ProcessStateEnum.FAILED)
            raise

        return process_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: simple
    def _spawn_simple_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as simple process.
        No attachments.
        """
        process_instance = self._create_process_instance(
            process_id, name, module, cls, config)
        # Add publishers if any...
        publish_streams = get_safe(config, "process.publish_streams")
        pub_names = self._set_publisher_endpoints(process_instance,
                                                  publish_streams)

        # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs)
        def cleanup(*args):
            for name in pub_names:
                p = getattr(process_instance, name)
                p.close()

        proc = self.proc_sup.spawn(name=process_instance.id,
                                   service=process_instance,
                                   listeners=[],
                                   proc_name=process_instance._proc_name,
                                   cleanup_method=cleanup)
        self.proc_sup.ensure_ready(
            proc, "_spawn_simple_process for %s" % process_instance.id)

        self._process_init(process_instance)
        self._process_start(process_instance)

        return process_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: immediate
    def _spawn_immediate_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as immediate one off process.
        No attachments.
        """
        process_instance = self._create_process_instance(
            process_id, name, module, cls, config)
        self._process_init(process_instance)
        self._process_start(process_instance)
        return process_instance

    def _create_process_instance(self, process_id, name, module, cls, config):
        """
        Creates an instance of a "service", be it a Service, Agent, Stream, etc.

        @rtype BaseService
        @return An instance of a "service"
        """
        # SERVICE INSTANCE.
        process_instance = for_name(module, cls)
        if not isinstance(process_instance, BaseService):
            raise ContainerConfigError(
                "Instantiated service not a BaseService %r" % process_instance)

        # Prepare service instance
        process_instance.errcause = ""
        process_instance.id = process_id
        process_instance.container = self.container
        process_instance.CFG = config
        process_instance._proc_name = name
        process_instance._proc_start_time = time.time()

        #Unless the process has been started as part of another Org, default to the container Org or the ION Org
        if config.has_key('org_name'):
            process_instance.org_name = config['org_name']
        else:
            process_instance.org_name = CFG.get_safe(
                'container.org_name', CFG.get_safe('system.root_org', 'ION'))

        # Add stateful process operations
        if hasattr(process_instance, "_flush_state"):

            def _flush_state():
                if not hasattr(process_instance, "_proc_state"):
                    process_instance._proc_state = {}
                    process_instance._proc_state_changed = False
                    return
                process_instance.container.state_repository.put_state(
                    process_instance.id, process_instance._proc_state)
                process_instance._proc_state_changed = False

            def _load_state():
                if not hasattr(process_instance, "_proc_state"):
                    process_instance._proc_state = {}
                try:
                    new_state = process_instance.container.state_repository.get_state(
                        process_instance.id)
                    process_instance._proc_state.clear()
                    process_instance._proc_state.update(new_state)
                    process_instance._proc_state_changed = False
                except Exception as ex:
                    log.warn("Process %s load state failed: %s",
                             process_instance.id, str(ex))

            process_instance._flush_state = _flush_state
            process_instance._load_state = _load_state

        process_start_mode = get_safe(config, "process.start_mode")
        if process_start_mode == "RESTART":
            if hasattr(process_instance, "_load_state"):
                process_instance._load_state()

        # start service dependencies (RPC clients)
        self._start_process_dependencies(process_instance)

        return process_instance

    def _start_process_dependencies(self, process_instance):
        process_instance.errcause = "setting service dependencies"
        log.debug("spawn_process dependencies: %s",
                  process_instance.dependencies)
        # TODO: Service dependency != process dependency
        for dependency in process_instance.dependencies:
            client = getattr(process_instance.clients, dependency)
            assert client, "Client for dependency not found: %s" % dependency

            # @TODO: should be in a start_client in RPCClient chain
            client.process = process_instance
            client.node = self.container.node

            # ensure that dep actually exists and is running
            # MM: commented out - during startup (init actually), we don't need to check for service dependencies
            # MM: TODO: split on_init from on_start; start consumer in on_start; check for full queues on restart
#            if process_instance.name != 'bootstrap' or (process_instance.name == 'bootstrap' and process_instance.CFG.level == dependency):
#                svc_de = self.container.resource_registry.find_resources(restype="Service", name=dependency, id_only=True)
#                if not svc_de:
#                    raise ContainerConfigError("Dependency for service %s not running: %s" % (process_instance.name, dependency))

    def _process_init(self, process_instance):
        # Init process
        process_instance.errcause = "initializing service"
        process_instance.init()

    def _process_start(self, process_instance):
        # Start process
        # THIS SHOULD BE CALLED LATER THAN SPAWN
        # TODO: Check for timeout
        process_instance.errcause = "starting service"
        process_instance.start()

    def _process_quit(self, process_instance):
        """
        Common method to handle process stopping.
        """
        process_instance.errcause = "quitting process"

        # Give the process notice to quit doing stuff.
        process_instance.quit()

        # Terminate IonProcessThread (may not have one, i.e. simple process)
        # @TODO: move this into process' on_quit()
        if getattr(process_instance, '_process',
                   None) is not None and process_instance._process:
            process_instance._process.notify_stop()
            process_instance._process.stop()

    def _set_publisher_endpoints(self,
                                 process_instance,
                                 publisher_streams=None):

        publisher_streams = publisher_streams or {}
        names = []

        for name, stream_id in publisher_streams.iteritems():
            # problem is here
            pub = StreamPublisher(process=process_instance,
                                  stream_id=stream_id)

            setattr(process_instance, name, pub)
            names.append(name)

        return names

    def _register_process(self, process_instance, name):
        """
        Performs all actions related to registering the new process in the system.
        Also performs process type specific registration, such as for services and agents
        """
        # Add process instance to container's process dict
        if name in self.procs_by_name:
            log.warn("Process name already registered in container: %s" % name)
        self.procs_by_name[name] = process_instance
        self.procs[process_instance.id] = process_instance

        # Add Process to resource registry
        # Note: In general the Process resource should be created by the CEI PD, but not all processes are CEI
        # processes. How to deal with this?
        process_instance.errcause = "registering"

        if process_instance._proc_type != "immediate":
            proc_obj = Process(name=process_instance.id,
                               label=name,
                               proctype=process_instance._proc_type)
            proc_id, _ = self.container.resource_registry.create(proc_obj)
            process_instance._proc_res_id = proc_id

            # Associate process with container resource
            self.container.resource_registry.create_association(
                self.cc_id, "hasProcess", proc_id)
        else:
            process_instance._proc_res_id = None

        # Process type specific registration
        # TODO: Factor out into type specific handler functions
        if process_instance._proc_type == "service":
            # Registration of SERVICE process: in resource registry
            service_list, _ = self.container.resource_registry.find_resources(
                restype="Service", name=process_instance.name)
            if service_list:
                process_instance._proc_svc_id = service_list[0]._id
            else:
                # We are starting the first process of a service instance
                # TODO: This should be created by the HA Service agent in the future
                svc_obj = Service(
                    name=process_instance.name,
                    exchange_name=process_instance._proc_listen_name,
                    state=ServiceStateEnum.READY)
                process_instance._proc_svc_id, _ = self.container.resource_registry.create(
                    svc_obj)

                # Create association to service definition resource
                svcdef_list, _ = self.container.resource_registry.find_resources(
                    restype="ServiceDefinition", name=process_instance.name)
                if svcdef_list:
                    self.container.resource_registry.create_association(
                        process_instance._proc_svc_id, "hasServiceDefinition",
                        svcdef_list[0]._id)
                else:
                    log.error("Cannot find ServiceDefinition resource for %s",
                              process_instance.name)

            self.container.resource_registry.create_association(
                process_instance._proc_svc_id, "hasProcess", proc_id)

        elif process_instance._proc_type == "agent":
            # Registration of AGENT process: in Directory
            caps = process_instance.get_capabilities()
            self.container.directory.register(
                "/Agents", process_instance.id,
                **dict(name=process_instance._proc_name,
                       container=process_instance.container.id,
                       resource_id=process_instance.resource_id,
                       agent_id=process_instance.agent_id,
                       def_id=process_instance.agent_def_id,
                       capabilities=caps))

        self._call_proc_state_changed(process_instance,
                                      ProcessStateEnum.RUNNING)

    def terminate_process(self, process_id, do_notifications=True):
        """
        Terminates a process and all its resources. Termination is graceful with timeout.

        @param  process_id          The id of the process to terminate. Should exist in the container's
                                    list of processes or this will raise.
        @param  do_notifications    If True, emits process state changes for TERMINATING and TERMINATED.
                                    If False, supresses any state changes. Used near EXITED and FAILED.
        """
        process_instance = self.procs.get(process_id, None)
        if not process_instance:
            raise BadRequest(
                "Cannot terminate. Process id='%s' unknown on container id='%s'"
                % (process_id, self.container.id))

        log.info("ProcManager.terminate_process: %s -> pid=%s",
                 process_instance._proc_name, process_id)

        if do_notifications:
            self._call_proc_state_changed(process_instance,
                                          ProcessStateEnum.TERMINATING)

        self._process_quit(process_instance)

        self._unregister_process(process_id, process_instance)

        if do_notifications:
            self._call_proc_state_changed(process_instance,
                                          ProcessStateEnum.TERMINATED)

    def _unregister_process(self, process_id, process_instance):
        # Remove process registration in resource registry
        if process_instance._proc_res_id:
            try:
                self.container.resource_registry.delete(
                    process_instance._proc_res_id, del_associations=True)
            except NotFound:  #, HTTPException):
                # if it's already gone, it's already gone!
                pass

            except Exception, ex:
                log.exception(ex)
                pass

        # Cleanup for specific process types
        if process_instance._proc_type == "service":
            # Check if this is the last process for this service and do auto delete service resources here
            svcproc_list = []
            try:
                svcproc_list, _ = self.container.resource_registry.find_objects(
                    process_instance._proc_svc_id,
                    "hasProcess",
                    "Process",
                    id_only=True)
            except ResourceNotFound:
                # if it's already gone, it's already gone!
                pass

            if not svcproc_list:
                try:
                    self.container.resource_registry.delete(
                        process_instance._proc_svc_id, del_associations=True)
                except NotFound:
                    # if it's already gone, it's already gone!
                    pass

                except Exception, ex:
                    log.exception(ex)
                    pass
Exemplo n.º 2
0
class ProcManager(object):
    def __init__(self, container):
        self.container = container

        # Define the callables that can be added to Container public API, and add
        self.container_api = [self.spawn_process, self.terminate_process]
        for call in self.container_api:
            setattr(self.container, call.__name__, call)

        self.proc_id_pool = IDPool()

        # Registry of running processes
        self.procs = {}
        self.procs_by_name = {}   # BAD: This is not correct if procs have the same name

        # mapping of greenlets we spawn to process_instances for error handling
        self._spawned_proc_to_process = {}

        # Effective execution engine config (after merging in child process overrides)
        self.ee_cfg = self._get_execution_engine_config()

        # Process dispatcher (if configured/enabled and not a child container process)
        self.pd_cfg = CFG.get_safe("service.process_dispatcher") or {}
        self.pd_enabled = self.pd_cfg.get("enabled", False) is True and not self.ee_cfg["container"]["is_child"]
        self.pd_core = None

        self.event_pub = EventPublisher()
        self.publish_events = CFG.get_safe("container.process.publish_events") is True

        # Passive manager for simple threads/greenlets, to keep them registered (these are not OS threads)
        # Note that each ION process has its own thread manager, so this is for container level threads
        self.thread_manager = ThreadManager(heartbeat_secs=None, failure_notify_callback=None)

        # Active supervisor for ION processes
        self.proc_sup = IonProcessThreadManager(heartbeat_secs=CFG.get_safe("container.timeout.heartbeat"),
                                                failure_notify_callback=self._spawned_proc_failed)

        # list of callbacks for process state changes
        self._proc_state_change_callbacks = []

    def start(self):
        log.debug("ProcManager starting ...")

        if self.pd_enabled:
            self._start_process_dispatcher()

        self.proc_sup.start()

        if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY):
            # Register container as resource object
            cc_obj = self._get_capability_container_object()
            self.cc_id, _ = self.container.resource_registry.create(cc_obj)

            # Create an association to an Org object if not the rot ION org and only if found
            if CFG.get_safe("container.org_name") != CFG.get_safe("system.root_org"):
                org, _ = self.container.resource_registry.find_resources(
                        restype=RT.Org, name=CFG.get_safe("container.org_name"), id_only=True)
                if org:
                    self.container.resource_registry.create_association(org[0], PRED.hasResource, self.cc_id)  # TODO - replace with proper association

        log.debug("ProcManager started, OK.")

    def stop(self):
        log.debug("ProcManager stopping ...")

        # Call quit on procs to give them ability to clean up in reverse order
        procs_list = sorted(self.procs.values(), key=lambda proc: proc._proc_start_time, reverse=True)
        for proc in procs_list:
            try:
                self.terminate_process(proc.id)
            except Exception as ex:
                log.warn("Failed to terminate process (%s): %s", proc.id, ex)

        # TODO: Have a choice of shutdown behaviors for waiting on children, timeouts, etc
        self.proc_sup.shutdown(CFG.get_safe("container.timeout.shutdown"))

        if self.procs:
            log.warn("ProcManager procs not empty: %s", self.procs)
        if self.procs_by_name:
            log.warn("ProcManager procs_by_name not empty: %s", self.procs_by_name)

        # Remove Resource registration
        if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY):
            try:
                self.container.resource_registry.delete(self.cc_id, del_associations=True)
            except NotFound:
                # already gone, this is ok
                pass

        if self.pd_enabled:
            self._stop_process_dispatcher()

        log.debug("ProcManager stopped, OK.")

    def _get_execution_engine_config(self):
        ee_base_cfg = CFG.get_safe("container.execution_engine") or {}
        if ee_base_cfg.get("type", None) != "scioncc":
            raise ContainerConfigError("Execution engine config invalid: %s", ee_base_cfg)

        ee_cfg = deepcopy(ee_base_cfg)

        # If we are a child process, merge in child config override
        proc_name = multiprocessing.current_process().name
        ee_cfg["container"] = dict(child_proc_name=proc_name, is_child=False)
        child_cfgs = ee_base_cfg.get("child_configs", None) or {}
        if proc_name.startswith("Container-child-"):
            ee_cfg["container"]["is_child"] = True
            if proc_name in child_cfgs:
                log.info("Applying execution engine config override for child: %s", proc_name)
                dict_merge(ee_cfg, child_cfgs[proc_name], inplace=True)
            else:
                for cfg_name, ch_cfg in child_cfgs.iteritems():
                    pattern = ch_cfg.get("name_pattern", None)
                    if pattern and re.match(pattern, proc_name):
                        log.info("Applying execution engine config override %s for child: %s", cfg_name, proc_name)
                        dict_merge(ee_cfg, ch_cfg, inplace=True)
                        break

        ee_cfg.pop("child_configs", None)
        return ee_cfg

    def _get_capability_container_object(self):
        container_info = dict(proc_name=multiprocessing.current_process().name,
                              process_id=os.getpid(),
                              parent_process_id=os.getppid(),
                              hostname=socket.gethostname(),
                              host=socket.gethostbyname(socket.gethostname()),
                              platform=sys.platform,
                              argv=sys.argv,
                              python_version=sys.version,
                              cwd=os.getcwd(),
                              start_time=self.container.start_time,
                              )
        # Other possibilities: username, os package versions, IP address
        host_info = {k: v for (k, v) in zip(("os_sysname", "os_nodename", "os_release", "os_version", "os_machine"), os.uname())}
        container_info.update(host_info)
        container_info["env"] = {k: str(v) for (k,v) in os.environ.iteritems()}
        container_info["python_path"] = sys.path
        cc_obj = CapabilityContainer(name=self.container.id, version=self.container.version,
                                     cc_agent=self.container.name,
                                     container_info=container_info,
                                     execution_engine_config=self.ee_cfg)
        return cc_obj

    # -----------------------------------------------------------------

    def spawn_process(self, name=None, module=None, cls=None, config=None, process_id=None):
        """
        Spawn a process within the container. Processes can be of different type.
        """
        if process_id and not is_valid_identifier(process_id, ws_sub='_'):
            raise BadRequest("Given process_id %s is not a valid identifier" % process_id)

        # PROCESS ID. Generate a new process id if not provided
        # TODO: Ensure it is system-wide unique
        process_id = process_id or "%s.%s" % (self.container.id, self.proc_id_pool.get_id())
        log.debug("ProcManager.spawn_process(name=%s, module.cls=%s.%s, config=%s) as pid=%s", name, module, cls, config, process_id)

        # CONFIG
        process_cfg = self._create_process_config(config)

        try:
            service_cls = named_any("%s.%s" % (module, cls))
        except AttributeError as ae:
            # Try to nail down the error
            import importlib
            importlib.import_module(module)
            raise

        # PROCESS TYPE. Determines basic process context (messaging, service interface)
        process_type = get_safe(process_cfg, "process.type") or getattr(service_cls, "process_type", PROCTYPE_SERVICE)

        process_start_mode = get_safe(config, "process.start_mode")

        process_instance = None

        # alert we have a spawning process, but we don't have the instance yet, so give the class instead (more accurate than name)
        # Note: this uses a str as first argument instead of a process instance
        self._call_proc_state_changed("%s.%s" % (module, cls), ProcessStateEnum.PENDING)

        try:
            # Additional attributes to set with the process instance
            proc_attr = {"_proc_type": process_type,
                         "_proc_spawn_cfg": config
                         }

            # SPAWN.  Determined by type
            if process_type == PROCTYPE_SERVICE:
                process_instance = self._spawn_service_process(process_id, name, module, cls, process_cfg, proc_attr)

            elif process_type == PROCTYPE_STREAMPROC:
                process_instance = self._spawn_stream_process(process_id, name, module, cls, process_cfg, proc_attr)

            elif process_type == PROCTYPE_AGENT:
                process_instance = self._spawn_agent_process(process_id, name, module, cls, process_cfg, proc_attr)

            elif process_type == PROCTYPE_STANDALONE:
                process_instance = self._spawn_standalone_process(process_id, name, module, cls, process_cfg, proc_attr)

            elif process_type == PROCTYPE_IMMEDIATE:
                process_instance = self._spawn_immediate_process(process_id, name, module, cls, process_cfg, proc_attr)

            elif process_type == PROCTYPE_SIMPLE:
                process_instance = self._spawn_simple_process(process_id, name, module, cls, process_cfg, proc_attr)

            else:
                raise BadRequest("Unknown process type: %s" % process_type)

            # REGISTER.
            self._register_process(process_instance, name)

            process_instance.errcause = "OK"
            log.info("ProcManager.spawn_process: %s.%s -> pid=%s OK", module, cls, process_id)

            if process_type == PROCTYPE_IMMEDIATE:
                log.debug('Terminating immediate process: %s', process_instance.id)
                self.terminate_process(process_instance.id)

                # Terminate process also triggers TERMINATING/TERMINATED
                self._call_proc_state_changed(process_instance, ProcessStateEnum.EXITED)

            else:
                # Update local policies for the new process
                if self.container.has_capability(self.container.CCAP.GOVERNANCE_CONTROLLER):
                    self.container.governance_controller.update_process_policies(
                                process_instance, safe_mode=True, force_update=False)

            return process_instance.id

        except IonProcessError:
            errcause = process_instance.errcause if process_instance else "instantiating process"
            log.exception("Error spawning %s %s process (process_id: %s): %s", name, process_type, process_id, errcause)
            return None

        except Exception:
            errcause = process_instance.errcause if process_instance else "instantiating process"
            log.exception("Error spawning %s %s process (process_id: %s): %s", name, process_type, process_id, errcause)

            # trigger failed notification - catches problems in init/start
            self._call_proc_state_changed(process_instance, ProcessStateEnum.FAILED)

            raise

    def _create_process_config(self, config):
        """ Prepare the config for the new process. Clone system config and apply process overrides.
        Support including config by reference of a resource attribute or object from object store.
        """
        process_cfg = deepcopy(CFG)
        if config:
            # Use provided config. Must be dict or DotDict
            if not isinstance(config, DotDict):
                config = DotDict(config)
            if config.get_safe("process.config_ref"):
                # Use a reference
                config_ref = config.get_safe("process.config_ref")
                log.info("Enhancing new process spawn config from ref=%s" % config_ref)
                matches = re.match(r'^([A-Za-z]+):([A-Za-z0-9_\.]+)/(.*)$', config_ref)
                if matches:
                    ref_type, ref_id, ref_ext = matches.groups()
                    if ref_type == "resources":
                        if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY):
                            try:
                                obj = self.container.resource_registry.read(ref_id)
                                if obj and hasattr(obj, ref_ext):
                                    ref_config = getattr(obj, ref_ext)
                                    if isinstance(ref_config, dict):
                                        dict_merge(process_cfg, ref_config, inplace=True)
                                    else:
                                        raise BadRequest("config_ref %s exists but not dict" % config_ref)
                                else:
                                    raise BadRequest("config_ref %s - attribute not found" % config_ref)
                            except NotFound as nf:
                                log.warn("config_ref %s - object not found" % config_ref)
                                raise
                        else:
                            log.error("Container missing RESOURCE_REGISTRY capability to resolve process config ref %s" % config_ref)
                    elif ref_type == "objects":
                        if self.container.has_capability(self.container.CCAP.OBJECT_STORE):
                            try:
                                obj = self.container.object_store.read_doc(ref_id)
                                ref_config = obj
                                if ref_ext:
                                    ref_config = get_safe(obj, ref_ext, None)
                                    if ref_config is None:
                                        raise BadRequest("config_ref %s - attribute not found" % config_ref)

                                if isinstance(ref_config, dict):
                                    dict_merge(process_cfg, ref_config, inplace=True)
                                else:
                                    raise BadRequest("config_ref %s exists but not dict" % config_ref)
                            except NotFound as nf:
                                log.warn("config_ref %s - object not found" % config_ref)
                                raise
                        else:
                            log.error("Container missing OBJECT_STORE capability to resolve process config ref %s" % config_ref)
                    else:
                        raise BadRequest("Unknown reference type in: %s" % config_ref)

            dict_merge(process_cfg, config, inplace=True)
            if self.container.spawn_args:
                # Override config with spawn args
                dict_merge(process_cfg, self.container.spawn_args, inplace=True)

        #log.debug("spawn_process() pid=%s process_cfg=%s", process_id, process_cfg)
        return process_cfg

    def list_local_processes(self, process_type=''):
        """ Returns a list of the running ION processes in the container or filtered by the process_type
        """
        if not process_type:
            return self.procs.values()

        return [p for p in self.procs.itervalues() if p.process_type == process_type]

    def get_a_local_process(self, proc_name=''):
        """ Returns a running ION process in the container for the specified process name
        """
        for p in self.procs.itervalues():
            if p.name == proc_name:
                return p

            if p.process_type == PROCTYPE_AGENT and p.resource_type == proc_name:
                return p

        return None

    def get_local_service_processes(self, service_name=''):
        """ Returns a list of running ION processes in the container for the specified service name
        """
        proc_list = [p for p in self.procs.itervalues() if p.process_type == PROCTYPE_SERVICE and p.name == service_name]
        return proc_list

    def is_local_service_process(self, service_name):
        local_services = self.list_local_processes(PROCTYPE_SERVICE)
        for p in local_services:
            if p.name == service_name:
                return True

        return False

    def is_local_agent_process(self, resource_type):
        local_agents = self.list_local_processes(PROCTYPE_AGENT)
        for p in local_agents:
            if p.resource_type == resource_type:
                return True
        return False

    def _spawned_proc_failed(self, gproc):
        log.error("ProcManager._spawned_proc_failed: %s, %s", gproc, gproc.exception)

        prc = self._spawned_proc_to_process.get(gproc, None)

        # stop the rest of the process
        if prc is not None:
            try:
                self.terminate_process(prc.id, False)
            except Exception as e:
                log.warn("Problem while stopping rest of failed process %s: %s", prc, e)
            finally:
                self._call_proc_state_changed(prc, ProcessStateEnum.FAILED)
        else:
            log.warn("No ION process found for failed proc manager child: %s", gproc)

        # Stop the container if this was the last process
        if not self.procs and CFG.get_safe("container.process.exit_once_empty", False):
            self.container.fail_fast("Terminating container after last process (%s) failed: %s" % (gproc, gproc.exception))

    def add_proc_state_changed_callback(self, cb):
        """
        Adds a callback to be called when a process' state changes.

        The callback should take three parameters: The process, the state, and the container.
        """
        self._proc_state_change_callbacks.append(cb)

    def remove_proc_state_changed_callback(self, cb):
        """
        Removes a callback from the process state change callback list.

        If the callback is not registered, this method does nothing.
        """
        if cb in self._proc_state_change_callbacks:
            self._proc_state_change_callbacks.remove(cb)

    def _call_proc_state_changed(self, svc, state):
        """
        Internal method to call all registered process state change callbacks.
        """
        #log.debug("Proc State Changed (%s): %s", ProcessStateEnum._str_map.get(state, state), svc)
        for cb in self._proc_state_change_callbacks:
            cb(svc, state, self.container)

        # Trigger event
        if self.publish_events:
            self._publish_process_event(svc, state)

    def _create_listening_endpoint(self, **kwargs):
        """
        Creates a listening endpoint for spawning processes.

        This method exists to be able to override the type created via configuration.
        In most cases it will create a ProcessRPCServer.
        """
        eptypestr = CFG.get_safe('container.messaging.endpoint.proc_listening_type', None)
        if eptypestr is not None:
            module, cls     = eptypestr.rsplit('.', 1)
            mod             = __import__(module, fromlist=[cls])
            eptype          = getattr(mod, cls)
            ep              = eptype(**kwargs)
        else:
            ep = ProcessRPCServer(**kwargs)
        return ep

    def _add_process_publishers(self, process_instance, config):
        # Add publishers if declared...
        publish_streams = get_safe(config, "process.publish_streams")
        pub_names = self._set_publisher_endpoints(process_instance, publish_streams)
        return pub_names

    # -----------------------------------------------------------------
    # PROCESS TYPE: service
    # - has service listen binding/queue and RPC interface
    def _spawn_service_process(self, process_id, name, module, cls, config, proc_attr):
        """
        Spawn a process acting as a service worker.
        Attach to service queue with service definition, attach to service pid
        """
        process_instance = self._create_app_instance(process_id, name, module, cls, config, proc_attr)

        listen_name = get_safe(config, "process.listen_name") or process_instance.name
        listen_name_xo = self.container.create_service_xn(listen_name)

        log.debug("Service Process (%s) listen_name: %s", name, listen_name)
        process_instance._proc_listen_name = listen_name

        # Service RPC endpoint
        rsvc1 = self._create_listening_endpoint(node=self.container.node,
                                                from_name=listen_name_xo,
                                                process=process_instance)

        # Start an ION process with the right kind of endpoint factory
        proc = self.proc_sup.spawn(name=process_instance.id,
                                   service=process_instance,
                                   listeners=[rsvc1],
                                   proc_name=process_instance._proc_name)
        proc.proc._glname = "ION Proc %s" % process_instance._proc_name
        self.proc_sup.ensure_ready(proc, "_spawn_service_process for %s" % ",".join((str(listen_name), process_instance.id)))

        # map gproc to process_instance
        self._spawned_proc_to_process[proc.proc] = process_instance

        # set service's reference to process
        process_instance._process = proc

        self._process_init(process_instance)
        self._process_start(process_instance)

        try:
            proc.start_listeners()
        except IonProcessError:
            self._process_quit(process_instance)
            self._call_proc_state_changed(process_instance, ProcessStateEnum.FAILED)
            raise

        return process_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: stream process
    # - has stream listen binding/queue
    # - has publishers if declared
    def _spawn_stream_process(self, process_id, name, module, cls, config, proc_attr):
        """
        Spawn a process acting as a data stream process.
        Attach to subscription queue with process function.
        """
        process_instance = self._create_app_instance(process_id, name, module, cls, config, proc_attr)
        listeners = []

        # Stream listener
        listen_name = get_safe(config, "process.listen_name") or name
        log.debug("Stream Process (%s) listen_name: %s", name, listen_name)
        process_instance._proc_listen_name = listen_name

        process_instance.stream_subscriber = StreamSubscriber(process=process_instance, exchange_name=listen_name,
                                                              callback=process_instance.call_process)
        listeners.append(process_instance.stream_subscriber)

        pub_names = self._add_process_publishers(process_instance, config)

        # Private PID listener
        # pid_listener_xo = self.container.create_process_xn(process_instance.id)
        # rsvc = self._create_listening_endpoint(node=self.container.node,
        #                                        from_name=pid_listener_xo,
        #                                        process=process_instance)
        # listeners.append(rsvc)

        # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs)
        def cleanup(*args):
            for name in pub_names:
                p = getattr(process_instance, name)
                p.close()

        proc = self.proc_sup.spawn(name=process_instance.id,
                                   service=process_instance,
                                   listeners=listeners,
                                   proc_name=process_instance._proc_name,
                                   cleanup_method=cleanup)
        proc.proc._glname = "ION Proc %s" % process_instance._proc_name
        self.proc_sup.ensure_ready(proc, "_spawn_stream_process for %s" % process_instance._proc_name)

        # map gproc to process_instance
        self._spawned_proc_to_process[proc.proc] = process_instance

        # set service's reference to process
        process_instance._process = proc

        self._process_init(process_instance)
        self._process_start(process_instance)

        try:
            proc.start_listeners()
        except IonProcessError:
            self._process_quit(process_instance)
            self._call_proc_state_changed(process_instance, ProcessStateEnum.FAILED)
            raise

        return process_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: agent
    # - has resource ID (or if non-existent PID) listen binding/queue
    # - has RPC interface
    def _spawn_agent_process(self, process_id, name, module, cls, config, proc_attr):
        """
        Spawn a process acting as agent process.
        Attach to service pid.
        """
        process_instance = self._create_app_instance(process_id, name, module, cls, config, proc_attr)
        if not isinstance(process_instance, ResourceAgent):
            raise ContainerConfigError("Agent process must extend ResourceAgent")
        listeners = []

        # Set the resource ID if we get it through the config
        resource_id = get_safe(process_instance.CFG, "agent.resource_id")
        if resource_id:
            process_instance.resource_id = resource_id

            # Resource ID listener
            resource_id_xo = self.container.create_process_xn(resource_id)

            alistener = self._create_listening_endpoint(node=self.container.node,
                                                        from_name=resource_id_xo,
                                                        process=process_instance)

            listeners.append(alistener)

        else:
            # Private PID listener
            pid_listener_xo = self.container.create_process_xn(process_instance.id)
            rsvc = self._create_listening_endpoint(node=self.container.node,
                                                   from_name=pid_listener_xo,
                                                   process=process_instance)

            listeners.append(rsvc)

        proc = self.proc_sup.spawn(name=process_instance.id,
                                   service=process_instance,
                                   listeners=listeners,
                                   proc_name=process_instance._proc_name)
        proc.proc._glname = "ION Proc %s" % process_instance._proc_name
        self.proc_sup.ensure_ready(proc, "_spawn_agent_process for %s" % process_instance.id)

        # map gproc to process_instance
        self._spawned_proc_to_process[proc.proc] = process_instance

        # set service's reference to process
        process_instance._process = proc

        # Now call the on_init of the agent.
        self._process_init(process_instance)

        if not process_instance.resource_id:
            log.warn("New agent pid=%s has no resource_id set" % process_id)

        self._process_start(process_instance)

        try:
            proc.start_listeners()
        except IonProcessError:
            self._process_quit(process_instance)
            self._call_proc_state_changed(process_instance, ProcessStateEnum.FAILED)
            raise

        if not process_instance.resource_id:
            log.warn("Agent process id=%s does not define resource_id!!" % process_instance.id)

        return process_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: standalone
    # - has PID binding/queue with RPC interface
    # - has publishers if declared
    def _spawn_standalone_process(self, process_id, name, module, cls, config, proc_attr):
        """
        Spawn a process acting as standalone process.
        Attach to service pid.
        """
        process_instance = self._create_app_instance(process_id, name, module, cls, config, proc_attr)

        # Private PID listener
        pid_listener_xo = self.container.create_process_xn(process_instance.id, auto_delete=True)
        rsvc = self._create_listening_endpoint(node=self.container.node,
                                               from_name=pid_listener_xo,
                                               process=process_instance)

        pub_names = self._add_process_publishers(process_instance, config)

        # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs)
        def cleanup(*args):
            for name in pub_names:
                p = getattr(process_instance, name)
                p.close()

        proc = self.proc_sup.spawn(name=process_instance.id,
                                   service=process_instance,
                                   listeners=[rsvc],
                                   proc_name=process_instance._proc_name,
                                   cleanup_method=cleanup)
        proc.proc._glname = "ION Proc %s" % process_instance._proc_name
        self.proc_sup.ensure_ready(proc, "_spawn_standalone_process for %s" % process_instance.id)

        # map gproc to process_instance
        self._spawned_proc_to_process[proc.proc] = process_instance

        # set service's reference to process
        process_instance._process = proc

        self._process_init(process_instance)
        self._process_start(process_instance)

        try:
            proc.start_listeners()
        except IonProcessError:
            self._process_quit(process_instance)
            self._call_proc_state_changed(process_instance, ProcessStateEnum.FAILED)
            raise

        return process_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: simple
    # - has publishers if declared
    def _spawn_simple_process(self, process_id, name, module, cls, config, proc_attr):
        """
        Spawn a process acting as simple process.
        No attachments.
        """
        process_instance = self._create_app_instance(process_id, name, module, cls, config, proc_attr)

        pub_names = self._add_process_publishers(process_instance, config)

        # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs)
        def cleanup(*args):
            for name in pub_names:
                p = getattr(process_instance, name)
                p.close()

        proc = self.proc_sup.spawn(name=process_instance.id,
                                   service=process_instance,
                                   listeners=[],
                                   proc_name=process_instance._proc_name,
                                   cleanup_method=cleanup)
        proc.proc._glname = "ION Proc %s" % process_instance._proc_name
        self.proc_sup.ensure_ready(proc, "_spawn_simple_process for %s" % process_instance.id)

        # map gproc to process_instance
        self._spawned_proc_to_process[proc.proc] = process_instance

        # set service's reference to process
        process_instance._process = proc

        self._process_init(process_instance)
        self._process_start(process_instance)

        return process_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: immediate
    # - will not be registered
    # - will be terminated right after start
    def _spawn_immediate_process(self, process_id, name, module, cls, config, proc_attr):
        """
        Spawn a process acting as immediate one off process.
        No messaging attachments.
        """
        process_instance = self._create_app_instance(process_id, name, module, cls, config, proc_attr)
        self._process_init(process_instance)
        self._process_start(process_instance)
        return process_instance

    # -----------------------------------------------------------------

    def _create_app_instance(self, process_id, name, module, cls, config, proc_attr):
        """
        Creates an instance of a BaseService, representing the app logic of a ION process.
        This is independent of the process type service, agent, standalone, etc.
        """
        # APP INSTANCE.
        app_instance = for_name(module, cls)
        if not isinstance(app_instance, BaseService):
            raise ContainerConfigError("Instantiated service not a BaseService %r" % app_instance)

        # Set BaseService instance common attributes
        app_instance.errcause = ""
        app_instance.id = process_id
        app_instance.container = self.container
        app_instance.CFG = config
        app_instance._proc_name = name
        app_instance._proc_start_time = time.time()
        for att, att_val in proc_attr.iteritems():
            setattr(app_instance, att, att_val)

        # Unless the process has been started as part of another Org, default to the container Org or the ION Org
        if 'org_governance_name' in config:
            app_instance.org_governance_name = config['org_governance_name']
        else:
            app_instance.org_governance_name = CFG.get_safe('container.org_name', CFG.get_safe('system.root_org', 'ION'))

        # Add process state management, if applicable
        self._add_process_state(app_instance)

        # Check dependencies (RPC clients)
        self._check_process_dependencies(app_instance)

        return app_instance

    def _add_process_state(self, process_instance):
        """ Add stateful process operations, if applicable
        """
        # Only applies if the process implements stateful interface
        if hasattr(process_instance, "_flush_state"):
            def _flush_state():
                with process_instance._state_lock:
                    state_obj = process_instance.container.state_repository.put_state(process_instance.id, process_instance._proc_state,
                                                                                      state_obj=process_instance._proc_state_obj)
                    state_obj.state = None   # Make sure memory footprint is low for larger states
                    process_instance._proc_state_obj = state_obj
                    process_instance._proc_state_changed = False

            def _load_state():
                if not hasattr(process_instance, "_proc_state"):
                    process_instance._proc_state = {}
                try:
                    with process_instance._state_lock:
                        new_state, state_obj = process_instance.container.state_repository.get_state(process_instance.id)
                        process_instance._proc_state.clear()
                        process_instance._proc_state.update(new_state)
                        process_instance._proc_state_obj = state_obj
                        process_instance._proc_state_changed = False
                except NotFound as nf:
                    log.debug("No persisted state available for process %s", process_instance.id)
                except Exception as ex:
                    log.warn("Process %s load state failed: %s", process_instance.id, str(ex))
            process_instance._flush_state = _flush_state
            process_instance._load_state = _load_state
            process_instance._state_lock = RLock()
            process_instance._proc_state = {}
            process_instance._proc_state_obj = None
            process_instance._proc_state_changed = False

            # PROCESS RESTART: Need to check whether this process had persisted state.
            # Note: This could happen anytime during a system run, not just on RESTART boot
            log.debug("Loading persisted state for process %s", process_instance.id)
            process_instance._load_state()

    def _check_process_dependencies(self, app_instance):
        app_instance.errcause = "setting service dependencies"
        log.debug("spawn_process dependencies: %s", app_instance.dependencies)
        # TODO: Service dependency != process dependency
        for dependency in app_instance.dependencies:
            client = getattr(app_instance.clients, dependency)
            assert client, "Client for dependency not found: %s" % dependency

            # @TODO: should be in a start_client in RPCClient chain
            client.process = app_instance
            client.node = self.container.node

            # Ensure that dep actually exists and is running?

    def _process_init(self, process_instance):
        """ Initialize the process, primarily by calling on_init()
        """
        process_instance.errcause = "initializing service"
        process_instance.init()

    def _process_start(self, process_instance):
        """ Start the process, primarily by calling on_start()
        """
        # Should this be after spawn_process?
        # Should we check for timeout?
        process_instance.errcause = "starting service"
        process_instance.start()

    def _process_quit(self, process_instance):
        """ Common method to handle process stopping.
        """
        process_instance.errcause = "quitting process"

        # Give the process notice to quit doing stuff.
        process_instance.quit()

        # Terminate IonProcessThread (may not have one, i.e. simple process)
        # @TODO: move this into process' on_quit()
        if getattr(process_instance, '_process', None) is not None and process_instance._process:
            process_instance._process.notify_stop()
            process_instance._process.stop()

    def _set_publisher_endpoints(self, process_instance, publisher_streams=None):
        """ Creates and attaches named stream publishers
        """
        publisher_streams = publisher_streams or {}
        names = []

        for name, stream_id in publisher_streams.iteritems():
            # problem is here
            pub = StreamPublisher(process=process_instance, stream_id=stream_id)

            setattr(process_instance, name, pub)
            names.append(name)

        return names

    def _register_process(self, process_instance, name):
        """
        Performs all actions related to registering the new process in the system.
        Also performs process type specific registration, such as for services and agents
        """
        # Add process instance to container's process dict
        if name in self.procs_by_name:
            log.warn("Process name already registered in container: %s" % name)
        self.procs_by_name[name] = process_instance
        self.procs[process_instance.id] = process_instance

        # Add Process to resource registry
        process_instance.errcause = "registering"

        if process_instance._proc_type != PROCTYPE_IMMEDIATE:
            if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY):
                proc_obj = Process(name=process_instance.id, label=name,
                                   process_type=process_instance._proc_type,
                                   service_name=getattr(process_instance, "name", None) or "",
                                   process_state=ProcessStateEnum.RUNNING)
                proc_id, _ = self.container.resource_registry.create(proc_obj)
                process_instance._proc_res_id = proc_id

                # Associate process with container resource
                self.container.resource_registry.create_association(self.cc_id, PRED.hasProcess, proc_id)
        else:
            process_instance._proc_res_id = None

        # Process type specific registration
        if process_instance._proc_type == PROCTYPE_SERVICE:
            if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY):
                # Registration of SERVICE process: in resource registry
                service_list, _ = self.container.resource_registry.find_resources(
                        restype=RT.Service, name=process_instance.name, id_only=True)
                if service_list:
                    process_instance._proc_svc_id = service_list[0]
                    if len(service_list) > 1:
                        log.warn("More than 1 Service resource found with name %s: %s", process_instance.name, service_list)
                else:
                    # We are starting the first process of a service instance
                    # TODO: This should be created by the HA Service agent in the future
                    svc_obj = Service(name=process_instance.name, exchange_name=process_instance._proc_listen_name,
                                      state=ServiceStateEnum.READY)
                    process_instance._proc_svc_id, _ = self.container.resource_registry.create(svc_obj)

                    # Create association to service definition resource
                    svcdef_list, _ = self.container.resource_registry.find_resources(
                            restype=RT.ServiceDefinition, name=process_instance.name, id_only=True)
                    if svcdef_list:
                        if len(svcdef_list) > 1:
                            log.warn("More than 1 ServiceDefinition resource found with name %s: %s", process_instance.name, svcdef_list)
                        self.container.resource_registry.create_association(
                                process_instance._proc_svc_id, PRED.hasServiceDefinition, svcdef_list[0])
                    else:
                        log.error("Cannot find ServiceDefinition resource for %s", process_instance.name)

                self.container.resource_registry.create_association(
                        process_instance._proc_svc_id, PRED.hasProcess, proc_id)

        elif process_instance._proc_type == PROCTYPE_AGENT:
            if self.container.has_capability(self.container.CCAP.DIRECTORY):
                # Registration of AGENT process: in Directory
                caps = process_instance.get_capabilities()
                self.container.directory.register("/Agents", process_instance.id,
                        **dict(name=process_instance._proc_name,
                               container=process_instance.container.id,
                               resource_id=process_instance.resource_id,
                               agent_id=process_instance.agent_id,
                               def_id=process_instance.agent_def_id,
                               capabilities=caps))

        self._call_proc_state_changed(process_instance, ProcessStateEnum.RUNNING)

    def terminate_process(self, process_id, do_notifications=True):
        """
        Terminates a process and all its resources. Termination is graceful with timeout.

        @param  process_id          The id of the process to terminate. Should exist in the container's
                                    list of processes or this will raise.
        @param  do_notifications    If True, emits process state changes for TERMINATING and TERMINATED.
                                    If False, supresses any state changes. Used near EXITED and FAILED.
        """
        process_instance = self.procs.get(process_id, None)
        if not process_instance:
            raise BadRequest("Cannot terminate. Process id='%s' unknown on container id='%s'" % (
                                        process_id, self.container.id))

        log.info("ProcManager.terminate_process: %s -> pid=%s", process_instance._proc_name, process_id)

        if do_notifications:
            self._call_proc_state_changed(process_instance, ProcessStateEnum.TERMINATING)

        self._process_quit(process_instance)

        self._unregister_process(process_id, process_instance)

        if do_notifications:
            self._call_proc_state_changed(process_instance, ProcessStateEnum.TERMINATED)

    def _unregister_process(self, process_id, process_instance):
        # Remove process registration in resource registry
        if process_instance._proc_res_id:
            if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY):
                try:
                    self.container.resource_registry.delete(process_instance._proc_res_id, del_associations=True)
                except NotFound:
                    # OK if already gone
                    pass
                except Exception as ex:
                    log.exception(ex)
                    pass

        # Cleanup for specific process types
        if process_instance._proc_type == PROCTYPE_SERVICE:
            if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY):
                # Check if this is the last process for this service and do auto delete service resources here
                svcproc_list, _ = self.container.resource_registry.find_objects(
                        process_instance._proc_svc_id, PRED.hasProcess, RT.Process, id_only=True)
                if not svcproc_list:
                    try:
                        self.container.resource_registry.delete(process_instance._proc_svc_id, del_associations=True)
                    except NotFound:
                        # OK if already gone
                        pass
                    except Exception as ex:
                        log.exception(ex)
                        pass

        elif process_instance._proc_type == PROCTYPE_AGENT:
            if self.container.has_capability(self.container.CCAP.DIRECTORY):
                self.container.directory.unregister_safe("/Agents", process_instance.id)

        # Remove internal registration in container
        del self.procs[process_id]
        if process_instance._proc_name in self.procs_by_name:
            del self.procs_by_name[process_instance._proc_name]
        else:
            log.warn("Process name %s not in local registry", process_instance.name)


    def _publish_process_event(self, proc_inst, state):
        sub_type = ProcessStateEnum._str_map.get(state, state)
        if isinstance(proc_inst, basestring):
            # self.event_pub.publish_event(event_type=OT.ProcessLifecycleEvent,
            #         origin=proc_inst, origin_type=RT.Process, sub_type=sub_type,
            #         state=state,
            #         container_id=self.container.id,
            #         process_type="", process_name=proc_inst,
            #         process_resource_id="", service_name="")
            # This is a PENDING process without process_id
            pass
        else:
            try:
                self.event_pub.publish_event(event_type=OT.ProcessLifecycleEvent,
                        origin=getattr(proc_inst, "id", "PD"), origin_type=RT.Process, sub_type=sub_type,
                        state=state,
                        container_id=self.container.id,
                        process_type=getattr(proc_inst, "_proc_type", ""),
                        process_name=getattr(proc_inst, "_proc_name", ""),
                        process_resource_id=getattr(proc_inst, "_proc_res_id", ""),
                        service_name=getattr(proc_inst, "name", ""))
            except Exception:
                log.exception("Could not publish process event")
    # -----------------------------------------------------------------

    def _start_process_dispatcher(self):
        from ion.core.process.pd_core import ProcessDispatcher
        self.pd_core = ProcessDispatcher(container=self.container, config=self.pd_cfg)
        self.pd_core.start()

    def _stop_process_dispatcher(self):
        if self.pd_core:
            self.pd_core.stop()
Exemplo n.º 3
0
class ProcManager(object):
    def __init__(self, container):
        self.container = container

        # Define the callables that can be added to Container public API
        self.container_api = [self.spawn_process, self.terminate_process]

        # Add the public callables to Container
        for call in self.container_api:
            setattr(self.container, call.__name__, call)

        self.proc_id_pool = IDPool()

        # Temporary registry of running processes
        self.procs_by_name = {}
        self.procs = {}

        # mapping of greenlets we spawn to process_instances for error handling
        self._spawned_proc_to_process = {}

        # The pyon worker process supervisor
        self.proc_sup = IonProcessThreadManager(heartbeat_secs=CFG.cc.timeout.heartbeat, failure_notify_callback=self._spawned_proc_failed)

        # list of callbacks for process state changes
        self._proc_state_change_callbacks = []

    def start(self):
        log.debug("ProcManager starting ...")
        self.proc_sup.start()

        if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY):
            # Register container as resource object
            cc_obj = CapabilityContainer(name=self.container.id, cc_agent=self.container.name)
            self.cc_id, _ = self.container.resource_registry.create(cc_obj)

            #Create an association to an Org object if not the rot ION org and only if found
            if CFG.container.org_name and CFG.container.org_name != CFG.system.root_org:
                org, _ = self.container.resource_registry.find_resources(restype=RT.Org, name=CFG.container.org_name, id_only=True)
                if org:
                    self.container.resource_registry.create_association(org[0], PRED.hasResource, self.cc_id)  # TODO - replace with proper association

        log.debug("ProcManager started, OK.")

    def stop(self):
        log.debug("ProcManager stopping ...")

        # Call quit on procs to give them ability to clean up
        # @TODO terminate_process is not gl-safe
#        gls = map(lambda k: spawn(self.terminate_process, k), self.procs.keys())
#        join(gls)
        procs_list = sorted(self.procs.values(), key=lambda proc: proc._proc_start_time, reverse=True)

        for proc in procs_list:
            try:
                self.terminate_process(proc.id)
            except Exception as ex:
                log.warn("Failed to terminate process (%s): %s", proc.id, ex)

        # TODO: Have a choice of shutdown behaviors for waiting on children, timeouts, etc
        self.proc_sup.shutdown(CFG.cc.timeout.shutdown)

        if self.procs:
            log.warn("ProcManager procs not empty: %s", self.procs)
        if self.procs_by_name:
            log.warn("ProcManager procs_by_name not empty: %s", self.procs_by_name)

        # Remove Resource registration
        if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY):
            try:
                self.container.resource_registry.delete(self.cc_id, del_associations=True)
            except NotFound:
                # already gone, this is ok
                pass
            # TODO: Check associations to processes

        log.debug("ProcManager stopped, OK.")

    def spawn_process(self, name=None, module=None, cls=None, config=None, process_id=None):
        """
        Spawn a process within the container. Processes can be of different type.
        """
        if process_id and not is_valid_identifier(process_id, ws_sub='_'):
            raise BadRequest("Given process_id %s is not a valid identifier" % process_id)

        # Generate a new process id if not provided
        # TODO: Ensure it is system-wide unique
        process_id = process_id or "%s.%s" % (self.container.id, self.proc_id_pool.get_id())
        log.debug("ProcManager.spawn_process(name=%s, module.cls=%s.%s, config=%s) as pid=%s", name, module, cls, config, process_id)

        process_cfg = deepcopy(CFG)
        if config:
            # Use provided config. Must be dict or DotDict
            if not isinstance(config, DotDict):
                config = DotDict(config)
            if config.get_safe("process.config_ref"):
                # Use a reference
                config_ref = config.get_safe("process.config_ref")
                log.info("Enhancing new process spawn config from ref=%s" % config_ref)
                matches = re.match(r'^([A-Za-z]+):([A-Za-z0-9]+)/(.+)$', config_ref)
                if matches:
                    ref_type, ref_id, ref_ext = matches.groups()
                    if ref_type == "resources":
                        if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY):
                            try:
                                obj = self.container.resource_registry.read(ref_id)
                                if obj and hasattr(obj, ref_ext):
                                    ref_config = getattr(obj, ref_ext)
                                    if isinstance(ref_config, dict):
                                        dict_merge(process_cfg, ref_config, inplace=True)
                                    else:
                                        raise BadRequest("config_ref %s exists but not dict" % config_ref)
                                else:
                                    raise BadRequest("config_ref %s - attribute not found" % config_ref)
                            except NotFound as nf:
                                log.warn("config_ref %s - object not found" % config_ref)
                                raise
                        else:
                            log.error("Container missing RESOURCE_REGISTRY capability to resolve process config ref %s" % config_ref)
                    else:
                        raise BadRequest("Unknown reference type in: %s" % config_ref)

            dict_merge(process_cfg, config, inplace=True)
            if self.container.spawn_args:
                # Override config with spawn args
                dict_merge(process_cfg, self.container.spawn_args, inplace=True)

        #log.debug("spawn_process() pid=%s process_cfg=%s", process_id, process_cfg)

        # PROCESS TYPE. Determines basic process context (messaging, service interface)
        # One of the constants defined at the top of this file

        service_cls = named_any("%s.%s" % (module, cls))
        process_type = get_safe(process_cfg, "process.type") or getattr(service_cls, "process_type", "service")

        process_start_mode = get_safe(config, "process.start_mode")

        process_instance = None

        # alert we have a spawning process, but we don't have the instance yet, so give the class instead (more accurate than name)
        self._call_proc_state_changed("%s.%s" % (module, cls), ProcessStateEnum.PENDING)

        try:
            # spawn service by type
            if process_type == SERVICE_PROCESS_TYPE:
                process_instance = self._spawn_service_process(process_id, name, module, cls, process_cfg)

            elif process_type == STREAM_PROCESS_TYPE:
                process_instance = self._spawn_stream_process(process_id, name, module, cls, process_cfg)

            elif process_type == AGENT_PROCESS_TYPE:
                process_instance = self._spawn_agent_process(process_id, name, module, cls, process_cfg)

            elif process_type == STANDALONE_PROCESS_TYPE:
                process_instance = self._spawn_standalone_process(process_id, name, module, cls, process_cfg)

            elif process_type == IMMEDIATE_PROCESS_TYPE:
                process_instance = self._spawn_immediate_process(process_id, name, module, cls, process_cfg)

            elif process_type == SIMPLE_PROCESS_TYPE:
                process_instance = self._spawn_simple_process(process_id, name, module, cls, process_cfg)

            else:
                raise BadRequest("Unknown process type: %s" % process_type)

            process_instance._proc_type = process_type
            self._register_process(process_instance, name)

            process_instance.errcause = "OK"
            log.info("ProcManager.spawn_process: %s.%s -> pid=%s OK", module, cls, process_id)

            if process_type == IMMEDIATE_PROCESS_TYPE:
                log.info('Terminating immediate process: %s', process_instance.id)
                self.terminate_process(process_instance.id)

                # terminate process also triggers TERMINATING/TERMINATED
                self._call_proc_state_changed(process_instance, ProcessStateEnum.EXITED)

            else:
                #Update local policies for the new process
                if self.container.has_capability(self.container.CCAP.GOVERNANCE_CONTROLLER):
                    self.container.governance_controller.update_container_policies(process_instance, safe_mode=True)

            return process_instance.id

        except IonProcessError:
            errcause = process_instance.errcause if process_instance else "instantiating process"
            log.exception("Error spawning %s %s process (process_id: %s): %s", name, process_type, process_id, errcause)
            return None

        except Exception:
            errcause = process_instance.errcause if process_instance else "instantiating process"
            log.exception("Error spawning %s %s process (process_id: %s): %s", name, process_type, process_id, errcause)

            # trigger failed notification - catches problems in init/start
            self._call_proc_state_changed(process_instance, ProcessStateEnum.FAILED)

            raise

    def list_local_processes(self, process_type=''):
        """
        Returns a list of the running ION processes in the container or filtered by the process_type
        """
        if not process_type:
            return self.procs.values()

        return [p for p in self.procs.itervalues() if p.process_type == process_type]

    def get_a_local_process(self, proc_name=''):
        """
        Returns a running ION processes in the container for the specified name
        """
        for p in self.procs.itervalues():

            if p.name == proc_name:
                return p

            if p.process_type == AGENT_PROCESS_TYPE and p.resource_type == proc_name:
                return p

        return None

    def is_local_service_process(self, service_name):
        local_services = self.list_local_processes(SERVICE_PROCESS_TYPE)
        for p in local_services:
            if p.name == service_name:
                return True

        return False

    def is_local_agent_process(self, resource_type):
        local_agents = self.list_local_processes(AGENT_PROCESS_TYPE)
        for p in local_agents:
            if p.resource_type == resource_type:
                return True
        return False

    def _spawned_proc_failed(self, gproc):
        log.error("ProcManager._spawned_proc_failed: %s, %s", gproc, gproc.exception)

        # for now - don't worry about the mapping, if we get a failure, just kill the container.
        # leave the mapping in place for potential expansion later.

#        # look it up in mapping
#        if not gproc in self._spawned_proc_to_process:
#            log.warn("No record of gproc %s in our map (%s)", gproc, self._spawned_proc_to_process)
#            return
#
        prc = self._spawned_proc_to_process.get(gproc, None)
#
#        # make sure prc is in our list
#        if not prc in self.procs.values():
#            log.warn("prc %s not found in procs list", prc)
#            return

        # stop the rest of the process
        if prc is not None:
            try:
                self.terminate_process(prc.id, False)
            except Exception as e:
                log.warn("Problem while stopping rest of failed process %s: %s", prc, e)
            finally:
                self._call_proc_state_changed(prc, ProcessStateEnum.FAILED)
        else:
            log.warn("No ION process found for failed proc manager child: %s", gproc)

        #self.container.fail_fast("Container process (%s) failed: %s" % (svc, gproc.exception))

        # Stop the container if this was the last process
        if not self.procs and CFG.get_safe("container.processes.exit_once_empty", False):
            self.container.fail_fast("Terminating container after last process (%s) failed: %s" % (gproc, gproc.exception))

    def _cleanup_method(self, queue_name, ep=None):
        """
        Common method to be passed to each spawned ION process to clean up their process-queue.

        @TODO Leaks implementation detail, should be using XOs
        """
        if  ep._chan is not None and not ep._chan._queue_auto_delete:
            # only need to delete if AMQP didn't handle it for us already!
            # @TODO this will not work with XOs (future)
            try:
                ch = self.container.node.channel(RecvChannel)
                ch._recv_name = NameTrio(get_sys_name(), "%s.%s" % (get_sys_name(), queue_name))
                ch._destroy_queue()
            except TransportError as ex:
                log.warn("Cleanup method triggered an error, ignoring: %s", ex)

    def add_proc_state_changed_callback(self, cb):
        """
        Adds a callback to be called when a process' state changes.

        The callback should take three parameters: The process, the state, and the container.
        """
        self._proc_state_change_callbacks.append(cb)

    def remove_proc_state_changed_callback(self, cb):
        """
        Removes a callback from the process state change callback list.

        If the callback is not registered, this method does nothing.
        """
        if cb in self._proc_state_change_callbacks:
            self._proc_state_change_callbacks.remove(cb)

    def _call_proc_state_changed(self, svc, state):
        """
        Internal method to call all registered process state change callbacks.
        """
        log.debug("Proc State Changed (%s): %s", ProcessStateEnum._str_map.get(state, state), svc)
        for cb in self._proc_state_change_callbacks:
            cb(svc, state, self.container)

    def _create_listening_endpoint(self, **kwargs):
        """
        Creates a listening endpoint for spawning processes.

        This method exists to be able to override the type created via configuration.
        In most cases it will create a ConversationRPCServer.
        """
        eptypestr = CFG.get_safe('container.messaging.endpoint.proc_listening_type', None)
        if eptypestr is not None:
            module, cls     = eptypestr.rsplit('.', 1)
            mod             = __import__(module, fromlist=[cls])
            eptype          = getattr(mod, cls)
            ep              = eptype(**kwargs)
        else:
            conv_enabled = CFG.get_safe('container.messaging.endpoint.rpc_conversation_enabled', False)
            if conv_enabled:
                ep = ConversationRPCServer(**kwargs)
            else:
                ep = ProcessRPCServer(**kwargs)
        return ep

    # -----------------------------------------------------------------
    # PROCESS TYPE: service
    def _spawn_service_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as a service worker.
        Attach to service queue with service definition, attach to service pid
        """
        process_instance = self._create_process_instance(process_id, name, module, cls, config)

        listen_name = get_safe(config, "process.listen_name") or process_instance.name
        log.debug("Service Process (%s) listen_name: %s", name, listen_name)
        process_instance._proc_listen_name = listen_name

        # Service RPC endpoint
        rsvc1 = self._create_listening_endpoint(node=self.container.node,
                                                from_name=listen_name,
                                                process=process_instance)
        # Named local RPC endpoint
        rsvc2 = self._create_listening_endpoint(node=self.container.node,
                                                from_name=process_instance.id,
                                                process=process_instance)

        # cleanup method to delete process queue
        cleanup = lambda _: self._cleanup_method(process_instance.id, rsvc2)

        # Start an ION process with the right kind of endpoint factory
        proc = self.proc_sup.spawn(name=process_instance.id,
                                   service=process_instance,
                                   listeners=[rsvc1, rsvc2],
                                   proc_name=process_instance._proc_name,
                                   cleanup_method=cleanup)
        self.proc_sup.ensure_ready(proc, "_spawn_service_process for %s" % ",".join((listen_name, process_instance.id)))

        # map gproc to process_instance
        self._spawned_proc_to_process[proc.proc] = process_instance

        # set service's reference to process
        process_instance._process = proc

        self._process_init(process_instance)
        self._process_start(process_instance)

        try:
            proc.start_listeners()
        except IonProcessError:
            self._process_quit(process_instance)
            self._call_proc_state_changed(process_instance, ProcessStateEnum.FAILED)
            raise

        return process_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: stream process
    def _spawn_stream_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as a data stream process.
        Attach to subscription queue with process function.
        """
        process_instance = self._create_process_instance(process_id, name, module, cls, config)

        listen_name = get_safe(config, "process.listen_name") or name
        log.debug("Stream Process (%s) listen_name: %s", name, listen_name)
        process_instance._proc_listen_name = listen_name

        process_instance.stream_subscriber = StreamSubscriber(process=process_instance, exchange_name=listen_name, callback=process_instance.call_process)

        # Add publishers if any...
        publish_streams = get_safe(config, "process.publish_streams")
        pub_names = self._set_publisher_endpoints(process_instance, publish_streams)

        rsvc = self._create_listening_endpoint(node=self.container.node,
                                               from_name=process_instance.id,
                                               process=process_instance)

        # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs)
        def cleanup(*args):
            self._cleanup_method(process_instance.id, rsvc)
            for name in pub_names:
                p = getattr(process_instance, name)
                p.close()

        proc = self.proc_sup.spawn(name=process_instance.id,
                                   service=process_instance,
                                   listeners=[rsvc, process_instance.stream_subscriber],
                                   proc_name=process_instance._proc_name,
                                   cleanup_method=cleanup)
        self.proc_sup.ensure_ready(proc, "_spawn_stream_process for %s" % process_instance._proc_name)

        # map gproc to process_instance
        self._spawned_proc_to_process[proc.proc] = process_instance

        # set service's reference to process
        process_instance._process = proc

        self._process_init(process_instance)
        self._process_start(process_instance)

        try:
            proc.start_listeners()
        except IonProcessError:
            self._process_quit(process_instance)
            self._call_proc_state_changed(process_instance, ProcessStateEnum.FAILED)
            raise

        return process_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: agent
    def _spawn_agent_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as agent process.
        Attach to service pid.
        """
        process_instance = self._create_process_instance(process_id, name, module, cls, config)
        if not isinstance(process_instance, ResourceAgent) and not isinstance(process_instance, SimpleResourceAgent):
            raise ContainerConfigError("Agent process must extend ResourceAgent")
        listeners = []

        # Set the resource ID if we get it through the config
        resource_id = get_safe(process_instance.CFG, "agent.resource_id")
        if resource_id:
            process_instance.resource_id = resource_id

            alistener = self._create_listening_endpoint(node=self.container.node,
                                                        from_name=resource_id,
                                                        process=process_instance)

            listeners.append(alistener)

        rsvc = self._create_listening_endpoint(node=self.container.node,
                                               from_name=process_instance.id,
                                               process=process_instance)

        listeners.append(rsvc)

        # cleanup method to delete process/agent queue (@TODO: leaks a bit here - should use XOs)
        def agent_cleanup(x):
            self._cleanup_method(process_instance.id, rsvc)
            if resource_id:
                self._cleanup_method(resource_id, alistener)

        proc = self.proc_sup.spawn(name=process_instance.id,
                                   service=process_instance,
                                   listeners=listeners,
                                   proc_name=process_instance._proc_name,
                                   cleanup_method=agent_cleanup)
        self.proc_sup.ensure_ready(proc, "_spawn_agent_process for %s" % process_instance.id)

        # map gproc to process_instance
        self._spawned_proc_to_process[proc.proc] = process_instance

        # set service's reference to process
        process_instance._process = proc

        # Now call the on_init of the agent.
        self._process_init(process_instance)

        if not process_instance.resource_id:
            log.warn("New agent pid=%s has no resource_id set" % process_id)

        self._process_start(process_instance)

        try:
            proc.start_listeners()
        except IonProcessError:
            self._process_quit(process_instance)
            self._call_proc_state_changed(process_instance, ProcessStateEnum.FAILED)
            raise

        if not process_instance.resource_id:
            log.warn("Agent process id=%s does not define resource_id!!" % process_instance.id)

        return process_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: standalone
    def _spawn_standalone_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as standalone process.
        Attach to service pid.
        """
        process_instance = self._create_process_instance(process_id, name, module, cls, config)
        rsvc = self._create_listening_endpoint(node=self.container.node,
                                               from_name=process_instance.id,
                                               process=process_instance)

        # Add publishers if any...
        publish_streams = get_safe(config, "process.publish_streams")
        pub_names = self._set_publisher_endpoints(process_instance, publish_streams)

        # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs)
        def cleanup(*args):
            self._cleanup_method(process_instance.id, rsvc)
            for name in pub_names:
                p = getattr(process_instance, name)
                p.close()

        proc = self.proc_sup.spawn(name=process_instance.id,
                                   service=process_instance,
                                   listeners=[rsvc],
                                   proc_name=process_instance._proc_name,
                                   cleanup_method=cleanup)
        self.proc_sup.ensure_ready(proc, "_spawn_standalone_process for %s" % process_instance.id)

        # map gproc to process_instance
        self._spawned_proc_to_process[proc.proc] = process_instance

        # set service's reference to process
        process_instance._process = proc

        self._process_init(process_instance)
        self._process_start(process_instance)

        try:
            proc.start_listeners()
        except IonProcessError:
            self._process_quit(process_instance)
            self._call_proc_state_changed(process_instance, ProcessStateEnum.FAILED)
            raise

        return process_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: simple
    def _spawn_simple_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as simple process.
        No attachments.
        """
        process_instance = self._create_process_instance(process_id, name, module, cls, config)
        # Add publishers if any...
        publish_streams = get_safe(config, "process.publish_streams")
        pub_names = self._set_publisher_endpoints(process_instance, publish_streams)

        # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs)
        def cleanup(*args):
            for name in pub_names:
                p = getattr(process_instance, name)
                p.close()

        proc = self.proc_sup.spawn(name=process_instance.id,
                                   service=process_instance,
                                   listeners=[],
                                   proc_name=process_instance._proc_name,
                                   cleanup_method=cleanup)
        self.proc_sup.ensure_ready(proc, "_spawn_simple_process for %s" % process_instance.id)

        # map gproc to process_instance
        self._spawned_proc_to_process[proc.proc] = process_instance

        # set service's reference to process
        process_instance._process = proc

        self._process_init(process_instance)
        self._process_start(process_instance)

        return process_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: immediate
    def _spawn_immediate_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as immediate one off process.
        No attachments.
        """
        process_instance = self._create_process_instance(process_id, name, module, cls, config)
        self._process_init(process_instance)
        self._process_start(process_instance)
        return process_instance

    def _create_process_instance(self, process_id, name, module, cls, config):
        """
        Creates an instance of a "service", be it a Service, Agent, Stream, etc.

        @rtype BaseService
        @return An instance of a "service"
        """
        # SERVICE INSTANCE.
        process_instance = for_name(module, cls)
        if not isinstance(process_instance, BaseService):
            raise ContainerConfigError("Instantiated service not a BaseService %r" % process_instance)

        # Prepare service instance
        process_instance.errcause = ""
        process_instance.id = process_id
        process_instance.container = self.container
        process_instance.CFG = config
        process_instance._proc_name = name
        process_instance._proc_start_time = time.time()

        #Unless the process has been started as part of another Org, default to the container Org or the ION Org
        if config.has_key('org_governance_name'):
            process_instance.org_governance_name = config['org_governance_name']
        else:
            process_instance.org_governance_name = CFG.get_safe('container.org_name', CFG.get_safe('system.root_org', 'ION'))


        # Add stateful process operations
        if hasattr(process_instance, "_flush_state"):
            def _flush_state():
                with process_instance._state_lock:
                    state_obj = process_instance.container.state_repository.put_state(process_instance.id, process_instance._proc_state,
                                                                                      state_obj=process_instance._proc_state_obj)
                    state_obj.state = None   # Make sure memory footprint is low for larger states
                    process_instance._proc_state_obj = state_obj
                    process_instance._proc_state_changed = False

            def _load_state():
                if not hasattr(process_instance, "_proc_state"):
                    process_instance._proc_state = {}
                try:
                    with process_instance._state_lock:
                        new_state, state_obj = process_instance.container.state_repository.get_state(process_instance.id)
                        process_instance._proc_state.clear()
                        process_instance._proc_state.update(new_state)
                        process_instance._proc_state_obj = state_obj
                        process_instance._proc_state_changed = False
                except NotFound as nf:
                    log.debug("No persisted state available for process %s", process_instance.id)
                except Exception as ex:
                    log.warn("Process %s load state failed: %s", process_instance.id, str(ex))
            process_instance._flush_state = _flush_state
            process_instance._load_state = _load_state
            process_instance._state_lock = RLock()
            process_instance._proc_state = {}
            process_instance._proc_state_obj = None
            process_instance._proc_state_changed = False

            # PROCESS RESTART: Need to check whether this process had persisted state.
            # Note: This could happen anytime during a system run, not just on RESTART boot
            log.debug("Loading persisted state for process %s", process_id)
            process_instance._load_state()

        # start service dependencies (RPC clients)
        self._start_process_dependencies(process_instance)

        return process_instance

    def _start_process_dependencies(self, process_instance):
        process_instance.errcause = "setting service dependencies"
        log.debug("spawn_process dependencies: %s", process_instance.dependencies)
        # TODO: Service dependency != process dependency
        for dependency in process_instance.dependencies:
            client = getattr(process_instance.clients, dependency)
            assert client, "Client for dependency not found: %s" % dependency

            # @TODO: should be in a start_client in RPCClient chain
            client.process  = process_instance
            client.node     = self.container.node

            # ensure that dep actually exists and is running
            # MM: commented out - during startup (init actually), we don't need to check for service dependencies
            # MM: TODO: split on_init from on_start; start consumer in on_start; check for full queues on restart
#            if process_instance.name != 'bootstrap' or (process_instance.name == 'bootstrap' and process_instance.CFG.level == dependency):
#                svc_de = self.container.resource_registry.find_resources(restype="Service", name=dependency, id_only=True)
#                if not svc_de:
#                    raise ContainerConfigError("Dependency for service %s not running: %s" % (process_instance.name, dependency))

    def _process_init(self, process_instance):
        # Init process
        process_instance.errcause = "initializing service"
        process_instance.init()

    def _process_start(self, process_instance):
        # Start process
        # THIS SHOULD BE CALLED LATER THAN SPAWN
        # TODO: Check for timeout
        process_instance.errcause = "starting service"
        process_instance.start()

    def _process_quit(self, process_instance):
        """
        Common method to handle process stopping.
        """
        process_instance.errcause = "quitting process"

        # Give the process notice to quit doing stuff.
        process_instance.quit()

        # Terminate IonProcessThread (may not have one, i.e. simple process)
        # @TODO: move this into process' on_quit()
        if getattr(process_instance, '_process', None) is not None and process_instance._process:
            process_instance._process.notify_stop()
            process_instance._process.stop()

    def _set_publisher_endpoints(self, process_instance, publisher_streams=None):

        publisher_streams = publisher_streams or {}
        names = []

        for name, stream_id in publisher_streams.iteritems():
            # problem is here
            pub = StreamPublisher(process=process_instance, stream_id=stream_id)

            setattr(process_instance, name, pub)
            names.append(name)

        return names

    def _register_process(self, process_instance, name):
        """
        Performs all actions related to registering the new process in the system.
        Also performs process type specific registration, such as for services and agents
        """
        # Add process instance to container's process dict
        if name in self.procs_by_name:
            log.warn("Process name already registered in container: %s" % name)
        self.procs_by_name[name] = process_instance
        self.procs[process_instance.id] = process_instance

        # Add Process to resource registry
        # Note: In general the Process resource should be created by the CEI PD, but not all processes are CEI
        # processes. How to deal with this?
        process_instance.errcause = "registering"

        if process_instance._proc_type != IMMEDIATE_PROCESS_TYPE:
            if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY):
                proc_obj = Process(name=process_instance.id, label=name, proctype=process_instance._proc_type)
                proc_id, _ = self.container.resource_registry.create(proc_obj)
                process_instance._proc_res_id = proc_id

                # Associate process with container resource
                self.container.resource_registry.create_association(self.cc_id, "hasProcess", proc_id)
        else:
            process_instance._proc_res_id = None

        # Process type specific registration
        # TODO: Factor out into type specific handler functions
        if process_instance._proc_type == SERVICE_PROCESS_TYPE:
            if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY):
                # Registration of SERVICE process: in resource registry
                service_list, _ = self.container.resource_registry.find_resources(restype="Service", name=process_instance.name, id_only=True)
                if service_list:
                    process_instance._proc_svc_id = service_list[0]
                    if len(service_list) > 1:
                        log.warn("More than 1 Service resource found with name %s: %s", process_instance.name, service_list)
                else:
                    # We are starting the first process of a service instance
                    # TODO: This should be created by the HA Service agent in the future
                    svc_obj = Service(name=process_instance.name, exchange_name=process_instance._proc_listen_name, state=ServiceStateEnum.READY)
                    process_instance._proc_svc_id, _ = self.container.resource_registry.create(svc_obj)

                    # Create association to service definition resource
                    svcdef_list, _ = self.container.resource_registry.find_resources(restype="ServiceDefinition",
                        name=process_instance.name, id_only=True)
                    if svcdef_list:
                        if len(svcdef_list) > 1:
                            log.warn("More than 1 ServiceDefinition resource found with name %s: %s", process_instance.name, svcdef_list)
                        self.container.resource_registry.create_association(process_instance._proc_svc_id,
                            "hasServiceDefinition", svcdef_list[0])
                    else:
                        log.error("Cannot find ServiceDefinition resource for %s", process_instance.name)

                self.container.resource_registry.create_association(process_instance._proc_svc_id, "hasProcess", proc_id)

        elif process_instance._proc_type == AGENT_PROCESS_TYPE:
            if self.container.has_capability(self.container.CCAP.DIRECTORY):
                # Registration of AGENT process: in Directory
                caps = process_instance.get_capabilities()
                self.container.directory.register("/Agents", process_instance.id,
                    **dict(name=process_instance._proc_name,
                        container=process_instance.container.id,
                        resource_id=process_instance.resource_id,
                        agent_id=process_instance.agent_id,
                        def_id=process_instance.agent_def_id,
                        capabilities=caps))

        self._call_proc_state_changed(process_instance, ProcessStateEnum.RUNNING)

    def terminate_process(self, process_id, do_notifications=True):
        """
        Terminates a process and all its resources. Termination is graceful with timeout.

        @param  process_id          The id of the process to terminate. Should exist in the container's
                                    list of processes or this will raise.
        @param  do_notifications    If True, emits process state changes for TERMINATING and TERMINATED.
                                    If False, supresses any state changes. Used near EXITED and FAILED.
        """
        process_instance = self.procs.get(process_id, None)
        if not process_instance:
            raise BadRequest("Cannot terminate. Process id='%s' unknown on container id='%s'" % (
                                        process_id, self.container.id))

        log.info("ProcManager.terminate_process: %s -> pid=%s", process_instance._proc_name, process_id)

        if do_notifications:
            self._call_proc_state_changed(process_instance, ProcessStateEnum.TERMINATING)

        self._process_quit(process_instance)

        self._unregister_process(process_id, process_instance)

        if do_notifications:
            self._call_proc_state_changed(process_instance, ProcessStateEnum.TERMINATED)

    def _unregister_process(self, process_id, process_instance):
        # Remove process registration in resource registry
        if process_instance._proc_res_id:
            if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY):
                try:
                    self.container.resource_registry.delete(process_instance._proc_res_id, del_associations=True)
                except NotFound: #, HTTPException):
                    # if it's already gone, it's already gone!
                    pass
                except Exception, ex:
                    log.exception(ex)
                    pass

        # Cleanup for specific process types
        if process_instance._proc_type == SERVICE_PROCESS_TYPE:
            if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY):
                # Check if this is the last process for this service and do auto delete service resources here
                svcproc_list = []
                try:
                    svcproc_list, _ = self.container.resource_registry.find_objects(process_instance._proc_svc_id,
                        "hasProcess", "Process", id_only=True)
                except ResourceNotFound:
                    # if it's already gone, it's already gone!
                    pass

                if not svcproc_list:
                    try:
                        self.container.resource_registry.delete(process_instance._proc_svc_id, del_associations=True)
                    except NotFound:
                        # if it's already gone, it's already gone!
                        pass

                    except Exception, ex:
                        log.exception(ex)
                        pass
Exemplo n.º 4
0
class ProcManager(object):
    def __init__(self, container):
        self.container = container

        # Define the callables that can be added to Container public API
        self.container_api = [self.spawn_process, self.terminate_process]

        # Add the public callables to Container
        for call in self.container_api:
            setattr(self.container, call.__name__, call)

        self.proc_id_pool = IDPool()

        # Temporary registry of running processes
        self.procs_by_name = {}
        self.procs = {}

        # mapping of greenlets we spawn to service_instances for error handling
        self._spawned_proc_to_process = {}

        # The pyon worker process supervisor
        self.proc_sup = IonProcessThreadManager(heartbeat_secs=CFG.cc.timeout.heartbeat, failure_notify_callback=self._spawned_proc_failed)

    def start(self):
        log.debug("ProcManager starting ...")
        self.proc_sup.start()
        log.debug("ProcManager started, OK.")

    def stop(self):
        log.debug("ProcManager stopping ...")

        # Call quit on procs to give them ability to clean up
        # @TODO terminate_process is not gl-safe
#        gls = map(lambda k: spawn(self.terminate_process, k), self.procs.keys())
#        join(gls)
        map(self.terminate_process, self.procs.keys())

        # TODO: Have a choice of shutdown behaviors for waiting on children, timeouts, etc
        self.proc_sup.shutdown(CFG.cc.timeout.shutdown)
        log.debug("ProcManager stopped, OK.")

    def spawn_process(self, name=None, module=None, cls=None, config=None, process_id=None):
        """
        Spawn a process within the container. Processes can be of different type.
        """

        if process_id and not is_valid_identifier(process_id, ws_sub='_'):
            raise BadRequest("Given process_id %s is not a valid identifier" % process_id)

        # Generate a new process id if not provided
        # TODO: Ensure it is system-wide unique
        process_id =  process_id or "%s.%s" % (self.container.id, self.proc_id_pool.get_id())
        log.debug("ProcManager.spawn_process(name=%s, module.cls=%s.%s) as pid=%s", name, module, cls, process_id)

        if not config:
            # Use system CFG. It has the command line args in it
            config = DictModifier(CFG)
        else:
            # Use provided config. Must be dict or DotDict
            if not isinstance(config, DotDict):
                config = DotDict(config)
            config = DictModifier(CFG, config)
            if self.container.spawn_args:
                # Override config with spawn args
                dict_merge(config, self.container.spawn_args, inplace=True)

        #log.debug("spawn_process() pid=%s config=%s", process_id, config)

        # PROCESS TYPE. Determines basic process context (messaging, service interface)
        # One of: service, stream_process, agent, simple, immediate

        service_cls = named_any("%s.%s" % (module, cls))
        process_type = get_safe(config, "process.type") or getattr(service_cls, "process_type", "service")

        service_instance = None
        try:
            # spawn service by type
            if process_type == "service":
                service_instance = self._spawn_service_process(process_id, name, module, cls, config)

            elif process_type == "stream_process":
                service_instance = self._spawn_stream_process(process_id, name, module, cls, config)

            elif process_type == "agent":
                service_instance = self._spawn_agent_process(process_id, name, module, cls, config)

            elif process_type == "standalone":
                service_instance = self._spawn_standalone_process(process_id, name, module, cls, config)

            elif process_type == "immediate":
                service_instance = self._spawn_immediate_process(process_id, name, module, cls, config)

            elif process_type == "simple":
                service_instance = self._spawn_simple_process(process_id, name, module, cls, config)

            else:
                raise BadRequest("Unknown process type: %s" % process_type)

            service_instance._proc_type = process_type
            self._register_process(service_instance, name)

            service_instance.errcause = "OK"
            log.info("AppManager.spawn_process: %s.%s -> pid=%s OK", module, cls, process_id)

            if process_type == 'immediate':
                log.info('Terminating immediate process: %s', service_instance.id)
                self.terminate_process(service_instance.id)

            return service_instance.id

        except Exception:
            errcause = service_instance.errcause if service_instance else "instantiating service"
            log.exception("Error spawning %s %s process (process_id: %s): %s", name, process_type, process_id, errcause)
            raise

    def _spawned_proc_failed(self, gproc):
        log.error("ProcManager._spawned_proc_failed: %s", gproc)

        # for now - don't worry about the mapping, if we get a failure, just kill the container.
        # leave the mapping in place for potential expansion later.

#        # look it up in mapping
#        if not gproc in self._spawned_proc_to_process:
#            log.warn("No record of gproc %s in our map (%s)", gproc, self._spawned_proc_to_process)
#            return
#
        svc = self._spawned_proc_to_process.get(gproc, "Unknown")
#
#        # make sure svc is in our list
#        if not svc in self.procs.values():
#            log.warn("svc %s not found in procs list", svc)
#            return

        self.container.fail_fast("Container process (%s) failed: %s" % (svc, gproc.exception))

    # -----------------------------------------------------------------
    # PROCESS TYPE: service
    def _spawn_service_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as a service worker.
        Attach to service queue with service definition, attach to service pid
        """
        service_instance = self._create_service_instance(process_id, name, module, cls, config)
        self._service_init(service_instance)

        self._service_start(service_instance)

        listen_name = get_safe(config, "process.listen_name") or service_instance.name
        log.debug("Service Process (%s) listen_name: %s", name, listen_name)

        # Service RPC endpoint
        rsvc1 = ProcessRPCServer(node=self.container.node,
            from_name=listen_name,
            service=service_instance,
            process=service_instance)
        # Named local RPC endpoint
        rsvc2 = ProcessRPCServer(node=self.container.node,
            from_name=service_instance.id,
            service=service_instance,
            process=service_instance)
        # Start an ION process with the right kind of endpoint factory
        proc = self.proc_sup.spawn(name=service_instance.id,
                                   service=service_instance,
                                   listeners=[rsvc1, rsvc2],
                                   proc_name=service_instance._proc_name)
        self.proc_sup.ensure_ready(proc, "_spawn_service_process for %s" % ",".join((listen_name, service_instance.id)))

        # map gproc to service_instance
        self._spawned_proc_to_process[proc.proc] = service_instance

        # set service's reference to process
        service_instance._process = proc

        # Directory registration
        self.container.directory.register_safe("/Services", listen_name, interface=service_instance.name)
        self.container.directory.register_safe("/Services/%s" % listen_name, service_instance.id)

        return service_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: stream process
    def _spawn_stream_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as a data stream process.
        Attach to subscription queue with process function.
        """
        service_instance = self._create_service_instance(process_id, name, module, cls, config)
        self._service_init(service_instance)

        # Start the service
        self._service_start(service_instance)

        listen_name = get_safe(config, "process.listen_name") or name

        service_instance.stream_subscriber_registrar = StreamSubscriberRegistrar(process=service_instance, node=self.container.node)
        sub = service_instance.stream_subscriber_registrar.create_subscriber(exchange_name=listen_name)

        # Add publishers if any...
        publish_streams = get_safe(config, "process.publish_streams")
        self._set_publisher_endpoints(service_instance, publish_streams)

        rsvc = ProcessRPCServer(node=self.container.node,
            from_name=service_instance.id,
            service=service_instance,
            process=service_instance)

        proc = self.proc_sup.spawn(name=service_instance.id,
                                   service=service_instance,
                                   listeners=[rsvc, sub],
                                   proc_name=service_instance._proc_name)
        self.proc_sup.ensure_ready(proc, "_spawn_stream_process for %s" % service_instance._proc_name)

        # map gproc to service_instance
        self._spawned_proc_to_process[proc.proc] = service_instance

        # set service's reference to process
        service_instance._process = proc

        return service_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: agent
    def _spawn_agent_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as agent process.
        Attach to service pid.
        """
        service_instance = self._create_service_instance(process_id, name, module, cls, config)
        if not isinstance(service_instance, ResourceAgent):
            raise ContainerConfigError("Agent process must extend ResourceAgent")

        # Set the resource ID if we get it through the config
        resource_id = get_safe(service_instance.CFG, "agent.resource_id")
        if resource_id:
            service_instance.resource_id = resource_id

        # Now call the on_init of the agent.
        self._service_init(service_instance)

        if not service_instance.resource_id:
            log.warn("New agent pid=%s has no resource_id set" % process_id)

        self._service_start(service_instance)

        rsvc = ProcessRPCServer(node=self.container.node,
            from_name=service_instance.id,
            service=service_instance,
            process=service_instance)

        proc = self.proc_sup.spawn(name=service_instance.id,
                                   service=service_instance,
                                   listeners=[rsvc],
                                   proc_name=service_instance._proc_name)
        self.proc_sup.ensure_ready(proc, "_spawn_agent_process for %s" % service_instance.id)

        # map gproc to service_instance
        self._spawned_proc_to_process[proc.proc] = service_instance

        # set service's reference to process
        service_instance._process = proc

        # Directory registration
        caps = service_instance.get_capabilities()
        self.container.directory.register("/Agents", service_instance.id,
            **dict(name=service_instance._proc_name,
                container=service_instance.container.id,
                resource_id=service_instance.resource_id,
                agent_id=service_instance.agent_id,
                def_id=service_instance.agent_def_id,
                capabilities=caps))
        if not service_instance.resource_id:
            log.warn("Agent process id=%s does not define resource_id!!" % service_instance.id)

        return service_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: standalone
    def _spawn_standalone_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as standalone process.
        Attach to service pid.
        """
        service_instance = self._create_service_instance(process_id, name, module, cls, config)
        self._service_init(service_instance)

        self._service_start(service_instance)

        rsvc = ProcessRPCServer(node=self.container.node,
            from_name=service_instance.id,
            service=service_instance,
            process=service_instance)

        proc = self.proc_sup.spawn(name=service_instance.id,
                                   service=service_instance,
                                   listeners=[rsvc],
                                   proc_name=service_instance._proc_name)
        self.proc_sup.ensure_ready(proc, "_spawn_standalone_process for %s" % service_instance.id)

        # map gproc to service_instance
        self._spawned_proc_to_process[proc.proc] = service_instance

        # set service's reference to process
        service_instance._process = proc

        # Add publishers if any...
        publish_streams = get_safe(config, "process.publish_streams")
        self._set_publisher_endpoints(service_instance, publish_streams)

        return service_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: simple
    def _spawn_simple_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as simple process.
        No attachments.
        """
        service_instance = self._create_service_instance(process_id, name, module, cls, config)
        self._service_init(service_instance)

        self._service_start(service_instance)

        # Add publishers if any...
        publish_streams = get_safe(config, "process.publish_streams")
        self._set_publisher_endpoints(service_instance, publish_streams)

        return service_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: immediate
    def _spawn_immediate_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as immediate one off process.
        No attachments.
        """
        service_instance = self._create_service_instance(process_id, name, module, cls, config)
        self._service_init(service_instance)
        self._service_start(service_instance)
        return service_instance

    def _create_service_instance(self, process_id, name, module, cls, config):
        """
        Creates an instance of a "service", be it a Service, Agent, Stream, etc.

        @rtype BaseService
        @return An instance of a "service"
        """
        # SERVICE INSTANCE.
        service_instance = for_name(module, cls)
        if not isinstance(service_instance, BaseService):
            raise ContainerConfigError("Instantiated service not a BaseService %r" % service_instance)

        # Prepare service instance
        service_instance.errcause = ""
        service_instance.id = process_id
        service_instance.container = self.container
        service_instance.CFG = config
        service_instance._proc_name = name

        # start service dependencies (RPC clients)
        self._start_service_dependencies(service_instance)
        
        return service_instance

    def _start_service_dependencies(self, service_instance):
        service_instance.errcause = "setting service dependencies"
        log.debug("spawn_process dependencies: %s", service_instance.dependencies)
        # TODO: Service dependency != process dependency
        for dependency in service_instance.dependencies:
            client = getattr(service_instance.clients, dependency)
            assert client, "Client for dependency not found: %s" % dependency

            # @TODO: should be in a start_client in RPCClient chain
            client.process  = service_instance
            client.node     = self.container.node

            # ensure that dep actually exists and is running
            if service_instance.name != 'bootstrap' or (service_instance.name == 'bootstrap' and service_instance.CFG.level == dependency):
                svc_de = self.container.directory.lookup("/Services/%s" % dependency)
                if svc_de is None:
                    raise ContainerConfigError("Dependency for service %s not running: %s" % (service_instance.name, dependency))

    def _service_init(self, service_instance):
        # Init process
        service_instance.errcause = "initializing service"
        service_instance.init()

    def _service_start(self, service_instance):
        # Start process
        # TODO: Check for timeout
        service_instance.errcause = "starting service"
        service_instance.start()

    def _set_publisher_endpoints(self, service_instance, publisher_streams=None):
        service_instance.stream_publisher_registrar = StreamPublisherRegistrar(process=service_instance, node=self.container.node)

        publisher_streams = publisher_streams or {}

        for name, stream_id in publisher_streams.iteritems():
            # problem is here
            pub = service_instance.stream_publisher_registrar.create_publisher(stream_id)

            setattr(service_instance, name, pub)

    def _register_process(self, service_instance, name):
        # Add to local process dict
        self.procs_by_name[name] = service_instance
        self.procs[service_instance.id] = service_instance

        # Add to directory
        service_instance.errcause = "registering"
        self.container.directory.register_safe("/Containers/%s/Processes" % self.container.id,
                                               service_instance.id, name=name)

        self.container.event_pub.publish_event(event_type="ProcessLifecycleEvent",
                                               origin=service_instance.id, origin_type="ContainerProcess",
                                               sub_type="SPAWN",
                                               container_id=self.container.id,
                                               process_type=service_instance._proc_type, process_name=service_instance._proc_name,
                                               state=ProcessStateEnum.SPAWN)

    def terminate_process(self, process_id):
        service_instance = self.procs.get(process_id, None)
        if not service_instance:
            raise BadRequest("Cannot terminate. Process id='%s' unknown on container id='%s'" % (
                                        process_id, self.container.id))

        service_instance.quit()

        # terminate IonProcessThread (may not have one, i.e. simple process)
        if service_instance._process:
            service_instance._process.notify_stop()
            service_instance._process.stop()

        del self.procs[process_id]

        self.container.directory.unregister_safe("/Containers/%s/Processes" % self.container.id,
                service_instance.id)

        # Cleanup for specific process types
        if service_instance._proc_type == "service":
            listen_name = get_safe(service_instance.CFG, "process.listen_name", service_instance.name)
            self.container.directory.unregister_safe("/Services/%s" % listen_name, service_instance.id)
            remaining_workers = self.container.directory.find_entries("/Services/%s" % listen_name)
            if remaining_workers and len(remaining_workers) == 2:
                self.container.directory.unregister_safe("/Services", listen_name)

        elif service_instance._proc_type == "agent":
            self.container.directory.unregister_safe("/Agents", service_instance.id)

        self.container.event_pub.publish_event(event_type="ProcessLifecycleEvent",
                                            origin=service_instance.id, origin_type="ContainerProcess",
                                            sub_type="TERMINATE",
                                            container_id=self.container.id,
                                            process_type=service_instance._proc_type, process_name=service_instance._proc_name,
                                            state=ProcessStateEnum.TERMINATE)
Exemplo n.º 5
0
class ProcManager(object):
    def __init__(self, container):
        self.container = container

        # Define the callables that can be added to Container public API
        self.container_api = [self.spawn_process, self.terminate_process]

        # Add the public callables to Container
        for call in self.container_api:
            setattr(self.container, call.__name__, call)

        self.proc_id_pool = IDPool()

        # Temporary registry of running processes
        self.procs_by_name = {}
        self.procs = {}

        # mapping of greenlets we spawn to service_instances for error handling
        self._spawned_proc_to_process = {}

        # The pyon worker process supervisor
        self.proc_sup = IonProcessThreadManager(heartbeat_secs=CFG.cc.timeout.heartbeat, failure_notify_callback=self._spawned_proc_failed)

    def start(self):
        log.debug("ProcManager starting ...")
        self.proc_sup.start()

        # Register container as resource object
        cc_obj = CapabilityContainer(name=self.container.id, cc_agent=self.container.name)
        self.cc_id, _ = self.container.resource_registry.create(cc_obj)

        #Create an association to an Org object if not the rot ION org and only if found
        if CFG.container.org_name and CFG.container.org_name != CFG.system.root_org:
            org,_ = self.container.resource_registry.find_resources(restype=RT.Org,name=CFG.container.org_name, id_only=True )
            if org:
                self.container.resource_registry.create_association(org[0],PRED.hasResource, self.cc_id)  #TODO - replace with proper association



        log.debug("ProcManager started, OK.")

    def stop(self):
        log.debug("ProcManager stopping ...")

        from pyon.datastore.couchdb.couchdb_datastore import CouchDB_DataStore
        stats1 = CouchDB_DataStore._stats.get_stats()

        # Call quit on procs to give them ability to clean up
        # @TODO terminate_process is not gl-safe
#        gls = map(lambda k: spawn(self.terminate_process, k), self.procs.keys())
#        join(gls)
        procs_list = sorted(self.procs.values(), key=lambda proc: proc._proc_start_time, reverse=True)

        for proc in procs_list:
            self.terminate_process(proc.id)

        # TODO: Have a choice of shutdown behaviors for waiting on children, timeouts, etc
        self.proc_sup.shutdown(CFG.cc.timeout.shutdown)

        if self.procs:
            log.warn("ProcManager procs not empty: %s", self.procs)
        if self.procs_by_name:
            log.warn("ProcManager procs_by_name not empty: %s", self.procs_by_name)

        # Remove Resource registration
        self.container.resource_registry.delete(self.cc_id, del_associations=True)
        # TODO: Check associations to processes

        stats2 = CouchDB_DataStore._stats.get_stats()

        stats3 = CouchDB_DataStore._stats.diff_stats(stats2, stats1)
        log.debug("Datastore stats difference during stop(): %s", stats3)

        log.debug("ProcManager stopped, OK.")

    def spawn_process(self, name=None, module=None, cls=None, config=None, process_id=None):
        """
        Spawn a process within the container. Processes can be of different type.
        """

        if process_id and not is_valid_identifier(process_id, ws_sub='_'):
            raise BadRequest("Given process_id %s is not a valid identifier" % process_id)

        # Generate a new process id if not provided
        # TODO: Ensure it is system-wide unique
        process_id =  process_id or "%s.%s" % (self.container.id, self.proc_id_pool.get_id())
        log.debug("ProcManager.spawn_process(name=%s, module.cls=%s.%s, config=%s) as pid=%s", name, module, cls, config, process_id)

        process_cfg = CFG.copy()
        if config:
            # Use provided config. Must be dict or DotDict
            if not isinstance(config, DotDict):
                config = DotDict(config)
            dict_merge(process_cfg, config, inplace=True)
            if self.container.spawn_args:
                # Override config with spawn args
                dict_merge(process_cfg, self.container.spawn_args, inplace=True)

        #log.debug("spawn_process() pid=%s process_cfg=%s", process_id, process_cfg)

        # PROCESS TYPE. Determines basic process context (messaging, service interface)
        # One of: service, stream_process, agent, simple, immediate

        service_cls = named_any("%s.%s" % (module, cls))
        process_type = get_safe(process_cfg, "process.type") or getattr(service_cls, "process_type", "service")

        service_instance = None
        try:
            # spawn service by type
            if process_type == "service":
                service_instance = self._spawn_service_process(process_id, name, module, cls, process_cfg)

            elif process_type == "stream_process":
                service_instance = self._spawn_stream_process(process_id, name, module, cls, process_cfg)

            elif process_type == "agent":
                service_instance = self._spawn_agent_process(process_id, name, module, cls, process_cfg)

            elif process_type == "standalone":
                service_instance = self._spawn_standalone_process(process_id, name, module, cls, process_cfg)

            elif process_type == "immediate":
                service_instance = self._spawn_immediate_process(process_id, name, module, cls, process_cfg)

            elif process_type == "simple":
                service_instance = self._spawn_simple_process(process_id, name, module, cls, process_cfg)

            else:
                raise BadRequest("Unknown process type: %s" % process_type)

            service_instance._proc_type = process_type
            self._register_process(service_instance, name)

            service_instance.errcause = "OK"
            log.info("ProcManager.spawn_process: %s.%s -> pid=%s OK", module, cls, process_id)

            if process_type == 'immediate':
                log.info('Terminating immediate process: %s', service_instance.id)
                self.terminate_process(service_instance.id)

            return service_instance.id

        except Exception:
            errcause = service_instance.errcause if service_instance else "instantiating service"
            log.exception("Error spawning %s %s process (process_id: %s): %s", name, process_type, process_id, errcause)
            raise

    def list_local_processes(self, process_type=''):
        '''
        Returns a list of the running ION processes in the container or filtered by the process_type
        '''
        ret = list()
        for p in self.procs.values():
            if process_type and p.process_type != process_type:
                continue

            ret.append(p)

        return ret

    def list_local_process_names(self, process_type=''):
        '''
        Returns a list of the running ION processes in the container or filtered by the process_type
        '''
        ret = list()
        for p in self.procs.values():
            if process_type and p.process_type != process_type:
                continue

            ret.append(p.name)

        return ret
    def is_local_service_process(self, service_name):
        local_services = self.list_local_process_names('service')
        if service_name in local_services:
            return True

        return False

    def _spawned_proc_failed(self, gproc):
        log.error("ProcManager._spawned_proc_failed: %s, %s", gproc, gproc.exception)

        # for now - don't worry about the mapping, if we get a failure, just kill the container.
        # leave the mapping in place for potential expansion later.

#        # look it up in mapping
#        if not gproc in self._spawned_proc_to_process:
#            log.warn("No record of gproc %s in our map (%s)", gproc, self._spawned_proc_to_process)
#            return
#
        svc = self._spawned_proc_to_process.get(gproc, "Unknown")
#
#        # make sure svc is in our list
#        if not svc in self.procs.values():
#            log.warn("svc %s not found in procs list", svc)
#            return

        self.container.fail_fast("Container process (%s) failed: %s" % (svc, gproc.exception))


    def _cleanup_method(self, queue_name, ep=None):
        """
        Common method to be passed to each spawned ION process to clean up their process-queue.

        @TODO Leaks implementation detail, should be using XOs
        """
        if not ep._chan._queue_auto_delete:
            # only need to delete if AMQP didn't handle it for us already!
            # @TODO this will not work with XOs (future)
            ch = self.container.node.channel(RecvChannel)
            ch._recv_name = NameTrio(get_sys_name(), "%s.%s" % (get_sys_name(), queue_name))
            ch._destroy_queue()


    #TODO - check with Michael if this is acceptable or if there is a better way.
    def _is_policy_management_service_available(self):
        """
        Method to verify if the Policy Management Service is running in the system.
        """
        policy_services, _ = self.container.resource_registry.find_resources(restype=RT.Service,name='policy_management')
        if policy_services:
            return True
        return False

    # -----------------------------------------------------------------
    # PROCESS TYPE: service
    def _spawn_service_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as a service worker.
        Attach to service queue with service definition, attach to service pid
        """
        service_instance = self._create_service_instance(process_id, name, module, cls, config)

        listen_name = get_safe(config, "process.listen_name") or service_instance.name
        log.debug("Service Process (%s) listen_name: %s", name, listen_name)
        service_instance._proc_listen_name = listen_name

        # Service RPC endpoint
        rsvc1 = ProcessRPCServer(node=self.container.node,
            from_name=listen_name,
            service=service_instance,
            process=service_instance)
        # Named local RPC endpoint
        rsvc2 = ProcessRPCServer(node=self.container.node,
            from_name=service_instance.id,
            service=service_instance,
            process=service_instance)

        # cleanup method to delete process queue
        cleanup = lambda _: self._cleanup_method(service_instance.id, rsvc2)

        # Start an ION process with the right kind of endpoint factory
        proc = self.proc_sup.spawn(name=service_instance.id,
                                   service=service_instance,
                                   listeners=[rsvc1, rsvc2],
                                   proc_name=service_instance._proc_name,
                                   cleanup_method=cleanup)
        self.proc_sup.ensure_ready(proc, "_spawn_service_process for %s" % ",".join((listen_name, service_instance.id)))

        # map gproc to service_instance
        self._spawned_proc_to_process[proc.proc] = service_instance

        # set service's reference to process
        service_instance._process = proc

        self._service_init(service_instance)
        self._service_start(service_instance)

        proc.start_listeners()

        # look to load any existing policies for this service
        if self._is_policy_management_service_available() and self.container.governance_controller:
            self.container.governance_controller.update_service_access_policy(service_instance._proc_listen_name)

        return service_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: stream process
    def _spawn_stream_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as a data stream process.
        Attach to subscription queue with process function.
        """
        service_instance = self._create_service_instance(process_id, name, module, cls, config)

        listen_name = get_safe(config, "process.listen_name") or name
        service_instance._proc_listen_name = listen_name

        service_instance.stream_subscriber_registrar = StreamSubscriberRegistrar(process=service_instance, container=self.container)
        sub = service_instance.stream_subscriber_registrar.create_subscriber(exchange_name=listen_name)

        # Add publishers if any...
        publish_streams = get_safe(config, "process.publish_streams")
        self._set_publisher_endpoints(service_instance, publish_streams)

        rsvc = ProcessRPCServer(node=self.container.node,
            from_name=service_instance.id,
            service=service_instance,
            process=service_instance)

        # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs)
        cleanup = lambda _: self._cleanup_method(service_instance.id, rsvc)

        proc = self.proc_sup.spawn(name=service_instance.id,
                                   service=service_instance,
                                   listeners=[rsvc, sub],
                                   proc_name=service_instance._proc_name,
                                   cleanup_method=cleanup)
        self.proc_sup.ensure_ready(proc, "_spawn_stream_process for %s" % service_instance._proc_name)

        # map gproc to service_instance
        self._spawned_proc_to_process[proc.proc] = service_instance

        # set service's reference to process
        service_instance._process = proc

        self._service_init(service_instance)
        self._service_start(service_instance)

        proc.start_listeners()

        return service_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: agent
    def _spawn_agent_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as agent process.
        Attach to service pid.
        """
        service_instance = self._create_service_instance(process_id, name, module, cls, config)
        if not isinstance(service_instance, ResourceAgent):
            raise ContainerConfigError("Agent process must extend ResourceAgent")

        # Set the resource ID if we get it through the config
        resource_id = get_safe(service_instance.CFG, "agent.resource_id")
        if resource_id:
            service_instance.resource_id = resource_id

        rsvc = ProcessRPCServer(node=self.container.node,
            from_name=service_instance.id,
            service=service_instance,
            process=service_instance)

        # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs)
        cleanup = lambda _: self._cleanup_method(service_instance.id, rsvc)

        proc = self.proc_sup.spawn(name=service_instance.id,
                                   service=service_instance,
                                   listeners=[rsvc],
                                   proc_name=service_instance._proc_name,
                                   cleanup_method=cleanup)
        self.proc_sup.ensure_ready(proc, "_spawn_agent_process for %s" % service_instance.id)

        # map gproc to service_instance
        self._spawned_proc_to_process[proc.proc] = service_instance

        # set service's reference to process
        service_instance._process = proc

        # Now call the on_init of the agent.
        self._service_init(service_instance)

        if not service_instance.resource_id:
            log.warn("New agent pid=%s has no resource_id set" % process_id)

        self._service_start(service_instance)

        proc.start_listeners()

        if service_instance.resource_id:
            # look to load any existing policies for this resource
            if self._is_policy_management_service_available() and self.container.governance_controller:
                self.container.governance_controller.update_resource_access_policy(service_instance.resource_id)
        else:
            log.warn("Agent process id=%s does not define resource_id!!" % service_instance.id)


        return service_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: standalone
    def _spawn_standalone_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as standalone process.
        Attach to service pid.
        """
        service_instance = self._create_service_instance(process_id, name, module, cls, config)
        rsvc = ProcessRPCServer(node=self.container.node,
            from_name=service_instance.id,
            service=service_instance,
            process=service_instance)

        # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs)
        cleanup = lambda _: self._cleanup_method(service_instance.id, rsvc)

        proc = self.proc_sup.spawn(name=service_instance.id,
                                   service=service_instance,
                                   listeners=[rsvc],
                                   proc_name=service_instance._proc_name,
                                   cleanup_method=cleanup)
        self.proc_sup.ensure_ready(proc, "_spawn_standalone_process for %s" % service_instance.id)

        # map gproc to service_instance
        self._spawned_proc_to_process[proc.proc] = service_instance

        # set service's reference to process
        service_instance._process = proc

        self._service_init(service_instance)
        self._service_start(service_instance)

        # Add publishers if any...
        publish_streams = get_safe(config, "process.publish_streams")
        self._set_publisher_endpoints(service_instance, publish_streams)

        proc.start_listeners()

        return service_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: simple
    def _spawn_simple_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as simple process.
        No attachments.
        """
        service_instance = self._create_service_instance(process_id, name, module, cls, config)
        self._service_init(service_instance)
        self._service_start(service_instance)

        # Add publishers if any...
        publish_streams = get_safe(config, "process.publish_streams")
        self._set_publisher_endpoints(service_instance, publish_streams)

        return service_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: immediate
    def _spawn_immediate_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as immediate one off process.
        No attachments.
        """
        service_instance = self._create_service_instance(process_id, name, module, cls, config)
        self._service_init(service_instance)
        self._service_start(service_instance)
        return service_instance


    def _create_service_instance(self, process_id, name, module, cls, config):
        """
        Creates an instance of a "service", be it a Service, Agent, Stream, etc.

        @rtype BaseService
        @return An instance of a "service"
        """
        # SERVICE INSTANCE.
        service_instance = for_name(module, cls)
        if not isinstance(service_instance, BaseService):
            raise ContainerConfigError("Instantiated service not a BaseService %r" % service_instance)

        # Prepare service instance
        service_instance.errcause = ""
        service_instance.id = process_id
        service_instance.container = self.container
        service_instance.CFG = config
        service_instance._proc_name = name
        service_instance._proc_start_time = time.time()

        # start service dependencies (RPC clients)
        self._start_service_dependencies(service_instance)
        
        return service_instance

    def _start_service_dependencies(self, service_instance):
        service_instance.errcause = "setting service dependencies"
        log.debug("spawn_process dependencies: %s", service_instance.dependencies)
        # TODO: Service dependency != process dependency
        for dependency in service_instance.dependencies:
            client = getattr(service_instance.clients, dependency)
            assert client, "Client for dependency not found: %s" % dependency

            # @TODO: should be in a start_client in RPCClient chain
            client.process  = service_instance
            client.node     = self.container.node

            # ensure that dep actually exists and is running
            # MM: commented out - during startup (init actually), we don't need to check for service dependencies
            # MM: TODO: split on_init from on_start; start consumer in on_start; check for full queues on restart
#            if service_instance.name != 'bootstrap' or (service_instance.name == 'bootstrap' and service_instance.CFG.level == dependency):
#                svc_de = self.container.resource_registry.find_resources(restype="Service", name=dependency, id_only=True)
#                if not svc_de:
#                    raise ContainerConfigError("Dependency for service %s not running: %s" % (service_instance.name, dependency))

    def _service_init(self, service_instance):
        # Init process
        service_instance.errcause = "initializing service"
        service_instance.init()

    def _service_start(self, service_instance):
        # Start process
        # TODO: Check for timeout
        service_instance.errcause = "starting service"
        service_instance.start()

    def _set_publisher_endpoints(self, service_instance, publisher_streams=None):
        service_instance.stream_publisher_registrar = StreamPublisherRegistrar(process=service_instance, container=self.container)

        publisher_streams = publisher_streams or {}

        for name, stream_id in publisher_streams.iteritems():
            # problem is here
            pub = service_instance.stream_publisher_registrar.create_publisher(stream_id)

            setattr(service_instance, name, pub)

    def _register_process(self, service_instance, name):
        """
        Performs all actions related to registering the new process in the system.
        Also performs process type specific registration, such as for services and agents
        """
        # Add process instance to container's process dict
        if name in self.procs_by_name:
            log.warn("Process name already registered in container: %s" % name)
        self.procs_by_name[name] = service_instance
        self.procs[service_instance.id] = service_instance

        # Add Process to resource registry
        # Note: In general the Process resource should be created by the CEI PD, but not all processes are CEI
        # processes. How to deal with this?
        service_instance.errcause = "registering"

        if service_instance._proc_type != "immediate":
            proc_obj = Process(name=service_instance.id, label=name, proctype=service_instance._proc_type)
            proc_id, _ = self.container.resource_registry.create(proc_obj)
            service_instance._proc_res_id = proc_id

            # Associate process with container resource
            self.container.resource_registry.create_association(self.cc_id, "hasProcess", proc_id)
        else:
            service_instance._proc_res_id = None

        # Process type specific registration
        # TODO: Factor out into type specific handler functions
        if service_instance._proc_type == "service":
            # Registration of SERVICE process: in resource registry
            service_list, _ = self.container.resource_registry.find_resources(restype="Service", name=service_instance.name)
            if service_list:
                service_instance._proc_svc_id = service_list[0]._id
            else:
                # We are starting the first process of a service instance
                # TODO: This should be created by the HA Service agent in the future
                svc_obj = Service(name=service_instance.name, exchange_name=service_instance._proc_listen_name)
                service_instance._proc_svc_id, _ = self.container.resource_registry.create(svc_obj)

                # Create association to service definition resource
                svcdef_list, _ = self.container.resource_registry.find_resources(restype="ServiceDefinition",
                    name=service_instance.name)
                if svcdef_list:
                    self.container.resource_registry.create_association(service_instance._proc_svc_id,
                        "hasServiceDefinition", svcdef_list[0]._id)
                else:
                    log.error("Cannot find ServiceDefinition resource for %s", service_instance.name)

            self.container.resource_registry.create_association(service_instance._proc_svc_id, "hasProcess", proc_id)

        elif service_instance._proc_type == "agent":
            # Registration of AGENT process: in Directory
            caps = service_instance.get_capabilities()
            self.container.directory.register("/Agents", service_instance.id,
                **dict(name=service_instance._proc_name,
                    container=service_instance.container.id,
                    resource_id=service_instance.resource_id,
                    agent_id=service_instance.agent_id,
                    def_id=service_instance.agent_def_id,
                    capabilities=caps))

        # Trigger a real-time event. At this time, everything persistent has to be completed and consistent.
        self.container.event_pub.publish_event(event_type="ProcessLifecycleEvent",
            origin=service_instance.id,
            origin_type="ContainerProcess",
            sub_type="SPAWN",
            container_id=self.container.id,
            process_type=service_instance._proc_type,
            process_name=service_instance._proc_name,
            state=ProcessStateEnum.SPAWN)



    def terminate_process(self, process_id):
        """
        Terminates a process and all its resources. Termination is graceful with timeout.
        """
        log.debug("terminate_process: %s", process_id)
        service_instance = self.procs.get(process_id, None)
        if not service_instance:
            raise BadRequest("Cannot terminate. Process id='%s' unknown on container id='%s'" % (
                                        process_id, self.container.id))

        # Give the process notice to quit doing stuff.
        service_instance.quit()

        # Terminate IonProcessThread (may not have one, i.e. simple process)
        if service_instance._process:
            service_instance._process.notify_stop()
            service_instance._process.stop()

        self._unregister_process(process_id, service_instance)

        # Send out real-time notice that process was terminated. At this point, everything persistent
        # has to be consistent.
        self.container.event_pub.publish_event(event_type="ProcessLifecycleEvent",
            origin=service_instance.id, origin_type="ContainerProcess",
            sub_type="TERMINATE",
            container_id=self.container.id,
            process_type=service_instance._proc_type, process_name=service_instance._proc_name,
            state=ProcessStateEnum.TERMINATE)

    def _unregister_process(self, process_id, service_instance):
        # Remove process registration in resource registry
        if service_instance._proc_res_id:
            self.container.resource_registry.delete(service_instance._proc_res_id, del_associations=True)

        # Cleanup for specific process types
        if service_instance._proc_type == "service":
            # Check if this is the last process for this service and do auto delete service resources here
            svcproc_list, _ = self.container.resource_registry.find_objects(service_instance._proc_svc_id,
                "hasProcess", "Process", id_only=True)
            if not svcproc_list:
                self.container.resource_registry.delete(service_instance._proc_svc_id, del_associations=True)

        elif service_instance._proc_type == "agent":
            self.container.directory.unregister_safe("/Agents", service_instance.id)


        # Remove internal registration in container
        del self.procs[process_id]
        if service_instance._proc_name in self.procs_by_name:
            del self.procs_by_name[service_instance._proc_name]
        else:
            log.warn("Process name %s not in local registry", service_instance.name)
Exemplo n.º 6
0
class ProcManager(object):
    def __init__(self, container):
        self.container = container

        # Define the callables that can be added to Container public API
        self.container_api = [self.spawn_process, self.terminate_process]

        # Add the public callables to Container
        for call in self.container_api:
            setattr(self.container, call.__name__, call)

        self.proc_id_pool = IDPool()

        # Temporary registry of running processes
        self.procs_by_name = {}
        self.procs = {}

        # mapping of greenlets we spawn to service_instances for error handling
        self._spawned_proc_to_process = {}

        # The pyon worker process supervisor
        self.proc_sup = IonProcessThreadManager(
            heartbeat_secs=CFG.cc.timeout.heartbeat,
            failure_notify_callback=self._spawned_proc_failed)

    def start(self):
        log.debug("ProcManager starting ...")
        self.proc_sup.start()
        log.debug("ProcManager started, OK.")

    def stop(self):
        log.debug("ProcManager stopping ...")

        # Call quit on procs to give them ability to clean up
        # @TODO terminate_process is not gl-safe
        #        gls = map(lambda k: spawn(self.terminate_process, k), self.procs.keys())
        #        join(gls)
        map(self.terminate_process, self.procs.keys())

        # TODO: Have a choice of shutdown behaviors for waiting on children, timeouts, etc
        self.proc_sup.shutdown(CFG.cc.timeout.shutdown)
        log.debug("ProcManager stopped, OK.")

    def spawn_process(self,
                      name=None,
                      module=None,
                      cls=None,
                      config=None,
                      process_id=None):
        """
        Spawn a process within the container. Processes can be of different type.
        """

        if process_id and not is_valid_identifier(process_id, ws_sub='_'):
            raise BadRequest("Given process_id %s is not a valid identifier" %
                             process_id)

        # Generate a new process id if not provided
        # TODO: Ensure it is system-wide unique
        process_id = process_id or "%s.%s" % (self.container.id,
                                              self.proc_id_pool.get_id())
        log.debug(
            "ProcManager.spawn_process(name=%s, module.cls=%s.%s) as pid=%s",
            name, module, cls, process_id)

        if not config:
            # Use system CFG. It has the command line args in it
            config = DictModifier(CFG)
        else:
            # Use provided config. Must be dict or DotDict
            if not isinstance(config, DotDict):
                config = DotDict(config)
            config = DictModifier(CFG, config)
            if self.container.spawn_args:
                # Override config with spawn args
                dict_merge(config, self.container.spawn_args, inplace=True)

        #log.debug("spawn_process() pid=%s config=%s", process_id, config)

        # PROCESS TYPE. Determines basic process context (messaging, service interface)
        # One of: service, stream_process, agent, simple, immediate

        service_cls = named_any("%s.%s" % (module, cls))
        process_type = get_safe(config, "process.type") or getattr(
            service_cls, "process_type", "service")

        service_instance = None
        try:
            # spawn service by type
            if process_type == "service":
                service_instance = self._spawn_service_process(
                    process_id, name, module, cls, config)

            elif process_type == "stream_process":
                service_instance = self._spawn_stream_process(
                    process_id, name, module, cls, config)

            elif process_type == "agent":
                service_instance = self._spawn_agent_process(
                    process_id, name, module, cls, config)

            elif process_type == "standalone":
                service_instance = self._spawn_standalone_process(
                    process_id, name, module, cls, config)

            elif process_type == "immediate":
                service_instance = self._spawn_immediate_process(
                    process_id, name, module, cls, config)

            elif process_type == "simple":
                service_instance = self._spawn_simple_process(
                    process_id, name, module, cls, config)

            else:
                raise BadRequest("Unknown process type: %s" % process_type)

            service_instance._proc_type = process_type
            self._register_process(service_instance, name)

            service_instance.errcause = "OK"
            log.info("AppManager.spawn_process: %s.%s -> pid=%s OK", module,
                     cls, process_id)

            if process_type == 'immediate':
                log.info('Terminating immediate process: %s',
                         service_instance.id)
                self.terminate_process(service_instance.id)

            return service_instance.id

        except Exception:
            errcause = service_instance.errcause if service_instance else "instantiating service"
            log.exception("Error spawning %s %s process (process_id: %s): %s",
                          name, process_type, process_id, errcause)
            raise

    def _spawned_proc_failed(self, gproc):
        log.error("ProcManager._spawned_proc_failed: %s", gproc)

        # for now - don't worry about the mapping, if we get a failure, just kill the container.
        # leave the mapping in place for potential expansion later.

        #        # look it up in mapping
        #        if not gproc in self._spawned_proc_to_process:
        #            log.warn("No record of gproc %s in our map (%s)", gproc, self._spawned_proc_to_process)
        #            return
        #
        svc = self._spawned_proc_to_process.get(gproc, "Unknown")
        #
        #        # make sure svc is in our list
        #        if not svc in self.procs.values():
        #            log.warn("svc %s not found in procs list", svc)
        #            return

        self.container.fail_fast("Container process (%s) failed: %s" %
                                 (svc, gproc.exception))

    # -----------------------------------------------------------------
    # PROCESS TYPE: service
    def _spawn_service_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as a service worker.
        Attach to service queue with service definition, attach to service pid
        """
        service_instance = self._create_service_instance(
            process_id, name, module, cls, config)
        self._service_init(service_instance)

        self._service_start(service_instance)

        listen_name = get_safe(config,
                               "process.listen_name") or service_instance.name
        log.debug("Service Process (%s) listen_name: %s", name, listen_name)

        # Service RPC endpoint
        rsvc1 = ProcessRPCServer(node=self.container.node,
                                 from_name=listen_name,
                                 service=service_instance,
                                 process=service_instance)
        # Named local RPC endpoint
        rsvc2 = ProcessRPCServer(node=self.container.node,
                                 from_name=service_instance.id,
                                 service=service_instance,
                                 process=service_instance)
        # Start an ION process with the right kind of endpoint factory
        proc = self.proc_sup.spawn(name=service_instance.id,
                                   service=service_instance,
                                   listeners=[rsvc1, rsvc2],
                                   proc_name=service_instance._proc_name)
        self.proc_sup.ensure_ready(
            proc, "_spawn_service_process for %s" % ",".join(
                (listen_name, service_instance.id)))

        # map gproc to service_instance
        self._spawned_proc_to_process[proc.proc] = service_instance

        # set service's reference to process
        service_instance._process = proc

        # Directory registration
        self.container.directory.register_safe("/Services",
                                               listen_name,
                                               interface=service_instance.name)
        self.container.directory.register_safe("/Services/%s" % listen_name,
                                               service_instance.id)

        return service_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: stream process
    def _spawn_stream_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as a data stream process.
        Attach to subscription queue with process function.
        """
        service_instance = self._create_service_instance(
            process_id, name, module, cls, config)
        self._service_init(service_instance)

        # Start the service
        self._service_start(service_instance)

        listen_name = get_safe(config, "process.listen_name") or name

        service_instance.stream_subscriber_registrar = StreamSubscriberRegistrar(
            process=service_instance, node=self.container.node)
        sub = service_instance.stream_subscriber_registrar.create_subscriber(
            exchange_name=listen_name)

        # Add publishers if any...
        publish_streams = get_safe(config, "process.publish_streams")
        self._set_publisher_endpoints(service_instance, publish_streams)

        rsvc = ProcessRPCServer(node=self.container.node,
                                from_name=service_instance.id,
                                service=service_instance,
                                process=service_instance)

        proc = self.proc_sup.spawn(name=service_instance.id,
                                   service=service_instance,
                                   listeners=[rsvc, sub],
                                   proc_name=service_instance._proc_name)
        self.proc_sup.ensure_ready(
            proc, "_spawn_stream_process for %s" % service_instance._proc_name)

        # map gproc to service_instance
        self._spawned_proc_to_process[proc.proc] = service_instance

        # set service's reference to process
        service_instance._process = proc

        return service_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: agent
    def _spawn_agent_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as agent process.
        Attach to service pid.
        """
        service_instance = self._create_service_instance(
            process_id, name, module, cls, config)
        if not isinstance(service_instance, ResourceAgent):
            raise ContainerConfigError(
                "Agent process must extend ResourceAgent")

        # Set the resource ID if we get it through the config
        resource_id = get_safe(service_instance.CFG, "agent.resource_id")
        if resource_id:
            service_instance.resource_id = resource_id

        # Now call the on_init of the agent.
        self._service_init(service_instance)

        if not service_instance.resource_id:
            log.warn("New agent pid=%s has no resource_id set" % process_id)

        self._service_start(service_instance)

        rsvc = ProcessRPCServer(node=self.container.node,
                                from_name=service_instance.id,
                                service=service_instance,
                                process=service_instance)

        proc = self.proc_sup.spawn(name=service_instance.id,
                                   service=service_instance,
                                   listeners=[rsvc],
                                   proc_name=service_instance._proc_name)
        self.proc_sup.ensure_ready(
            proc, "_spawn_agent_process for %s" % service_instance.id)

        # map gproc to service_instance
        self._spawned_proc_to_process[proc.proc] = service_instance

        # set service's reference to process
        service_instance._process = proc

        # Directory registration
        caps = service_instance.get_capabilities()
        self.container.directory.register(
            "/Agents", service_instance.id,
            **dict(name=service_instance._proc_name,
                   container=service_instance.container.id,
                   resource_id=service_instance.resource_id,
                   agent_id=service_instance.agent_id,
                   def_id=service_instance.agent_def_id,
                   capabilities=caps))
        if not service_instance.resource_id:
            log.warn("Agent process id=%s does not define resource_id!!" %
                     service_instance.id)

        return service_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: standalone
    def _spawn_standalone_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as standalone process.
        Attach to service pid.
        """
        service_instance = self._create_service_instance(
            process_id, name, module, cls, config)
        self._service_init(service_instance)

        self._service_start(service_instance)

        rsvc = ProcessRPCServer(node=self.container.node,
                                from_name=service_instance.id,
                                service=service_instance,
                                process=service_instance)

        proc = self.proc_sup.spawn(name=service_instance.id,
                                   service=service_instance,
                                   listeners=[rsvc],
                                   proc_name=service_instance._proc_name)
        self.proc_sup.ensure_ready(
            proc, "_spawn_standalone_process for %s" % service_instance.id)

        # map gproc to service_instance
        self._spawned_proc_to_process[proc.proc] = service_instance

        # set service's reference to process
        service_instance._process = proc

        # Add publishers if any...
        publish_streams = get_safe(config, "process.publish_streams")
        self._set_publisher_endpoints(service_instance, publish_streams)

        return service_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: simple
    def _spawn_simple_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as simple process.
        No attachments.
        """
        service_instance = self._create_service_instance(
            process_id, name, module, cls, config)
        self._service_init(service_instance)

        self._service_start(service_instance)

        # Add publishers if any...
        publish_streams = get_safe(config, "process.publish_streams")
        self._set_publisher_endpoints(service_instance, publish_streams)

        return service_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: immediate
    def _spawn_immediate_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as immediate one off process.
        No attachments.
        """
        service_instance = self._create_service_instance(
            process_id, name, module, cls, config)
        self._service_init(service_instance)
        self._service_start(service_instance)
        return service_instance

    def _create_service_instance(self, process_id, name, module, cls, config):
        """
        Creates an instance of a "service", be it a Service, Agent, Stream, etc.

        @rtype BaseService
        @return An instance of a "service"
        """
        # SERVICE INSTANCE.
        service_instance = for_name(module, cls)
        if not isinstance(service_instance, BaseService):
            raise ContainerConfigError(
                "Instantiated service not a BaseService %r" % service_instance)

        # Prepare service instance
        service_instance.errcause = ""
        service_instance.id = process_id
        service_instance.container = self.container
        service_instance.CFG = config
        service_instance._proc_name = name

        # start service dependencies (RPC clients)
        self._start_service_dependencies(service_instance)

        return service_instance

    def _start_service_dependencies(self, service_instance):
        service_instance.errcause = "setting service dependencies"
        log.debug("spawn_process dependencies: %s",
                  service_instance.dependencies)
        # TODO: Service dependency != process dependency
        for dependency in service_instance.dependencies:
            client = getattr(service_instance.clients, dependency)
            assert client, "Client for dependency not found: %s" % dependency

            # @TODO: should be in a start_client in RPCClient chain
            client.process = service_instance
            client.node = self.container.node

            # ensure that dep actually exists and is running
            if service_instance.name != 'bootstrap' or (
                    service_instance.name == 'bootstrap'
                    and service_instance.CFG.level == dependency):
                svc_de = self.container.directory.lookup("/Services/%s" %
                                                         dependency)
                if svc_de is None:
                    raise ContainerConfigError(
                        "Dependency for service %s not running: %s" %
                        (service_instance.name, dependency))

    def _service_init(self, service_instance):
        # Init process
        service_instance.errcause = "initializing service"
        service_instance.init()

    def _service_start(self, service_instance):
        # Start process
        # TODO: Check for timeout
        service_instance.errcause = "starting service"
        service_instance.start()

    def _set_publisher_endpoints(self,
                                 service_instance,
                                 publisher_streams=None):
        service_instance.stream_publisher_registrar = StreamPublisherRegistrar(
            process=service_instance, node=self.container.node)

        publisher_streams = publisher_streams or {}

        for name, stream_id in publisher_streams.iteritems():
            # problem is here
            pub = service_instance.stream_publisher_registrar.create_publisher(
                stream_id)

            setattr(service_instance, name, pub)

    def _register_process(self, service_instance, name):
        # Add to local process dict
        self.procs_by_name[name] = service_instance
        self.procs[service_instance.id] = service_instance

        # Add to directory
        service_instance.errcause = "registering"
        self.container.directory.register_safe("/Containers/%s/Processes" %
                                               self.container.id,
                                               service_instance.id,
                                               name=name)

        self.container.event_pub.publish_event(
            event_type="ProcessLifecycleEvent",
            origin=service_instance.id,
            origin_type="ContainerProcess",
            sub_type="SPAWN",
            container_id=self.container.id,
            process_type=service_instance._proc_type,
            process_name=service_instance._proc_name,
            state=ProcessStateEnum.SPAWN)

    def terminate_process(self, process_id):
        service_instance = self.procs.get(process_id, None)
        if not service_instance:
            raise BadRequest(
                "Cannot terminate. Process id='%s' unknown on container id='%s'"
                % (process_id, self.container.id))

        service_instance.quit()

        # terminate IonProcessThread (may not have one, i.e. simple process)
        if service_instance._process:
            service_instance._process.notify_stop()
            service_instance._process.stop()

        del self.procs[process_id]

        self.container.directory.unregister_safe(
            "/Containers/%s/Processes" % self.container.id,
            service_instance.id)

        # Cleanup for specific process types
        if service_instance._proc_type == "service":
            listen_name = get_safe(service_instance.CFG, "process.listen_name",
                                   service_instance.name)
            self.container.directory.unregister_safe(
                "/Services/%s" % listen_name, service_instance.id)
            remaining_workers = self.container.directory.find_entries(
                "/Services/%s" % listen_name)
            if remaining_workers and len(remaining_workers) == 2:
                self.container.directory.unregister_safe(
                    "/Services", listen_name)

        elif service_instance._proc_type == "agent":
            self.container.directory.unregister_safe("/Agents",
                                                     service_instance.id)

        self.container.event_pub.publish_event(
            event_type="ProcessLifecycleEvent",
            origin=service_instance.id,
            origin_type="ContainerProcess",
            sub_type="TERMINATE",
            container_id=self.container.id,
            process_type=service_instance._proc_type,
            process_name=service_instance._proc_name,
            state=ProcessStateEnum.TERMINATE)
Exemplo n.º 7
0
class ProcManager(object):
    def __init__(self, container):
        self.container = container

        # Define the callables that can be added to Container public API
        self.container_api = [self.spawn_process, self.terminate_process]

        # Add the public callables to Container
        for call in self.container_api:
            setattr(self.container, call.__name__, call)

        self.proc_id_pool = IDPool()

        # Temporary registry of running processes
        self.procs_by_name = {}
        self.procs = {}

        # mapping of greenlets we spawn to service_instances for error handling
        self._spawned_proc_to_process = {}

        # The pyon worker process supervisor
        self.proc_sup = IonProcessThreadManager(
            heartbeat_secs=CFG.cc.timeout.heartbeat,
            failure_notify_callback=self._spawned_proc_failed)

    def start(self):
        log.debug("ProcManager starting ...")
        self.proc_sup.start()

        # Register container as resource object
        cc_obj = CapabilityContainer(name=self.container.id,
                                     cc_agent=self.container.name)
        self.cc_id, _ = self.container.resource_registry.create(cc_obj)

        #Create an association to an Org object if not the rot ION org and only if found
        if CFG.container.org_name and CFG.container.org_name != CFG.system.root_org:
            org, _ = self.container.resource_registry.find_resources(
                restype=RT.Org, name=CFG.container.org_name, id_only=True)
            if org:
                self.container.resource_registry.create_association(
                    org[0], PRED.hasResource,
                    self.cc_id)  #TODO - replace with proper association

        log.debug("ProcManager started, OK.")

    def stop(self):
        log.debug("ProcManager stopping ...")

        from pyon.datastore.couchdb.couchdb_datastore import CouchDB_DataStore
        stats1 = CouchDB_DataStore._stats.get_stats()

        # Call quit on procs to give them ability to clean up
        # @TODO terminate_process is not gl-safe
        #        gls = map(lambda k: spawn(self.terminate_process, k), self.procs.keys())
        #        join(gls)
        procs_list = sorted(self.procs.values(),
                            key=lambda proc: proc._proc_start_time,
                            reverse=True)

        for proc in procs_list:
            self.terminate_process(proc.id)

        # TODO: Have a choice of shutdown behaviors for waiting on children, timeouts, etc
        self.proc_sup.shutdown(CFG.cc.timeout.shutdown)

        if self.procs:
            log.warn("ProcManager procs not empty: %s", self.procs)
        if self.procs_by_name:
            log.warn("ProcManager procs_by_name not empty: %s",
                     self.procs_by_name)

        # Remove Resource registration
        self.container.resource_registry.delete(self.cc_id,
                                                del_associations=True)
        # TODO: Check associations to processes

        stats2 = CouchDB_DataStore._stats.get_stats()

        stats3 = CouchDB_DataStore._stats.diff_stats(stats2, stats1)
        log.debug("Datastore stats difference during stop(): %s", stats3)

        log.debug("ProcManager stopped, OK.")

    def spawn_process(self,
                      name=None,
                      module=None,
                      cls=None,
                      config=None,
                      process_id=None):
        """
        Spawn a process within the container. Processes can be of different type.
        """

        if process_id and not is_valid_identifier(process_id, ws_sub='_'):
            raise BadRequest("Given process_id %s is not a valid identifier" %
                             process_id)

        # Generate a new process id if not provided
        # TODO: Ensure it is system-wide unique
        process_id = process_id or "%s.%s" % (self.container.id,
                                              self.proc_id_pool.get_id())
        log.debug(
            "ProcManager.spawn_process(name=%s, module.cls=%s.%s, config=%s) as pid=%s",
            name, module, cls, config, process_id)

        process_cfg = CFG.copy()
        if config:
            # Use provided config. Must be dict or DotDict
            if not isinstance(config, DotDict):
                config = DotDict(config)
            dict_merge(process_cfg, config, inplace=True)
            if self.container.spawn_args:
                # Override config with spawn args
                dict_merge(process_cfg,
                           self.container.spawn_args,
                           inplace=True)

        #log.debug("spawn_process() pid=%s process_cfg=%s", process_id, process_cfg)

        # PROCESS TYPE. Determines basic process context (messaging, service interface)
        # One of: service, stream_process, agent, simple, immediate

        service_cls = named_any("%s.%s" % (module, cls))
        process_type = get_safe(process_cfg, "process.type") or getattr(
            service_cls, "process_type", "service")

        service_instance = None
        try:
            # spawn service by type
            if process_type == "service":
                service_instance = self._spawn_service_process(
                    process_id, name, module, cls, process_cfg)

            elif process_type == "stream_process":
                service_instance = self._spawn_stream_process(
                    process_id, name, module, cls, process_cfg)

            elif process_type == "agent":
                service_instance = self._spawn_agent_process(
                    process_id, name, module, cls, process_cfg)

            elif process_type == "standalone":
                service_instance = self._spawn_standalone_process(
                    process_id, name, module, cls, process_cfg)

            elif process_type == "immediate":
                service_instance = self._spawn_immediate_process(
                    process_id, name, module, cls, process_cfg)

            elif process_type == "simple":
                service_instance = self._spawn_simple_process(
                    process_id, name, module, cls, process_cfg)

            else:
                raise BadRequest("Unknown process type: %s" % process_type)

            service_instance._proc_type = process_type
            self._register_process(service_instance, name)

            service_instance.errcause = "OK"
            log.info("ProcManager.spawn_process: %s.%s -> pid=%s OK", module,
                     cls, process_id)

            if process_type == 'immediate':
                log.info('Terminating immediate process: %s',
                         service_instance.id)
                self.terminate_process(service_instance.id)

            return service_instance.id

        except Exception:
            errcause = service_instance.errcause if service_instance else "instantiating service"
            log.exception("Error spawning %s %s process (process_id: %s): %s",
                          name, process_type, process_id, errcause)
            raise

    def list_local_processes(self, process_type=''):
        '''
        Returns a list of the running ION processes in the container or filtered by the process_type
        '''
        ret = list()
        for p in self.procs.values():
            if process_type and p.process_type != process_type:
                continue

            ret.append(p)

        return ret

    def list_local_process_names(self, process_type=''):
        '''
        Returns a list of the running ION processes in the container or filtered by the process_type
        '''
        ret = list()
        for p in self.procs.values():
            if process_type and p.process_type != process_type:
                continue

            ret.append(p.name)

        return ret

    def is_local_service_process(self, service_name):
        local_services = self.list_local_process_names('service')
        if service_name in local_services:
            return True

        return False

    def _spawned_proc_failed(self, gproc):
        log.error("ProcManager._spawned_proc_failed: %s, %s", gproc,
                  gproc.exception)

        # for now - don't worry about the mapping, if we get a failure, just kill the container.
        # leave the mapping in place for potential expansion later.

        #        # look it up in mapping
        #        if not gproc in self._spawned_proc_to_process:
        #            log.warn("No record of gproc %s in our map (%s)", gproc, self._spawned_proc_to_process)
        #            return
        #
        svc = self._spawned_proc_to_process.get(gproc, "Unknown")
        #
        #        # make sure svc is in our list
        #        if not svc in self.procs.values():
        #            log.warn("svc %s not found in procs list", svc)
        #            return

        self.container.fail_fast("Container process (%s) failed: %s" %
                                 (svc, gproc.exception))

    def _cleanup_method(self, queue_name, ep=None):
        """
        Common method to be passed to each spawned ION process to clean up their process-queue.

        @TODO Leaks implementation detail, should be using XOs
        """
        if not ep._chan._queue_auto_delete:
            # only need to delete if AMQP didn't handle it for us already!
            # @TODO this will not work with XOs (future)
            ch = self.container.node.channel(RecvChannel)
            ch._recv_name = NameTrio(get_sys_name(),
                                     "%s.%s" % (get_sys_name(), queue_name))
            ch._destroy_queue()

    #TODO - check with Michael if this is acceptable or if there is a better way.
    def _is_policy_management_service_available(self):
        """
        Method to verify if the Policy Management Service is running in the system.
        """
        policy_services, _ = self.container.resource_registry.find_resources(
            restype=RT.Service, name='policy_management')
        if policy_services:
            return True
        return False

    # -----------------------------------------------------------------
    # PROCESS TYPE: service
    def _spawn_service_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as a service worker.
        Attach to service queue with service definition, attach to service pid
        """
        service_instance = self._create_service_instance(
            process_id, name, module, cls, config)

        listen_name = get_safe(config,
                               "process.listen_name") or service_instance.name
        log.debug("Service Process (%s) listen_name: %s", name, listen_name)
        service_instance._proc_listen_name = listen_name

        # Service RPC endpoint
        rsvc1 = ProcessRPCServer(node=self.container.node,
                                 from_name=listen_name,
                                 service=service_instance,
                                 process=service_instance)
        # Named local RPC endpoint
        rsvc2 = ProcessRPCServer(node=self.container.node,
                                 from_name=service_instance.id,
                                 service=service_instance,
                                 process=service_instance)

        # cleanup method to delete process queue
        cleanup = lambda _: self._cleanup_method(service_instance.id, rsvc2)

        # Start an ION process with the right kind of endpoint factory
        proc = self.proc_sup.spawn(name=service_instance.id,
                                   service=service_instance,
                                   listeners=[rsvc1, rsvc2],
                                   proc_name=service_instance._proc_name,
                                   cleanup_method=cleanup)
        self.proc_sup.ensure_ready(
            proc, "_spawn_service_process for %s" % ",".join(
                (listen_name, service_instance.id)))

        # map gproc to service_instance
        self._spawned_proc_to_process[proc.proc] = service_instance

        # set service's reference to process
        service_instance._process = proc

        self._service_init(service_instance)
        self._service_start(service_instance)

        proc.start_listeners()

        # look to load any existing policies for this service
        if self._is_policy_management_service_available(
        ) and self.container.governance_controller:
            self.container.governance_controller.update_service_access_policy(
                service_instance._proc_listen_name)

        return service_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: stream process
    def _spawn_stream_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as a data stream process.
        Attach to subscription queue with process function.
        """
        service_instance = self._create_service_instance(
            process_id, name, module, cls, config)

        listen_name = get_safe(config, "process.listen_name") or name
        service_instance._proc_listen_name = listen_name

        service_instance.stream_subscriber_registrar = StreamSubscriberRegistrar(
            process=service_instance, container=self.container)
        sub = service_instance.stream_subscriber_registrar.create_subscriber(
            exchange_name=listen_name)

        # Add publishers if any...
        publish_streams = get_safe(config, "process.publish_streams")
        self._set_publisher_endpoints(service_instance, publish_streams)

        rsvc = ProcessRPCServer(node=self.container.node,
                                from_name=service_instance.id,
                                service=service_instance,
                                process=service_instance)

        # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs)
        cleanup = lambda _: self._cleanup_method(service_instance.id, rsvc)

        proc = self.proc_sup.spawn(name=service_instance.id,
                                   service=service_instance,
                                   listeners=[rsvc, sub],
                                   proc_name=service_instance._proc_name,
                                   cleanup_method=cleanup)
        self.proc_sup.ensure_ready(
            proc, "_spawn_stream_process for %s" % service_instance._proc_name)

        # map gproc to service_instance
        self._spawned_proc_to_process[proc.proc] = service_instance

        # set service's reference to process
        service_instance._process = proc

        self._service_init(service_instance)
        self._service_start(service_instance)

        proc.start_listeners()

        return service_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: agent
    def _spawn_agent_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as agent process.
        Attach to service pid.
        """
        service_instance = self._create_service_instance(
            process_id, name, module, cls, config)
        if not isinstance(service_instance, ResourceAgent):
            raise ContainerConfigError(
                "Agent process must extend ResourceAgent")

        # Set the resource ID if we get it through the config
        resource_id = get_safe(service_instance.CFG, "agent.resource_id")
        if resource_id:
            service_instance.resource_id = resource_id

        rsvc = ProcessRPCServer(node=self.container.node,
                                from_name=service_instance.id,
                                service=service_instance,
                                process=service_instance)

        # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs)
        cleanup = lambda _: self._cleanup_method(service_instance.id, rsvc)

        proc = self.proc_sup.spawn(name=service_instance.id,
                                   service=service_instance,
                                   listeners=[rsvc],
                                   proc_name=service_instance._proc_name,
                                   cleanup_method=cleanup)
        self.proc_sup.ensure_ready(
            proc, "_spawn_agent_process for %s" % service_instance.id)

        # map gproc to service_instance
        self._spawned_proc_to_process[proc.proc] = service_instance

        # set service's reference to process
        service_instance._process = proc

        # Now call the on_init of the agent.
        self._service_init(service_instance)

        if not service_instance.resource_id:
            log.warn("New agent pid=%s has no resource_id set" % process_id)

        self._service_start(service_instance)

        proc.start_listeners()

        if service_instance.resource_id:
            # look to load any existing policies for this resource
            if self._is_policy_management_service_available(
            ) and self.container.governance_controller:
                self.container.governance_controller.update_resource_access_policy(
                    service_instance.resource_id)
        else:
            log.warn("Agent process id=%s does not define resource_id!!" %
                     service_instance.id)

        return service_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: standalone
    def _spawn_standalone_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as standalone process.
        Attach to service pid.
        """
        service_instance = self._create_service_instance(
            process_id, name, module, cls, config)
        rsvc = ProcessRPCServer(node=self.container.node,
                                from_name=service_instance.id,
                                service=service_instance,
                                process=service_instance)

        # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs)
        cleanup = lambda _: self._cleanup_method(service_instance.id, rsvc)

        proc = self.proc_sup.spawn(name=service_instance.id,
                                   service=service_instance,
                                   listeners=[rsvc],
                                   proc_name=service_instance._proc_name,
                                   cleanup_method=cleanup)
        self.proc_sup.ensure_ready(
            proc, "_spawn_standalone_process for %s" % service_instance.id)

        # map gproc to service_instance
        self._spawned_proc_to_process[proc.proc] = service_instance

        # set service's reference to process
        service_instance._process = proc

        self._service_init(service_instance)
        self._service_start(service_instance)

        # Add publishers if any...
        publish_streams = get_safe(config, "process.publish_streams")
        self._set_publisher_endpoints(service_instance, publish_streams)

        proc.start_listeners()

        return service_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: simple
    def _spawn_simple_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as simple process.
        No attachments.
        """
        service_instance = self._create_service_instance(
            process_id, name, module, cls, config)
        self._service_init(service_instance)
        self._service_start(service_instance)

        # Add publishers if any...
        publish_streams = get_safe(config, "process.publish_streams")
        self._set_publisher_endpoints(service_instance, publish_streams)

        return service_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: immediate
    def _spawn_immediate_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as immediate one off process.
        No attachments.
        """
        service_instance = self._create_service_instance(
            process_id, name, module, cls, config)
        self._service_init(service_instance)
        self._service_start(service_instance)
        return service_instance

    def _create_service_instance(self, process_id, name, module, cls, config):
        """
        Creates an instance of a "service", be it a Service, Agent, Stream, etc.

        @rtype BaseService
        @return An instance of a "service"
        """
        # SERVICE INSTANCE.
        service_instance = for_name(module, cls)
        if not isinstance(service_instance, BaseService):
            raise ContainerConfigError(
                "Instantiated service not a BaseService %r" % service_instance)

        # Prepare service instance
        service_instance.errcause = ""
        service_instance.id = process_id
        service_instance.container = self.container
        service_instance.CFG = config
        service_instance._proc_name = name
        service_instance._proc_start_time = time.time()

        # start service dependencies (RPC clients)
        self._start_service_dependencies(service_instance)

        return service_instance

    def _start_service_dependencies(self, service_instance):
        service_instance.errcause = "setting service dependencies"
        log.debug("spawn_process dependencies: %s",
                  service_instance.dependencies)
        # TODO: Service dependency != process dependency
        for dependency in service_instance.dependencies:
            client = getattr(service_instance.clients, dependency)
            assert client, "Client for dependency not found: %s" % dependency

            # @TODO: should be in a start_client in RPCClient chain
            client.process = service_instance
            client.node = self.container.node

            # ensure that dep actually exists and is running
            # MM: commented out - during startup (init actually), we don't need to check for service dependencies
            # MM: TODO: split on_init from on_start; start consumer in on_start; check for full queues on restart
#            if service_instance.name != 'bootstrap' or (service_instance.name == 'bootstrap' and service_instance.CFG.level == dependency):
#                svc_de = self.container.resource_registry.find_resources(restype="Service", name=dependency, id_only=True)
#                if not svc_de:
#                    raise ContainerConfigError("Dependency for service %s not running: %s" % (service_instance.name, dependency))

    def _service_init(self, service_instance):
        # Init process
        service_instance.errcause = "initializing service"
        service_instance.init()

    def _service_start(self, service_instance):
        # Start process
        # TODO: Check for timeout
        service_instance.errcause = "starting service"
        service_instance.start()

    def _set_publisher_endpoints(self,
                                 service_instance,
                                 publisher_streams=None):
        service_instance.stream_publisher_registrar = StreamPublisherRegistrar(
            process=service_instance, container=self.container)

        publisher_streams = publisher_streams or {}

        for name, stream_id in publisher_streams.iteritems():
            # problem is here
            pub = service_instance.stream_publisher_registrar.create_publisher(
                stream_id)

            setattr(service_instance, name, pub)

    def _register_process(self, service_instance, name):
        """
        Performs all actions related to registering the new process in the system.
        Also performs process type specific registration, such as for services and agents
        """
        # Add process instance to container's process dict
        if name in self.procs_by_name:
            log.warn("Process name already registered in container: %s" % name)
        self.procs_by_name[name] = service_instance
        self.procs[service_instance.id] = service_instance

        # Add Process to resource registry
        # Note: In general the Process resource should be created by the CEI PD, but not all processes are CEI
        # processes. How to deal with this?
        service_instance.errcause = "registering"

        if service_instance._proc_type != "immediate":
            proc_obj = Process(name=service_instance.id,
                               label=name,
                               proctype=service_instance._proc_type)
            proc_id, _ = self.container.resource_registry.create(proc_obj)
            service_instance._proc_res_id = proc_id

            # Associate process with container resource
            self.container.resource_registry.create_association(
                self.cc_id, "hasProcess", proc_id)
        else:
            service_instance._proc_res_id = None

        # Process type specific registration
        # TODO: Factor out into type specific handler functions
        if service_instance._proc_type == "service":
            # Registration of SERVICE process: in resource registry
            service_list, _ = self.container.resource_registry.find_resources(
                restype="Service", name=service_instance.name)
            if service_list:
                service_instance._proc_svc_id = service_list[0]._id
            else:
                # We are starting the first process of a service instance
                # TODO: This should be created by the HA Service agent in the future
                svc_obj = Service(
                    name=service_instance.name,
                    exchange_name=service_instance._proc_listen_name)
                service_instance._proc_svc_id, _ = self.container.resource_registry.create(
                    svc_obj)

                # Create association to service definition resource
                svcdef_list, _ = self.container.resource_registry.find_resources(
                    restype="ServiceDefinition", name=service_instance.name)
                if svcdef_list:
                    self.container.resource_registry.create_association(
                        service_instance._proc_svc_id, "hasServiceDefinition",
                        svcdef_list[0]._id)
                else:
                    log.error("Cannot find ServiceDefinition resource for %s",
                              service_instance.name)

            self.container.resource_registry.create_association(
                service_instance._proc_svc_id, "hasProcess", proc_id)

        elif service_instance._proc_type == "agent":
            # Registration of AGENT process: in Directory
            caps = service_instance.get_capabilities()
            self.container.directory.register(
                "/Agents", service_instance.id,
                **dict(name=service_instance._proc_name,
                       container=service_instance.container.id,
                       resource_id=service_instance.resource_id,
                       agent_id=service_instance.agent_id,
                       def_id=service_instance.agent_def_id,
                       capabilities=caps))

        # Trigger a real-time event. At this time, everything persistent has to be completed and consistent.
        self.container.event_pub.publish_event(
            event_type="ProcessLifecycleEvent",
            origin=service_instance.id,
            origin_type="ContainerProcess",
            sub_type="SPAWN",
            container_id=self.container.id,
            process_type=service_instance._proc_type,
            process_name=service_instance._proc_name,
            state=ProcessStateEnum.SPAWN)

    def terminate_process(self, process_id):
        """
        Terminates a process and all its resources. Termination is graceful with timeout.
        """
        log.debug("terminate_process: %s", process_id)
        service_instance = self.procs.get(process_id, None)
        if not service_instance:
            raise BadRequest(
                "Cannot terminate. Process id='%s' unknown on container id='%s'"
                % (process_id, self.container.id))

        # Give the process notice to quit doing stuff.
        service_instance.quit()

        # Terminate IonProcessThread (may not have one, i.e. simple process)
        if service_instance._process:
            service_instance._process.notify_stop()
            service_instance._process.stop()

        self._unregister_process(process_id, service_instance)

        # Send out real-time notice that process was terminated. At this point, everything persistent
        # has to be consistent.
        self.container.event_pub.publish_event(
            event_type="ProcessLifecycleEvent",
            origin=service_instance.id,
            origin_type="ContainerProcess",
            sub_type="TERMINATE",
            container_id=self.container.id,
            process_type=service_instance._proc_type,
            process_name=service_instance._proc_name,
            state=ProcessStateEnum.TERMINATE)

    def _unregister_process(self, process_id, service_instance):
        # Remove process registration in resource registry
        if service_instance._proc_res_id:
            self.container.resource_registry.delete(
                service_instance._proc_res_id, del_associations=True)

        # Cleanup for specific process types
        if service_instance._proc_type == "service":
            # Check if this is the last process for this service and do auto delete service resources here
            svcproc_list, _ = self.container.resource_registry.find_objects(
                service_instance._proc_svc_id,
                "hasProcess",
                "Process",
                id_only=True)
            if not svcproc_list:
                self.container.resource_registry.delete(
                    service_instance._proc_svc_id, del_associations=True)

        elif service_instance._proc_type == "agent":
            self.container.directory.unregister_safe("/Agents",
                                                     service_instance.id)

        # Remove internal registration in container
        del self.procs[process_id]
        if service_instance._proc_name in self.procs_by_name:
            del self.procs_by_name[service_instance._proc_name]
        else:
            log.warn("Process name %s not in local registry",
                     service_instance.name)