def _create_process_instance(self, process_id, name, module, cls, config): """ Creates an instance of a "service", be it a Service, Agent, Stream, etc. @rtype BaseService @return An instance of a "service" """ # SERVICE INSTANCE. process_instance = for_name(module, cls) if not isinstance(process_instance, BaseService): raise ContainerConfigError("Instantiated service not a BaseService %r" % process_instance) # Prepare service instance process_instance.errcause = "" process_instance.id = process_id process_instance.container = self.container process_instance.CFG = config process_instance._proc_name = name process_instance._proc_start_time = time.time() # Unless the process has been started as part of another Org, default to the container Org or the ION Org if config.has_key("org_name"): process_instance.org_name = config["org_name"] else: process_instance.org_name = CFG.get_safe("container.org_name", CFG.get_safe("system.root_org", "ION")) # Add stateful process operations if hasattr(process_instance, "_flush_state"): def _flush_state(): if not hasattr(process_instance, "_proc_state"): process_instance._proc_state = {} process_instance._proc_state_changed = False return process_instance.container.state_repository.put_state(process_instance.id, process_instance._proc_state) process_instance._proc_state_changed = False def _load_state(): if not hasattr(process_instance, "_proc_state"): process_instance._proc_state = {} try: new_state = process_instance.container.state_repository.get_state(process_instance.id) process_instance._proc_state.clear() process_instance._proc_state.update(new_state) process_instance._proc_state_changed = False except Exception as ex: log.warn("Process %s load state failed: %s", process_instance.id, str(ex)) process_instance._flush_state = _flush_state process_instance._load_state = _load_state process_start_mode = get_safe(config, "process.start_mode") if process_start_mode == "RESTART": if hasattr(process_instance, "_load_state"): process_instance._load_state() # start service dependencies (RPC clients) self._start_process_dependencies(process_instance) return process_instance
def _make_management_call(self, url, method="get", data=None): """ Makes a call to the Rabbit HTTP management API using the passed in HTTP method. """ log.debug("Calling rabbit API management (%s): %s", method, url) meth = getattr(requests, method) try: mgmt_cfg_key = CFG.get_safe("container.messaging.management.server", "rabbit_manage") mgmt_cfg = CFG.get_safe("server." + mgmt_cfg_key) username = get_safe(mgmt_cfg, "username") or "guest" password = get_safe(mgmt_cfg, "password") or "guest" with gevent.timeout.Timeout(10): r = meth(url, auth=(username, password), data=data) r.raise_for_status() if not r.content == "": content = json.loads(r.content) else: content = None except gevent.timeout.Timeout as ex: raise Timeout(str(ex)) except requests.exceptions.Timeout as ex: raise Timeout(str(ex)) except (requests.exceptions.ConnectionError, socket.error) as ex: raise ServiceUnavailable(str(ex)) except requests.exceptions.RequestException as ex: # the generic base exception all requests' exceptions inherit from, raise our # general server error too. raise ServerError(str(ex)) return content
def _create_app_instance(self, process_id, name, module, cls, config, proc_attr): """ Creates an instance of a BaseService, representing the app logic of a ION process. This is independent of the process type service, agent, standalone, etc. """ # APP INSTANCE. app_instance = for_name(module, cls) if not isinstance(app_instance, BaseService): raise ContainerConfigError("Instantiated service not a BaseService %r" % app_instance) # Set BaseService instance common attributes app_instance.errcause = "" app_instance.id = process_id app_instance.container = self.container app_instance.CFG = config app_instance._proc_name = name app_instance._proc_start_time = time.time() for att, att_val in proc_attr.iteritems(): setattr(app_instance, att, att_val) # Unless the process has been started as part of another Org, default to the container Org or the ION Org if 'org_governance_name' in config: app_instance.org_governance_name = config['org_governance_name'] else: app_instance.org_governance_name = CFG.get_safe('container.org_name', CFG.get_safe('system.root_org', 'ION')) # Add process state management, if applicable self._add_process_state(app_instance) # Check dependencies (RPC clients) self._check_process_dependencies(app_instance) return app_instance
def __init__(self): self.cont_cert = None self.cont_key = None self.root_cert = None self.white_list = [] # Look for certificates and keys in "the usual places" certstore_path = self.certstore = CFG.get_safe('authentication.certstore', CERTSTORE_PATH) log.debug("certstore_path: %s" % str(certstore_path)) keystore_path = self.certstore = CFG.get_safe('authentication.keystore', KEYSTORE_PATH) log.debug("keystore_path: %s" % str(keystore_path)) if certstore_path and keystore_path: if certstore_path == 'directory': log.debug("Container.instance.directory: " % str(Container.instance.directory)) Container.instance.directory.load_authentication() else: cont_cert_path = os.path.join(certstore_path, CONTAINER_CERT_NAME) log.debug("cont_cert_path: %s" % cont_cert_path) cont_key_path = os.path.join(keystore_path, CONTAINER_KEY_NAME) log.debug("cont_key_path: %s" % cont_key_path) root_cert_path = os.path.join(certstore_path, ORG_CERT_NAME) log.debug("root_cert_path: %s" % root_cert_path) if os.path.exists(cont_cert_path) and os.path.exists(cont_key_path) and os.path.exists(root_cert_path): with open(cont_cert_path, 'r') as f: self.cont_cert = f.read() log.debug("cont_cert: %s" % self.cont_cert) self.cont_key = EVP.load_key(cont_key_path) with open(root_cert_path, 'r') as f: self.root_cert = f.read() log.debug("root_cert: %s" % self.root_cert) self.add_to_white_list(self.root_cert)
def setUp(self): self.server_type = CFG.get_safe("container.datastore.default_server", "couchdb") if self.server_type.startswith("couch"): self.ds_class = CouchPyonDataStore elif self.server_type == "postgresql": self.ds_class = PostgresPyonDataStore # We're running outside of a container - configure the tracer CallTracer.configure(CFG.get_safe("container.tracer", {}))
def configure(self, config): if "enabled" in config: self.enabled = config["enabled"] self.enabled = self.enabled and CFG.get_safe( "container.objects.validate.interceptor", True) self.raise_exception = CFG.get_safe( "container.objects.validate.interceptor_error", False) is True log.debug("ValidateInterceptor enabled: %s" % self.enabled)
def cleanup_broker(): # @Dave: This is maybe too brute force and there is maybe a better pattern... connect_str = "-q -H %s -P %s -u %s -p %s -V %s" % (CFG.get_safe('server.amqp_priv.host', CFG.get_safe('server.amqp.host', 'localhost')), CFG.get_safe('container.exchange.management.port', '55672'), CFG.get_safe('container.exchange.management.username', 'guest'), CFG.get_safe('container.exchange.management.password', 'guest'), '/') from putil.rabbithelper import clean_by_sysname clean_by_sysname(connect_str, get_sys_name())
def es_cleanup(): es_host = CFG.get_safe('server.elasticsearch.host', 'localhost') es_port = CFG.get_safe('server.elasticsearch.port', '9200') es = ep.ElasticSearch(host=es_host, port=es_port, timeout=10) indexes = STD_INDEXES.keys() indexes.append('%s_resources_index' % get_sys_name().lower()) indexes.append('%s_events_index' % get_sys_name().lower()) for index in indexes: IndexManagementService._es_call(es.river_couchdb_delete, index) IndexManagementService._es_call(es.index_delete, index)
def heartbeat(self): """ Returns a 3-tuple indicating everything is ok. Should only be called after the process has been started. Checks the following: - All attached endpoints are alive + listening (this means ready) - The control flow greenlet is alive + listening or processing @return 3-tuple indicating (listeners ok, ctrl thread ok, heartbeat status). Use all on it for a boolean indication of success. """ listeners_ok = True for l in self.listeners: if not (l in self._listener_map and not self._listener_map[l].proc.dead and l.get_ready_event().is_set()): listeners_ok = False ctrl_thread_ok = self._ctrl_thread.running # are we currently processing something? heartbeat_ok = True if self._ctrl_current is not None: st = traceback.extract_stack(self._ctrl_thread.proc.gr_frame) if self._ctrl_current == self._heartbeat_op: if st == self._heartbeat_stack: self._heartbeat_count += 1 # we've seen this before! increment count # we've been in this for the last X ticks, or it's been X seconds, fail this part of the heartbeat if self._heartbeat_count > CFG.get_safe('container.timeout.heartbeat_proc_count_threshold', 30) or \ get_ion_ts_millis() - int(self._heartbeat_time) >= CFG.get_safe('container.timeout.heartbeat_proc_time_threshold', 30) * 1000: heartbeat_ok = False else: # it's made some progress self._heartbeat_count = 1 self._heartbeat_stack = st self._heartbeat_time = get_ion_ts() else: self._heartbeat_op = self._ctrl_current self._heartbeat_count = 1 self._heartbeat_time = get_ion_ts() self._heartbeat_stack = st else: self._heartbeat_op = None self._heartbeat_count = 0 #log.debug("%s %s %s", listeners_ok, ctrl_thread_ok, heartbeat_ok) return (listeners_ok, ctrl_thread_ok, heartbeat_ok)
def heartbeat(self): """ Returns a tuple indicating everything is ok. Should only be called after the process has been started. Checks the following: - All attached endpoints are alive + listening (this means ready) - The control flow greenlet is alive + listening or processing @return 3-tuple indicating (listeners ok, ctrl thread ok, heartbeat status). Use all on it for a boolean indication of success. """ listeners_ok = True for l in self.listeners: if not (l in self._listener_map and not self._listener_map[l].proc.dead and l.get_ready_event().is_set()): listeners_ok = False ctrl_thread_ok = self._ctrl_thread.running # are we currently processing something? heartbeat_ok = True if self._ctrl_current is not None: st = traceback.extract_stack(self._ctrl_thread.proc.gr_frame) if self._ctrl_current == self._heartbeat_op: if st == self._heartbeat_stack: self._heartbeat_count += 1 # we've seen this before! increment count # we've been in this for the last X ticks, or it's been X seconds, fail this part of the heartbeat if ( self._heartbeat_count > CFG.get_safe("cc.timeout.heartbeat_proc_count_threshold", 30) or int(get_ion_ts()) - int(self._heartbeat_time) >= CFG.get_safe("cc.timeout.heartbeat_proc_time_threshold", 30) * 1000 ): heartbeat_ok = False else: # it's made some progress self._heartbeat_count = 1 self._heartbeat_stack = st self._heartbeat_time = get_ion_ts() else: self._heartbeat_op = self._ctrl_current self._heartbeat_count = 1 self._heartbeat_time = get_ion_ts() self._heartbeat_stack = st else: self._heartbeat_op = None self._heartbeat_count = 0 return (listeners_ok, ctrl_thread_ok, heartbeat_ok)
def _get_management_url(self, *feats): """ Builds a URL to be used with the Rabbit HTTP management API. """ node = self._priv_nodes.get(ION_DEFAULT_BROKER, self.default_node) host = node.client.parameters.host mgmt_cfg_key = CFG.get_safe("container.messaging.management.server", "rabbit_manage") mgmt_cfg = CFG.get_safe("server." + mgmt_cfg_key) mgmt_port = get_safe(mgmt_cfg, "port") or "15672" url = "http://%s:%s/api/%s" % (host, mgmt_port, "/".join(feats)) return url
def __init__(self, target=None, listeners=None, name=None, service=None, cleanup_method=None, heartbeat_secs=10, **kwargs): """ Constructs the control part of an ION process. Used by the container's IonProcessThreadManager, as part of spawn_process. @param target A callable to run in the PyonThread. If None (typical), will use the target method defined in this class. @param listeners A list of listening endpoints attached to this thread. @param name The name of this ION process. @param service An instance of the BaseService derived class which contains the business logic for the ION process. @param cleanup_method An optional callable to run when the process is stopping. Runs after all other notify_stop calls have run. Should take one param, this instance. @param heartbeat_secs Number of seconds to wait in between heartbeats. """ self._startup_listeners = listeners or [] self.listeners = [] self._listener_map = {} self.name = name self.service = service self._cleanup_method = cleanup_method self.thread_manager = ThreadManager(failure_notify_callback=self._child_failed) # bubbles up to main thread manager self._dead_children = [] # save any dead children for forensics self._ctrl_thread = None self._ctrl_queue = Queue() self._ready_control = Event() self._errors = [] self._ctrl_current = None # set to the AR generated by _routing_call when in the context of a call # processing vs idle time (ms) self._start_time = None self._proc_time = 0 # busy time since start self._proc_time_prior = 0 # busy time at the beginning of the prior interval self._proc_time_prior2 = 0 # busy time at the beginning of 2 interval's ago self._proc_interval_num = 0 # interval num of last record # for heartbeats, used to detect stuck processes self._heartbeat_secs = heartbeat_secs # amount of time to wait between heartbeats self._heartbeat_stack = None # stacktrace of last heartbeat self._heartbeat_time = None # timestamp of heart beat last matching the current op self._heartbeat_op = None # last operation (by AR) self._heartbeat_count = 0 # number of times this operation has been seen consecutively self._log_call_exception = CFG.get_safe("container.process.log_exceptions", False) self._log_call_dbstats = CFG.get_safe("container.process.log_dbstats", False) self._warn_call_dbstmt_threshold = CFG.get_safe("container.process.warn_dbstmt_threshold", 0) PyonThread.__init__(self, target=target, **kwargs)
def es_cleanup(): es_host = CFG.get_safe('server.elasticsearch.host', 'localhost') es_port = CFG.get_safe('server.elasticsearch.port', '9200') es = ep.ElasticSearch( host=es_host, port=es_port, timeout=10 ) indexes = STD_INDEXES.keys() indexes.append('%s_resources_index' % get_sys_name().lower()) indexes.append('%s_events_index' % get_sys_name().lower()) for index in indexes: IndexManagementService._es_call(es.river_couchdb_delete,index) IndexManagementService._es_call(es.index_delete,index)
def start_listeners(self): """ Starts all listeners in managed greenlets. Usually called by the ProcManager, unless using IonProcess manually. """ try: # disable normal error reporting, this method should only be called from startup self.thread_manager._failure_notify_callback = None # spawn all listeners in startup listeners (from initializer, or added later) for listener in self._startup_listeners: self.add_endpoint(listener) with Timeout(seconds=CFG.get_safe( 'container.messaging.timeout.start_listener', 30)): gevent.wait([x.get_ready_event() for x in self.listeners]) except Timeout: # remove failed endpoints before reporting failure above for listener, proc in self._listener_map.iteritems(): if proc.proc.dead: log.info("removed dead listener: %s", listener) self.listeners.remove(listener) self.thread_manager.children.remove(proc) raise IonProcessError( "start_listeners did not complete in expected time") finally: self.thread_manager._failure_notify_callback = self._child_failed
def exchange_auto_delete(self): # Fix OOIION-1710: Added because exchanges get deleted on broker restart if CFG.get_safe('container.exchange.names.durable', False): self._xs_auto_delete = False return False return self._xs_auto_delete
def exchange_durable(self): # Added because exchanges get deleted on broker restart if CFG.get_safe('container.messaging.names.durable', False): self._xs_durable = True return True return self._xs_durable
def create_event_xn(self, name, event_type=None, origin=None, sub_type=None, origin_type=None, pattern=None, xp=None, auto_delete=None, **kwargs): """ Creates an EventExchangeName suitable for listening with an EventSubscriber. Pass None for the name to have one automatically generated. If you pass a pattern, it takes precedence over making a new one from event_type/origin/sub_type/origin_type. """ # make a name if no name exists name = name or create_simple_unique_id() # get event xp for the xs if not set if not xp: # pull from configuration eventxp = CFG.get_safe('exchange.core.events', DEFAULT_EVENTS_XP) xp = self.create_xp(eventxp) node = xp.node transport = xp._transports[0] xn = EventExchangeName(self, transport, node, name, xp, event_type=event_type, sub_type=sub_type, origin=origin, origin_type=origin_type, pattern=pattern, auto_delete=auto_delete, **kwargs) self._register_xn(name, xn, xp) return xn
def start(self): log.debug("AppManager starting ...") self.max_proc_replicas = int( CFG.get_safe("container.process.max_replicas", 0)) if self.use_pd: from ion.core.process.pd_core import ProcessDispatcherClient self.pd_client = ProcessDispatcherClient(self.container)
def _spawned_proc_failed(self, gproc): log.error("ProcManager._spawned_proc_failed: %s, %s", gproc, gproc.exception) # for now - don't worry about the mapping, if we get a failure, just kill the container. # leave the mapping in place for potential expansion later. # # look it up in mapping # if not gproc in self._spawned_proc_to_process: # log.warn("No record of gproc %s in our map (%s)", gproc, self._spawned_proc_to_process) # return # prc = self._spawned_proc_to_process.get(gproc, None) # # # make sure prc is in our list # if not prc in self.procs.values(): # log.warn("prc %s not found in procs list", prc) # return # stop the rest of the process if prc is not None: try: self.terminate_process(prc.id, False) except Exception as e: log.warn("Problem while stopping rest of failed process %s: %s", prc, e) finally: self._call_proc_state_changed(prc, ProcessStateEnum.FAILED) else: log.warn("No ION process found for failed proc manager child: %s", gproc) #self.container.fail_fast("Container process (%s) failed: %s" % (svc, gproc.exception)) # Stop the container if this was the last process if not self.procs and CFG.get_safe("container.processes.exit_once_empty", False): self.container.fail_fast("Terminating container after last process (%s) failed: %s" % (gproc, gproc.exception))
def _sample_request(self, status, status_descr, msg, headers, response, response_headers): """ Performs sFlow sampling of a completed/errored RPC request (if configured to). Makes two calls: 1) get_sflow_manager (overridden at process level) 2) make sample dict (the kwargs to sflow_manager.transaction, may be overridden where appropriate) Then performs the transact call if the manager says to do so. """ if CFG.get_safe('container.sflow.enabled', False): sm = self._get_sflow_manager() if sm and sm.should_sample: app_name = self._get_sample_name() try: trans_kwargs = self._build_sample(app_name, status, status_descr, msg, headers, response, response_headers) sm.transaction(**trans_kwargs) except Exception: log.exception("Could not sample, ignoring") else: log.debug( "No SFlowManager or it told us not to sample this transaction" )
def stop(self): log.debug("ProcManager stopping ...") # Call quit on procs to give them ability to clean up in reverse order procs_list = sorted(self.procs.values(), key=lambda proc: proc._proc_start_time, reverse=True) for proc in procs_list: try: self.terminate_process(proc.id) except Exception as ex: log.warn("Failed to terminate process (%s): %s", proc.id, ex) # TODO: Have a choice of shutdown behaviors for waiting on children, timeouts, etc self.proc_sup.shutdown(CFG.get_safe("container.timeout.shutdown")) if self.procs: log.warn("ProcManager procs not empty: %s", self.procs) if self.procs_by_name: log.warn("ProcManager procs_by_name not empty: %s", self.procs_by_name) # Remove Resource registration if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY): try: self.container.resource_registry.delete(self.cc_id, del_associations=True) except NotFound: # already gone, this is ok pass if self.pd_enabled: self._stop_process_dispatcher() log.debug("ProcManager stopped, OK.")
def exchange_auto_delete(self): # Added because exchanges get deleted on broker restart if CFG.get_safe("container.messaging.names.durable", False): self._xs_auto_delete = False return False return self._xs_auto_delete
def _get_execution_engine_config(self): ee_base_cfg = CFG.get_safe("container.execution_engine") or {} if ee_base_cfg.get("type", None) != "scioncc": raise ContainerConfigError("Execution engine config invalid: %s", ee_base_cfg) ee_cfg = deepcopy(ee_base_cfg) # If we are a child process, merge in child config override proc_name = multiprocessing.current_process().name ee_cfg["container"] = dict(child_proc_name=proc_name, is_child=False) child_cfgs = ee_base_cfg.get("child_configs", None) or {} if proc_name.startswith("Container-child-"): ee_cfg["container"]["is_child"] = True if proc_name in child_cfgs: log.info("Applying execution engine config override for child: %s", proc_name) dict_merge(ee_cfg, child_cfgs[proc_name], inplace=True) else: for cfg_name, ch_cfg in child_cfgs.iteritems(): pattern = ch_cfg.get("name_pattern", None) if pattern and re.match(pattern, proc_name): log.info("Applying execution engine config override %s for child: %s", cfg_name, proc_name) dict_merge(ee_cfg, ch_cfg, inplace=True) break ee_cfg.pop("child_configs", None) return ee_cfg
def _load_capabilities(self): self._cap_initialized = [] # List of capability constants initialized in container self._capabilities = [] # List of capability constants active in container self._cap_instances = {} # Dict mapping capability->manager instance self._cap_definitions = Config(["res/config/container_capabilities.yml"]).data['capabilities'] profile_filename = CFG.get_safe("container.profile", "development") if not profile_filename.endswith(".yml"): profile_filename = "res/profile/%s.yml" % profile_filename log.info("Loading CC capability profile from file: %s", profile_filename) profile_cfg = Config([profile_filename]).data if not isinstance(profile_cfg, dict) or profile_cfg['type'] != "profile" or not "profile" in profile_cfg: raise ContainerError("Container capability profile invalid: %s" % profile_filename) self.cap_profile = profile_cfg['profile'] if "capabilities" in self.cap_profile and self.cap_profile['capabilities']: dict_merge(self._cap_definitions, self.cap_profile['capabilities'], True) CCAP.clear() cap_list = self._cap_definitions.keys() CCAP.update(zip(cap_list, cap_list)) if "config" in self.cap_profile and self.cap_profile['config']: log.info("Container CFG was changed based on profile: %s", profile_filename)
def __init__(self, event_type=None, xp=None, process=None, **kwargs): """ Constructs a publisher of events for a specific type. @param event_type The name of the event type object @param xp Exchange (AMQP) name, can be none, will use events default. """ self.event_type = event_type self.process = process self._events_xp = CFG.get_safe("exchange.core.events", DEFAULT_EVENTS_XP) if bootstrap.container_instance and getattr( bootstrap.container_instance, 'event_repository', None): self.event_repo = bootstrap.container_instance.event_repository else: self.event_repo = None # generate an exchange name to publish events to container = (hasattr(self, '_process') and hasattr( self._process, 'container') and self._process.container ) or BaseEndpoint._get_container_instance() if container and container.has_capability( container.CCAP.EXCHANGE_MANAGER ): # might be too early in chain xp = xp or container.create_xp(self._events_xp) to_name = xp else: xp = xp or self.get_events_exchange_point() to_name = (xp, None) Publisher.__init__(self, to_name=to_name, **kwargs)
def start_listeners(self): """ Starts all listeners in managed greenlets. This must be called after starting this IonProcess. Currently, the Container's ProcManager will handle this for you, but if using an IonProcess manually, you must remember to call this method or no attached listeners will run. """ try: # disable normal error reporting, this method should only be called from startup self.thread_manager._failure_notify_callback = None # spawn all listeners in startup listeners (from initializer, or added later) for listener in self._startup_listeners: self.add_endpoint(listener) with Timeout(seconds=CFG.get_safe('cc.timeout.start_listener', 10)): waitall([x.get_ready_event() for x in self.listeners]) except Timeout: # remove failed endpoints before reporting failure above for listener, proc in self._listener_map.iteritems(): if proc.proc.dead: log.info("removed dead listener: %s", listener) self.listeners.remove(listener) self.thread_manager.children.remove(proc) raise IonProcessError("start_listeners did not complete in expected time") finally: self.thread_manager._failure_notify_callback = self._child_failed
def __init__(self, event_type=None, xp=None, process=None, **kwargs): """ Constructs a publisher of events for a specific type. @param event_type The name of the event type object @param xp Exchange (AMQP) name, can be none, will use events default. """ self.event_type = event_type self.process = process self._events_xp = CFG.get_safe("exchange.core.events", DEFAULT_EVENTS_XP) if bootstrap.container_instance and getattr(bootstrap.container_instance, 'event_repository', None): self.event_repo = bootstrap.container_instance.event_repository else: self.event_repo = None # generate an exchange name to publish events to container = (hasattr(self, '_process') and hasattr(self._process, 'container') and self._process.container) or BaseEndpoint._get_container_instance() if container and container.has_capability(container.CCAP.EXCHANGE_MANAGER): # might be too early in chain xp = xp or container.create_xp(self._events_xp) to_name = xp else: xp = xp or self.get_events_exchange_point() to_name = (xp, None) Publisher.__init__(self, to_name=to_name, **kwargs)
def _new_transport(self, ch_number=None): """ Creates a new AMQPTransport with an underlying Pika channel. """ amq_chan = blocking_cb(self.client.channel, 'on_open_callback', channel_number=ch_number) if amq_chan is None: log.error( "AMQCHAN IS NONE THIS SHOULD NEVER HAPPEN, chan number requested: %s", ch_number) from pyon.container.cc import Container if Container.instance is not None: Container.instance.fail_fast( "AMQCHAN IS NONE, messaging has failed", True) raise StandardError( "AMQCHAN IS NONE THIS SHOULD NEVER HAPPEN, chan number requested: %s" % ch_number) transport = AMQPTransport(amq_chan) # return the pending in collection (lets this number be assigned again later) self.client._pending.remove(transport.channel_number) # by default, everything should have a prefetch count of 1 (configurable) # this can be overridden by the channel get_n related methods transport.qos_impl(prefetch_count=CFG.get_safe( 'container.messaging.endpoint.prefetch_count', 1)) return transport
def start_listeners(self): """ Starts all listeners in managed greenlets. Usually called by the ProcManager, unless using IonProcess manually. """ try: # disable normal error reporting, this method should only be called from startup self.thread_manager._failure_notify_callback = None # spawn all listeners in startup listeners (from initializer, or added later) for listener in self._startup_listeners: self.add_endpoint(listener) with Timeout(seconds=CFG.get_safe('container.messaging.timeout.start_listener', 30)): gevent.wait([x.get_ready_event() for x in self.listeners]) except Timeout: # remove failed endpoints before reporting failure above for listener, proc in self._listener_map.iteritems(): if proc.proc.dead: log.info("removed dead listener: %s", listener) self.listeners.remove(listener) self.thread_manager.children.remove(proc) raise IonProcessError("start_listeners did not complete in expected time") finally: self.thread_manager._failure_notify_callback = self._child_failed
def exchange_auto_delete(self): # Added because exchanges get deleted on broker restart if CFG.get_safe('container.messaging.names.durable', False): self._xs_auto_delete = False return False return self._xs_auto_delete
def _send(self, msg, headers=None, **kwargs): # could have a specified timeout in kwargs if 'timeout' in kwargs and kwargs['timeout'] is not None: timeout = kwargs['timeout'] else: timeout = CFG.get_safe('endpoint.receive.timeout', 10) #log.debug("RequestEndpointUnit.send (timeout: %s)", timeout) ts = time.time() if not self._recv_greenlet: self.channel.setup_listener(NameTrio(self.channel._send_name.exchange)) # anon queue self.channel.start_consume() self.spawn_listener() self.response_queue = event.AsyncResult() self.message_received = lambda m, h: self.response_queue.set((m, h)) BidirectionalEndpointUnit._send(self, msg, headers=headers) try: result_data, result_headers = self.response_queue.get(timeout=timeout) except Timeout: raise exception.Timeout('Request timed out (%d sec) waiting for response from %s' % (timeout, str(self.channel._send_name))) finally: elapsed = time.time() - ts # log.info("Client-side request (conv id: %s/%s, dest: %s): %.2f elapsed", headers.get('conv-id', 'NOCONVID'), # headers.get('conv-seq', 'NOSEQ'), # self.channel._send_name, # elapsed) #log.debug("Response data: %s, headers: %s", result_data, result_headers) return result_data, result_headers
def incoming(self, invocation): log.debug("PolicyInterceptor.incoming: %s", invocation.get_arg_value('process', invocation)) #If missing default to request just to be safe msg_performative = invocation.get_header_value('performative', 'request') #TODO - This should be removed once better process security is implemented #THis fix infers that all messages that do not specify an actor id are TRUSTED wihtin the system policy_loaded = CFG.get_safe('system.load_policy', False) if policy_loaded: actor_id = invocation.get_header_value('ion-actor-id', None) else: actor_id = invocation.get_header_value('ion-actor-id', 'anonymous') #No need to check policy for response or failure messages - TODO - remove the last check at some point if msg_performative != 'inform-result' and msg_performative != 'failure' and actor_id is not None: #checking policy - if needed receiver = invocation.get_message_receiver() #If this is a sub RPC request to the RR service from a higher level service that has already been validated and set a token #then skip checking policy yet again - should help with performance and to simplify policy if receiver == 'resource_registry' and self.has_valid_token(invocation, ALLOW_RESOURCE_REGISTRY_SUB_CALLS): return invocation #Annotate the message has started policy checking invocation.message_annotations[GovernanceDispatcher.POLICY__STATUS_ANNOTATION] = GovernanceDispatcher.STATUS_STARTED ret = None if self.governance_controller is not None: #First check for Org boundary policies if the container is configured as such org_id = self.governance_controller.get_container_org_boundary_id() if org_id is not None: ret = self.governance_controller.policy_decision_point_manager.check_resource_request_policies(invocation, org_id) if str(ret) != Decision.DENY_STR: #Next check endpoint process specific policies process_type = invocation.get_invocation_process_type() if process_type == 'agent': ret = self.governance_controller.policy_decision_point_manager.check_agent_request_policies(invocation) elif process_type == 'service': ret = self.governance_controller.policy_decision_point_manager.check_service_request_policies(invocation) log.debug("Policy Decision: " + str(ret)) #Annonate the message has completed policy checking invocation.message_annotations[GovernanceDispatcher.POLICY__STATUS_ANNOTATION] = GovernanceDispatcher.STATUS_COMPLETE if ret is not None: if str(ret) == Decision.DENY_STR: self.annotate_denied_message(invocation) elif str(ret) == Decision.PERMIT: self.permit_registry_calls_token(invocation) return invocation
def __init__(self, originating_container, actor_id, requesting_message, token): self.originator = originating_container self.actor_id = actor_id self.requesting_message = requesting_message self.token = token timeout = CFG.get_safe('container.messaging.timeout.receive', 30) self.expire_time = current_time_millis() + (timeout * 1000) # Set the expire time to current time + timeout in ms
def declare(self): param_kwargs = {} # Added because exchanges get deleted on broker restart if CFG.get_safe('container.messaging.names.durable', False): param_kwargs["durable"] = True param_kwargs["auto_delete"] = False self.declare_exchange_impl(self.exchange, **param_kwargs)
def queue_durable(self): if self._xn_durable is not None: return self._xn_durable if CFG.get_safe('container.exchange.names.durable', False): return True return False
def declare(self): param_kwargs = {} # Fix OOIION-1710: Added because exchanges get deleted on broker restart if CFG.get_safe('container.exchange.names.durable', False): param_kwargs["durable"] = True param_kwargs["auto_delete"] = False self.declare_exchange_impl(self.exchange, **param_kwargs)
def declare(self): param_kwargs = {} # Added because exchanges get deleted on broker restart if CFG.get_safe("container.messaging.names.durable", False): param_kwargs["durable"] = True param_kwargs["auto_delete"] = False self.declare_exchange_impl(self.exchange, **param_kwargs)
def queue_durable(self): if self._xn_durable is not None: return self._xn_durable if CFG.get_safe("container.messaging.names.durable", False): return True return False
def on_start(self): self.ION_NOTIFICATION_EMAIL_ADDRESS = CFG.get_safe( 'server.smtp.sender') # Create an event processor self.event_processor = EmailEventProcessor() # Dictionaries that maintain information asetting_up_smtp_clientbout users and their subscribed notifications self.user_info = {} # The reverse_user_info is calculated from the user_info dictionary self.reverse_user_info = {} # Get the clients # @TODO: Why are these not dependencies in the service YML??? self.discovery = DiscoveryServiceClient() self.process_dispatcher = ProcessDispatcherServiceClient() self.event_publisher = EventPublisher() self.datastore = self.container.datastore_manager.get_datastore( 'events') self.start_time = get_ion_ts() #------------------------------------------------------------------------------------ # Create an event subscriber for Reload User Info events #------------------------------------------------------------------------------------ def reload_user_info(event_msg, headers): """ Callback method for the subscriber to ReloadUserInfoEvent """ notification_id = event_msg.notification_id log.debug( "(UNS instance received a ReloadNotificationEvent. The relevant notification_id is %s" % notification_id) try: self.user_info = self.load_user_info() except NotFound: log.warning("ElasticSearch has not yet loaded the user_index.") self.reverse_user_info = calculate_reverse_user_info( self.user_info) log.debug("(UNS instance) After a reload, the user_info: %s" % self.user_info) log.debug("(UNS instance) The recalculated reverse_user_info: %s" % self.reverse_user_info) # the subscriber for the ReloadUSerInfoEvent self.reload_user_info_subscriber = EventSubscriber( event_type=OT.ReloadUserInfoEvent, origin='UserNotificationService', callback=reload_user_info) self.add_endpoint(self.reload_user_info_subscriber)
def _make_management_call(self, url, use_ems=True, method="get", data=None): """ Makes a call to the Rabbit HTTP management API using the passed in HTTP method. """ log.debug("Calling rabbit API management (%s): %s", method, url) if use_ems and self._ems_available(): log.debug("Directing call to EMS") content = self._ems_client.call_management( url, method, headers=self._build_security_headers()) else: meth = getattr(requests, method) try: username = CFG.get_safe( "container.exchange.management.username", "guest") password = CFG.get_safe( "container.exchange.management.password", "guest") with gevent.timeout.Timeout(10): r = meth(url, auth=(username, password), data=data) r.raise_for_status() if not r.content == "": content = json.loads(r.content) else: content = None except gevent.timeout.Timeout as ex: raise Timeout(str(ex)) except requests.exceptions.Timeout as ex: raise Timeout(str(ex)) except (requests.exceptions.ConnectionError, socket.error) as ex: raise ServiceUnavailable(str(ex)) except requests.exceptions.RequestException as ex: # the generic base exception all requests' exceptions inherit from, raise our # general server error too. raise ServerError(str(ex)) return content
def __init__(self): classes = inspect.getmembers(interface.objects, inspect.isclass) for name, clzz in classes: model_classes[name] = clzz classes = inspect.getmembers(interface.messages, inspect.isclass) for name, clzz in classes: message_classes[name] = clzz from pyon.core.bootstrap import CFG self.validate_setattr = CFG.get_safe('validate.setattr', False)
def _get_management_url(self, *feats): """ Builds a URL to be used with the Rabbit HTTP management API. """ node = self._nodes.get('priviledged', self._nodes.values()[0]) host = node.client.parameters.host url = "http://%s:%s/api/%s" % (host, CFG.get_safe("container.exchange.management.port", "55672"), "/".join(feats)) return url
def _get_management_url(self, *feats): """ Builds a URL to be used with the Rabbit HTTP management API. """ node = self._nodes.get('priviledged', self._nodes.values()[0]) host = node.client.parameters.host url = "http://%s:%s/api/%s" % ( host, CFG.get_safe("container.exchange.management.port", "55672"), "/".join(feats)) return url
def start(self): if self._started: return self._clear_stats_groups() self._activate_collection() # Install the container tracer self.container.tracer = CallTracer self.container.tracer.configure(CFG.get_safe("container.tracer", {})) self._started = True
def __init__(self, orgname=None, datastore_manager=None, container=None): self.container = container or bootstrap.container_instance # Get an instance of datastore configured as directory. datastore_manager = datastore_manager or self.container.datastore_manager self.dir_store = datastore_manager.get_datastore(DataStore.DS_DIRECTORY, DataStore.DS_PROFILE.DIRECTORY) self.orgname = orgname or CFG.system.root_org self.is_root = (self.orgname == CFG.system.root_org) self.events_enabled = CFG.get_safe("service.directory.publish_events") is True # Publish change events? self.event_pub = None self.event_sub = None
def _create_listening_endpoint(self, **kwargs): """ Creates a listening endpoint for spawning processes. This method exists to be able to override the type created via configuration. In most cases it will create a ConversationRPCServer. """ eptypestr = CFG.get_safe( 'container.messaging.endpoint.proc_listening_type', None) if eptypestr is not None: module, cls = eptypestr.rsplit('.', 1) mod = __import__(module, fromlist=[cls]) eptype = getattr(mod, cls) ep = eptype(**kwargs) else: conv_enabled = CFG.get_safe( 'container.messaging.endpoint.rpc_conversation_enabled', False) if conv_enabled: ep = ConversationRPCServer(**kwargs) else: ep = ProcessRPCServer(**kwargs) return ep
def __init__(self): self.cont_cert = None self.cont_key = None self.root_cert = None self.white_list = [] # Look for certificates and keys in "the usual places" certstore_path = self.certstore = CFG.get_safe( 'authentication.certstore', CERTSTORE_PATH) log.debug("certstore_path: %s" % str(certstore_path)) keystore_path = self.certstore = CFG.get_safe( 'authentication.keystore', KEYSTORE_PATH) log.debug("keystore_path: %s" % str(keystore_path)) if certstore_path and keystore_path: if certstore_path == 'directory': log.debug("Container.instance.directory: " % str(Container.instance.directory)) Container.instance.directory.load_authentication() else: cont_cert_path = os.path.join(certstore_path, CONTAINER_CERT_NAME) log.debug("cont_cert_path: %s" % cont_cert_path) cont_key_path = os.path.join(keystore_path, CONTAINER_KEY_NAME) log.debug("cont_key_path: %s" % cont_key_path) root_cert_path = os.path.join(certstore_path, ORG_CERT_NAME) log.debug("root_cert_path: %s" % root_cert_path) if os.path.exists(cont_cert_path) and os.path.exists( cont_key_path) and os.path.exists(root_cert_path): with open(cont_cert_path, 'r') as f: self.cont_cert = f.read() log.debug("cont_cert: %s" % self.cont_cert) self.cont_key = EVP.load_key(cont_key_path) with open(root_cert_path, 'r') as f: self.root_cert = f.read() log.debug("root_cert: %s" % self.root_cert) self.add_to_white_list(self.root_cert)
def __init__(self): classes = inspect.getmembers(interface.objects, inspect.isclass) for name, clzz in classes: if clzz.__bases__[0].__name__ == "IonEnum": enum_classes[name] = clzz else: model_classes[name] = clzz classes = inspect.getmembers(interface.messages, inspect.isclass) for name, clzz in classes: message_classes[name] = clzz from pyon.core.bootstrap import CFG self.validate_setattr = CFG.get_safe( 'container.objects.validate.setattr', False)
def _get_node_for_xp(self, xp_name, xs_name): """ Finds a node to be used by an ExchangePoint, falling back to an ExchangeSpace if none found. Similar to _get_node_for_xs. Returns a 2-tuple of name, node. """ for broker_name, broker_cfg in CFG.get_safe('exchange.exchange_brokers', {}).iteritems(): # Bug in DotList, contains not implemented correctly if xp_name in list(broker_cfg['join_xp']): return broker_name, self._priv_nodes.get(broker_name, self._nodes.get(broker_name, None)) # @TODO: iterate exchange.exchange_spaces.<item>.exchange_points? return self._get_node_for_xs(xs_name)
def _ems_available(self): """ Returns True if the EMS is (likely) available and the auto_register CFG entry is True. @TODO: make a property """ if CFG.get_safe('container.exchange.auto_register', False) and self.use_ems: # ok now make sure it's in the directory exchange_service = get_service_registry().is_service_available( 'exchange_management') if exchange_service: return True return False
def __init__(self, container): self._container = container self._gl_counter = None self._conf_last_mod = None # last modified time of the conf file sflowcfg = CFG.get_safe('container.sflow', {}) self._counter_interval = CFG.get_safe('container.sflow.counter_interval', 30) # number of seconds between counter pulses, 0 means don't do it self._hsflowd_addr = CFG.get_safe("container.sflow.hsflowd_addr", "localhost") # host where hsflowd is running self._hsflowd_port = CFG.get_safe("container.sflow.hsflowd_port", 36343) # udp port on host where hsflowd is listening for json self._hsflowd_conf = CFG.get_safe("container.sflow.hsflowd_auto_file", "/etc/hsflowd.auto") # hsflowd auto-conf file, where we poll for updates (only if addr is local) self._trans_sample_rate = CFG.get_safe("container.sflow.trans_sample_rate", 1) # transaction sample rate, 1 means do everything!
def exchange_durable(self): # TODO: Check - exchanges get deleted on broker restart if CFG.get_safe('container.messaging.names.durable', False): self._exchange_durable = True return True if self._exchange_durable is not None: return self._exchange_durable if hasattr(self, '_send_name') and hasattr(self._send_name, 'exchange_durable'): return self._send_name.exchange_durable if hasattr(self, '_recv_name') and hasattr(self._recv_name, 'exchange_durable'): return self._recv_name.exchange_durable return False
def _bootstrap_default_org(self): """ Finds an Org resource to be used by create_xs. @TODO: create_xs is being removed, so this will not be needed """ if not self.org_id: # find the default Org root_orgname = CFG.get_safe("system.root_org", "ION") org_ids, _ = self._rr.find_resources(RT.Org, name=root_orgname, id_only=True) if not org_ids or len(org_ids) != 1: log.warn("Could not find ION root Org") return None self.org_id = org_ids[0] log.debug("Bootstrapped Container exchange manager with org id: %s", self.org_id) return self.org_id
def send_batch_email(self, msg_body = None, msg_subject = None, msg_recipient = None, smtp_client = None): """ Send the email @param msg_body str @param msg_subject str @param msg_recipient str @param smtp_client object """ msg = MIMEText(msg_body) msg['Subject'] = msg_subject msg['From'] = self.ION_NOTIFICATION_EMAIL_ADDRESS msg['To'] = msg_recipient log.debug("UNS sending batch (digest) email from %s to %s" , self.ION_NOTIFICATION_EMAIL_ADDRESS, msg_recipient) smtp_sender = CFG.get_safe('server.smtp.sender') smtp_client.sendmail(smtp_sender, [msg_recipient], msg.as_string())
def start(self): log.debug("Container starting...") if self._is_started: raise ContainerError("Container already started") start_order = self.cap_profile['start_order'] for cap in start_order: if cap not in self._cap_instances: continue # First find the default enabled value if no CFG key exists enabled_default = self._cap_definitions.get_safe( "%s.enabled_default" % cap, True) # Then find CFG key where enabled flag is (default or override) enabled_config = self._cap_definitions.get_safe( "%s.enabled_config" % cap, "container.%s.enabled" % cap) # Then determine the enabled value enabled = CFG.get_safe(enabled_config, enabled_default) if enabled: log.debug("start(): Starting '%s'" % cap) try: cap_obj = self._cap_instances[cap] cap_obj.start() self._capabilities.append(cap) except Exception as ex: log.error("Container Capability %s start error: %s" % (cap, ex)) raise else: log.debug("start(): Capability '%s' disabled by config '%s'", cap, enabled_config) if self.has_capability(CCAP.EVENT_PUBLISHER): self.event_pub.publish_event(event_type="ContainerLifecycleEvent", origin=self.id, origin_type="CapabilityContainer", sub_type="START", state=ContainerStateEnum.START) self._is_started = True self._status = RUNNING log.info("Container (%s) started, OK.", self.id)
def begin(self): self._active_queues = set() self._test_changes = {} self._queues_declared = [] # ordered list of queues declared self._queues = defaultdict(list) # queue name -> list of accesses from pyon.ion.exchange import ExchangeManager from pyon.util.containers import DotDict from pyon.core.bootstrap import CFG from mock import Mock containermock = Mock() containermock.resource_registry.find_resources.return_value = ([], None) self.ex_manager = ExchangeManager( containermock) # needs to be able to setattr self.ex_manager._nodes['priviledged'] = DotDict(client=DotDict( parameters=DotDict( host=CFG.get_safe('server.amqp.host', 'localhost'))))