def get_devices(self): cp = deepcopy(self._devices) foundbad = False for k, v in cp.items(): dt = parse_timestamp_string(v['last_published_utc']) dtnow = get_aware_utc_now() if dt+datetime.timedelta(minutes=5) < dtnow: v['health'] = Status.build( BAD_STATUS, 'Too long between publishes for {}'.format(k)).as_dict() foundbad = True else: v['health'] = Status.build(GOOD_STATUS).as_dict() if len(cp): if foundbad: self.vip.health.set_status( BAD_STATUS, 'At least one device has not published in 5 minutes') else: self.vip.health.set_status( GOOD_STATUS, 'All devices publishing normally.' ) return cp
def simple_secondary_state_machine(self, current_state): """Function representing the state machine for a simple secondary instance. Starts the target agent if the simple primary is not communicating. :param current_state: Indicates if remote platforms are active. Ignores the Volttron Central status. :type current_state: tuple of booleans """ primary_is_up, _ = current_state alert_key = 'failover {}'.format(self.agent_id) if primary_is_up: context = 'Primary is active stopping agent {}'.format( self.agent_vip_identity) if current_state != self._state: self._state = current_state _log.warning(context) status = Status.build(STATUS_GOOD, context=context) self.vip.health.send_alert(alert_key, status) self._agent_control('stop_agent') else: context = 'Primary is inactive starting agent {}'.format( self.agent_vip_identity) if current_state != self._state: self._state = current_state _log.warning(context) status = Status.build(STATUS_BAD, context=context) self.vip.health.send_alert(alert_key, status) agents = self.vip.rpc.call(CONTROL, 'list_agents').get() _log.info( f"simple_secondary_state_machine List agents: {self.agent_uuid}, {agents}" ) agents_stats = self.vip.rpc.call(CONTROL, 'status_agents').get() _log.info( f"simple_secondary_state_machine Agent stats: {self.agent_uuid}, {agents_stats}" ) proc_info = self.vip.rpc.call(CONTROL, 'agent_status', self.agent_uuid).get() _log.info( f"simple_secondary_state_machine: {self.agent_uuid}, {proc_info}" ) is_not_running = proc_info[0] is None and proc_info[1] is None if is_not_running: _log.info( f"simple_secondary_state_machine, starting agent: {self.agent_uuid}" ) self._agent_control('start_agent')
def get_status(platform_uuid): cn = self._pa_agents.get(platform_uuid) if cn is None: _log.debug('cn is NONE so status is BAD for uuid {}' .format(platform_uuid)) return Status.build(BAD_STATUS, "Platform Unreachable.").as_dict() try: _log.debug('TRYING TO REACH {}'.format(platform_uuid)) health = cn.agent.vip.rpc.call(VOLTTRON_CENTRAL_PLATFORM, 'get_health').get(timeout=30) except Unreachable: health = Status.build(UNKNOWN_STATUS, "Platform Agent Unreachable").as_dict() return health
def get_status(platform_uuid): cn = self._pa_agents.get(platform_uuid) if cn is None: _log.debug('cn is NONE so status is BAD for uuid {}'.format( platform_uuid)) return Status.build(BAD_STATUS, "Platform Unreachable.").as_dict() try: _log.debug('TRYING TO REACH {}'.format(platform_uuid)) health = cn.agent.vip.rpc.call(VOLTTRON_CENTRAL_PLATFORM, 'get_health').get(timeout=30) except Unreachable: health = Status.build(UNKNOWN_STATUS, "Platform Agent Unreachable").as_dict() return health
def send_alert(self, unseen_topics): """Send an alert for the group, summarizing missing topics. :param unseen_topics: List of topics that were expected but not received :type unseen_topics: list """ alert_key = "AlertAgent Timeout for group {}".format(self.group_name) _log.debug(f"unseen_topics {unseen_topics}") _log.debug( f"sorted : {sorted(unseen_topics, key = lambda x: x[0] if isinstance(x, tuple) else x)}" ) context = "Topic(s) not published within time limit: {}".format( sorted(unseen_topics, key=lambda x: x[0] if isinstance(x, tuple) else x)) status = Status.build(STATUS_BAD, context=context) if self.publish_remote: try: remote_agent = self.main_agent.remote_agent if not remote_agent: raise RuntimeError("Remote agent unavailable") else: remote_agent.vip.health.send_alert(alert_key, status) except gevent.Timeout: self.main_agent.vip.health.send_alert(alert_key, status) else: if self.publish_local: self.main_agent.vip.health.send_alert(alert_key, status) else: self.main_agent.vip.health.send_alert(alert_key, status)
def send_alert(self, unseen_topics): """Send an alert for the group, summarizing missing topics. :param unseen_topics: List of topics that were expected but not received :type unseen_topics: list """ alert_key = "AlertAgent Timeout for group {}".format(self.group_name) context = "Topic(s) not published within time limit: {}".format( sorted(unseen_topics)) status = Status.build(STATUS_BAD, context=context) if self.main_agent: try: remote_agent = self.main_agent.remote_agent if not remote_agent: _log.error("Remote agent unavailable") else: remote_agent.vip.health.send_alert(alert_key, status) except gevent.Timeout: self.vip.health.send_alert(alert_key, status) else: if self.publish_local: self.vip.health.send_alert(alert_key, status) else: self.vip.health.send_alert(alert_key, status)
def historian_setup(self): if self.rmq_to_rmq_comm: _log.debug("Setting up to forward to {}".format( self.destination_instance_name)) self._target_platform = None else: _log.debug("Setting up to forward to {}".format( self.destination_vip)) try: agent = build_agent( address=self.destination_vip, serverkey=self.destination_serverkey, publickey=self.core.publickey, secretkey=self.core.secretkey, enable_store=False, identity=self.remote_identity, instance_name=self.destination_instance_name) except gevent.Timeout: self.vip.health.set_status(STATUS_BAD, "Timeout in setup of agent") try: status = Status.from_json(self.vip.health.get_status()) self.vip.health.send_alert(DATAMOVER_TIMEOUT_KEY, status) except KeyError: _log.error("Error getting the health status") else: self._target_platform = agent
def _alert(self, topic, threshold, data, point=''): """ Raise alert for the given topic. :param topic: Topic that has published some threshold-exceeding value. :type topic: str :param threshold: Value that has been exceeded. Used in alert message. :type threshold: float :param data: Value that is out of range. Used in alert message. :type data: float :param point: Optional point name. Used in alert message. :type point: str """ if point: point = '({})'.format(point) if threshold < data: custom = "above" else: custom = "below" message = "{topic}{point} value ({data})" \ "is {custom} acceptable limit ({threshold})" message = message.format(topic=topic, point=point, data=data, custom=custom, threshold=threshold) status = Status.build(STATUS_BAD, message) self.vip.health.send_alert(topic, status)
def setup(self): """ Read resource files and load list of valid tags, categories, tags grouped by categories, list of reference tags and its parent. :return: """ _log.debug("Setup of mongodb tagging agent") err_message = "" collections = [] db = None try: db = self._client.get_default_database() collections = db.collection_names(include_system_collections=False) _log.debug(collections) except Exception as e: err_message = "Unable to query list of existing tables from the " \ "database. Exception in init of tagging service: {}. " \ "Stopping tagging service agent".format(e.args) collection = "" try: collection = self.tags_collection if self.tags_collection in collections: _log.info("{} collection exists. Assuming initial values have " "been loaded".format(collection)) else: self._init_tags(db) self._init_category_tags(db) collection = self.tag_refs_collection if self.tag_refs_collection in collections: _log.info("{} collection exists. Assuming initial values have " "been loaded".format(collection)) else: self._init_tag_refs(db) collection = self.categories_collection if self.categories_collection in collections: _log.info("{} collection exists. Assuming initial values " "have been loaded".format(collection)) else: self._init_categories(db) except Exception as e: err_message = "Initialization of " + collection + \ " collection failed with exception: {}" \ "Stopping tagging service agent. ".format(e.args) if err_message: _log.error(err_message) self.vip.health.set_status( STATUS_BAD, "Initialization of tagging service " "failed") status = Status.from_json(self.vip.health.get_status_json()) # status.context = status.context + \ # " Exception: {}".format(e.args) + \ # " Stopping tagging service agent" # _log.debug("status:{}".format(status)) self.vip.health.send_alert(TAGGING_SERVICE_SETUP_FAILED, status) self.core.stop()
def setup(self): """ Read resource files and load list of valid tags, categories, tags grouped by categories, list of reference tags and its parent. :return: """ _log.debug("Setup of mongodb tagging agent") err_message = "" collections = [] db = None try: db = self._client.get_default_database() collections = db.collection_names(include_system_collections=False) _log.debug(collections) except Exception as e: err_message = "Unable to query list of existing tables from the " \ "database. Exception in init of tagging service: {}. " \ "Stopping tagging service agent".format(e.args) collection = "" try: collection = self.tags_collection if self.tags_collection in collections: _log.info("{} collection exists. Assuming initial values have " "been loaded".format(collection)) else: self._init_tags(db) self._init_category_tags(db) collection = self.tag_refs_collection if self.tag_refs_collection in collections: _log.info("{} collection exists. Assuming initial values have " "been loaded".format(collection)) else: self._init_tag_refs(db) collection = self.categories_collection if self.categories_collection in collections: _log.info("{} collection exists. Assuming initial values " "have been loaded".format(collection)) else: self._init_categories(db) except Exception as e: err_message = "Initialization of " + collection + \ " collection failed with exception: {}" \ "Stopping tagging service agent. ".format(e.args) if err_message: _log.error(err_message) self.vip.health.set_status(STATUS_BAD, "Initialization of tagging service " "failed") status = Status.from_json(self.vip.health.get_status_json()) # status.context = status.context + \ # " Exception: {}".format(e.args) + \ # " Stopping tagging service agent" # _log.debug("status:{}".format(status)) self.vip.health.send_alert(TAGGING_SERVICE_SETUP_FAILED, status) self.core.stop()
def send_alert(self, device, point=None): if point is not None: alert_key = "Timeout:{}({})".format(device, point) context = "{}({}) not published within time limit".format(device, point) else: alert_key = "Timeout:{}".format(device) context = "{} not published within time limit".format(device) status = Status.build(STATUS_BAD, context=context) self.vip.health.send_alert(alert_key, status)
def alert(self, message, topic): """ Raise alert for the given topic :param message: Message to include in alert :param topic: PUB/SUB topic that caused alert :type message: str :type topic: str """ status = Status.build(STATUS_BAD, message) self.vip.health.send_alert(topic, status)
def send_alarm(self, error_text): """Send an alarm. This will send an alert which may be picked up by the emailer agent. :param error_text: Text of the message to be sent. :type error_text: str """ alert_key = "BESSAgent Alarm {}" context = "ERROR in BESS Agent: {}".format(error_text) status = Status.build(STATUS_BAD, context=context) self.vip.health.send_alert(alert_key, status)
def send_alert(self, unseen_topics): """Send an alert for the group, summarizing missing topics. :param unseen_topics: List of topics that were expected but not received :type unseen_topics: list """ alert_key = "AlertAgent Timeout for group {}".format(self.group_name) context = "Topic(s) not published within time limit: {}".format( sorted(unseen_topics)) status = Status.build(STATUS_BAD, context=context) self.vip.health.send_alert(alert_key, status)
def simple_secondary_state_machine(self, current_state): """Function representing the state machine for a simple secondary instance. Starts the target agent if the simple primary is not communicating. :param current_state: Indicates if remote platforms are active. Ignores the Volttron Central status. :type current_state: tuple of booleans """ primary_is_up, _ = current_state alert_key = 'failover {}'.format(self.agent_id) if primary_is_up: context = 'Primary is active stopping agent {}'.format( self.agent_vip_identity) if current_state != self._state: self._state = current_state _log.warn(context) status = Status.build(STATUS_GOOD, context=context) self.vip.health.send_alert(alert_key, status) self._agent_control('stop_agent') else: context = 'Primary is inactive starting agent {}'.format( self.agent_vip_identity) if current_state != self._state: self._state = current_state _log.warn(context) status = Status.build(STATUS_BAD, context=context) self.vip.health.send_alert(alert_key, status) proc_info = self.vip.rpc.call(CONTROL, 'agent_status', self.agent_uuid).get() is_running = proc_info[0] > 0 and proc_info[1] == None if not is_running: self._agent_control('start_agent')
def watch_agents(self): peerlist = self.vip.peerlist().get() missing_agents = [] for vip_id in self.watchlist: if vip_id not in peerlist: missing_agents.append(vip_id) if missing_agents: alert_key = "AgentWatcher" context = "Agent(s) expected but but not running {}".format(missing_agents) _log.warning(context) status = Status.build(STATUS_BAD, context=context) self.vip.health.send_alert(alert_key, status)
def historian_setup(self): _log.debug("Setting up to forward to {}".format(self.destination_vip)) try: agent = build_agent(address=self.destination_vip, serverkey=self.destination_serverkey, publickey=self.core.publickey, secretkey=self.core.secretkey, enable_store=False) except gevent.Timeout: self.vip.health.set_status(STATUS_BAD, "Timeout in setup of agent") status = Status.from_json(self.vip.health.get_status()) self.vip.health.send_alert(DATAMOVER_TIMEOUT_KEY, status) else: self._target_platform = agent
def get_devices(self): cp = deepcopy(self._devices) foundbad = False for k, v in cp.items(): dt = parse_timestamp_string(v['last_published_utc']) dtnow = get_aware_utc_now() if dt + datetime.timedelta(minutes=5) < dtnow: v['health'] = Status.build( BAD_STATUS, 'Too long between publishes for {}'.format(k)).as_dict() foundbad = True else: v['health'] = Status.build(GOOD_STATUS).as_dict() if len(cp): if foundbad: self.vip.health.set_status( BAD_STATUS, 'At least one device has not published in 5 minutes') else: self.vip.health.set_status(GOOD_STATUS, 'All devices publishing normally.') return cp
def historian_setup(self): try: _log.debug( "Setting up to forward to {}".format(destination_vip)) event = gevent.event.Event() agent = Agent(address=destination_vip, enable_store=False) agent.core.onstart.connect(lambda *a, **kw: event.set(), event) gevent.spawn(agent.core.run) event.wait(timeout=10) self._target_platform = agent except gevent.Timeout: self.vip.health.set_status(STATUS_BAD, "Timeout in setup of agent") status = Status.from_json(self.vip.health.get_status()) self.vip.health.send_alert(FORWARD_TIMEOUT_KEY, status)
def historian_setup(self): _log.debug("Setting up to forward to {}".format(self.destination_vip)) try: agent = build_agent(address=self.destination_vip, serverkey=self.destination_serverkey, publickey=self.core.publickey, secretkey=self.core.secretkey, enable_store=False) except gevent.Timeout: self.vip.health.set_status( STATUS_BAD, "Timeout in setup of agent") status = Status.from_json(self.vip.health.get_status()) self.vip.health.send_alert(DATAMOVER_TIMEOUT_KEY, status) else: self._target_platform = agent
def health(self): """ Returns a Status object as a dictionary. This will be populated by the heartbeat from the external instance that this object is monitoring, unless it has been over 10 seconds since the instance has been reached. In that case the health will be BAD. :return: """ now = get_utc_seconds_from_epoch() self._health = Status.build(GOOD_STATUS, "Platform here!") # if now > self._last_time_verified_connection + 10: # self._health = Status.build( # BAD_STATUS, # "Platform hasn't been reached in over 10 seconds.") return self._health.as_dict()
def historian_setup(self): try: _log.debug( "Setting up to forward to {}".format(destination_vip)) event = gevent.event.Event() agent = Agent(address=destination_vip) agent.core.onstart.connect(lambda *a, **kw: event.set(), event) gevent.spawn(agent.core.run) event.wait(timeout=10) self._target_platform = agent except gevent.Timeout: self.vip.health.set_status( STATUS_BAD, "Timeout in setup of agent") status = Status.from_json(self.vip.health.get_status()) self.vip.health.send_alert(FORWARD_TIMEOUT_KEY, status)
def health(self): """ Returns a Status object as a dictionary. This will be populated by the heartbeat from the external instance that this object is monitoring, unless it has been over 10 seconds since the instance has been reached. In that case the health will be BAD. :return: """ now = get_utc_seconds_from_epoch() self._health = Status.build( GOOD_STATUS, "Platform here!" ) # if now > self._last_time_verified_connection + 10: # self._health = Status.build( # BAD_STATUS, # "Platform hasn't been reached in over 10 seconds.") return self._health.as_dict()
def simple_primary_state_machine(self, current_state): """Function representing the state machine for a simple primary instance. Always tries to start the target agent. :param current_state: Indicates if remote platforms are active. Ingored. :type current_state: tuple of booleans """ alert_key = 'failover {}'.format(self.agent_id) if current_state != self._state: context = 'Starting agent {}'.format(self.agent_vip_identity) self._state = current_state _log.warn(context) status = Status.build(STATUS_GOOD, context=context) self.vip.health.send_alert(alert_key, status) proc_info = self.vip.rpc.call(CONTROL, 'agent_status', self.agent_uuid).get() is_running = proc_info[0] > 0 and proc_info[1] == None if not is_running: self._agent_control('start_agent')
def store_agent_config(self, session_user, params): required = ('agent_identity', 'config_name', 'raw_contents') message_id = params.pop('message_id') errors = [] for r in required: if r not in params: errors.append('Missing {}'.format(r)) config_type = params.get('config_type', None) if config_type: if config_type not in ('raw', 'json', 'csv'): errors.append('Invalid config_type parameter') if errors: return jsonrpc.json_error(message_id, INVALID_PARAMS, "\n".join(errors)) try: self._log.debug("Calling store_agent_config on external platform.") self.call("store_agent_config", **params) except Exception as e: self._log.error(str(e)) return jsonrpc.json_error(message_id, INTERNAL_ERROR, str(e)) config_name = params.get("config_name") agent_identity = params.get("agent_identity") if config_name.startswith("devices"): # Since we start with devices, we assume that we are attempting # to save a master driver config file. rawdict = jsonapi.loads(params['raw_contents']) # if this is not a bacnet device_type then we cannot do anything # more than save and retrieve it from the store. driver_type = rawdict.get('driver_type', None) if driver_type is None or driver_type not in ('bacnet', 'modbus'): return jsonrpc.json_result(message_id, "SUCCESS") # Registry config starts with config:// registry_config = rawdict['registry_config'][len('config://'):] try: self._log.debug("Retrieving registry_config for new device.") point_config = self.call("get_agent_config", agent_identity, registry_config, raw=False) except Exception as e: self._log.error(str(e)) return jsonrpc.json_error(message_id, INTERNAL_ERROR, "Couldn't retrieve registry_config " "from connection.") else: new_device = dict( device_address=rawdict['driver_config']['device_address'], device_id=rawdict['driver_config']['device_id'], points=[], path=config_name, health=Status.build(UNKNOWN_STATUS, context="Unpublished").as_dict() ) points = [p['Volttron Point Name'] for p in point_config] new_device['points'] = points self._vc.send_management_message("NEW_DEVICE", new_device) status = Status.build(UNKNOWN_STATUS, context="Not published since update") device_config_name = params.get('config_name') device_no_prefix = device_config_name[len('devices/'):] the_device = self._current_devices.get(device_no_prefix, {}) if not the_device: self._current_devices[device_no_prefix] = dict( last_publish_utc=None, health=status.as_dict(), points=points ) else: self._current_devices[device_no_prefix]['points'] = points return jsonrpc.json_result(message_id, "SUCCESS")
def __init__(self, vc, vip_identity): # This is the identity of the vcp agent connected to the # volttron.central instance. self._log = logging.getLogger(self.__class__.__name__) self._vip_identity = vip_identity # References the main agent to be used to talk through to the vip # router. self._vc = vc # Add some logging information about the vcp platform self._external_vip_addresses = self.call('get_vip_addresses') self._instance_name = self.call('get_instance_name') message = "Building handler for platform: {} from address: {}".format( self._instance_name, self._external_vip_addresses ) self._log.info(message) # Start the current devices dictionary. self._current_devices = defaultdict(dict) """ the _current_devices structure should be what the ui uses to display its data. where devices/t/1/1 was the full topic this was published to. "t/1/1": { "points": [ "Occupied" ], "health": { "status": "GOOD", "last_updated": "2017-03-02T00:38:30.347172+00:00", "context": "Last received data on: 2017-03-02T00:38:30.347075+00:00" }, "last_publish_utc": null } """ for k, v in self.call('get_devices').items(): status = Status.build(UNKNOWN_STATUS, context="Unpublished").as_dict() self._current_devices[k]['health'] = status self._current_devices[k]['points'] = [p for p in v['points']] self._current_devices[k]['last_publish_utc'] = None self._platform_stats = {} platform_prefix = "platforms/{}/".format(self.vip_identity) # Setup callbacks to listen to the local bus from the vcp instance. vcp_topics = ( # devices and status. ('devices/', self._on_device_message), # statistics for showing performance in the ui. ('datalogger/platform/status', self._on_platform_stats), # iam and configure callbacks ('iam/', self._on_platform_message), # iam and configure callbacks ('configure/', self._on_platform_message) ) for topic, funct in vcp_topics: self._vc.vip.pubsub.subscribe('pubsub', platform_prefix + topic, funct) self._log.info('Subscribing to {} with from vcp {}'.format( platform_prefix + topic, topic)) # method will subscribe to devices/ on the collector and publish # the regular device topics with the prefix platform_prefix. self.call("subscribe_to_vcp", topic, platform_prefix)
def publish_to_historian(self, to_publish_list): handled_records = [] _log.debug("publish_to_historian number of items: {}".format( len(to_publish_list))) parsed = urlparse(self.core.address) next_dest = urlparse(destination_vip) current_time = self.timestamp() last_time = self._last_timeout _log.debug('Lasttime: {} currenttime: {}'.format( last_time, current_time)) timeout_occurred = False if self._last_timeout: # if we failed we need to wait 60 seconds before we go on. if self.timestamp() < self._last_timeout + 60: _log.debug('Not allowing send < 60 seconds from failure') return if not self._target_platform: self.historian_setup() if not self._target_platform: _log.debug('Could not connect to target') return for vip_id in required_target_agents: try: self._target_platform.vip.ping(vip_id).get() except Unreachable: skip = "Skipping publish: Target platform not running " \ "required agent {}".format(vip_id) _log.warn(skip) self.vip.health.set_status(STATUS_BAD, skip) return except Exception as e: err = "Unhandled error publishing to target platform." _log.error(err) _log.error(traceback.format_exc()) self.vip.health.set_status(STATUS_BAD, err) return for x in to_publish_list: topic = x['topic'] value = x['value'] # payload = jsonapi.loads(value) payload = value headers = payload['headers'] headers['X-Forwarded'] = True try: del headers['Origin'] except KeyError: pass try: del headers['Destination'] except KeyError: pass if gather_timing_data: add_timing_data_to_header( headers, self.core.agent_uuid or self.core.identity, "forwarded") if timeout_occurred: _log.error( 'A timeout has occurred so breaking out of publishing') break with gevent.Timeout(30): try: _log.debug('debugger: {} {} {}'.format( topic, headers, payload)) self._target_platform.vip.pubsub.publish( peer='pubsub', topic=topic, headers=headers, message=payload['message']).get() except gevent.Timeout: _log.debug("Timeout occurred email should send!") timeout_occurred = True self._last_timeout = self.timestamp() self._num_failures += 1 # Stop the current platform from attempting to # connect self._target_platform.core.stop() self._target_platform = None self.vip.health.set_status(STATUS_BAD, "Timeout occured") except Unreachable: _log.error( "Target not reachable. Wait till it's ready!") except ZMQError as exc: if exc.errno == ENOTSOCK: # Stop the current platform from attempting to # connect _log.error( "Target disconnected. Stopping target platform agent" ) self._target_platform = None self.vip.health.set_status( STATUS_BAD, "Target platform disconnected") except Exception as e: err = "Unhandled error publishing to target platfom." _log.error(err) _log.error(traceback.format_exc()) self.vip.health.set_status(STATUS_BAD, err) # Before returning lets mark any that weren't errors # as sent. self.report_handled(handled_records) return else: handled_records.append(x) _log.debug("handled: {} number of items".format( len(to_publish_list))) self.report_handled(handled_records) if timeout_occurred: _log.debug('Sending alert from the ForwardHistorian') status = Status.from_json(self.vip.health.get_status()) self.vip.health.send_alert(FORWARD_TIMEOUT_KEY, status) else: self.vip.health.set_status( STATUS_GOOD, "published {} items".format(len(to_publish_list)))
def publish_to_historian(self, to_publish_list): handled_records = [] _log.debug("publish_to_historian number of items: {}" .format(len(to_publish_list))) parsed = urlparse(self.core.address) next_dest = urlparse(destination_vip) current_time = self.timestamp() last_time = self._last_timeout _log.debug('Lasttime: {} currenttime: {}'.format(last_time, current_time)) timeout_occurred = False if self._last_timeout: # if we failed we need to wait 60 seconds before we go on. if self.timestamp() < self._last_timeout + 60: _log.debug('Not allowing send < 60 seconds from failure') return if not self._target_platform: self.historian_setup() if not self._target_platform: _log.debug('Could not connect to target') return for x in to_publish_list: topic = x['topic'] value = x['value'] # payload = jsonapi.loads(value) payload = value headers = payload['headers'] headers['X-Forwarded'] = True try: del headers['Origin'] except KeyError: pass try: del headers['Destination'] except KeyError: pass # if not headers.get('Origin', None) # if overwrite_origin: # if not include_origin_in_header: # try: # del headers['Origin'] # except KeyError: # pass # else: # headers['Origin'] = origin # else: # headers['Origin'] = parsed.hostname # headers['Destination'] = [next_dest.scheme + # '://'+ # next_dest.hostname] # else: # headers['Destination'].append(next_dest.hostname) if timeout_occurred: _log.error( 'A timeout has occured so breaking out of publishing') break with gevent.Timeout(30): try: _log.debug('debugger: {} {} {}'.format(topic, headers, payload)) self._target_platform.vip.pubsub.publish( peer='pubsub', topic=topic, headers=headers, message=payload['message']).get() except gevent.Timeout: _log.debug("Timout occurred email should send!") timeout_occurred = True self._last_timeout = self.timestamp() self._num_failures += 1 # Stop the current platform from attempting to # connect self._target_platform.core.stop() self._target_platform = None self.vip.health.set_status( STATUS_BAD, "Timout occured") except Exception as e: _log.error(e) else: handled_records.append(x) _log.debug("handled: {} number of items".format( len(to_publish_list))) self.report_handled(handled_records) if timeout_occurred: _log.debug('Sending alert from the ForwardHistorian') status = Status.from_json(self.vip.health.get_status()) self.vip.health.send_alert(FORWARD_TIMEOUT_KEY, status)
import gevent from volttron.platform.keystore import KeyStore from volttron.platform.messaging.health import STATUS_BAD, Status from volttron.platform.vip.agent.utils import build_agent test_subject = "Test subject1" test_message = "this is a message that is sent via pubsub email" message = dict(subject=test_subject, message=test_message) ks = KeyStore() agent = build_agent(identity="test.email.pubsub", enable_store=False) agent.vip.pubsub.publish('pubsub', topic="platform/send_email", message=message) # agent.vip.health.set_status(STATUS_BAD, "It's bad man really bad!") agent.vip.health.send_alert("ALERT_KEY", Status.build( STATUS_BAD, "It's really bad again!" )) gevent.sleep(5) agent.core.stop()
def send_alert1(self, key, message): status = Status.build(STATUS_BAD, message) self.vip.health.send_alert(key, status)
def get_health(self): _log.debug("Getting health: {}".format(self.vip.health.get_status())) return Status.from_json(self.vip.health.get_status()).as_dict()
def list_agents(self): """ List the agents that are installed on the platform. Note this does not take into account agents that are connected with the instance, but only the ones that are installed and have a uuid. :return: A list of agents. """ agents = self.vip.rpc.call("control", "list_agents").get(timeout=30) status_running = self.status_agents() uuid_to_status = {} # proc_info has a list of [startproc, endprox] for a in agents: pinfo = None is_running = False for uuid, name, proc_info in status_running: if a['uuid'] == uuid: is_running = proc_info[0] > 0 and proc_info[1] == None pinfo = proc_info break uuid_to_status[a['uuid']] = { 'is_running': is_running, 'process_id': None, 'error_code': None, 'permissions': { 'can_stop': is_running, 'can_start': not is_running, 'can_restart': True, 'can_remove': True } } if pinfo: uuid_to_status[a['uuid']]['process_id'] = proc_info[0] uuid_to_status[a['uuid']]['error_code'] = proc_info[1] if 'volttroncentral' in a['name'] or \ 'vcplatform' in a['name']: uuid_to_status[a['uuid']]['permissions']['can_stop'] = False uuid_to_status[a['uuid']]['permissions']['can_remove'] = False # The default agent is stopped health looks like this. uuid_to_status[a['uuid']]['health'] = { 'status': 'UNKNOWN', 'context': None, 'last_updated': None } if is_running: identity = self.vip.rpc.call('control', 'agent_vip_identity', a['uuid']).get(timeout=30) status = self.vip.rpc.call(identity, 'health.get_status').get(timeout=30) uuid_to_status[a['uuid']]['health'] = Status.from_json( status).as_dict() for a in agents: if a['uuid'] in uuid_to_status.keys(): _log.debug('UPDATING STATUS OF: {}'.format(a['uuid'])) a.update(uuid_to_status[a['uuid']]) return agents
def _on_device_message(self, peer, sender, bus, topic, headers, message): """ Handle device data coming from the platform represented by this object. this method only cares about the /all messages that are published to the message bus. :param peer: :param sender: :param bus: :param topic: :param headers: :param message: """ expected_prefix = "platforms/{}/".format(self.vip_identity) self._log.debug("TOPIC WAS: {}".format(topic)) self._log.debug("MESSAGE WAS: {}".format(message)) self._log.debug("Expected topic: {}".format(expected_prefix)) self._log.debug("Are Equal: {}".format( topic.startswith(expected_prefix))) self._log.debug("topic type: {} prefix_type: {}".format( type(topic), type(expected_prefix))) if topic is None or not topic.startswith(expected_prefix): self._log.error("INVALID DEVICE DATA FOR {}".format( self.vip_identity)) return if topic is None or not topic.startswith(expected_prefix): self._log.error( 'INVALID DEVICE TOPIC/MESSAGE DETECTED ON {}'.format( self.vip_identity)) return # Update the devices store for get_devices function call if not topic.endswith('/all'): self._log.debug("Skipping publish to {}".format(topic)) return # topic = topic[len(expected_prefix):] self._log.debug("topic: {}, message: {}".format(topic, message)) ts = format_timestamp(get_aware_utc_now()) context = "Last received data on: {}".format(ts) status = Status.build(GOOD_STATUS, context=context) base_topic = topic[:-len('/all')] base_topic_no_prefix = base_topic[len('devices/'):] if base_topic_no_prefix not in self._current_devices: self._current_devices[base_topic_no_prefix] = {} device_dict = self._current_devices[base_topic_no_prefix] points = [k for k, v in message[0].items()] device_dict['points'] = points device_dict['health'] = status.as_dict() device_dict['last_publish_utc'] = ts self._vc.send_management_message("DEVICE_STATUS_UPDATED", data=dict(context=context, topic=base_topic))
def _on_device_message(self, peer, sender, bus, topic, headers, message): """ Handle device data coming from the platform represented by this object. this method only cares about the /all messages that are published to the message bus. :param peer: :param sender: :param bus: :param topic: :param headers: :param message: """ expected_prefix = "platforms/{}/".format(self.vip_identity) self._log.debug("TOPIC WAS: {}".format(topic)) self._log.debug("MESSAGE WAS: {}".format(message)) self._log.debug("Expected topic: {}".format(expected_prefix)) self._log.debug("Are Equal: {}".format(topic.startswith(expected_prefix))) self._log.debug("topic type: {} prefix_type: {}".format(type(topic), type(expected_prefix))) if topic is None or not topic.startswith(expected_prefix): self._log.error("INVALID DEVICE DATA FOR {}".format(self.vip_identity)) return if topic is None or not topic.startswith(expected_prefix): self._log.error('INVALID DEVICE TOPIC/MESSAGE DETECTED ON {}'.format( self.vip_identity )) return # Update the devices store for get_devices function call if not topic.endswith('/all'): self._log.debug("Skipping publish to {}".format(topic)) return # topic = topic[len(expected_prefix):] self._log.debug("topic: {}, message: {}".format(topic, message)) ts = format_timestamp(get_aware_utc_now()) context = "Last received data on: {}".format(ts) status = Status.build(GOOD_STATUS, context=context) base_topic = topic[:-len('/all')] base_topic_no_prefix = base_topic[len('devices/'):] if base_topic_no_prefix not in self._current_devices: self._current_devices[base_topic_no_prefix] = {} device_dict = self._current_devices[base_topic_no_prefix] points = [k for k, v in message[0].items()] device_dict['points'] = points device_dict['health'] = status.as_dict() device_dict['last_publish_utc'] = ts self._vc.send_management_message( "DEVICE_STATUS_UPDATED", data=dict(context=context, topic=base_topic))
def publish_to_historian(self, to_publish_list): handled_records = [] _log.debug("publish_to_historian number of items: {}" .format(len(to_publish_list))) parsed = urlparse(self.core.address) next_dest = urlparse(destination_vip) current_time = self.timestamp() last_time = self._last_timeout _log.debug('Lasttime: {} currenttime: {}'.format(last_time, current_time)) timeout_occurred = False if self._last_timeout: # if we failed we need to wait 60 seconds before we go on. if self.timestamp() < self._last_timeout + 60: _log.debug('Not allowing send < 60 seconds from failure') return if not self._target_platform: self.historian_setup() if not self._target_platform: _log.debug('Could not connect to target') return for vip_id in required_target_agents: try: self._target_platform.vip.ping(vip_id).get() except Unreachable: skip = "Skipping publish: Target platform not running " \ "required agent {}".format(vip_id) _log.warn(skip) self.vip.health.set_status( STATUS_BAD, skip) return except Exception as e: err = "Unhandled error publishing to target platform." _log.error(err) _log.error(traceback.format_exc()) self.vip.health.set_status( STATUS_BAD, err) return for x in to_publish_list: topic = x['topic'] value = x['value'] # payload = jsonapi.loads(value) payload = value headers = payload['headers'] headers['X-Forwarded'] = True try: del headers['Origin'] except KeyError: pass try: del headers['Destination'] except KeyError: pass if gather_timing_data: add_timing_data_to_header(headers, self.core.agent_uuid or self.core.identity,"forwarded") if timeout_occurred: _log.error( 'A timeout has occurred so breaking out of publishing') break with gevent.Timeout(30): try: _log.debug('debugger: {} {} {}'.format(topic, headers, payload)) self._target_platform.vip.pubsub.publish( peer='pubsub', topic=topic, headers=headers, message=payload['message']).get() except gevent.Timeout: _log.debug("Timeout occurred email should send!") timeout_occurred = True self._last_timeout = self.timestamp() self._num_failures += 1 # Stop the current platform from attempting to # connect self._target_platform.core.stop() self._target_platform = None self.vip.health.set_status( STATUS_BAD, "Timeout occured") except Unreachable: _log.error("Target not reachable. Wait till it's ready!") except ZMQError as exc: if exc.errno == ENOTSOCK: # Stop the current platform from attempting to # connect _log.error("Target disconnected. Stopping target platform agent") self._target_platform = None self.vip.health.set_status( STATUS_BAD, "Target platform disconnected") except Exception as e: err = "Unhandled error publishing to target platfom." _log.error(err) _log.error(traceback.format_exc()) self.vip.health.set_status( STATUS_BAD, err) # Before returning lets mark any that weren't errors # as sent. self.report_handled(handled_records) return else: handled_records.append(x) _log.debug("handled: {} number of items".format( len(to_publish_list))) self.report_handled(handled_records) if timeout_occurred: _log.debug('Sending alert from the ForwardHistorian') status = Status.from_json(self.vip.health.get_status()) self.vip.health.send_alert(FORWARD_TIMEOUT_KEY, status) else: self.vip.health.set_status( STATUS_GOOD,"published {} items".format( len(to_publish_list)))
def __init__(self, vc, vip_identity): # This is the identity of the vcp agent connected to the # volttron.central instance. self._log = logging.getLogger(self.__class__.__name__) self._vip_identity = vip_identity # References the main agent to be used to talk through to the vip # router. self._vc = vc # Add some logging information about the vcp platform self._external_vip_addresses = self.call('get_vip_addresses') self._instance_name = self.call('get_instance_name') message = "Building handler for platform: {} from address: {}".format( self._instance_name, self._external_vip_addresses) self._log.info(message) # Start the current devices dictionary. self._current_devices = defaultdict(dict) """ the _current_devices structure should be what the ui uses to display its data. where devices/t/1/1 was the full topic this was published to. "t/1/1": { "points": [ "Occupied" ], "health": { "status": "GOOD", "last_updated": "2017-03-02T00:38:30.347172+00:00", "context": "Last received data on: 2017-03-02T00:38:30.347075+00:00" }, "last_publish_utc": null } """ for k, v in self.call('get_devices').items(): status = Status.build(UNKNOWN_STATUS, context="Unpublished").as_dict() self._current_devices[k]['health'] = status self._current_devices[k]['points'] = [p for p in v['points']] self._current_devices[k]['last_publish_utc'] = None self._platform_stats = {} platform_prefix = "platforms/{}/".format(self.vip_identity) # Setup callbacks to listen to the local bus from the vcp instance. # # Note: the platform/{}/ is prepended to the vcp_topics below for # communication from the vcp in the field. vcp_topics = ( # ('device_updates', self._on_device_message), # ('devices/update', self._on_device_message), # devices and status. # ('devices/', self._on_device_message), # statistics for showing performance in the ui. ('datalogger/platform/status', self._on_platform_stats), # iam and configure callbacks ('iam/', self._on_platform_message), # iam and configure callbacks ('configure/', self._on_platform_message)) for topic, funct in vcp_topics: self._vc.vip.pubsub.subscribe('pubsub', platform_prefix + topic, funct) self._log.info('Subscribing to {} with from vcp {}'.format( platform_prefix + topic, topic)) # method will subscribe to devices/ on the collector and publish # the regular device topics with the prefix platform_prefix. self.call("subscribe_to_vcp", topic, platform_prefix)
def publish_to_historian(self, to_publish_list): handled_records = [] _log.debug("publish_to_historian number of items: {}".format( len(to_publish_list))) parsed = urlparse(self.core.address) next_dest = urlparse(destination_vip) current_time = self.timestamp() last_time = self._last_timeout _log.debug('Lasttime: {} currenttime: {}'.format( last_time, current_time)) timeout_occurred = False if self._last_timeout: # if we failed we need to wait 60 seconds before we go on. if self.timestamp() < self._last_timeout + 60: _log.debug('Not allowing send < 60 seconds from failure') return if not self._target_platform: self.historian_setup() if not self._target_platform: _log.debug('Could not connect to target') return for x in to_publish_list: topic = x['topic'] value = x['value'] # payload = jsonapi.loads(value) payload = value headers = payload['headers'] headers['X-Forwarded'] = True try: del headers['Origin'] except KeyError: pass try: del headers['Destination'] except KeyError: pass # if not headers.get('Origin', None) # if overwrite_origin: # if not include_origin_in_header: # try: # del headers['Origin'] # except KeyError: # pass # else: # headers['Origin'] = origin # else: # headers['Origin'] = parsed.hostname # headers['Destination'] = [next_dest.scheme + # '://'+ # next_dest.hostname] # else: # headers['Destination'].append(next_dest.hostname) if timeout_occurred: _log.error( 'A timeout has occured so breaking out of publishing') break with gevent.Timeout(30): try: _log.debug('debugger: {} {} {}'.format( topic, headers, payload)) self._target_platform.vip.pubsub.publish( peer='pubsub', topic=topic, headers=headers, message=payload['message']).get() except gevent.Timeout: _log.debug("Timout occurred email should send!") timeout_occurred = True self._last_timeout = self.timestamp() self._num_failures += 1 # Stop the current platform from attempting to # connect self._target_platform.core.stop() self._target_platform = None self.vip.health.set_status(STATUS_BAD, "Timout occured") except Exception as e: _log.error(e) else: handled_records.append(x) _log.debug("handled: {} number of items".format( len(to_publish_list))) self.report_handled(handled_records) if timeout_occurred: _log.debug('Sending alert from the ForwardHistorian') status = Status.from_json(self.vip.health.get_status()) self.vip.health.send_alert(FORWARD_TIMEOUT_KEY, status)
def store_agent_config(self, session_user, params): required = ('agent_identity', 'config_name', 'raw_contents') message_id = params.pop('message_id') errors = [] for r in required: if r not in params: errors.append('Missing {}'.format(r)) config_type = params.get('config_type', None) if config_type: if config_type not in ('raw', 'json', 'csv'): errors.append('Invalid config_type parameter') if errors: return jsonrpc.json_error(message_id, INVALID_PARAMS, "\n".join(errors)) try: self._log.debug("Calling store_agent_config on external platform.") self.call("store_agent_config", **params) except Exception as e: self._log.error(str(e)) return jsonrpc.json_error(message_id, INTERNAL_ERROR, str(e)) config_name = params.get("config_name") agent_identity = params.get("agent_identity") if config_name.startswith("devices"): # Since we start with devices, we assume that we are attempting # to save a master driver config file. rawdict = jsonapi.loads(params['raw_contents']) # if this is not a bacnet device_type then we cannot do anything # more than save and retrieve it from the store. driver_type = rawdict.get('driver_type', None) if driver_type is None or driver_type not in ('bacnet', 'modbus'): return jsonrpc.json_result(message_id, "SUCCESS") # Registry config starts with config:// registry_config = rawdict['registry_config'][len('config://'):] try: self._log.debug("Retrieving registry_config for new device.") point_config = self.call("get_agent_config", agent_identity, registry_config, raw=False) except Exception as e: self._log.error(str(e)) return jsonrpc.json_error( message_id, INTERNAL_ERROR, "Couldn't retrieve registry_config " "from connection.") else: new_device = dict( device_address=rawdict['driver_config']['device_address'], device_id=rawdict['driver_config']['device_id'], points=[], path=config_name, health=Status.build(UNKNOWN_STATUS, context="Unpublished").as_dict()) points = [p['Volttron Point Name'] for p in point_config] new_device['points'] = points self._vc.send_management_message("NEW_DEVICE", new_device) status = Status.build(UNKNOWN_STATUS, context="Not published since update") device_config_name = params.get('config_name') device_no_prefix = device_config_name[len('devices/'):] the_device = self._current_devices.get(device_no_prefix, {}) if not the_device: self._current_devices[device_no_prefix] = dict( last_publish_utc=None, health=status.as_dict(), points=points) else: self._current_devices[device_no_prefix]['points'] = points return jsonrpc.json_result(message_id, "SUCCESS")
def setup(self): """ Read resource files and load list of valid tags, categories, tags grouped by categories, list of reference tags and its parent. :return: """ _log.debug("Setup of sqlite tagging agent") err_message = "" table_names = [] try: stmt = "SELECT name FROM sqlite_master " \ "WHERE type='table';" name_list = self.sqlite_utils.select(stmt, None, fetch_all=True) table_names = [name[0] for name in name_list] _log.debug(table_names) except Exception as e: err_message = "Unable to query list of existing tables from the " \ "database. Exception: {}. Stopping tagging " \ "service agent".format(e.args) table_name = "" try: table_name = self.tags_table if self.tags_table in table_names: _log.info("{} table exists. Assuming initial values have been " "loaded".format(table_name)) else: self._init_tags() table_name = self.tag_refs_table if self.tag_refs_table in table_names: _log.info("{} table exists. Assuming initial values have been " "loaded".format(table_name)) else: self._init_tag_refs() table_name = self.topic_tags_table if self.topic_tags_table in table_names: _log.info("{} table exists. Assuming initial values " "have been loaded".format(table_name)) else: self._init_topic_tags() table_name = self.categories_table if self.categories_table in table_names: _log.info("{} table exists. Assuming initial values " "have been loaded".format(table_name)) else: self._init_categories() table_name = self.category_tags_table if self.category_tags_table in table_names: _log.info("{} table exists. Assuming initial values " "have been loaded".format(table_name)) else: self._init_category_tags() except Exception as e: err_message = "Initialization of " + table_name + \ " table failed with exception: {}" \ "Stopping tagging service agent. ".format(str(e)) if err_message: _log.error(err_message) self.vip.health.set_status( STATUS_BAD, "Initialization of tagging service " "failed") status = Status.from_json(self.vip.health.get_status_json()) self.vip.health.send_alert(TAGGING_SERVICE_SETUP_FAILED, status) self.core.stop()
def setup(self): """ Read resource files and load list of valid tags, categories, tags grouped by categories, list of reference tags and its parent. :return: """ _log.debug("Setup of sqlite tagging agent") err_message = "" table_names = [] try: stmt = "SELECT name FROM sqlite_master " \ "WHERE type='table';" name_list = self.sqlite_utils.select(stmt, None, fetch_all=True) table_names = [name[0] for name in name_list] _log.debug(table_names) except Exception as e: err_message = "Unable to query list of existing tables from the " \ "database. Exception: {}. Stopping tagging " \ "service agent".format(e.args) table_name = "" try: table_name = self.tags_table if self.tags_table in table_names: _log.info("{} table exists. Assuming initial values have been " "loaded".format(table_name)) else: self._init_tags() table_name = self.tag_refs_table if self.tag_refs_table in table_names: _log.info("{} table exists. Assuming initial values have been " "loaded".format(table_name)) else: self._init_tag_refs() table_name = self.topic_tags_table if self.topic_tags_table in table_names: _log.info("{} table exists. Assuming initial values " "have been loaded".format(table_name)) else: self._init_topic_tags() table_name = self.categories_table if self.categories_table in table_names: _log.info("{} table exists. Assuming initial values " "have been loaded".format(table_name)) else: self._init_categories() table_name = self.category_tags_table if self.category_tags_table in table_names: _log.info("{} table exists. Assuming initial values " "have been loaded".format(table_name)) else: self._init_category_tags() except Exception as e: err_message = "Initialization of " + table_name + \ " table failed with exception: {}" \ "Stopping tagging service agent. ".format(e.args) if err_message: _log.error(err_message) self.vip.health.set_status(STATUS_BAD, "Initialization of tagging service " "failed") status = Status.from_json(self.vip.health.get_status_json()) self.vip.health.send_alert(TAGGING_SERVICE_SETUP_FAILED, status) self.core.stop()
import gevent from volttron.platform.keystore import KeyStore from volttron.platform.messaging.health import STATUS_BAD, Status from volttron.platform.vip.agent.utils import build_agent test_subject = "Test subject1" test_message = "this is a message that is sent via pubsub email" message = dict(subject=test_subject, message=test_message) ks = KeyStore() agent = build_agent(identity="test.email.pubsub", enable_store=False) agent.vip.pubsub.publish('pubsub', topic="platform/send_email", message=message) # agent.vip.health.set_status(STATUS_BAD, "It's bad man really bad!") agent.vip.health.send_alert("ALERT_KEY", Status.build(STATUS_BAD, "It's really bad again!")) gevent.sleep(5) agent.core.stop()