def __init__(self, hostid): """ Collects resource usage data of host and nodes. :param hostid: ROS_IP of this host :type hostid: string """ super(HostStatisticsHandler, self).__init__(hostid) self.__update_interval = float(0) self.__publish_interval = 0 self.__is_enabled = False self.__check_enabled = 0 self.__search_nodes_inv = 0 self.__init_params() self.__register_service() self.__lock = threading.Lock() self.__dict_lock = threading.Lock() self.pub = rospy.Publisher('/statistics_host', HostStatistics, queue_size=500) #: Used to store information about the host's status. self._status = HostStatus(rospy.Time.now()) #: Interface to restart and stop nodes # or executing other commands. self.__node_manager = NodeManager() #: Dictionary holding all nodes currently running on the host. self.__node_list = {} # Base-stats for I/O self.__bandwidth_base = {} self.__msg_freq_base = {} self.__disk_write_base = {} self.__disk_read_base = {} self.__set_bases()
class HostStatisticsHandler(StatisticsHandler): """ Represents a host , limited to one instance per host. Collects statistics about the current state of the host and s ends them using the publisher-subscriber mechanism. """ def __init__(self, hostid): """ Collects resource usage data of host and nodes. :param hostid: ROS_IP of this host :type hostid: string """ super(HostStatisticsHandler, self).__init__(hostid) self.__update_interval = float(0) self.__publish_interval = 0 self.__is_enabled = False self.__check_enabled = 0 self.__search_nodes_inv = 0 self.__init_params() self.__register_service() self.__lock = threading.Lock() self.__dict_lock = threading.Lock() self.pub = rospy.Publisher('/statistics_host', HostStatistics, queue_size=500) #: Used to store information about the host's status. self._status = HostStatus(rospy.Time.now()) #: Interface to restart and stop nodes # or executing other commands. self.__node_manager = NodeManager() #: Dictionary holding all nodes currently running on the host. self.__node_list = {} # Base-stats for I/O self.__bandwidth_base = {} self.__msg_freq_base = {} self.__disk_write_base = {} self.__disk_read_base = {} self.__set_bases() def __set_bases(self): """ Calculate base stats for I/O at start of the node. Used to calculate difference to get per/s stats. """ psutil.cpu_percent() psutil.cpu_percent(percpu=True) for interface in psutil.net_io_counters(True): netint = psutil.net_io_counters(True)[interface] total_bytes = netint.bytes_recv + netint.bytes_sent total_packages = netint.packets_sent + netint.packets_recv self.__bandwidth_base[interface] = total_bytes self.__msg_freq_base[interface] = total_packages dev_names = [] for disk in psutil.disk_partitions(): if all(['cdrom' not in disk.opts, 'sr' not in disk.device]): dev_names.append(disk.device) for key in psutil.disk_io_counters(True): if key in dev_names: disk = psutil.disk_io_counters(True)[key] self.__disk_read_base[key] = disk.read_bytes self.__disk_write_base[key] = disk.write_bytes def __register_service(self): """ Register all services """ ip = self._id.replace('.', '_') rospy.Service( "/execute_node_reaction/%s" % ip, NodeReaction, self.execute_reaction) def measure_status(self, event): """ Collects information about the host's current status using psutils. Triggered periodically. """ if not self.__is_enabled: pass if self.__is_enabled: self.__lock.acquire() # update node list self.__dict_lock.acquire() self.update_nodes() self.__dict_lock.release() # CPU self._status.add_cpu_usage(psutil.cpu_percent()) self._status.add_cpu_usage_core(psutil.cpu_percent(percpu=True)) # RAM self._status.add_ram_usage(psutil.virtual_memory().percent) # temp self.get_sensors() # Bandwidth and message frequency self.__measure_network_usage() # Disk usage self.__measure_disk_usage() self.__lock.release() def __measure_network_usage(self): """ measure current network_io_counters """ network_interfaces = psutil.net_io_counters(True) for key in network_interfaces: total_bytes = (network_interfaces[ key].bytes_sent + network_interfaces[key].bytes_recv) - self.__bandwidth_base[key] total_packages = (network_interfaces[ key].packets_sent + network_interfaces[key].packets_recv) - self.__msg_freq_base[key] bandwidth = total_bytes / self.__update_interval msg_frequency = total_packages / self.__update_interval self._status.add_bandwidth(key, bandwidth) self._status.add_msg_frequency(key, msg_frequency) # update base stats for next iteration self.__bandwidth_base[key] += total_bytes self.__msg_freq_base[key] += total_packages def __measure_disk_usage(self): """ measure current disk usage """ # Free Space on disks disks = psutil.disk_partitions() dev_name = [] for disk in disks: if 'cdrom' not in disk.opts and 'sr' not in disk.device: free_space = psutil.disk_usage(disk.mountpoint).free / 2 ** 20 self._status.add_drive_space(disk.device, free_space) dev_name.append(disk.device) # Drive I/O drive_io = psutil.disk_io_counters(True) for disk in dev_name: if disk in drive_io: readb = drive_io[disk].read_bytes - self.__disk_read_base[disk] writeb = drive_io[disk].write_bytes - \ self.__disk_write_base[disk] read_rate = readb / self.__update_interval write_rate = writeb / self.__update_interval self._status.add_drive_read(disk, read_rate) self._status.add_drive_write(disk, write_rate) # update base stats for next iteration self.__disk_read_base[disk] += readb self.__disk_write_base[disk] += writeb else: # No information available - push None's self._status.add_drive_read(disk, None) self._status.add_drive_write(disk, None) def publish_status(self, event): """ Publishes the current status to a topic using ROS's publisher-subscriber mechanism. Triggered periodically. """ if not self.__is_enabled: pass if self.__is_enabled: self.__lock.acquire() self._status.time_end = rospy.Time.now() stats = self.__calc_statistics() self.__dict_lock.acquire() self.__publish_nodes() self.__dict_lock.release() self.pub.publish(stats) self._status.reset() self._status.time_start = rospy.Time.now() self.__lock.release() def __publish_nodes(self): """ publishes current status of all nodes. """ for node in self.__node_list: Thread(target=self.__node_list[node].publish_status).start() def __init_params(self): """ Initializes params on the parameter server, """ self.__publish_interval = rospy.get_param('~publish_interval', 10) self.__update_interval = self.__publish_interval / \ float(rospy.get_param('~window_max_elements', 10)) self.__is_enabled = rospy.get_param('/enable_statistics', False) self.__check_enabled = rospy.get_param( '/arni/check_enabled_interval', 10) self.__search_nodes_inv = rospy.get_param('~search_nodes', 5) def __calc_statistics(self): """ Calculates statistics like mean, standard deviation and max from the status. Returns an instance of HostStatistics which can be published. :returns: HostStatistics """ stats_dict = self._status.calc_stats() host_status = HostStatistics() host_status.host = self._id host_status.window_start = self._status.time_start host_status.window_stop = self._status.time_end for v in dir(host_status): if v in stats_dict: setattr(host_status, v, stats_dict[v]) return host_status def execute_reaction(self, reaction): """ Parses through the reaction and calls the appropriate method from the NodeManager. Uses ROS Services. Returns a message about operation's success. :param reaction: Reaction to be executed and node affected. :type reaction: NodeReaction. :returns: String """ msg = '' if reaction.node not in self.__node_list: return 'Specified node is not running on this host.' if reaction.action == 'restart': node = self.__node_list[reaction.node] msg = self.__node_manager.restart_node(node) self.remove_node(reaction.node) elif reaction.action == 'stop': msg = self.__node_manager.stop_node(reaction.node) self.remove_node(reaction.node) elif reaction.action == 'command': msg = self.__node_manager.execute_command(reaction.command) else: msg = 'Failed to execute reaction, %s is no valid argument' % reaction.action rospy.loginfo('Executing reaction: %s' % msg) return msg def remove_node(self, node): """ Removes the Node with the given id from the host. :param node_id: id of the node to be removed. :type node_id: String """ if node in self.__node_list: del self.__node_list[node] def get_node_info(self, event): """ Get the list of all Nodes running on the host. Update the __node_list """ if not self.__is_enabled: pass if self.__is_enabled: nodes = [] for node_name in rosnode.get_node_names(): try: node_api = rosnode.get_api_uri(rospy.get_master(), node_name, skip_cache=True) if self._id in node_api[2]: nodes.append(node_name) except : pass for node in nodes: if node not in self.__node_list: """TODO currently not catching the exception here - master not running is a hard error so it does not make sense to continue running..""" node_api = rosnode.get_api_uri(rospy.get_master(), node, skip_cache=True) try: pid = self.get_node_pid(node_api, node) if not pid: continue node_process = psutil.Process(pid) new_node = NodeStatisticsHandler( self._id, node, node_process) self.__dict_lock.acquire(True) self.__node_list[node] = new_node self.__dict_lock.release() except psutil.NoSuchProcess: rospy.loginfo('pid of node %s could not be fetched' % node) continue self.__dict_lock.acquire() to_remove = [] for node_name in self.__node_list: if node_name not in nodes: rospy.logdebug('removing %s from host' % node_name) to_remove.append(node_name) for node_name in to_remove: self.remove_node(node_name) self.__dict_lock.release() def get_node_pid(self, node_api, node): """ Use the xmlrpclib Server Proxy to get node- pid :param node_api: API URI used for ServerProxy :type node_api: list :param node: name of the node :type node: string """ socket.setdefaulttimeout(1) try: code, msg, pid = xmlrpclib.ServerProxy( node_api[2]).getPid('/NODEINFO') return pid except socket.error: rospy.logdebug('Node %s is unreachable' % node) return False def update_nodes(self): """ update the status of each node in its own threading """ for node in self.__node_list: Thread(target=self.__node_list[node].measure_status).start() def get_sensors(self): """ collects the current temperature of CPU and each core """ sensors.init() added = [] cpu_temp_c = [] try: for chip in sensors.iter_detected_chips(): for feature in chip: if feature.name.startswith('temp'): if ((feature.label.startswith('Physical') or feature.label.startswith('CPU')) and feature.label not in added): self._status.add_cpu_temp(feature.get_value()) elif (feature.label.startswith('Core') and feature.label not in added): cpu_temp_c.append(feature.get_value()) added.append(feature.label) except sensors.SensorsError: pass if cpu_temp_c: try: self._status.add_cpu_temp_core(cpu_temp_c) except IndexError: pass sensors.cleanup() def check_enabled(self, event): """ Periodically checks if /enable_statistics is ture if false no data will be collected / published """ #rospy.logdebug('checking if enable_statistics is true') try: self.__is_enabled = rospy.get_param('/enable_statistics', False) except: pass #rospy.logdebug('enable_statistics is %s' % self.__is_enabled) @property def update_interval(self): return self.__update_interval @property def publish_interval(self): return self.__publish_interval @property def check_enabled_interval(self): return self.__check_enabled @property def search_nodes_inv(self): return self.__search_nodes_inv @property def status(self): return self._status
class HostStatisticsHandler(StatisticsHandler): """ Represents a host , limited to one instance per host. Collects statistics about the current state of the host and s ends them using the publisher-subscriber mechanism. """ def __init__(self, hostid): """ Collects resource usage data of host and nodes. :param hostid: ROS_IP of this host :type hostid: string """ super(HostStatisticsHandler, self).__init__(hostid) self.__update_interval = float(0) self.__publish_interval = 0 self.__is_enabled = False self.__check_enabled = 0 self.__search_nodes_inv = 0 self.__init_params() self.__register_service() self.__lock = threading.Lock() self.__dict_lock = threading.Lock() self.pub = rospy.Publisher('/statistics_host', HostStatistics, queue_size=500) #: Used to store information about the host's status. self._status = HostStatus(rospy.Time.now()) #: Interface to restart and stop nodes # or executing other commands. self.__node_manager = NodeManager() #: Dictionary holding all nodes currently running on the host. self.__node_list = {} # Base-stats for I/O self.__bandwidth_base = {} self.__msg_freq_base = {} self.__disk_write_base = {} self.__disk_read_base = {} self.__set_bases() def __set_bases(self): """ Calculate base stats for I/O at start of the node. Used to calculate difference to get per/s stats. """ psutil.cpu_percent() psutil.cpu_percent(percpu=True) for interface in psutil.net_io_counters(True): netint = psutil.net_io_counters(True)[interface] total_bytes = netint.bytes_recv + netint.bytes_sent total_packages = netint.packets_sent + netint.packets_recv self.__bandwidth_base[interface] = total_bytes self.__msg_freq_base[interface] = total_packages dev_names = [] for disk in psutil.disk_partitions(): if all(['cdrom' not in disk.opts, 'sr' not in disk.device]): dev_names.append(disk.device) for key in psutil.disk_io_counters(True): if key in dev_names: disk = psutil.disk_io_counters(True)[key] self.__disk_read_base[key] = disk.read_bytes self.__disk_write_base[key] = disk.write_bytes def __register_service(self): """ Register all services """ ip = self._id.replace('.', '_') rospy.Service("/execute_node_reaction/%s" % ip, NodeReaction, self.execute_reaction) def measure_status(self, event): """ Collects information about the host's current status using psutils. Triggered periodically. """ if not self.__is_enabled: pass if self.__is_enabled: self.__lock.acquire() # update node list self.__dict_lock.acquire() self.update_nodes() self.__dict_lock.release() # CPU self._status.add_cpu_usage(psutil.cpu_percent()) self._status.add_cpu_usage_core(psutil.cpu_percent(percpu=True)) # RAM self._status.add_ram_usage(psutil.virtual_memory().percent) # temp self.get_sensors() # Bandwidth and message frequency self.__measure_network_usage() # Disk usage self.__measure_disk_usage() self.__lock.release() def __measure_network_usage(self): """ measure current network_io_counters """ network_interfaces = psutil.net_io_counters(True) for key in network_interfaces: total_bytes = (network_interfaces[key].bytes_sent + network_interfaces[key].bytes_recv ) - self.__bandwidth_base[key] total_packages = (network_interfaces[key].packets_sent + network_interfaces[key].packets_recv ) - self.__msg_freq_base[key] bandwidth = total_bytes / self.__update_interval msg_frequency = total_packages / self.__update_interval self._status.add_bandwidth(key, bandwidth) self._status.add_msg_frequency(key, msg_frequency) # update base stats for next iteration self.__bandwidth_base[key] += total_bytes self.__msg_freq_base[key] += total_packages def __measure_disk_usage(self): """ measure current disk usage """ # Free Space on disks disks = psutil.disk_partitions() dev_name = [] for disk in disks: if 'cdrom' not in disk.opts and 'sr' not in disk.device: free_space = psutil.disk_usage(disk.mountpoint).free / 2**20 self._status.add_drive_space(disk.device, free_space) dev_name.append(disk.device) # Drive I/O drive_io = psutil.disk_io_counters(True) for disk in dev_name: if disk in drive_io: readb = drive_io[disk].read_bytes - self.__disk_read_base[disk] writeb = drive_io[disk].write_bytes - \ self.__disk_write_base[disk] read_rate = readb / self.__update_interval write_rate = writeb / self.__update_interval self._status.add_drive_read(disk, read_rate) self._status.add_drive_write(disk, write_rate) # update base stats for next iteration self.__disk_read_base[disk] += readb self.__disk_write_base[disk] += writeb else: # No information available - push None's self._status.add_drive_read(disk, None) self._status.add_drive_write(disk, None) def publish_status(self, event): """ Publishes the current status to a topic using ROS's publisher-subscriber mechanism. Triggered periodically. """ if not self.__is_enabled: pass if self.__is_enabled: self.__lock.acquire() self._status.time_end = rospy.Time.now() stats = self.__calc_statistics() self.__dict_lock.acquire() self.__publish_nodes() self.__dict_lock.release() self.pub.publish(stats) self._status.reset() self._status.time_start = rospy.Time.now() self.__lock.release() def __publish_nodes(self): """ publishes current status of all nodes. """ for node in self.__node_list: Thread(target=self.__node_list[node].publish_status).start() def __init_params(self): """ Initializes params on the parameter server, """ self.__publish_interval = rospy.get_param('~publish_interval', 10) self.__update_interval = self.__publish_interval / \ float(rospy.get_param('~window_max_elements', 10)) self.__is_enabled = rospy.get_param('/enable_statistics', False) self.__check_enabled = rospy.get_param('/arni/check_enabled_interval', 10) self.__search_nodes_inv = rospy.get_param('~search_nodes', 5) def __calc_statistics(self): """ Calculates statistics like mean, standard deviation and max from the status. Returns an instance of HostStatistics which can be published. :returns: HostStatistics """ stats_dict = self._status.calc_stats() host_status = HostStatistics() host_status.host = self._id host_status.window_start = self._status.time_start host_status.window_stop = self._status.time_end for v in dir(host_status): if v in stats_dict: setattr(host_status, v, stats_dict[v]) return host_status def execute_reaction(self, reaction): """ Parses through the reaction and calls the appropriate method from the NodeManager. Uses ROS Services. Returns a message about operation's success. :param reaction: Reaction to be executed and node affected. :type reaction: NodeReaction. :returns: String """ msg = '' if reaction.node not in self.__node_list: return 'Specified node is not running on this host.' if reaction.action == 'restart': node = self.__node_list[reaction.node] msg = self.__node_manager.restart_node(node) self.remove_node(reaction.node) elif reaction.action == 'stop': msg = self.__node_manager.stop_node(reaction.node) self.remove_node(reaction.node) elif reaction.action == 'command': msg = self.__node_manager.execute_command(reaction.command) else: msg = 'Failed to execute reaction, %s is no valid argument' % reaction.action rospy.loginfo('Executing reaction: %s' % msg) return msg def remove_node(self, node): """ Removes the Node with the given id from the host. :param node_id: id of the node to be removed. :type node_id: String """ if node in self.__node_list: del self.__node_list[node] def get_node_info(self, event): """ Get the list of all Nodes running on the host. Update the __node_list """ if not self.__is_enabled: pass if self.__is_enabled: nodes = [] for node_name in rosnode.get_node_names(): try: node_api = rosnode.get_api_uri(rospy.get_master(), node_name, skip_cache=True) if self._id in node_api[2]: nodes.append(node_name) except: pass for node in nodes: if node not in self.__node_list: """TODO currently not catching the exception here - master not running is a hard error so it does not make sense to continue running..""" node_api = rosnode.get_api_uri(rospy.get_master(), node, skip_cache=True) try: pid = self.get_node_pid(node_api, node) if not pid: continue node_process = psutil.Process(pid) new_node = NodeStatisticsHandler( self._id, node, node_process) self.__dict_lock.acquire(True) self.__node_list[node] = new_node self.__dict_lock.release() except psutil.NoSuchProcess: rospy.loginfo('pid of node %s could not be fetched' % node) continue self.__dict_lock.acquire() to_remove = [] for node_name in self.__node_list: if node_name not in nodes: rospy.logdebug('removing %s from host' % node_name) to_remove.append(node_name) for node_name in to_remove: self.remove_node(node_name) self.__dict_lock.release() def get_node_pid(self, node_api, node): """ Use the xmlrpclib Server Proxy to get node- pid :param node_api: API URI used for ServerProxy :type node_api: list :param node: name of the node :type node: string """ socket.setdefaulttimeout(1) try: code, msg, pid = xmlrpclib.ServerProxy( node_api[2]).getPid('/NODEINFO') return pid except socket.error: rospy.logdebug('Node %s is unreachable' % node) return False def update_nodes(self): """ update the status of each node in its own threading """ for node in self.__node_list: Thread(target=self.__node_list[node].measure_status).start() def get_sensors(self): """ collects the current temperature of CPU and each core """ sensors.init() added = [] cpu_temp_c = [] try: for chip in sensors.iter_detected_chips(): for feature in chip: if feature.name.startswith('temp'): if ((feature.label.startswith('Physical') or feature.label.startswith('CPU')) and feature.label not in added): self._status.add_cpu_temp(feature.get_value()) elif (feature.label.startswith('Core') and feature.label not in added): cpu_temp_c.append(feature.get_value()) added.append(feature.label) except sensors.SensorsError: pass if cpu_temp_c: try: self._status.add_cpu_temp_core(cpu_temp_c) except IndexError: pass sensors.cleanup() def check_enabled(self, event): """ Periodically checks if /enable_statistics is ture if false no data will be collected / published """ #rospy.logdebug('checking if enable_statistics is true') try: self.__is_enabled = rospy.get_param('/enable_statistics', False) except: pass #rospy.logdebug('enable_statistics is %s' % self.__is_enabled) @property def update_interval(self): return self.__update_interval @property def publish_interval(self): return self.__publish_interval @property def check_enabled_interval(self): return self.__check_enabled @property def search_nodes_inv(self): return self.__search_nodes_inv @property def status(self): return self._status