def _get_network_flow_data(self): ''' Get network flow datas(Byte) from network card by command 'ifconfig'. Split the grep result and divide it into list. @return: ['10.120.0.1', '123', '123'] ''' receive_bytes = 0L transfer_bytes = 0L receive_packages = 0L transfer_packages = 0L # TODO(hzyangtk): When VM has multiple network card, it should monitor # all the cards but not only eth0. net_devs = self._read_file_from_guest('/proc/net/dev') if net_devs: network_lines = net_devs.splitlines() else: LOG.warn("Get network data failed, uuid: %s" % utils.get_domain_uuid(self.domain)) return [receive_bytes, transfer_bytes] for network_line in network_lines: network_datas = network_line.replace(':', ' ').split() try: if network_datas[0] in CONF.net_card_list: receive_bytes += long(network_datas[1]) receive_packages += long(network_datas[2]) transfer_bytes += long(network_datas[9]) transfer_packages += long(network_datas[10]) except (KeyError, ValueError, IndexError, TypeError) as e: LOG.warn("Get invalid network data, uuid: %s, exception: %s" % (utils.get_domain_uuid(self.domain), e)) continue return [receive_bytes, transfer_bytes]
def _get_network_flow_rate_dict(self): ''' Assemble dict datas collect from _get_network_flow_data() for network flow rate in 60s. Set network flow datas to self.temp. @return: { 'ip': '10.120.0.1', 'receive_rate': 0.0, 'transfer_rate': 0.0 } ''' old_receive_bytes = self.temp['network_receive_bytes'] old_transfer_bytes = self.temp['network_transfer_bytes'] now_receive_bytes, now_transfer_bytes = \ self._get_network_flow_data() receive_rate = (float(now_receive_bytes - old_receive_bytes) / 1024.0 / CONF.monitor_delay) transfer_rate = (float(now_transfer_bytes - old_transfer_bytes) / 1024.0 / CONF.monitor_delay) if receive_rate < 0 or transfer_rate < 0: LOG.warn("Get invalid network rate data: uuid: %s, %s, %s" % (utils.get_domain_uuid(self.domain), receive_rate, transfer_rate)) receive_rate = 0 transfer_rate = 0 network_info_dict = { 'receive_rate': receive_rate, 'transfer_rate': transfer_rate } self.temp['network_receive_bytes'] = now_receive_bytes self.temp['network_transfer_bytes'] = now_transfer_bytes return network_info_dict
def _get_memory_usage_dict(self): ''' Get memory info(MB) from /proc/meminfo. @return: {'total_memory': 1, 'free_memory': 1, 'used_memory': 1, 'memory_usage_rate': 45} free_memory = MemFree + Buffers + Cached used_memory = MemTotal - free_memory memory_usage_rate = used_memory * 100 / MemTotal ''' mem_usage = { 'total_memory': 0, 'free_memory': 0, 'used_memory': 0, 'memory_usage_rate': 0 } mem_file_read = self._read_file_from_guest('/proc/meminfo') if mem_file_read: mem_info_lines = mem_file_read.splitlines() else: LOG.warn("Mem usage get failed, uuid: %s" % utils.get_domain_uuid(self.domain)) return mem_usage mem_usage['total_memory'] = long(mem_info_lines[0].split()[1]) / 1024 mem_usage['free_memory'] = (long(mem_info_lines[1].split()[1]) + long(mem_info_lines[2].split()[1]) + long(mem_info_lines[3].split()[1])) / 1024 mem_usage['used_memory'] = (mem_usage['total_memory'] - mem_usage['free_memory']) mem_usage['memory_usage_rate'] = ((mem_usage['used_memory'] * 100) / mem_usage['total_memory']) return mem_usage
def _get_disk_data_by_proc(disks, total_disk_info): ''' Get disks infos from /proc/diskstats, like: read/write datas(KB), request times(count time), read/write paid time(ms) and so on. And set the datas into total_disk_info dict. ''' partitions = disks.keys() diskstats = self._read_file_from_guest('/proc/diskstats') if diskstats: disk_datas = diskstats.splitlines() else: LOG.warn("Get diskstats failed, uuid: %s" % utils.get_domain_uuid(self.domain)) return for disk_data in disk_datas: datas = disk_data.split() if datas[2] in partitions: total_disk_info['disk_read_request'] += long(datas[3]) total_disk_info['disk_write_request'] += long(datas[7]) total_disk_info['disk_read'] += long(datas[5]) / 2 total_disk_info['disk_write'] += long(datas[9]) / 2 total_disk_info['disk_read_delay'] += long(datas[6]) total_disk_info['disk_write_delay'] += long(datas[10])
def _get_mounted_disks(): ''' Get mounted disks/partitions from /proc/mounts. @return: partition:target dict: {'vda1': '/', 'dm-0': '/mnt'} ''' mounted_disks = {} mounts_file = self._read_file_from_guest('/proc/mounts') if mounts_file: mounts = mounts_file.splitlines() else: LOG.warn("Get mounted disks failed, uuid: %s" % utils.get_domain_uuid(self.domain)) return mounted_disks for mount in mounts: if mount.startswith('/dev/'): mount = mount.split() realpath = _get_disk_realpath(mount[0]) if realpath: partition = realpath.rsplit('/')[-1] else: partition = mount[0].rsplit('/')[-1] target = mount[1] if (partition not in mounted_disks and target not in mounted_disks.values() or (target == '/' and '/' not in mounted_disks.values())): mounted_disks[partition] = target return mounted_disks
def get_system_usage_datas(self): ''' Get all system datas and assemble them into all_system_usage_dict. The key names of all_system_usage_dict are the same as XML setting. ''' cpu_usage = self._get_cpu_usage_dict() loadavg = self._get_loadavg_dict() memory_usage = self._get_memory_usage_dict() network_usage = self._get_network_flow_rate_dict() disk_usage = {} disk_usage = self._get_disk_usage_rate_dict() all_system_usage_dict = { 'cpuUsage': cpu_usage['cpu_usage'], 'memUsage': memory_usage['used_memory'], 'networkReceive': network_usage['receive_rate'], 'networkTransfer': network_usage['transfer_rate'], 'diskUsage': disk_usage['used_disk'], 'diskWriteRate': disk_usage['disk_write_rate'], 'diskReadRate': disk_usage['disk_read_rate'], 'diskWriteRequest': disk_usage['disk_write_request'], 'diskReadRequest': disk_usage['disk_read_request'], 'diskWriteDelay': disk_usage['disk_write_delay'], 'diskReadDelay': disk_usage['disk_read_delay'], 'diskPartition': [disk_usage['disk_partition_info'], disk_usage['disk_partition_data']], 'loadavg_5': loadavg['loadavg_5'], 'memUsageRate': memory_usage['memory_usage_rate'] } LOG.info("get system from uuid: (%s), Usage:%s" % (utils.get_domain_uuid(self.domain), json.dumps(all_system_usage_dict))) return all_system_usage_dict
def main(): db_instances = instance.get_all_instances_on_host() db_uuids = [inst['id'] for inst in db_instances] helper = helper_.LibvirtQemuHelper() hyper_domains = helper.list_all_domains() monitor_domains_with_project_id = [] for dom in hyper_domains: dom_uuid = dom.UUIDString() # if dom_uuid in db_uuids: # project_id = None # for inst in db_instances: # if dom_uuid == inst['id']: # project_id = inst['tenant_id'] # monitor_domains_with_project_id.append((dom, project_id)) monitor_domains_with_project_id.append(dom) for dom in monitor_domains_with_project_id: uuid = utils.get_domain_uuid(dom) if not uuid: LOG.warn("Get domain uuid failed") continue if not utils.is_active(dom): LOG.info("Domain is not active, uuid: %s" % uuid) continue get_system_usage = data_stat.GetSystemUsage(dom, helper) get_system_usage.get_system_usage_datas()
def serve(self): LOG.info("Heartbeat thread start") domains = self.helper.list_all_domains() for dom in domains: if not self.RUN_TH: LOG.info("Break from hearbeat thread") break uuid = utils.get_domain_uuid(dom) if not uuid: LOG.warn("Get domain uuid failed") continue if not utils.is_active(dom): LOG.info("domain is not active, uuid %s" % uuid) continue heartbeat_cmd = json.dumps({"execute": "guest-ping"}) response = self.helper.exec_qga_command(dom, heartbeat_cmd, timeout=CONF.heartbeat_cmd_timeout) LOG.debug("Ping command response from qga: %s" % response) if response: self.sender.report_heartbeat(uuid) else: LOG.warn("Ping command failed, uuid: %s" % uuid) LOG.info("Heartbeat thread end")
def _get_cpu_usage_dict(self): ''' Get CPU usage(percent) by vmstat command. @return: {'cpu_usage': 0.0} ''' cpu_stat = self._read_file_from_guest('/proc/stat') if cpu_stat: cpu_read_line = cpu_stat.splitlines()[0] cpu_infos = cpu_read_line.split()[1:-1] total_cpu_time = 0L for cpu_info in cpu_infos: total_cpu_time += long(cpu_info) last_cpu_time = self.temp['total_cpu_time'] cpu_idle_time = long(cpu_infos[3]) last_cpu_idle_time = self.temp['last_cpu_idle_time'] total_cpu_period = float(total_cpu_time - last_cpu_time) idle_cpu_period = float(cpu_idle_time - last_cpu_idle_time) if total_cpu_period <= 0 or idle_cpu_period < 0: cpu_usage = 0.0 else: idle_usage = idle_cpu_period / total_cpu_period * 100 cpu_usage = round(100 - idle_usage, 2) self.temp['total_cpu_time'] = total_cpu_time self.temp['last_cpu_idle_time'] = cpu_idle_time else: LOG.warn("Cpu usage get failed, uuid: %s" % utils.get_domain_uuid(self.domain)) cpu_usage = 0.0 return {'cpu_usage': cpu_usage}
def save_temp(self): temp_file = os.path.join(utils.get_instance_dir(self.domain), CONF.temp_file_name) LOG.debug("Saving temp data of instance %s to %s: %s" % (utils.get_domain_uuid(self.domain), temp_file, self.temp)) with open(temp_file, 'w') as f: self.temp['timestamp'] = long(time.time()) f.write(json.dumps(self.temp))
def _get_fs_info(path): """Get free/used/total space info for a filesystem :param path: Any dirent on the filesystem :returns: A dict containing: :free: How much space is free (in bytes) :used: How much space is used (in bytes) :total: How big the filesystem is (in bytes) """ def byte_to_mb(v): return (float(v) / 1024.0 / 1024.0) fs_info = {'total': 0.0, 'free': 0.0, 'used': 0.0} cmd_statvfs = json.dumps({ "execute": "guest-get-statvfs", "arguments": { "path": path } }) response = self.helper.exec_qga_command( self.domain, cmd_statvfs, timeout=CONF.read_file_time_out) if response: LOG.debug("Get statvfs response from qga: %s" % response) try: hddinfo = json.loads(response)['return'] except (ValueError, KeyError, TypeError) as e: LOG.warn("Get statvfs failed, uuid: %s, exception: %s" % (utils.get_domain_uuid(self.domain), e)) hddinfo = None else: LOG.warn("Get statvfs failed, uuid: %s" % utils.get_domain_uuid(self.domain)) return fs_info fs_info['total'] = byte_to_mb(hddinfo['f_frsize'] * hddinfo['f_blocks']) fs_info['free'] = byte_to_mb(hddinfo['f_frsize'] * hddinfo['f_bavail']) fs_info['used'] = byte_to_mb( hddinfo['f_frsize'] * (hddinfo['f_blocks'] - hddinfo['f_bfree'])) return fs_info
def _get_fs_info(path): """Get free/used/total space info for a filesystem :param path: Any dirent on the filesystem :returns: A dict containing: :free: How much space is free (in bytes) :used: How much space is used (in bytes) :total: How big the filesystem is (in bytes) """ def byte_to_mb(v): return (float(v) / 1024.0 / 1024.0) fs_info = {'total': 0.0, 'free': 0.0, 'used': 0.0} cmd_statvfs = json.dumps({"execute": "guest-get-statvfs", "arguments": {"path": path}}) response = self.helper.exec_qga_command(self.domain, cmd_statvfs, timeout=CONF.read_file_time_out) if response: LOG.debug("Get statvfs response from qga: %s" % response) try: hddinfo = json.loads(response)['return'] except (ValueError, KeyError, TypeError) as e: LOG.warn("Get statvfs failed, uuid: %s, exception: %s" % (utils.get_domain_uuid(self.domain), e)) hddinfo = None else: LOG.warn("Get statvfs failed, uuid: %s" % utils.get_domain_uuid(self.domain)) return fs_info fs_info['total'] = byte_to_mb(hddinfo['f_frsize'] * hddinfo['f_blocks']) fs_info['free'] = byte_to_mb(hddinfo['f_frsize'] * hddinfo['f_bavail']) fs_info['used'] = byte_to_mb(hddinfo['f_frsize'] * (hddinfo['f_blocks'] - hddinfo['f_bfree'])) return fs_info
def _get_loadavg_dict(self): ''' Get loadavg info from /proc/loadavg. @return: {'loadavg_5': 4.32} ''' loadavg_file_read = self._read_file_from_guest('/proc/loadavg') if loadavg_file_read: loadavg_info_line = loadavg_file_read.splitlines()[0] loadavg_5 = float(loadavg_info_line.split()[1]) else: LOG.warn("Loadavg_5 get failed, uuid: %s" % utils.get_domain_uuid(self.domain)) loadavg_5 = 0.0 return {'loadavg_5': loadavg_5}
def _get_disk_realpath(path): cmd_realpath = json.dumps({"execute": "guest-get-realpath", "arguments": {"path": path}}) response = self.helper.exec_qga_command(self.domain, cmd_realpath, timeout=CONF.read_file_time_out) if response: LOG.debug("Get realpath response from qga: %s" % response) try: return json.loads(response)['return'] except (ValueError, KeyError, TypeError) as e: LOG.warn("get realpath failed, uuid: %s, exception: %s" % (utils.get_domain_uuid(self.domain), e)) return None else: LOG.warn("Get realpath of %s by qga failed" % path) return None
def _get_disk_realpath(path): ''' for example: you get disk from /proc/mount is /dev/disk/by-uuid/dd3f9691-fbcb-4f54-b4ac-9e28c08ecb2d it is a soft-link file to ../../vda1, we want /dev/vda1''' cmd_realpath = json.dumps({"execute": "guest-get-realpath", "arguments": {"path": path}}) response = self.helper.exec_qga_command(self.domain, cmd_realpath, timeout=CONF.read_file_time_out) if response: LOG.debug("Get realpath response from qga: %s" % response) try: return json.loads(response)['return'] except (ValueError, KeyError, TypeError) as e: LOG.warn("get realpath failed, uuid: %s, exception: %s" % (utils.get_domain_uuid(self.domain), e)) return None else: LOG.warn("Get realpath of %s by qga failed" % path) return None
def serve(self): LOG.info("Monitor thread start") monitor_domains_with_project_id = self._update_instances() for (dom, project_id) in monitor_domains_with_project_id: if not self.RUN_TH: LOG.info("Break from monitor thread") break uuid = utils.get_domain_uuid(dom) if not uuid: LOG.warn("Get domain uuid failed") continue if not utils.is_active(dom): LOG.info("Domain is not active, uuid: %s" % uuid) continue info_file_dict = utils.get_info_file_dict(dom, project_id) if not info_file_dict: LOG.warn("Info file load failed, uuid: %s" % uuid) continue monitor_setting_root = utils.get_monitor_setting_root(dom) if not monitor_setting_root: LOG.warn("Monitor setting file load failed, uuid: %s" % uuid) continue get_system_usage = GetSystemUsage(dom, self.helper) temp_ok = get_system_usage.load_temp() last_partitions = get_system_usage.temp['disk_partition_info'] all_usage_dict = get_system_usage.get_system_usage_datas() new_partitions = get_system_usage.temp['disk_partition_info'] metrics = utils.get_monitor_metrics(info_file_dict, monitor_setting_root) metric_names = [m.attrib.get('name') for m in metrics] LOG.debug("Metric names of %s: %s" % (uuid, metric_names)) identify_id = utils.get_identify_id(info_file_dict, uuid) # FIXME(wangpan): hardcode here the 'diskPartition' metric if ('diskPartition' in metric_names and last_partitions != new_partitions): LOG.info("Notifing partitions change of %s, old: %s, new: %s" % (uuid, last_partitions, new_partitions)) notify_succ = sender.notify_platform_partition_change( new_partitions, info_file_dict, monitor_setting_root, identify_id) if not notify_succ: LOG.warn("Notifing partitions change failed") get_system_usage.temp['disk_partition_info'] = \ last_partitions get_system_usage.save_temp() if temp_ok: metric_datas = DataFormater().format_data(metrics, all_usage_dict, monitor_setting_root, info_file_dict, identify_id) send_request = sender.SendRequest(info_file_dict, json.dumps(metric_datas)) response = send_request.send_request_to_server() if response and response.status_code == 200: LOG.debug("Send monitor data of %s successfully" % uuid) else: LOG.error("Send monitor data of %s faild" % uuid) else: LOG.info("First start or temp file is expired, %s" % uuid) LOG.info("Monitor thread end")