def get_memory_info(self): try: resp = errcode.get_error_result() utc = int((dt.datetime.utcnow() - dt.datetime.utcfromtimestamp(0)).total_seconds()) resp['utc'] = utc mem_info = psutil.virtual_memory() resp['total'] = mem_info.total resp['available'] = mem_info.available resp['utilization'] = mem_info.percent # insert redis """ if self.rds.ping_server(): node_key = '{}:memory_info:{}'.format(self.ip, self.now_date) insert_data = resp.copy() insert_data.pop('code') insert_data.pop('msg') if self.rds.exists(node_key): self.rds.zadd(node_key, {json.dumps(insert_data): utc}) else: self.rds.zadd(node_key, {json.dumps(insert_data): utc}) self.rds.expire(node_key, self.rds.live_seconds) # 86400 seconds of a day """ return resp except Exception as err: logger.error(err) logger.error(''.join(traceback.format_exc())) resp = errcode.get_error_result(error="GetMemoryInfoFailure") return resp
def get_service_info(self): try: resp = {} resp['data'] = {} utc = int((dt.datetime.utcnow() - dt.datetime.utcfromtimestamp(0)).total_seconds()) resp['utc'] = utc #resp['hostname'] = self.hostname #resp['ip'] = self.ip resp['type'] = 'service' resp['node_uuid'] = self.node_uuid cmd = 'systemctl list-units|grep -E "%s"' % '|'.join(self.services) results = os.popen(cmd).readlines() if results: for ret in results: service_name = ret.split('.')[0].split()[-1].strip() service_status = ret.split('.')[1].split()[3].strip() resp['data'][service_name] = service_status for service in [ x for x in self.services if x not in resp['data'].keys() ]: resp['data'][service] = 'not found' return resp except Exception as err: logger.error(err) logger.error(''.join(traceback.format_exc())) resp = errcode.get_error_result(error="GetServiceInfoFailure") return resp
def setup_and_start(app): # tasks = [] try: # path = os.path.dirname(os.path.abspath(__file__)) # task_dir, task_package = os.path.split(path) # tasks_files = os.listdir(path) # for file_name in tasks_files: # if file_name.endswith('.py') and file_name not in ('__init__.py', 'base_task.py'): # full_file_name = file_name.split('.')[0] # package = '%s' % task_package + "." + full_file_name # module = __import__(package, fromlist=['.']) # names = full_file_name.split('_') # class_name = list() # for name in names: # class_name.append(name.title()) # cls = getattr(module, ''.join(class_name)) # tasks.append(cls(app)) tasks = [ ResourceTask(app), ServiceTask(app), StatisticTask(app) ] return tasks except Exception as err: print('err {}'.format(err)) logger.error('err {}'.format(err)) logger.error(''.join(traceback.format_exc())) current_app.logger.error(err) raise Exception("load task error")
def get_diskio_info(self): try: resp = errcode.get_error_result() utc = int((dt.datetime.utcnow() - dt.datetime.utcfromtimestamp(0)).total_seconds()) resp['utc'] = utc diskio_parts = psutil.disk_io_counters(perdisk=True) virtual_block_device = os.listdir('/sys/devices/virtual/block/') physical_block_device = [ dev for dev in diskio_parts if dev not in virtual_block_device ] for diskio in physical_block_device: resp[diskio] = { 'read_bytes': diskio_parts[diskio].read_bytes, 'write_bytes': diskio_parts[diskio].write_bytes } # insert redis """ if self.rds.ping_server(): node_key = '{}:diskio_info:{}'.format(self.ip, self.now_date) insert_data = resp.copy() insert_data.pop('code') insert_data.pop('msg') if self.rds.exists(node_key): self.rds.zadd(node_key, {json.dumps(insert_data): utc}) else: self.rds.zadd(node_key, {json.dumps(insert_data): utc}) self.rds.expire(node_key, self.rds.live_seconds) # 86400 seconds of a day """ return resp except Exception as err: logger.error(err) logger.error(''.join(traceback.format_exc())) resp = errcode.get_error_result(error="GetDiskIoInfoFailure") return resp
def get_bonds_dict(self): ret_data = {'bond_masters': [], 'bond_slaves': []} if os.path.exists(self.bond_master_file): with open(self.bond_master_file) as f: master_content = f.read() if master_content: bonds = master_content[:-1].split(' ') ret_data['bond_masters'] = bonds for bond in bonds: bond_slave_file = self.bond_slave_file % bond if os.path.exists(bond_slave_file): with open(bond_slave_file) as f: slave_content = f.read() if slave_content: ret_data['bond_slaves'].extend( slave_content[:-1].split(' ')) else: logger.error( '{} bond slave file is null!!!'.format( bond_slave_file)) return None else: logger.error( '{} bond slave file not exists!!!'.format( bond_slave_file)) return None return ret_data else: logger.warning('{} bond slave file is null!!!'.format( self.bond_master_file)) return None else: return None
def get_networkio_info(self): try: resp = errcode.get_error_result() utc = int((dt.datetime.utcnow() - dt.datetime.utcfromtimestamp(0)).total_seconds()) resp['utc'] = utc nics_io = psutil.net_io_counters(pernic=True) virtual_net_device = os.listdir('/sys/devices/virtual/net/') physical_net_device = [ dev for dev in nics_io if dev not in virtual_net_device ] for nic in physical_net_device: resp[nic] = { 'bytes_send': nics_io[nic].bytes_sent, 'bytes_recv': nics_io[nic].bytes_recv } # insert redis """ if self.rds.ping_server(): node_key = '{}:networkio_info:{}'.format(self.ip, self.now_date) insert_data = resp.copy() insert_data.pop('code') insert_data.pop('msg') if self.rds.exists(node_key): self.rds.zadd(node_key, {json.dumps(insert_data): utc}) else: self.rds.zadd(node_key, {json.dumps(insert_data): utc}) self.rds.expire(node_key, self.rds.live_seconds) # 86400 seconds of a day """ return resp except Exception as err: logger.error(err) logger.error(''.join(traceback.format_exc())) resp = errcode.get_error_result(error="GetNetworkIoInfoFailure") return resp
def get_perf_info(self): try: merge_resp = {} merge_resp['type'] = 'resource' merge_resp['node_uuid'] = self.node_uuid #merge_resp['ip'] = self.ip #merge_resp['hostname'] = self.hostname utc = int((dt.datetime.utcnow() - dt.datetime.utcfromtimestamp(0)).total_seconds()) merge_resp['utc'] = utc call_func_list = [ self.get_cpu_info, self.get_memory_info, self.get_disk_info, self.get_diskio_info, self.get_networkio_info ] merge_resp['data'] = {} for func in call_func_list: resp = func() if resp['code'] != 0: return resp else: #for key in ['ip', 'hostname', 'utc', 'code', 'msg']: for key in ['utc', 'code', 'msg']: if key in resp.keys(): resp.pop(key) add_key_name = func.__name__.split('_')[1] merge_resp['data'][add_key_name] = resp return merge_resp except Exception as err: logger.error(err) logger.error(''.join(traceback.format_exc())) resp = errcode.get_error_result(error="OtherError") return None
def get_cpu_info(self): try: resp = errcode.get_error_result() utc = int((dt.datetime.utcnow() - dt.datetime.utcfromtimestamp(0)).total_seconds()) resp['utc'] = utc cpu_numbers = psutil.cpu_count() resp['numbers'] = cpu_numbers cpu_utilization = psutil.cpu_percent() resp['utilization'] = cpu_utilization # insert redis """ if self.rds.ping_server(): node_key = '{}:cpu_info:{}'.format(self.ip, self.now_date) insert_data = resp.copy() insert_data.pop('code') insert_data.pop('msg') if self.rds.exists(node_key): self.rds.zadd(node_key, {json.dumps(insert_data): utc}) else: self.rds.zadd(node_key, {json.dumps(insert_data): utc}) self.rds.expire(node_key, self.rds.live_seconds) # 86400 seconds of a day """ return resp except Exception as err: logger.error(err) logger.error(''.join(traceback.format_exc())) resp = errcode.get_error_result(error="GetCpuInfoFailure") return resp
def request(self, **kwargs): http_client = HttpClient() resp = None body = None try: resp, body = http_client.post(url=self.server_url, **kwargs) except Exception as err: logger.error(err) logger.error(''.join(traceback.format_exc())) return resp, body
def ping_server(self): try: if self.ping(): return True else: logger.error('redis ping error, please check redis service!!!') return False except Exception as err: logger.error(err) return False
def get_nic_util(self): try: nics_io = psutil.net_io_counters(pernic=True) virtual_net_device = os.listdir('/sys/devices/virtual/net/') physical_net_device = [ dev for dev in nics_io if dev not in virtual_net_device and not dev.startswith("tap") ] monitor_net_devices = physical_net_device # add bond, drop bond's slave physical nic bond_info = self.get_bonds_dict() if bond_info: bond_masters = bond_info.get("bond_masters", []) bond_slaves = bond_info.get("bond_slaves", []) monitor_net_devices.extend(bond_masters) monitor_net_devices = [ dev for dev in monitor_net_devices if dev not in bond_slaves ] with self.app.app_context(): for nic in monitor_net_devices: bytes_send = nics_io[nic].bytes_sent bytes_recv = nics_io[nic].bytes_recv if nic not in current_app.statistic['nic_util'].keys(): current_app.statistic['nic_util'][nic] = {} current_app.statistic['nic_util'][nic]['ip'] = "" if (nic in psutil.net_if_addrs()) and ( psutil.net_if_addrs()[nic][0].family == socket.AF_INET): current_app.statistic['nic_util'][nic][ 'ip'] = psutil.net_if_addrs()[nic][0].address current_app.statistic['nic_util'][nic][ 'read_bytes'] = [] current_app.statistic['nic_util'][nic][ 'write_bytes'] = [] if len(current_app.statistic['nic_util'][nic] ['write_bytes']) == self.record_cnt: current_app.statistic['nic_util'][nic][ 'write_bytes'].pop(0) current_app.statistic['nic_util'][nic][ 'write_bytes'].append(bytes_send) if len(current_app.statistic['nic_util'][nic] ['read_bytes']) == self.record_cnt: current_app.statistic['nic_util'][nic][ 'read_bytes'].pop(0) current_app.statistic['nic_util'][nic][ 'read_bytes'].append(bytes_recv) logger.debug(current_app.statistic['nic_util']) except Exception as err: logger.error(err) logger.error(''.join(traceback.format_exc()))
def get_services(self): try: # work_dir = os.getcwd() work_dir = os.path.join(BASE_DIR, 'config') conf = configparser.ConfigParser() conf.read('{}/monitor_services.ini'.format(work_dir)) for key in conf['SERVICES']: if conf['SERVICES'][key] == "true": self.services.append(key) except Exception as err: logger.error(err) logger.error(''.join(traceback.format_exc()))
def get_cpu_util(self): try: cpu_utilization = psutil.cpu_percent() with self.app.app_context(): if cpu_utilization: if len(current_app.statistic['cpu_util'] ) == self.record_cnt: current_app.statistic['cpu_util'].pop(0) current_app.statistic['cpu_util'].append(cpu_utilization) logger.debug(current_app.statistic['cpu_util']) except Exception as err: logger.error(err) logger.error(''.join(traceback.format_exc()))
def process(self): try: utc = int((dt.datetime.utcnow() - dt.datetime.utcfromtimestamp(0)).total_seconds()) with self.app.app_context(): current_app.statistic['utc'] = utc call_func_list = [ self.get_cpu_util, self.get_memory_util, self.get_disk_util, self.get_nic_util, self.get_diskio_util ] for func in call_func_list: func() except Exception as err: logger.error(err) logger.error(''.join(traceback.format_exc()))
def process(self): try: self.now_date = dt.datetime.now().strftime('%Y%m%d') resource_info = self.get_perf_info() logger.info( json.dumps(resource_info, sort_keys=True, indent=4, separators=(', ', ': '))) if resource_info is None: return else: resp, body = self.request(headers={}, body=resource_info) logger.info('resp = {}, body = {}'.format(resp, body)) except Exception as err: logger.error(err)
def process(self): try: resource_info = self.get_service_info() logger.info( json.dumps(resource_info, sort_keys=True, indent=4, separators=(', ', ': '))) if resource_info is None: return else: resp, body = self.request(headers={}, body=resource_info) logger.info('resp = {}, body = {}'.format(resp, body)) except Exception as err: logger.error(err) logger.error(''.join(traceback.format_exc()))
def get_config(self): try: # get server url server_url = None # work_dir = os.getcwd() work_dir = os.path.join(BASE_DIR, 'config') conf = configparser.ConfigParser() conf.read('{}/monitor_server.ini'.format(work_dir)) if conf.has_option('CONTROLLER', 'addr') and conf.has_option( 'CONTROLLER', 'node_uuid'): server_url = conf.get('CONTROLLER', 'addr') self.node_uuid = conf.get('CONTROLLER', 'node_uuid') return server_url except Exception as err: logger.error(err) logger.error(''.join(traceback.format_exc())) return None
def get_disk_io_ticks(self, dev_name): """ read file: /proc/diskstats, every line have 14 elements, get "use" digital major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq return: {"sad": 11123, "sdb": 2222} """ disk_io_use_ticks = 0 try: with open('/proc/diskstats', 'r') as f: lines = f.readlines() for line in lines: dev_name = line.split()[2] use_ticks = line.split()[-2] if dev_name == dev_name: disk_io_use_ticks = int(use_ticks) break except Exception as err: logger.error("exception: %s", err, exc_info=True) return disk_io_use_ticks
def init_config(self): try: # work_dir = os.getcwd() work_dir = os.path.join(BASE_DIR, 'config') conf = configparser.ConfigParser() conf.read('{}/monitor_server.ini'.format(work_dir)) if conf.has_option('REDIS', 'redis_host'): self._host = conf.get('REDIS', 'redis_host') if conf.has_option('REDIS', 'redis_port'): self._port = conf.get('REDIS', 'redis_port') if conf.has_option('REDIS', 'redis_password'): self._password = conf.get('REDIS', 'redis_password') if conf.has_option('REDIS', 'redis_db'): self._db = conf.get('REDIS', 'redis_db') if conf.has_option('REDIS', 'redis_ttl'): self.live_seconds = conf.get('REDIS', 'redis_ttl') except Exception as err: logger.error(err) logger.error(''.join(traceback.format_exc()))
def get_memory_util(self): try: mem_info = psutil.virtual_memory() with self.app.app_context(): if mem_info.percent: if not current_app.statistic['memory_util'].keys(): current_app.statistic['memory_util']['used'] = [] current_app.statistic['memory_util']['percent'] = [] if len(current_app.statistic['memory_util'] ['percent']) == self.record_cnt: current_app.statistic['memory_util']['percent'].pop(0) current_app.statistic['memory_util']['used'].pop(0) current_app.statistic['memory_util']['percent'].append( mem_info.percent) current_app.statistic['memory_util']['used'].append( mem_info.used) logger.debug(current_app.statistic['memory_util']) except Exception as err: logger.error(err) logger.error(''.join(traceback.format_exc()))
def get_disk_util(self): try: disk_parts = psutil.disk_partitions() with self.app.app_context(): for disk in disk_parts: disk_mountpoint = disk.mountpoint disk_usage = psutil.disk_usage(disk_mountpoint) if disk_mountpoint not in current_app.statistic[ 'disk_util'].keys(): current_app.statistic['disk_util'][ disk_mountpoint] = {} current_app.statistic['disk_util'][disk_mountpoint][ 'rate'] = "%0.2f" % disk_usage.percent current_app.statistic['disk_util'][disk_mountpoint][ 'used'] = disk_usage.used current_app.statistic['disk_util'][disk_mountpoint][ 'total'] = disk_usage.total logger.debug(current_app.statistic['disk_util']) except Exception as err: logger.error(err) logger.error(''.join(traceback.format_exc()))
def get_diskio_util(self): try: with self.app.app_context(): disks_io = psutil.disk_io_counters(perdisk=True) for disk_name in self.get_disk_list(): # disk_io = disks_io[disk_name] write_bytes = disks_io[disk_name].write_bytes read_bytes = disks_io[disk_name].read_bytes io_use_ticks = self.get_disk_io_ticks(disk_name) if disk_name not in current_app.statistic[ 'disk_io_util'].keys(): current_app.statistic['disk_io_util'][disk_name] = {} current_app.statistic['disk_io_util'][disk_name][ 'read_bytes'] = [] current_app.statistic['disk_io_util'][disk_name][ 'write_bytes'] = [] current_app.statistic['disk_io_util'][disk_name][ 'io_use_ticks'] = [] if len(current_app.statistic['disk_io_util'][disk_name] ['write_bytes']) == self.record_cnt: current_app.statistic['disk_io_util'][disk_name][ 'write_bytes'].pop(0) current_app.statistic['disk_io_util'][disk_name][ 'read_bytes'].pop(0) current_app.statistic['disk_io_util'][disk_name][ 'io_use_ticks'].pop(0) current_app.statistic['disk_io_util'][disk_name][ 'write_bytes'].append(write_bytes) current_app.statistic['disk_io_util'][disk_name][ 'read_bytes'].append(read_bytes) current_app.statistic['disk_io_util'][disk_name][ 'io_use_ticks'].append(io_use_ticks) logger.debug(current_app.statistic['disk_io_util']) except Exception as err: logger.error(err) logger.error(''.join(traceback.format_exc()))
def get_disk_info(self): try: resp = errcode.get_error_result() utc = int((dt.datetime.utcnow() - dt.datetime.utcfromtimestamp(0)).total_seconds()) resp['utc'] = utc disk_parts = psutil.disk_partitions() for disk in disk_parts: disk_mountpoint = disk.mountpoint disk_usage = psutil.disk_usage(disk_mountpoint) resp[disk_mountpoint] = { 'type': self.checkSsd(disk.device), 'total': disk_usage.total, 'used': disk_usage.used, 'free': disk_usage.free, 'utilization': disk_usage.percent } # insert redis """ if self.rds.ping_server(): node_key = '{}:disk_info:{}'.format(self.ip, self.now_date) insert_data = resp.copy() insert_data.pop('code') insert_data.pop('msg') if self.rds.exists(node_key): self.rds.zadd(node_key, {json.dumps(insert_data): utc}) else: self.rds.zadd(node_key, {json.dumps(insert_data): utc}) self.rds.expire(node_key, self.rds.live_seconds) # 86400 seconds of a day """ return resp except Exception as err: logger.error(err) logger.error(''.join(traceback.format_exc())) resp = errcode.get_error_result(error="GetDiskInfoFailure") return resp
def monitor_timer(msq, handlers): # default pause timer handlers for timer in handlers: exec_func = getattr(timer, "pause") if timer.name == "statistic": continue logger.debug('timer = {}'.format(timer.name)) exec_func() while True: # 1. get queue msg ['pause', 'resume', 'update'] try: cmd_msg = msq.get(block=False) for timer in handlers: exec_func = getattr(timer, cmd_msg) exec_func() logger.info('monitor_timer exec: {}'.format(cmd_msg)) except Exception as err: if type(err) is queue.Empty: #logger.debug('timer_queue no msq, go to sleep 5 seconds...') pass else: logger.error('monitor_timer error: {}'.format(err)) # 2. exec thread process time.sleep(5)