class Monitor(): def __init__(self, app_base, install_path=None, debug=False): self.APP_BASE = app_base self.CONF_BASE = _CONF_BASE self.install_path = install_path self.pkgCom = PkgCommon() self.log_level = 'DEBUG' if debug else 'INFO' def run(self): if self.install_path == 'all': info_list = self.pkgCom.getPkgList() else: pkg_info = self.pkgCom.getPkgId(self.install_path) if pkg_info: info_list = {'pkg': [pkg_info]} else: return 1, "%s not a valid package" % (self.install_path) t_list = [] #只处理包类型为pkg的包 if 'pkg' in info_list: pkg_info_list = info_list['pkg'] else: pkg_info_list = [] for pkg_info in pkg_info_list: t_list.append(gevent.spawn(self.monitorPkg, pkg_info)) gevent.joinall(t_list) return 0, "ok" def reportStatus(self, pkg_id, pkg_conf_path, inst_info, status, err_info={}): reportInfo = {} reportInfo['dims'] = {} reportInfo['vals'] = {} reportInfo['dims']['process.process.package_id'] = pkg_id reportInfo['dims']['process.process.version_id'] = "" reportInfo['dims']['process.process.install_path'] = inst_info[ 'installPath'] reportInfo['vals']['process.process.package_status'] = status if not err_info.get('err_proc') and not err_info.get('err_port'): reportInfo['vals']['process.process.alert_status'] = 0 else: reportInfo['vals']['process.process.alert_status'] = 1 conf_file = os.path.join(pkg_conf_path, 'package.conf.yaml') proc_config = common.getConfig(conf_file, "proc_list") port_config = common.getConfig(conf_file, "port_list") proc_list = {} for proc in proc_config: proc_list[proc['proc_name']] = proc normal_proc_list = [] proc_num_list = self.pkgCom.getProcNum() normal_proc_str = "" for proc_name in err_info.get('ok_proc', []): num_min = proc_list[proc_name].get('proc_num_min', 0) num_max = proc_list[proc_name].get('proc_num_max', 0) proc_str = "%s:%s,%s|%s" % (proc_name, num_min, num_max, proc_num_list[proc_name]) normal_proc_list.append(proc_str) normal_proc_str = '##'.join(normal_proc_list) abnormal_proc_list = [] abnormal_proc_str = "" for proc_name in err_info.get('err_proc', []): num_min = proc_list[proc_name].get('proc_num_min', 0) num_max = proc_list[proc_name].get('proc_num_max', 0) proc_str = "%s:%s,%s|%s" % (proc_name, num_min, num_max, proc_num_list[proc_name]) abnormal_proc_list.append(proc_str) abnormal_proc_str = '##'.join(abnormal_proc_list) normal_port_list = [] normal_port_str = "" for port in err_info.get('ok_port', []): normal_port_list.append(port) normal_port_str = "##".join(map(str, normal_port_list)) abnormal_port_list = [] abnormal_port_str = "" for port in err_info.get('err_port', []): abnormal_port_list.append(port) abnormal_port_str = "##".join(map(str, abnormal_port_list)) reportInfo['vals'][ 'process.process.normal_processes'] = normal_proc_str reportInfo['vals'][ 'process.process.abnormal_processes'] = abnormal_proc_str reportInfo['vals']['process.process.normal_ports'] = normal_port_str reportInfo['vals'][ 'process.process.abnormal_ports'] = abnormal_port_str if report: ret, msg = report(data_id=3000, dims=reportInfo['dims'], vals=reportInfo['vals']) @catch_except # 确保不会相互影响 def monitorPkg(self, pkg_info): import random gevent.sleep(random.randint(0, 15)) pkg_id, pkg_conf_path, pkg_install_path = pkg_info[ 'packageId'], pkg_info['confPath'], pkg_info['installPath'] # 初始化log配置 st = time.time() mLog = self.pkgCom.configLog(pkg_install_path, 'monitor', self.log_level) mLog.info('check process start: %s' % pkg_install_path) # 读取实例信息 inst_conf_file = os.path.join(pkg_conf_path, 'instance.conf.yaml') with open(inst_conf_file, 'r') as fp: conf_info = yaml.load(fp) inst_info = conf_info.pop() # 检查包是否已启动 status_conf_file = os.path.join(pkg_conf_path, 'package.status') if not os.path.isfile(status_conf_file): return 0, 'ok' with open(status_conf_file, 'r') as fp: status_info = yaml.load(fp) start_status = status_info['status'] if start_status == 'stopped': self.reportStatus(pkg_id, pkg_conf_path, inst_info, status=start_status) mLog.info('package status is stopped: %s %s' % (pkg_info['installPath'], round(time.time() - st, 4))) return 0, 'ok' # 获得文件锁,准备检查。且快速失败,因为进程检查本身就是1分钟执行一次的 ret = self.pkgCom.getLock(pkg_conf_path, timeout=10) if not ret: mLog.error("get lock error") return 2008, "get lock error" # 根据包配置信息检查包的进程状态 conf_file = os.path.join(pkg_conf_path, 'package.conf.yaml') err_proc, ok_proc = self.pkgCom.checkProcStatus( conf_file, install_path=inst_info.get('installPath')) err_port, ok_port = self.pkgCom.checkPort( conf_file, install_path=inst_info.get('installPath')) proc_config = common.getConfig(conf_file, "proc_guard") port_config = common.getConfig(conf_file, "port_guard") err_info = { 'err_proc': err_proc, 'ok_proc': ok_proc, 'err_port': err_port, 'ok_port': ok_port } self.reportStatus(pkg_id, pkg_conf_path, inst_info, status=start_status, err_info=err_info) code = 0 msg = 'ok' err_msg = "" if err_proc: err_msg += ",error process:" + ",".join(err_proc) if err_port: err_msg += ",error port:" + ",".join(map(str, err_port)) # 包操作对象 op = pkgOp(self.APP_BASE, inst_info['installPath']) if (err_proc and proc_config == 'stopStart') or (err_port and port_config == 'stopStart'): msg = "process error,monitor run stopStart:" + err_msg mLog.info(msg) code, msg = op.stopStart(inst_info['packageId'], inst_info['installPath']) elif (err_proc and proc_config == 'custom') or (err_port and port_config == 'custom'): msg = "process error,monitor run custom script:" + err_msg mLog.info(msg) code, msg = op.resolve(inst_info['packageId'], inst_info['installPath']) elif err_proc or err_port: msg = "process error,do nothing:" + err_msg mLog.info(msg) # 解锁 self.pkgCom.unLock(pkg_conf_path) mLog.info('check process end: %s %s' % (pkg_info['installPath'], round(time.time() - st, 4))) return code, msg
class Clear(): def __init__(self, app_base, install_path=None, debug=False): self.APP_BASE = app_base self.CONF_BASE = _CONF_BASE self.install_path = install_path self.pkgCom = PkgCommon() self.log_level = 'DEBUG' if debug else 'INFO' def run(self): if self.install_path == 'all': info_list = self.pkgCom.getPkgList() else: pkg_info = self.pkgCom.getPkgId(self.install_path) if pkg_info: info_list = {'pkg': [pkg_info]} else: return 1, "%s not a valid package" % (self.install_path) t_list = [] #只处理包类型为pkg的包 if 'pkg' in info_list: pkg_info_list = info_list['pkg'] else: pkg_info_list = [] for pkg_info in pkg_info_list: t_list.append(gevent.spawn(self.clear_pkg, pkg_info)) gevent.joinall(t_list) return 0, "ok" @catch_except def clear_pkg(self, pkg_info): gevent.sleep(random.randint(0, 15)) pkg_id, pkg_conf_path, install_path = pkg_info['packageId'], pkg_info[ 'confPath'], pkg_info['installPath'] st = time.time() cLog = self.pkgCom.configLog(install_path, 'clear', self.log_level) cLog.info('clear file start: %s' % install_path) conf_file = os.path.join(pkg_conf_path, "package.conf.yaml") configs = self.get_config(conf_file) for config in configs: path = config[0] limit = config[1] cmd = config[2] param = config[3] target = config[4] if not os.path.isabs(path): path = os.path.join(install_path, path) if cmd == "delete": ret = self.delete_file(path=path, target=target, param=param, limit=limit, log=cLog) elif cmd == "clear": ret = self.clear_file(path=path, target=target, param=param, limit=limit, log=cLog) # todo暂未实现 elif cmd == "tar": ret = self.tar_file(path=path, target=target, param=param, limit=limit, log=cLog) else: continue cLog.info('clear file end: %s %s' % (install_path, round(time.time() - st, 4))) return 0, 'ok' def get_config(self, config_file): clear_conf = common.getConfig(config_file, 'clear_file') # 处理以#开头的行 regex = r"\s*#" conf_arr = clear_conf.splitlines() real_conf = [] for line in conf_arr: ret = re.match(regex, line) if ret is None: # 处理#注释在行尾的情况 reg_2 = r"^((\"[^\"]*\"|'[^']*'|[^'\"#])*)(#.*)$" ret = re.match(reg_2, line) if ret is not None: conf_line = ret.group(1) else: conf_line = line conf = re.split(r'\s+', conf_line) if len(conf) < 5: continue real_conf.append(conf) return real_conf def delete_file(self, path, target, param, limit, log): code, msg = self.check_limit(path, limit) if code == 0: return code, msg target_reg = target.replace('*', '.*') if param.endswith('h'): limit_mtime = time.time() - int(param.strip('h')) * 3600 elif param.endswith('m'): limit_mtime = time.time() - int(param.strip('m')) * 60 elif param.endswith('d'): limit_mtime = time.time() - int(param.strip('d')) * 24 * 3600 else: limit_mtime = time.time() - int(param) * 24 * 3600 for root, dirs, files in os.walk(path): for name in files: filepath = os.path.join(root, name) if not os.path.exists(filepath): continue mtime = os.stat(filepath).st_mtime if mtime < limit_mtime: ret = re.match(target_reg, name) if ret: log.info("begin delete, file:[%s]" % (filepath)) try: os.remove(filepath) log.info("delete success, file:[%s]" % (filepath)) except Exception, e: log.info("delete failed, file:[%s]" % (filepath)) return 0, 'ok'