Пример #1
0
class Monitor():
    def __init__(self, app_base, install_path=None, debug=False):
        self.APP_BASE = app_base
        self.CONF_BASE = _CONF_BASE
        self.install_path = install_path
        self.pkgCom = PkgCommon()
        self.log_level = 'DEBUG' if debug else 'INFO'

    def run(self):
        if self.install_path == 'all':
            info_list = self.pkgCom.getPkgList()
        else:
            pkg_info = self.pkgCom.getPkgId(self.install_path)
            if pkg_info:
                info_list = {'pkg': [pkg_info]}
            else:
                return 1, "%s not a valid package" % (self.install_path)

        t_list = []
        #只处理包类型为pkg的包
        if 'pkg' in info_list:
            pkg_info_list = info_list['pkg']
        else:
            pkg_info_list = []
        for pkg_info in pkg_info_list:
            t_list.append(gevent.spawn(self.monitorPkg, pkg_info))
        gevent.joinall(t_list)
        return 0, "ok"

    def reportStatus(self,
                     pkg_id,
                     pkg_conf_path,
                     inst_info,
                     status,
                     err_info={}):
        reportInfo = {}
        reportInfo['dims'] = {}
        reportInfo['vals'] = {}
        reportInfo['dims']['process.process.package_id'] = pkg_id
        reportInfo['dims']['process.process.version_id'] = ""
        reportInfo['dims']['process.process.install_path'] = inst_info[
            'installPath']

        reportInfo['vals']['process.process.package_status'] = status
        if not err_info.get('err_proc') and not err_info.get('err_port'):
            reportInfo['vals']['process.process.alert_status'] = 0
        else:
            reportInfo['vals']['process.process.alert_status'] = 1

        conf_file = os.path.join(pkg_conf_path, 'package.conf.yaml')
        proc_config = common.getConfig(conf_file, "proc_list")
        port_config = common.getConfig(conf_file, "port_list")
        proc_list = {}
        for proc in proc_config:
            proc_list[proc['proc_name']] = proc

        normal_proc_list = []
        proc_num_list = self.pkgCom.getProcNum()
        normal_proc_str = ""
        for proc_name in err_info.get('ok_proc', []):
            num_min = proc_list[proc_name].get('proc_num_min', 0)
            num_max = proc_list[proc_name].get('proc_num_max', 0)
            proc_str = "%s:%s,%s|%s" % (proc_name, num_min, num_max,
                                        proc_num_list[proc_name])
            normal_proc_list.append(proc_str)
            normal_proc_str = '##'.join(normal_proc_list)
        abnormal_proc_list = []
        abnormal_proc_str = ""
        for proc_name in err_info.get('err_proc', []):
            num_min = proc_list[proc_name].get('proc_num_min', 0)
            num_max = proc_list[proc_name].get('proc_num_max', 0)
            proc_str = "%s:%s,%s|%s" % (proc_name, num_min, num_max,
                                        proc_num_list[proc_name])
            abnormal_proc_list.append(proc_str)
            abnormal_proc_str = '##'.join(abnormal_proc_list)

        normal_port_list = []
        normal_port_str = ""
        for port in err_info.get('ok_port', []):
            normal_port_list.append(port)
        normal_port_str = "##".join(map(str, normal_port_list))

        abnormal_port_list = []
        abnormal_port_str = ""
        for port in err_info.get('err_port', []):
            abnormal_port_list.append(port)
        abnormal_port_str = "##".join(map(str, abnormal_port_list))

        reportInfo['vals'][
            'process.process.normal_processes'] = normal_proc_str
        reportInfo['vals'][
            'process.process.abnormal_processes'] = abnormal_proc_str
        reportInfo['vals']['process.process.normal_ports'] = normal_port_str
        reportInfo['vals'][
            'process.process.abnormal_ports'] = abnormal_port_str
        if report:
            ret, msg = report(data_id=3000,
                              dims=reportInfo['dims'],
                              vals=reportInfo['vals'])

    @catch_except  # 确保不会相互影响
    def monitorPkg(self, pkg_info):
        import random
        gevent.sleep(random.randint(0, 15))

        pkg_id, pkg_conf_path, pkg_install_path = pkg_info[
            'packageId'], pkg_info['confPath'], pkg_info['installPath']
        # 初始化log配置
        st = time.time()
        mLog = self.pkgCom.configLog(pkg_install_path, 'monitor',
                                     self.log_level)
        mLog.info('check process start: %s' % pkg_install_path)
        # 读取实例信息
        inst_conf_file = os.path.join(pkg_conf_path, 'instance.conf.yaml')
        with open(inst_conf_file, 'r') as fp:
            conf_info = yaml.load(fp)
        inst_info = conf_info.pop()

        # 检查包是否已启动
        status_conf_file = os.path.join(pkg_conf_path, 'package.status')
        if not os.path.isfile(status_conf_file):
            return 0, 'ok'
        with open(status_conf_file, 'r') as fp:
            status_info = yaml.load(fp)
        start_status = status_info['status']

        if start_status == 'stopped':
            self.reportStatus(pkg_id,
                              pkg_conf_path,
                              inst_info,
                              status=start_status)
            mLog.info('package status is stopped: %s %s' %
                      (pkg_info['installPath'], round(time.time() - st, 4)))
            return 0, 'ok'

        # 获得文件锁,准备检查。且快速失败,因为进程检查本身就是1分钟执行一次的
        ret = self.pkgCom.getLock(pkg_conf_path, timeout=10)
        if not ret:
            mLog.error("get lock error")
            return 2008, "get lock error"

        # 根据包配置信息检查包的进程状态
        conf_file = os.path.join(pkg_conf_path, 'package.conf.yaml')
        err_proc, ok_proc = self.pkgCom.checkProcStatus(
            conf_file, install_path=inst_info.get('installPath'))
        err_port, ok_port = self.pkgCom.checkPort(
            conf_file, install_path=inst_info.get('installPath'))
        proc_config = common.getConfig(conf_file, "proc_guard")
        port_config = common.getConfig(conf_file, "port_guard")
        err_info = {
            'err_proc': err_proc,
            'ok_proc': ok_proc,
            'err_port': err_port,
            'ok_port': ok_port
        }
        self.reportStatus(pkg_id,
                          pkg_conf_path,
                          inst_info,
                          status=start_status,
                          err_info=err_info)

        code = 0
        msg = 'ok'

        err_msg = ""
        if err_proc:
            err_msg += ",error process:" + ",".join(err_proc)
        if err_port:
            err_msg += ",error port:" + ",".join(map(str, err_port))

        # 包操作对象
        op = pkgOp(self.APP_BASE, inst_info['installPath'])
        if (err_proc and proc_config
                == 'stopStart') or (err_port and port_config == 'stopStart'):
            msg = "process error,monitor run stopStart:" + err_msg
            mLog.info(msg)
            code, msg = op.stopStart(inst_info['packageId'],
                                     inst_info['installPath'])
        elif (err_proc
              and proc_config == 'custom') or (err_port
                                               and port_config == 'custom'):
            msg = "process error,monitor run custom script:" + err_msg
            mLog.info(msg)
            code, msg = op.resolve(inst_info['packageId'],
                                   inst_info['installPath'])
        elif err_proc or err_port:
            msg = "process error,do nothing:" + err_msg
            mLog.info(msg)
        # 解锁
        self.pkgCom.unLock(pkg_conf_path)
        mLog.info('check process end: %s %s' %
                  (pkg_info['installPath'], round(time.time() - st, 4)))
        return code, msg
Пример #2
0
class Clear():
    def __init__(self, app_base, install_path=None, debug=False):
        self.APP_BASE = app_base
        self.CONF_BASE = _CONF_BASE
        self.install_path = install_path
        self.pkgCom = PkgCommon()
        self.log_level = 'DEBUG' if debug else 'INFO'

    def run(self):
        if self.install_path == 'all':
            info_list = self.pkgCom.getPkgList()
        else:
            pkg_info = self.pkgCom.getPkgId(self.install_path)
            if pkg_info:
                info_list = {'pkg': [pkg_info]}
            else:
                return 1, "%s not a valid package" % (self.install_path)

        t_list = []
        #只处理包类型为pkg的包
        if 'pkg' in info_list:
            pkg_info_list = info_list['pkg']
        else:
            pkg_info_list = []
        for pkg_info in pkg_info_list:
            t_list.append(gevent.spawn(self.clear_pkg, pkg_info))
        gevent.joinall(t_list)
        return 0, "ok"

    @catch_except
    def clear_pkg(self, pkg_info):
        gevent.sleep(random.randint(0, 15))

        pkg_id, pkg_conf_path, install_path = pkg_info['packageId'], pkg_info[
            'confPath'], pkg_info['installPath']
        st = time.time()
        cLog = self.pkgCom.configLog(install_path, 'clear', self.log_level)
        cLog.info('clear file start: %s' % install_path)

        conf_file = os.path.join(pkg_conf_path, "package.conf.yaml")
        configs = self.get_config(conf_file)
        for config in configs:
            path = config[0]
            limit = config[1]
            cmd = config[2]
            param = config[3]
            target = config[4]
            if not os.path.isabs(path):
                path = os.path.join(install_path, path)
            if cmd == "delete":
                ret = self.delete_file(path=path,
                                       target=target,
                                       param=param,
                                       limit=limit,
                                       log=cLog)
            elif cmd == "clear":
                ret = self.clear_file(path=path,
                                      target=target,
                                      param=param,
                                      limit=limit,
                                      log=cLog)
            # todo暂未实现
            elif cmd == "tar":
                ret = self.tar_file(path=path,
                                    target=target,
                                    param=param,
                                    limit=limit,
                                    log=cLog)
            else:
                continue
        cLog.info('clear file end: %s %s' %
                  (install_path, round(time.time() - st, 4)))
        return 0, 'ok'

    def get_config(self, config_file):
        clear_conf = common.getConfig(config_file, 'clear_file')
        # 处理以#开头的行
        regex = r"\s*#"
        conf_arr = clear_conf.splitlines()
        real_conf = []
        for line in conf_arr:
            ret = re.match(regex, line)
            if ret is None:
                # 处理#注释在行尾的情况
                reg_2 = r"^((\"[^\"]*\"|'[^']*'|[^'\"#])*)(#.*)$"
                ret = re.match(reg_2, line)
                if ret is not None:
                    conf_line = ret.group(1)
                else:
                    conf_line = line
                conf = re.split(r'\s+', conf_line)
                if len(conf) < 5:
                    continue
                real_conf.append(conf)

        return real_conf

    def delete_file(self, path, target, param, limit, log):
        code, msg = self.check_limit(path, limit)
        if code == 0:
            return code, msg
        target_reg = target.replace('*', '.*')
        if param.endswith('h'):
            limit_mtime = time.time() - int(param.strip('h')) * 3600
        elif param.endswith('m'):
            limit_mtime = time.time() - int(param.strip('m')) * 60
        elif param.endswith('d'):
            limit_mtime = time.time() - int(param.strip('d')) * 24 * 3600
        else:
            limit_mtime = time.time() - int(param) * 24 * 3600

        for root, dirs, files in os.walk(path):
            for name in files:
                filepath = os.path.join(root, name)
                if not os.path.exists(filepath):
                    continue
                mtime = os.stat(filepath).st_mtime
                if mtime < limit_mtime:
                    ret = re.match(target_reg, name)
                    if ret:
                        log.info("begin delete, file:[%s]" % (filepath))
                        try:
                            os.remove(filepath)
                            log.info("delete success, file:[%s]" % (filepath))
                        except Exception, e:
                            log.info("delete failed, file:[%s]" % (filepath))
        return 0, 'ok'