def master_slave_analysis(log_file, log_level, seconds_behind_master, warning_interval, notify_dict): logger = log.Logger(log_file, log_level) db = database.db() sql = "select role, slave_io_thread, slave_sql_thread, seconds_behind_master, slave_io_state, slave_sql_state from mysql_slave, mysql_role where mysql_role.record_time=mysql_slave.record_time" data = db.query_one(sql) conn_msg = "slave_conn" delay_msg = "slave_delay" if data is not None and data[0] == "slave": logger.logger.debug("开始分析MySQL主从信息") if data[1].lower() == data[2].lower() == "yes": conn_flag = 0 delay_flag = 1 if data[3] >= seconds_behind_master else 0 else: conn_flag = 1 delay_flag = None for flag, msg in [(conn_flag, conn_msg), (delay_flag, delay_msg)]: if flag is not None: warning_flag = warning.warning(logger, db, flag, "mysql", msg, warning_interval) if warning_flag: warning_msg="MySQL预警:\n"\ "MySQL主从连接:\n"\ f"Slave_IO_Running: {data[1]}\n"\ f"Slave_SQL_Running: {data[2]}\n"\ f"Slave_IO_State: {data[4]}\n"\ f"Slave_SQL_Running_State: {data[5]}\n"\ f"Seconds_Behind_Master: {data[3]}" notification.send(logger, warning_msg, notify_dict, msg=msg)
def matching_analysis(log_file, log_level, warning_interval, matching_dict, notify_dict): logger = log.Logger(log_file, log_level) db = database.db() for matching_file in matching_dict: sql = f"select record_time, matching_context from matching \ where record_time=( \ select max(record_time) from matching \ where matching_context!=? and matching_file=? and matching_key=?\ )" data = db.query_one( sql, ("all", matching_file, matching_dict[matching_file])) logger.logger.debug("分析匹配...") if data is not None: if data[1] != 'Nothing': warning_msg = f"\"{matching_file}\"文件中\"{data[1].strip()}\"行存在关键字\"{matching_dict[matching_file]}\"" msg = f"{matching_file}_{matching_dict[matching_file]}" warning_flag = warning.non_remedial_warning( logger, db, "matching", msg, warning_msg, data[0], warning_interval) if warning_flag: warning_msg = f"日志分析预警:\n{warning_msg}\n" notification.send(logger, warning_msg, notify_dict, msg=msg)
def running_analysis(log_file, log_level, warning_interval, notify_dict): logger=log.Logger(log_file, log_level) logger.logger.debug("开始分析Redis运行情况...") db=database.db() sql="select port, pid from redis_constant where record_time=(select max(record_time) from redis_constant)" port, pid=db.query_one(sql) flag= 1 if pid==0 else 0 # 是否预警 warning_flag=warning.warning(logger, db, flag, "redis", "running", warning_interval) if warning_flag: warning_msg=f"Redis预警:\nRedis({port})未运行" notification.send(logger, warning_msg, notify_dict, msg=f'redis_running')
def jvm_analysis(log_file, log_level, warning_interval, notify_dict): logger = log.Logger(log_file, log_level) db = database.db() logger.logger.debug("开始分析Jvm内存情况...") java_version = db.query_one("select version from tomcat_java_version")[0] table_name = f"tomcat_jstat{java_version}" sql = f"select port, ygc, ygct, fgc, fgct from {table_name} where record_time=(select max(record_time) from {table_name})" data = db.query_all(sql) ygc_warning_time = 1 fgc_warning_time = 10 #ygc_warning_time=0.01 #fgc_warning_time=0 for i in data: port = i[0] if i[1] == 0: ygc_time = 0 else: ygc_time = i[2] / i[1] if i[3] == 0: fgc_time = 0 else: fgc_time = i[4] / i[3] ygc_flag = 0 if ygc_time >= ygc_warning_time: ygc_flag = 1 logger.logger.warning(f"Tomcat({port})的YGC平均时间: {ygc_time}") warning_flag = warning.warning(logger, db, ygc_flag, port, "ygc", warning_interval) if warning_flag: warning_msg = f"Tomcat预警:\nTomcat({port})YGC平均时间为{ygc_time}\n" notification.send(logger, warning_msg, notify_dict, msg=f'tomcat{port}_ygc') fgc_flag = 0 if fgc_time >= fgc_warning_time: fgc_flag = 1 logger.logger.warning(f"Tomcat({port})的FGC平均时间: {fgc_time}") warning_flag = warning.warning(logger, db, fgc_flag, port, "fgc", warning_interval) if warning_flag: warning_msg = f"Tomcat预警:\nTomcat({port})FGC平均时间为{fgc_time}\n" notification.send(logger, warning_msg, notify_dict, msg=f'tomcat{port}_fgc')
def memory_analysis(log_file, log_level, warning_percent, warning_interval, notify_dict): logger=log.Logger(log_file, log_level) db=database.db() sql="select record_time, used_percent from memory order by record_time desc" data=db.query_one(sql) mem_used_percent=float(data[1]) logger.logger.debug("分析Mem...") flag=0 # 是否有预警信息 if mem_used_percent > warning_percent: flag=1 logger.logger.warning(f"内存当前使用率当前已达到{mem_used_percent}%") warning_flag=warning.warning(logger, db, flag, "mem", "used_percent", warning_interval) if warning_flag: warning_msg=f"内存预警:\n内存当前使用率当前已达到{mem_used_percent}%" notification.send(logger, warning_msg, notify_dict, msg='mem_used_percent')
def disk_analysis(log_file, log_level, warning_percent, warning_interval, notify_dict): logger=log.Logger(log_file, log_level) db=database.db() sql=f"select record_time, name, used_percent, mounted from disk where record_time=(select max(record_time) from disk)" data=db.query_all(sql) logger.logger.debug("分析disk...") for i in data: flag=0 # 是否有预警信息 if i[2] >= warning_percent: flag=1 logger.logger.warning(f"{i[3]}目录({i[1]})已使用{i[2]}%") warning_flag=warning.warning(logger, db, flag, "disk", i[3], warning_interval) if warning_flag: warning_msg=f"磁盘预警:\n{i[3]}目录({i[1]})已使用{i[2]}%\n" notification.send(logger, warning_msg, notify_dict, msg=i[3])
def cpu_analysis(log_file, log_level, warning_percent, warning_interval, notify_dict): logger=log.Logger(log_file, log_level) db=database.db() sql="select record_time, cpu_used_percent from cpu order by record_time desc" data=db.query_one(sql) cpu_used_percent=float(data[1]) logger.logger.debug("分析CPU...") flag=0 # 是否有预警信息 if cpu_used_percent >= warning_percent: flag=1 logger.logger.warning(f"CPU当前使用率已达到{cpu_used_percent}%") warning_flag=warning.warning(logger, db, flag, "cpu", "used_percent", warning_interval) if warning_flag: warning_msg=f"CPU预警:\nCPU使用率当前已达到{cpu_used_percent}%" notification.send(logger, warning_msg, notify_dict, msg='cpu_used_percent')
def tablespace_analysis(log_file, log_level, warning_percent, warning_interval, notify_dict): logger = log.Logger(log_file, log_level) db = database.db() sql = f"select tablespace_name, used_percent from oracle where record_time=(select max(record_time) from oracle)" data = db.query_all(sql) logger.logger.debug("分析表空间...") for i in data: flag = 0 # 是否有预警信息 if i[1] >= warning_percent: flag = 1 logger.logger.warning(f"{i[0]}表空间已使用{i[1]}%") warning_flag = warning.warning(logger, db, flag, "oracle", i[0], warning_interval) if warning_flag: warning_msg = f"Oracle表空间预警:\n{i[0]}表空间已使用{i[1]}%" notification.send(logger, warning_msg, notify_dict, msg=i[0])
def analysis(log_file, log_level, warning_interval, notify_dict): logger = log.Logger(log_file, log_level) db = database.db() logger.logger.debug(f"分析用户的资源限制...") sql = "select user, nofile, nproc from users_limit where record_time=(select max(record_time) from users_limit)" data = db.query_all(sql) min_limit = 5000 for i in data: flag = 0 arg = "nofile" if i[1].isdigit(): if int(i[1]) < min_limit: flag = 1 cmd = f"echo '{i[0]} - {arg} 65536' >> /etc/security/limits.conf" warning_msg=f"用户资源限制预警:\n" \ f"用户({i[0]})的{arg}参数值({i[1]})过低.\n"\ f"请在root用户下执行命令: {cmd}, 然后重启登录该用户再重启该用户下相应软件" warning_flag = warning.warning(logger, db, flag, f"{i[0]}_limit", arg, warning_interval) if warning_flag: notification.send(logger, warning_msg, notify_dict, msg=f"{i[0]}_limit nofile") flag = 0 arg = "nproc" if i[2].isdigit(): if int(i[2]) < min_limit: flag = 1 cmd = f"echo '{i[0]} - {arg} 65536' >> /etc/security/limits.conf" warning_msg=f"用户资源限制预警:\n" \ f"用户({i[0]})的{arg}参数值({i[2]})过低.\n"\ f"请在root用户下执行命令: {cmd}, 然后重启登录该用户再重启该用户下相应软件" warning_flag = warning.warning(logger, db, flag, f"{i[0]}_nproc_limit", arg, warning_interval) if warning_flag: notification.send(logger, warning_msg, notify_dict, msg=f"{i[0]}_limit nproc")
def analysis(log_file, log_level, directory, warning_interval, notify_dict): """对备份文件进行预警 1. 备份目录不存在则提示 2. 当天的备份文件未生成则提示 3. 当天的备份文件小于上一个的大小的99%则提示 """ logger = log.Logger(log_file, log_level) db = database.db() logger.logger.info(f"分析备份目录{directory}文件...") sql = "select record_time, directory, filename, size, ctime from backup where directory=? order by record_time, ctime desc limit 2" data = db.query_all(sql, (directory, )) now_time = datetime.datetime.now().strftime("%Y-%m-%d") flag = 0 # 是否有预警信息 value = None if len(data) < 2: if data[0][2] is None: flag = 1 value = "dir_is_None" warning_msg = f"备份预警:\n备份目录({directory})不存在" else: if now_time not in data[0][4]: flag = 1 warning_msg = f"备份预警:\n备份目录({directory})当天备份文件未生成" value = "file_is_None" else: if now_time not in data[0][4]: flag = 1 warning_msg = f"备份预警:\n备份目录({directory})当天备份文件未生成" value = "file_is_None" elif data[0][3] < data[1][3] * 0.99: flag = 1 warning_msg = f"备份预警:\n备份目录({directory})当天备份文件({format_size(data[0][3])})与上一次({format_size(data[1][3])})相比相差较大" value = "file_is_small" warning_flag = warning.warning(logger, db, flag, f"backup {directory}", value, warning_interval) if warning_flag: notification.send(logger, warning_msg, notify_dict, msg=f"{directory} {value}")
def running_analysis(log_file, log_level, warning_interval, notify_dict): logger = log.Logger(log_file, log_level) logger.logger.debug("开始分析Tomcat运行情况...") db = database.db() """ sql="select record_time, port, pid from tomcat_constant where (port,record_time) in (select port,max(record_time) from tomcat_constant group by port)" sqlite3低版本不支持多列in查询, 无语... """ sql = "select port, pid from tomcat_constant where record_time=(select max(record_time) from tomcat_constant)" data = db.query_all(sql) for i in data: flag = 0 if i[1] == 0: flag = 1 warning_flag = warning.warning(logger, db, flag, i[0], "running", warning_interval) if warning_flag: warning_msg = f"Tomcat预警:\nTomcat({i[0]})未运行\n" notification.send(logger, warning_msg, notify_dict, msg=f'tomcat{i[0]}_running')