def insertMonitor(ip, cmd, monitime): ''' 监控记录入库 @ip: 被监控主机Ip @cmd: 包括指令名称 ''' oipid = 'null' # 获取oipid oipid = getIdByIp(ip) if oipid == 'null' or not oipid: oipid = 0 oMonTextRemote = False if hostSocket.PortIsAlive(ip, SOCKET['REMOTE_PORT']): # 从远程服务器获取数据 oMonTextRemote = hostSocket.proSocket(ip, SOCKET['REMOTE_PORT'], cmd) if oMonTextRemote: oMonText = oMonTextRemote.split('!') else: oMonText = ['mon_all_stat','null'] #开锁,解决数据库资源争用问题 lock = setLock() cmdid = 'null' # 获取cmdid(指令ID) try: cmdid = addCmdgetId(cmd) if cmdid == 'null' or not cmdid: time.sleep(random.randint(0,10)) cmdid = addCmdgetId(cmd) except Exception as e: time.sleep(random.randint(0,10)) cmdid = 0 save_log( 'ERROR', 'cmdid error:'+str(e) +','+ cmd ) cmdtext = oMonText[1] try: sql = "insert into osa_monitor(oIpid, oCmdid, oMonTime, oMonText) \ values (%(oipid)d, %(cmdid)d, '%(monitime)s','%(cmdtext)s')" % vars() con = _get_pcon() cur = con.cursor() cur.execute(sql) except TypeError,e: con = _get_con() cur = con.cursor() cur.execute(sql) os.system('rm -f ' + lock) save_log('WARNING','insertMonitor : '+str(e)) sys.exit()
def insertMonitor(ip, cmd, monitime): ''' 监控记录入库 @ip: 被监控主机Ip @cmd: 包括指令名称 ''' oipid = 'null' # 获取oipid oipid = getIdByIp(ip) if oipid == 'null' or not oipid: oipid = 0 oMonTextRemote = False if hostSocket.PortIsAlive(ip, SOCKET['REMOTE_PORT']): # 从远程服务器获取数据 oMonTextRemote = hostSocket.proSocket(ip, SOCKET['REMOTE_PORT'], cmd) if oMonTextRemote: oMonText = oMonTextRemote.split('!') else: oMonText = ['mon_all_stat', 'null'] #开锁,解决数据库资源争用问题 lock = setLock() cmdid = 'null' # 获取cmdid(指令ID) try: cmdid = addCmdgetId(cmd) if cmdid == 'null' or not cmdid: time.sleep(random.randint(0, 10)) cmdid = addCmdgetId(cmd) except Exception as e: time.sleep(random.randint(0, 10)) cmdid = 0 save_log('ERROR', 'cmdid error:' + str(e) + ',' + cmd) cmdtext = oMonText[1] try: sql = "insert into osa_monitor(oIpid, oCmdid, oMonTime, oMonText) \ values (%(oipid)d, %(cmdid)d, '%(monitime)s','%(cmdtext)s')" % vars() con = _get_pcon() cur = con.cursor() cur.execute(sql) except TypeError, e: con = _get_con() cur = con.cursor() cur.execute(sql) os.system('rm -f ' + lock) save_log('WARNING', 'insertMonitor : ' + str(e)) sys.exit()
def serverMonitor(serverip,entry,x): ''' 服务器信息监控入口线程函数 ''' entry_list = list(entry) #配置项 oItemConfig = eval(entry_list[4]) save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread start now!------------------------------') save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread name:' + oItemConfig['alarmcmd']) #取信息指令 cmd = 'SYSTEM_RUN_COMMAND!{"mon_all_stat":""}' save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread oMonText begin,Cmd is :'+cmd) ##如果是端口或者数据库检测就不需要取客户端信息了 if oItemConfig['alarmcmd'] == 'portstatcheck' or oItemConfig['alarmcmd'] == 'databasecheck': oMonText = '1!1' save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread subtype is portstatcheck or databasecheck ,subtype is :'+oItemConfig['alarmcmd']) elif PortIsAlive(serverip,SOCKET['REMOTE_PORT']) == False: each_ipinfo = None #端口如果不通,说明服务器异常! each_ipinfo_list = select("SELECT * FROM osa_ipinfo where oIp = '"+serverip+"'") if each_ipinfo_list : each_ipinfo = each_ipinfo_list[0] else: save_log('INFO','ip not found:'+str(serverip)) sys.exit() AllUserEmailAddress = GetUserEmailAddress('ALL') save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread server PortIsAlive == false,exit now!,IP is :'+serverip+',each_info is:'+str(each_ipinfo)+',AllUserEmailAddress is :'+str(AllUserEmailAddress)) #检测单个服务器是否正常,是否需要告警 try: ThreadForEachServer(each_ipinfo,AllUserEmailAddress) except Exception as e: save_log('ERROR','ThreadForEachServer run fail first:'+str(e)) sys.exit() sys.exit() else: rtime = round(float(random.randrange(0, 100, 1))/10,2) time.sleep(rtime) oMonText = proSocket(serverip, SOCKET['REMOTE_PORT'], cmd, type=None) if oMonText: oMonText = oMonText.split('!') save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread get oMonText over!,oMonText is :'+str(oMonText)) else: save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread get oMonText faild ,exit now!') each_ipinfo = None #这里如果没取到数据,说明服务器异常! each_ipinfo_list = select("SELECT * FROM osa_ipinfo where oIp = '"+serverip+"'") if each_ipinfo_list : each_ipinfo = each_ipinfo_list[0] AllUserEmailAddress = GetUserEmailAddress('ALL') save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread server PortIsAlive == false,exit now!,IP is :'+serverip+',each_info is:'+str(each_ipinfo)+',AllUserEmailAddress is :'+str(AllUserEmailAddress)) #检测单个服务器是否正常,是否需要告警 try: ThreadForEachServer(each_ipinfo,AllUserEmailAddress) except Exception as e: save_log('ERROR','ThreadForEachServer run fail again:'+str(e)) sys.exit() sys.exit() save_log('INFO','serverMonitor oMonText : '+str(oMonText)) #将不同的配置项提出来! serverChooseDict={ 'diskspacecheck' : {'oMonText' : 'diskstat','oItemConfig' : 'percentage', 'strrecov' : '的磁盘空间率已经恢复!' , 'errorstr1' : '的磁盘使用率过高,分区 ' , 'errorstr2' : '当前使用率为:'}, 'topstatcheck' : {'oMonText' : 'loadstat','oItemConfig' : 'topvalue', 'strrecov' : '的负载已经恢复正常!' , 'errorstr1':'的负载状态过高。当前负载值:','errorstr2' : ''}, 'loginusercheck' : {'oMonText' : 'login','oItemConfig' : 'usernum', 'strrecov' : '的登录用户数量已经恢复正常!' , 'errorstr1':'登录用户过多。当前用户数:','errorstr2':''}, 'networkcheck' : {'oMonText' : 'network','oItemConfig' : 'topvalue', 'strrecov' : '的网络流量已经恢复正常!' , 'errorstr1':'流量过载。网卡 ','errorstr2':',当前进出流量峰值(MB): '} } #接下来走不同的函数完成监控 if oItemConfig['alarmcmd'] in serverChooseDict.keys(): save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread serverChoose now!') return serverChooseAlarm(oItemConfig['alarmcmd'],serverChooseDict,serverip,entry,oMonText,x) #数据库报警 if oItemConfig['alarmcmd'] == 'databasecheck': save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread databasecheck now!') return DataBaseCheck(entry,serverip,x) #端口报警 if oItemConfig['alarmcmd'] == 'portstatcheck': a=IsPortAlive(serverip,oItemConfig['portlist']) save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread portstatcheck func now!status is :'+str(a[0])) if a[0] == True:#此次检测结果是正常的 result = ExistInserverinfo(entry_list[0],serverip) save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread portstatcheck exit now!result is :'+str(result)) if result: if result[3] == '0': if entry_list[8] == '1': SendMail('RECOVERNOTIFY',entry_list[9],serverip,'portstatcheck') CreateAlarmMsg(entry_list[1],entry_list[0],serverip,"服务器"+serverip+"的端口访问已经恢复正常:"+oItemConfig['portlist'],0) result[3] = '1' result[4] = 0 Update_osa_serverinfo(result) sys.exit() if result == None: InsertInto_osa_serverinfo(entry_list[0],serverip,'1',0) sys.exit() if a[0] == False: result = ExistInserverinfo(entry_list[0],serverip) save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread portstatcheck exit now!result is :'+str(result)) if result: if result[3] == '1': CreateAlarmMsg(entry_list[1],entry_list[0],serverip,"服务器"+serverip+"端口:"+a[1]+"异常",3) SendMail('STATUS_EXCEPTION',entry_list[9],serverip,'portstatcheck') result[4]=result[4]+1 result[3] = '0' Update_osa_serverinfo(result) sys.exit() if result[3] =='0': if result[4] < entry_list[7]: SendMail('STATUS_EXCEPTION',entry_list[9],serverip,'portstatcheck') result[4]=result[4]+1 Update_osa_serverinfo(result) sys.exit() #如果osa_serverinfo不存在记录,则添加记录,更新下一次检测时间 if result == None: #发送异常告警通知,添加告警记录。 CreateAlarmMsg(entry_list[1],entry_list[0],serverip,"服务器"+serverip+'端口:'+a[1]+' 未存活!',3) SendMail('STATUS_EXCEPTION',entry_list[9],serverip,oItemConfig['alarmcmd']) #更新状态为0,告警次数为1 InsertInto_osa_serverinfo(entry_list[0],serverip,'0',1) sys.exit() sys.exit()
def socketDo(connection, addr): ''' server指令处理模块 @connection: 指令连接对象 ''' try: rev = socketRev(connection, addr) if isConseResult(rev): connection.send(encode('result_send_ok')) #处理从unctrlpy 传过来的结果信息 try: chooseResultDef(rev) except Exception as e: save_log('ERROR', 'chooseResultDef error:' + str(e)) cmdstr, ip = ayCmdToConsole(rev) if not ip: save_log('ERROR', 'IP IS NULL') connection.send('Ip is null.') connection.close() sys.exit() save_log('INFO', str(cmdstr.split('!'))) # if cmd for myself cname = isMySelfCmd(cmdstr) #if batch cmd batch = isBatchCmd(cmdstr) toPhpData = "" if cname: try: toPhpData = OsaChooseDef(defname=isMySelfCmd(cmdstr), ip=ip, fromPhpCmd=cmdstr) except Exception as e: save_log('ERROR', e) elif batch: toPhpData = 'BATCH_CMD_OK' else: toPhpData = hostSocket.proSocket(ip, SOCKET['REMOTE_PORT'], cmdstr) save_log('DEBUG', 'cmd info:' + str(encode(cmdstr))) save_log('DEBUG', 'cmd info:' + str(cmdstr)) if toPhpData == False: toPhpData = 'Data_receive_Failed!' connection.send('%s!%s' % (toPhpData, ip)) connection.close() if toPhpData == 'BATCH_CMD_OK': try: chooseBatchDef(cmdstr, rev) except Exception as e: save_log('ERROR', 'BATCH chooseBatchDef error:' + str(e)) sys.exit() except queueEmptyException, value: save_log('ERROR', 'server socketDo:' + str(value)) connection.close() sys.exit()
def socketDo(connection, addr): """ server指令处理模块 @connection: 指令连接对象 """ try: rev = socketRev(connection, addr) if isConseResult(rev): connection.send(encode("result_send_ok")) # 处理从unctrlpy 传过来的结果信息 try: chooseResultDef(rev) except Exception as e: save_log("ERROR", "chooseResultDef error:" + str(e)) cmdstr, ip = ayCmdToConsole(rev) if not ip: save_log("ERROR", "IP IS NULL") connection.send("Ip is null.") connection.close() sys.exit() save_log("INFO", str(cmdstr.split("!"))) # if cmd for myself cname = isMySelfCmd(cmdstr) # if batch cmd batch = isBatchCmd(cmdstr) toPhpData = "" if cname: try: toPhpData = OsaChooseDef(defname=isMySelfCmd(cmdstr), ip=ip, fromPhpCmd=cmdstr) except Exception as e: save_log("ERROR", e) elif batch: toPhpData = "BATCH_CMD_OK" else: toPhpData = hostSocket.proSocket(ip, SOCKET["REMOTE_PORT"], cmdstr) save_log("DEBUG", "cmd info:" + str(encode(cmdstr))) save_log("DEBUG", "cmd info:" + str(cmdstr)) if toPhpData == False: toPhpData = "Data_receive_Failed!" connection.send("%s!%s" % (toPhpData, ip)) connection.close() if toPhpData == "BATCH_CMD_OK": try: chooseBatchDef(cmdstr, rev) except Exception as e: save_log("ERROR", "BATCH chooseBatchDef error:" + str(e)) sys.exit() except queueEmptyException, value: save_log("ERROR", "server socketDo:" + str(value)) connection.close() sys.exit()