Пример #1
0
def DataBaseCheck(entry,serverip,x):
	'''
	数据库检测的入口函数
	'''
	entry_list=list(entry)
	oItemConfig = eval(entry_list[4])
	database_name = serverip+":"+str(oItemConfig['port'])
	a,b = IsDataBaseNormal(oItemConfig,serverip)
	
	
	result = ExistInserverinfo(entry_list[0],database_name)
	
	save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread databasecheck func now!status is :'+a+',result is:'+str(result))
	#此次检测结果是正常的
	if a == '1':
		if result:
			if result[3] == '0':
				if entry_list[8] == '1':
					SendMail('RECOVERNOTIFY',entry_list[9],database_name,'databasecheck')
				CreateAlarmMsg(entry_list[1],entry_list[0],serverip,"数据库"+database_name+"已经恢复正常:",0)
				result[3] = '1'
				result[4] = 0
				Update_osa_serverinfo(result)				
				sys.exit()			
		if result == None:
			InsertInto_osa_serverinfo(entry_list[0],database_name,'1',0)			
			sys.exit()	
	if a == '0':
		
		if result:
			
			if result[3] == '1':
				CreateAlarmMsg(entry_list[1],entry_list[0],database_name,"数据库:"+database_name+"出现异常:"+str(b),3)
				SendMail('STATUS_EXCEPTION',entry_list[9],database_name,'databasecheck')
				result[4]=result[4]+1
				result[3] = '0'
				Update_osa_serverinfo(result)				
				sys.exit()
			if result[3] =='0':
				if result[4] <= entry_list[7]:
					SendMail('STATUS_EXCEPTION',entry_list[9],database_name,'databasecheck')
					result[4]=result[4]+1
					Update_osa_serverinfo(result)					
				sys.exit()
		if result == None:
			SendMail('STATUS_EXCEPTION',entry_list[9],database_name,'databasecheck')
			InsertInto_osa_serverinfo(entry_list[0],database_name,'0',1)			
			sys.exit()	
	sys.exit()
Пример #2
0
def serverMonitor(serverip,entry,x):
	'''
	   服务器信息监控入口线程函数
	'''	
	entry_list = list(entry)
	#配置项
	oItemConfig = eval(entry_list[4])
	
	save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread start now!------------------------------')
	
	save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread name:' + oItemConfig['alarmcmd'])
	
	#取信息指令
	cmd = 'SYSTEM_RUN_COMMAND!{"mon_all_stat":""}'
	
	
	save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread oMonText begin,Cmd is :'+cmd)
	
	##如果是端口或者数据库检测就不需要取客户端信息了
	if oItemConfig['alarmcmd'] == 'portstatcheck' or oItemConfig['alarmcmd'] == 'databasecheck':
		oMonText  = '1!1'	
		save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread subtype is portstatcheck or databasecheck ,subtype is :'+oItemConfig['alarmcmd'])

	elif PortIsAlive(serverip,SOCKET['REMOTE_PORT']) == False:	
	
		each_ipinfo = None
		#端口如果不通,说明服务器异常!
		each_ipinfo_list = select("SELECT * FROM osa_ipinfo where oIp = '"+serverip+"'")
		if each_ipinfo_list :
			each_ipinfo = each_ipinfo_list[0]
		else:
			save_log('INFO','ip not found:'+str(serverip))
			sys.exit()
		AllUserEmailAddress = GetUserEmailAddress('ALL')
		
		save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread server PortIsAlive == false,exit now!,IP is :'+serverip+',each_info is:'+str(each_ipinfo)+',AllUserEmailAddress is :'+str(AllUserEmailAddress))
		#检测单个服务器是否正常,是否需要告警
		try:
			
			ThreadForEachServer(each_ipinfo,AllUserEmailAddress)	
		except Exception as e:
			save_log('ERROR','ThreadForEachServer run fail first:'+str(e))
			sys.exit()		
		sys.exit()
	
	else:
		rtime = round(float(random.randrange(0, 100, 1))/10,2)
		time.sleep(rtime)
	
		oMonText  = proSocket(serverip, SOCKET['REMOTE_PORT'], cmd, type=None)
	
	if oMonText:	
		oMonText = oMonText.split('!')
		save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread get oMonText over!,oMonText is :'+str(oMonText))
	else:
		save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread get oMonText faild ,exit now!')
		each_ipinfo = None
		#这里如果没取到数据,说明服务器异常!
		each_ipinfo_list = select("SELECT * FROM osa_ipinfo where oIp = '"+serverip+"'")
		if each_ipinfo_list :
			each_ipinfo = each_ipinfo_list[0]
		AllUserEmailAddress = GetUserEmailAddress('ALL')
		
		save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread server PortIsAlive == false,exit now!,IP is :'+serverip+',each_info is:'+str(each_ipinfo)+',AllUserEmailAddress is :'+str(AllUserEmailAddress))
			
		#检测单个服务器是否正常,是否需要告警
		try:
			
			ThreadForEachServer(each_ipinfo,AllUserEmailAddress)	
		except Exception as e:
			save_log('ERROR','ThreadForEachServer run fail again:'+str(e))			
			sys.exit()		
		sys.exit()
		
	save_log('INFO','serverMonitor oMonText : '+str(oMonText))
	
	#将不同的配置项提出来!
	serverChooseDict={

		'diskspacecheck' : {'oMonText' : 'diskstat','oItemConfig' : 'percentage', 'strrecov' : '的磁盘空间率已经恢复!' , 'errorstr1' : '的磁盘使用率过高,分区 ' , 'errorstr2' : '当前使用率为:'},
		'topstatcheck' : {'oMonText' : 'loadstat','oItemConfig' : 'topvalue', 'strrecov' : '的负载已经恢复正常!' , 'errorstr1':'的负载状态过高。当前负载值:','errorstr2' : ''},
		'loginusercheck' : {'oMonText' : 'login','oItemConfig' : 'usernum', 'strrecov' : '的登录用户数量已经恢复正常!' , 'errorstr1':'登录用户过多。当前用户数:','errorstr2':''},
		'networkcheck' : {'oMonText' : 'network','oItemConfig' : 'topvalue', 'strrecov' : '的网络流量已经恢复正常!' , 'errorstr1':'流量过载。网卡 ','errorstr2':',当前进出流量峰值(MB): '}
	
	}
	
	#接下来走不同的函数完成监控
	if oItemConfig['alarmcmd'] in serverChooseDict.keys():
		save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread serverChoose now!')
		return serverChooseAlarm(oItemConfig['alarmcmd'],serverChooseDict,serverip,entry,oMonText,x)
	
	#数据库报警
	if oItemConfig['alarmcmd'] == 'databasecheck':
		save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread databasecheck now!')
		return DataBaseCheck(entry,serverip,x)	
	
	
	#端口报警
	if oItemConfig['alarmcmd'] == 'portstatcheck':		
		a=IsPortAlive(serverip,oItemConfig['portlist'])
		save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread portstatcheck func now!status is :'+str(a[0]))
		if a[0] == True:#此次检测结果是正常的
			result = ExistInserverinfo(entry_list[0],serverip)
			save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread portstatcheck exit now!result is :'+str(result))
			if result:
				if result[3] == '0':
					if entry_list[8] == '1':
						SendMail('RECOVERNOTIFY',entry_list[9],serverip,'portstatcheck')
					CreateAlarmMsg(entry_list[1],entry_list[0],serverip,"服务器"+serverip+"的端口访问已经恢复正常:"+oItemConfig['portlist'],0)
					result[3] = '1'
					result[4] = 0
					Update_osa_serverinfo(result)					
					sys.exit()				
			if result == None:
				InsertInto_osa_serverinfo(entry_list[0],serverip,'1',0)				
				sys.exit()
		if a[0] == False:
			result = ExistInserverinfo(entry_list[0],serverip)
			save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread portstatcheck exit now!result is :'+str(result))
			if result:
				if result[3] == '1':
					CreateAlarmMsg(entry_list[1],entry_list[0],serverip,"服务器"+serverip+"端口:"+a[1]+"异常",3)
					SendMail('STATUS_EXCEPTION',entry_list[9],serverip,'portstatcheck')
					result[4]=result[4]+1
					result[3] = '0'
					Update_osa_serverinfo(result)					
					sys.exit()
				if result[3] =='0':
					if result[4] < entry_list[7]:
						SendMail('STATUS_EXCEPTION',entry_list[9],serverip,'portstatcheck')
						result[4]=result[4]+1
						Update_osa_serverinfo(result)									
					sys.exit()
		#如果osa_serverinfo不存在记录,则添加记录,更新下一次检测时间
			if result == None:
			#发送异常告警通知,添加告警记录。
				CreateAlarmMsg(entry_list[1],entry_list[0],serverip,"服务器"+serverip+'端口:'+a[1]+' 未存活!',3)
				SendMail('STATUS_EXCEPTION',entry_list[9],serverip,oItemConfig['alarmcmd'])					
									
			#更新状态为0,告警次数为1
				InsertInto_osa_serverinfo(entry_list[0],serverip,'0',1)
				sys.exit()
	sys.exit()
Пример #3
0
def serverChooseAlarm(alarmcmd,serverChooseDict,serverip,entry,oMonText,x):
	'''
	根据不同监控类型执行不同的告警方式
	'''

	oItemConfig = {}
	oItemConfig['alarmcmd'] = alarmcmd
	save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread in serverChooseAlarm,clientdata now!! oMonText is: '+str(oMonText))
	
	#从字典中提出键值,比如:'diskstat'
	ckey = serverChooseDict[alarmcmd]['oMonText']	
	
	try:
		oMonTextDic = eval(oMonText[1])
	except Exception as e:
		save_log('ERROR','oMonTextDic is error:'+str(e)+',ip is:'+str(serverip)+', str : '+str(oMonText))
		sys.exit()
	
	clientdata = None
	
	if ckey in oMonTextDic:
		clientdata = oMonTextDic[ckey]
	else:
		save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread in serverChooseAlarm,clientdata error exit now!! oMonText is '+str(oMonText))
		sys.exit(0)
	

	entry_list = list(entry)
	
	okey = serverChooseDict[alarmcmd]['oItemConfig']
	
	oItemConfig = eval(entry_list[4])
	
	
	a = []
	
	if alarmcmd == 'diskspacecheck':
		a = IsDiskSpaceNormal(clientdata,int(oItemConfig[okey]))
	elif alarmcmd == 'topstatcheck':
		a = IsLoadStatNormal(clientdata,int(oItemConfig[okey]))
	elif alarmcmd == 'loginusercheck':
		a = IsLoginUserOver(clientdata,int(oItemConfig[okey]))
	elif alarmcmd == 'networkcheck':
		a = IsNetworkTrafficNormal(clientdata,int(oItemConfig[okey]))
	else:
		sys.exit()
	
	#恢复时通知信息
	rstr = serverChooseDict[alarmcmd]['strrecov']
	
	#异常时告警通知信息
	estr = serverChooseDict[alarmcmd]['errorstr1'] 	+ str(a[1])
	
	if len(a) > 2:
		estr = estr+  serverChooseDict[alarmcmd]['errorstr2'] + str(a[2])
	
	save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread in serverChoose func now! status is '+str(a[0]))
	#此次检测结果是正常的	
	if a[0] == True:			
			
		result = ExistInserverinfo(entry_list[0],serverip)
		save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread in serverChoose func exit now! result is '+str(result))
		#如果osa_serverinfo存在记录了
		if result:
			#如果原来记录为不正常
			if result[3] == '0':
				#是否恢复通知为 1 发送邮件
				if entry_list[8] == '1':
					SendMail('RECOVERNOTIFY',entry_list[9],serverip,alarmcmd)
					
				#往osa_alarmmsg表里增加新的条目
				CreateAlarmMsg(entry_list[1],entry_list[0],serverip,"服务器"+serverip+rstr,0)
					
				#恢复时更新数据状态和下一次检测时间
				DatabaseUpdateWhenRecovey(result,entry_list)
				sys.exit()
			
					
		#如果osa_serverinfo不存在记录,则添加记录,更新下一次检测时间
		if result == None:
			
			InsertInto_osa_serverinfo(entry_list[0],serverip,'1',0)			
			sys.exit()
				
	#此次检测结果是异常的		
	if a[0] == False:
		    
		result = ExistInserverinfo(entry_list[0],serverip)
		
		save_Thread_log('MONTHREAD',serverip+'_'+str(x)+'_'+oItemConfig['alarmcmd'],'subThread in serverChoose func exit now! result is '+str(result))
		#如果osa_serverinfo 存在记录
		if result:
			#如果之前记录为正常
			if result[3] == '1':
				#发送异常告警通知,添加告警记录。
				CreateAlarmMsg(entry_list[1],entry_list[0],serverip,"服务器"+serverip+estr,3)
				SendMail('STATUS_EXCEPTION',entry_list[9],serverip,alarmcmd)
					
				#告警次数加1
				result[4]=result[4]+1
				#状态为0,表示不正常
				result[3] = '0'
					
				#更新osa_serverinfo记录!
				Update_osa_serverinfo(result)
				#更新下一次时间
				sys.exit()
					
			#如果之前记录为异常
			if result[3] =='0':
				#如果当前告警次数小于用户设定的告警次数,则发送告警
				if result[4] < entry_list[7]:
					SendMail('STATUS_EXCEPTION',entry_list[9],serverip,alarmcmd)
					result[4]=result[4]+1
					Update_osa_serverinfo(result)
				sys.exit()
			
		#如果osa_serverinfo不存在记录,则添加记录,更新下一次检测时间
		################补充###########################################
		if result == None:
			#发送异常告警通知,添加告警记录。
			CreateAlarmMsg(entry_list[1],entry_list[0],serverip,"服务器"+serverip+estr,3)
			SendMail('STATUS_EXCEPTION',entry_list[9],serverip,alarmcmd)					
									
			#更新状态为0,告警次数为1
			InsertInto_osa_serverinfo(entry_list[0],serverip,'0',1)
			sys.exit()
	sys.exit()