def create_event_host_failed_clear(region, host_name, eth_name): source = "Region=%s,CeeFunction=1,Node=%s,Network=SR-IOV,Aggregator=sriov,EthernetPort=%s" % ( region, host_name, eth_name) event = FmEvent( True, str(source), 193, 2031681, enums.FM_ACTIVE_SEVERITY.CLEARED, enums.FM_EVENT_TYPE.communicationsAlarm, enums.FM_PROBABLE_CAUSE.m3100LossOfSignal, "Ethernet Port Fault", "Network=SR-IOV,Aggregator=sriov,EthernetPort=${}".format(eth_name)) EventSender().create_new_fm_event(event)
def watchmenAlarm(self, boole, region, tenant_id, uuid, nodeName, minor_id, fm_active_severity, sp): evnet_sender = EventSender() source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, tenant_id, uuid) event = FmEvent(boole, source_one, 193, minor_id, fm_active_severity, enums.FM_EVENT_TYPE.communicationsAlarm, enums.FM_PROBABLE_CAUSE.m3100Indeterminate, sp, None, "VM %s(%s):$myReason" % (nodeName, uuid)) evnet_sender.create_new_fm_event(event)
def watchmenAlarm(): evnet_sender = EventSender() source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, '689c319ac7a14aca86312c3e1036b275', '1693537ff-98f4-4e5b-b425-a5075189fd37') event = FmEvent( True, source_one, 193, 2032696, enums.FM_ACTIVE_SEVERITY.CRITICAL, enums.FM_EVENT_TYPE.communicationsAlarm, enums.FM_PROBABLE_CAUSE.m3100Indeterminate, "Virtual Machine OS Fault", None, "VM %s(%s)" % ('test_vm_02', '1693537f-98f4-4e5b-b425-a5075189fd37')) evnet_sender.create_new_fm_event(event)
def watchmenAlarm(): evnet_sender = EventSender() source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, '689c319ac7a14aca86312c3e1036b275', '1693537ff-98f4-4e5b-b425-a5075189fd37') event = FmEvent(True, source_one, 193, 2032707, enums.FM_ACTIVE_SEVERITY.CLEARED, enums.FM_EVENT_TYPE.equipmentAlarm, enums.FM_PROBABLE_CAUSE.m3100Indeterminate, "VM External Storage Fault", None, "Pyhsical Storage Network Fault on compute-1209-2") evnet_sender.create_new_fm_event(event) print 'watchmenalarmend'
def create_event_host_failed_clear(region, host_name, eth_name): source = "Region=%s,CeeFunction=1,Node=%s,Network=SR-IOV,Aggregator=sriov,EthernetPort=%s" % ( region, host_name, eth_name) event = FmEvent( True, str(source), # -src 193, # -ma 2031681, # -mi # enums.FM_ACTIVE_SEVERITY.MINOR, enums.FM_ACTIVE_SEVERITY.CLEARED, # -s enums.FM_EVENT_TYPE.communicationsAlarm, # -e # enums.FM_PROBABLE_CAUSE.m3100ReplaceableUnitProblem, enums.FM_PROBABLE_CAUSE.m3100LossOfSignal, # -p "Ethernet Port Fault", # -sp 如果后面还有-t的值,就','下一行,然后在""里写值即可 "Network=SR-IOV,Aggregator=sriov,EthernetPort=${}".format( eth_name)) # -t EventSender().create_new_fm_event(event)
def raise_alarm(self): for item_cache in self.last_cache: print 'raise_alarm函数得到的缓存是' print self.last_cache vm_uuid = item_cache.get('vm_uuid') tenant_id = item_cache.get('tenant_id') compute = item_cache.get('compute') evnet_sender = EventSender() source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, tenant_id, vm_uuid) event = FmEvent( True, str(source_one), 193, 2032707, enums.FM_ACTIVE_SEVERITY.MAJOR, enums.FM_EVENT_TYPE.equipmentAlarm, enums.FM_PROBABLE_CAUSE.m3100Indeterminate, "VM External Storage Fault", None, str("Pyhsical Storage Network Fault on %s" % (compute))) evnet_sender.create_new_fm_event(event)
def clear_alarm(self, tenant_id, vm_uuid, compute): print 'clear_alarm函数得到的tenant_id' print tenant_id print 'clear_alarm函数得到的vm_uuid' print vm_uuid print 'clear_alarm函数得到的compute' print compute evnet_sender = EventSender() print 'clear_alarm函数得到的region' print region source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, tenant_id, vm_uuid) event = FmEvent( True, str(source_one), 193, 2032707, enums.FM_ACTIVE_SEVERITY.CLEARED, enums.FM_EVENT_TYPE.equipmentAlarm, enums.FM_PROBABLE_CAUSE.m3100Indeterminate, "VM External Storage Fault", None, str("Pyhsical Storage Network Fault on %s" % (compute))) evnet_sender.create_new_fm_event(event)
def raise_alert(self, service): output_num = [] while True: output_status, output = commands.getstatusoutput( "ps -ef | grep %s | grep -v grep > /dev/null" % (service)) output_num.append(output) if len(output_num) < 2: print "raise alert" evnet_sender = EventSender() source_one = "Region=%s,CeeFunction=1,Node=%s,Service=%s" % ( region, NODE, service) event = FmEvent( False, source_one, 193, 2031710, enums.FM_ACTIVE_SEVERITY.MAJOR, enums.FM_EVENT_TYPE.other, enums.FM_PROBABLE_CAUSE.m3100Indeterminate, "Service stopped", None, "On node:%s service: %s has been stopped." % (NODE, service)) evnet_sender.create_new_fm_event(event) time.sleep(120) else: time.sleep(1)
def keep_alive(a): vm_uuid = a region_status, region = commands.getstatusoutput( "grep \"region_name\" /etc/watchmen/watchmen-producer.conf | cut -d\"=\" -f2" ) count = 0 commands.getstatusoutput("source /root/openrc") # get curr status vm_curr_status_num, vm_curr_status = commands.getstatusoutput( "nova list --all-tenants --fields status | grep %s | awk -F\"|\" '{print $3}' | tr -d ' '" % (vm_uuid)) if "X{}".format(vm_curr_status) == "XSHUTOFF": print "start vm to keep it alive" count = 0 nova_start_status, nova_start = commands.getstatusoutput( "nova start {}".format(vm_uuid)) retValue = nova_start_status count += 1 while count <= 3 and retValue != 0: nova_start_status, nova_start = commands.getstatusoutput( "nova start {}".format(vm_uuid)) retValue = nova_start_status count = count + 1 time.sleep(5) elif "X{}".format(vm_curr_status) == "XERROR": print "restore vm to keep it alive" time.sleep(50) count = 0 nova_start_status, nova_start = commands.getstatusoutput( "nova reset-state --active {}".format(vm_uuid)) retValue = nova_start_status count += 1 while count <= 2 and retValue != 0: nova_start_status, nova_start = commands.getstatusoutput( "nova reset-state --active {}".format(vm_uuid)) retValue = nova_start_status count += 1 time.sleep(5) if retValue != 0: # restore failed, generate alarm print "restore failed , raise event" tenant_uuid_status, tenant_uuid = commands.getstatusoutput( "nova list --all-tenants --fields tenant_id | grep %s | awk -F\"|\" '{print $3}' | tr -d ' '" % (vm_uuid)) evnet_sender = EventSender() source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, tenant_uuid, vm_uuid) event = FmEvent( True, source_one, 193, 2032702, enums.FM_ACTIVE_SEVERITY.WARNING, enums.FM_EVENT_TYPE.other, enums.FM_PROBABLE_CAUSE.enums.FM_PROBABLE_CAUSE. m3100Indeterminate, "VM Restore Failed", ) evnet_sender.create_new_fm_event(event)
def on_message(self, body, message): #SCRIPTS_DIR = "/var/cmcc-la/scripts" region_status, region = commands.getstatusoutput( "grep \"region_name\" /etc/watchmen/watchmen-producer.conf | cut -d\"=\" -f2" ) commands.getstatusoutput("source /root/openrc") res = curl_get_kpi() res = res["servers"] tenant_id = [] # list have same value,so use tenant_id[0] mysql_root_password = get_mysql_password() #commit mysql mysql_vip = "192.168.42.28" db = MySQLdb.connect(mysql_vip, "root", mysql_root_password) cursor = db.cursor() create_dase_sql = """create database if not exists om_datafree;""" cursor.execute(create_dase_sql) use_base_sql = """use om_datafree;""" cursor.execute(use_base_sql) create_table_sql = """create table if not exists vm_monitor (vm_uuid varchar(128) not null unique, vm_status varchar(32)) ENGINE=InnoDB;""" cursor.execute(create_table_sql) a = {} for i in res: vm_uuid = str(i.get('id')) vm_last_status = i.get('status') tenant_id_vm = i.get('tenant_id') tenant_id.append(tenant_id_vm) a[vm_uuid] = vm_last_status insert_sql = """insert ignore into vm_monitor values(\'%s\',\'%s\');""" % ( vm_uuid, vm_last_status) #往数据库插入数据 cursor.execute(insert_sql) db.commit() db.close() while True: message = json.loads( body['oslo.message']) #old def on_message,insert to while event_type = message.get('event_type') res_two = curl_get_kpi() #get all vm uuid and status res_two = res_two["servers"] b = {} for j in res_two: vm_uuid = str(j.get('id')) vm_curr_status = j.get('status') b[vm_uuid] = vm_curr_status dict2 = dict.fromkeys([x for x in a if x not in b]) if dict2 != {}: for key_2 in dict2: #如果删除了虚拟机(老状态的缓存存在而新的缓存不存在,则在数据库中删除那个虚机的uuid并清除告警) print('%s:%s deleted to nova' % (time.ctime(), key_2)) mysql_root_password = get_mysql_password() mysql_vip = "192.168.42.28" db = MySQLdb.connect(mysql_vip, "root", mysql_root_password) cursor = db.cursor() use_base_sql = """use om_datafree;""" cursor.execute(use_base_sql) del_sql = """DELETE FROM vm_monitor where vm_uuid=\'%s\';""" % ( key_2) cursor.execute(del_sql) db.commit() db.close() dict4 = dict.fromkeys([x for x in b if x not in a]) if dict4 != {}: for key_4 in dict4: print('%s:%s added to nova' % (time.ctime(), key_4)) dict3 = dict.fromkeys( [x for x in a if x in b and a[x] != b[x]]) if dict3 != {}: for k in dict3: if a[k] == "ACTIVE" and b[k] == "ERROR": print('raise event -to error %s' % k) evnet_sender = EventSender() source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, tenant_id[0], k) event = FmEvent( True, source_one, 193, 2032692, enums.FM_ACTIVE_SEVERITY.CRITICAL, enums.FM_EVENT_TYPE.equipmentAlarm, enums.FM_PROBABLE_CAUSE.enums. FM_PROBABLE_CAUSE.m3100Indeterminate, "VM status became error", None, "VM %s has changed to status error" % (k)) evnet_sender.create_new_fm_event(event) res = curl_get_kpi_show(k) res = res["servers"] for i in res: metadata = i.get('metadata', '') if metadata != '': retValue_status, retValue = commands.getstatusoutput( "echo %s | grep \"Auto_Restore:true\"" % (k)) if retValue: # create_sample(k) keep_alive( k) # restore vm,等拿到error状态了再改这个函数 if a[k] == "ACTIVE" and b[k] == "SHUTOFF": # nova start print('raise event -to shutoff %s' % k) evnet_sender2 = EventSender() source_two = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, tenant_id[0], k) event2 = FmEvent( True, source_two, 193, 2032693, enums.FM_ACTIVE_SEVERITY.CRITICAL, enums.FM_EVENT_TYPE.equipmentAlarm, enums.FM_PROBABLE_CAUSE.enums. FM_PROBABLE_CAUSE.m3100Indeterminate, "VM status became shutoff", None, "VM %s has changed to status shutoff" % (k)) evnet_sender2.create_new_fm_event(event2) res = curl_get_kpi_show(k) res = res["servers"] for i in res: metadata = i.get('metadata', '') if metadata != '': keep_Alive_status, keep_Alive1 = commands.getstatusoutput( "echo %s | grep \"Keep_Alive:true\"" % (metadata)) if keep_Alive1: print "start vm to keep it alive" nova_start(k) # nova start count = 0 while count <= 2 and event_type != 'compute.instance.power_on.end': nova_start(k) count += 1 time.sleep(5) if count > 2: evnet_sender = EventSender( ) # 告警这部分需要问一下报哪种警告 source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, tenant_id[0], k) event = FmEvent( True, source_one, 193, 2032702, enums.FM_ACTIVE_SEVERITY. WARNING, enums.FM_EVENT_TYPE.other, enums.FM_PROBABLE_CAUSE.enums. FM_PROBABLE_CAUSE. m3100Indeterminate, "VM Restore Failed", ) evnet_sender.create_new_fm_event( event) if a[k] == "ERROR" and b[k] == "ACTIVE": print('clear event -error %s' % k) evnet_sender3 = EventSender() source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, tenant_id[0], k) event3 = FmEvent( True, source_one, 193, 2032692, enums.FM_ACTIVE_SEVERITY.CLEARED, enums.FM_EVENT_TYPE.equipmentAlarm, enums.FM_PROBABLE_CAUSE.enums. FM_PROBABLE_CAUSE.m3100Indeterminate, "VM status became ACTIVE", None, "VM %s has changed to status ACTIVE" % (k)) evnet_sender3.create_new_fm_event(event3) if a[k] == "SHUTOFF" and b[k] == "ACTIVE": print('clear event - shutoff %s' % k) evnet_sender4 = EventSender() source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, tenant_id[0], k) event4 = FmEvent( True, source_one, 193, 2032692, enums.FM_ACTIVE_SEVERITY.CLEARED, enums.FM_EVENT_TYPE.equipmentAlarm, enums.FM_PROBABLE_CAUSE.enums. FM_PROBABLE_CAUSE.m3100Indeterminate, "VM status became ACTIVE", None, "VM %s has changed to status ACTIVE" % (k)) evnet_sender4.create_new_fm_event(event4) if cmp(a, b) != 0: mysql_root_password = get_mysql_password() mysql_vip = "192.168.42.28" db = MySQLdb.connect(mysql_vip, "root", mysql_root_password) cursor = db.cursor() use_base_sql = """use om_datafree;""" cursor.execute(use_base_sql) update_sql = """update vm_monitor set vm_status=\'%s\' where vm_uuid=\'%s\';""" % ( vm_curr_status, vm_uuid) cursor.execute(update_sql) db.commit() db.close() a = b
def main(): SCRIPTS_DIR = "/var/cmcc-la/scripts" region_status, region = commands.getstatusoutput( "grep \"region_name\" /etc/watchmen/watchmen-producer.conf | cut -d\"=\" -f2" ) commands.getstatusoutput("source /root/openrc") res = curl_get_kpi() res = res["servers"] tenant_id = [] # 里面有多个重复的tenant_id,用的时候取第一个就行了 mysql_root_password = get_mysql_password() mysql_vip = "192.168.42.28" db = MySQLdb.connect(mysql_vip, "root", mysql_root_password) cursor = db.cursor() create_dase_sql = """create database if not exists om_datafree;""" # 判断有没有数据库 cursor.execute(create_dase_sql) use_base_sql = """use om_datafree;""" cursor.execute(use_base_sql) create_table_sql = """create table if not exists vm_monitor (vm_uuid varchar(64) not null unique, vm_status varchar(16) not null);""" cursor.execute(create_table_sql) a = {} # 缓存文件 for i in res: vm_uuid = i.get('id', '') vm_name = i.get('name', '') vm_last_status = i.get('status', '') tenant_id_vm = i.get('tenant_id', '') tenant_id.append(tenant_id_vm) a[vm_uuid] = vm_last_status insert_sql = """insert into vm_monitor values('%s', %s) ON DUPLICATE KEY UPDATE %s;""" % ( vm_uuid, vm_last_status, vm_last_status ) #往数据库里加入数据,如果存在则更新,如果不存在则插入 cursor.execute(insert_sql) db.commit() db.close() # 然后需要先判断一下sql中有没有这张表,如果没有的话就创建一张表,如果有,就读取数据库的表放在a字典里 # 如果没有的话,创建一个表 # 判断数据库里有没有对应的表 while True: res_two = curl_get_kpi() res_two = res_two["servers"] b = {} # 缓存文件 for j in res_two: vm_uuid = j.get('id', '') vm_name = j.get('name', '') vm_curr_status = j.get('status', '') b[vm_uuid] = vm_curr_status dict4 = dict.fromkeys([x for x in b if x not in a]) if dict4 != {}: for key_4 in dict4: print( '%s:%s added to nova' % (time.ctime(), key_4) ) ## new added vm#比较差异之后把two中新增的uuid对应的vm_name取出来打印(用mysql),然后拿dictb的元素作为基准重新覆盖数据库中的相应表 dict3 = dict.fromkeys([x for x in a if x in b and a[x] != b[x]]) if dict3 != {}: for k in dict3: if a[k] == "ACTIVE" and b[k] == "ERROR": print('raise event -to error %s' % k) evnet_sender = EventSender() source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, tenant_id[0], k) event = FmEvent( True, source_one, 193, 2032692, enums.FM_ACTIVE_SEVERITY.CRITICAL, enums.FM_EVENT_TYPE.equipmentAlarm, enums.FM_PROBABLE_CAUSE.enums.FM_PROBABLE_CAUSE. m3100Indeterminate, "VM status became error", None, "VM %s has changed to status error" % (k)) evnet_sender.create_new_fm_event(event) # 救虚机的部分 res = curl_get_kpi_show(k) res = res["servers"] for i in res: metadata = i.get('metadata', '') if metadata != '': retValue_status, retValue = commands.getstatusoutput( "echo %s | grep \"Auto_Restore:true\"" % (k)) if retValue: keep_alive.main(k) if a[k] == "ACTIVE" and b[k] == "SHUTOFF": print('raise event -to shutoff %s' % k) evnet_sender2 = EventSender() source_two = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, tenant_id[0], k) event2 = FmEvent( True, source_two, 193, 2032693, enums.FM_ACTIVE_SEVERITY.CRITICAL, enums.FM_EVENT_TYPE.equipmentAlarm, enums.FM_PROBABLE_CAUSE.enums.FM_PROBABLE_CAUSE. m3100Indeterminate, "VM status became shutoff", None, "VM %s has changed to status shutoff" % (k)) evnet_sender2.create_new_fm_event(event2) res = curl_get_kpi_show(k) res = res["servers"] for i in res: metadata = i.get('metadata', '') if metadata != '': keep_Alive_status, keep_Alive1 = commands.getstatusoutput( "echo %s | grep \"Keep_Alive:true\"" % (metadata)) migration_status, migration = commands.getstatusoutput( "echo %s | grep \"Alive_Policy:migration\"" % (metadata)) evacuation_status, evacuation = commands.getstatusoutput( "echo %s | grep \"Alive_Policy:evacuation\"" % (metadata)) if keep_Alive1 and migration: # migrate and keep alive keep_alive.main(k) elif keep_Alive1 and evacuation: # evacuate the vm for a DOWN host evacuate_vm.main(k) elif keep_Alive1 and not evacuation: # keep alive keep_alive.main(k) if a[k] == "ERROR" and b[k] == "ACTIVE": print('clear event -error %s' % k) evnet_sender3 = EventSender() source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, tenant_id[0], k) event3 = FmEvent( True, source_one, 193, 2032692, enums.FM_ACTIVE_SEVERITY.CLEARED, enums.FM_EVENT_TYPE.equipmentAlarm, enums.FM_PROBABLE_CAUSE.enums.FM_PROBABLE_CAUSE. m3100Indeterminate, "VM status became ACTIVE", None, "VM %s has changed to status ACTIVE" % (k)) evnet_sender3.create_new_fm_event(event3) if a[k] == "SHUTOFF" and b[k] == "ACTIVE": print('clear event - shutoff %s' % k) evnet_sender4 = EventSender() source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, tenant_id[0], k) event4 = FmEvent( True, source_one, 193, 2032692, enums.FM_ACTIVE_SEVERITY.CLEARED, enums.FM_EVENT_TYPE.equipmentAlarm, enums.FM_PROBABLE_CAUSE.enums.FM_PROBABLE_CAUSE. m3100Indeterminate, "VM status became ACTIVE", None, "VM %s has changed to status ACTIVE" % (k)) evnet_sender4.create_new_fm_event(event4) # 如果改动if a !=b,则将b写入mysql if cmp(a, b) != 0: mysql_root_password = get_mysql_password() mysql_vip = "192.168.42.28" db = MySQLdb.connect(mysql_vip, "root", mysql_root_password) cursor = db.cursor() use_base_sql = """use om_datafree;""" cursor.execute(use_base_sql) update_sql = """update vm_monitor set vm_status=%s where vm_uuid='%s';""" % ( vm_curr_status, vm_uuid) cursor.execute(update_sql) db.commit() db.close() a = b # 覆盖a的缓存
def on_message(self, body, message): # SCRIPTS_DIR = "/var/cmcc-la/scripts" region_status, region = commands.getstatusoutput( "grep \"region_name\" /etc/watchmen/watchmen-producer.conf | cut -d\"=\" -f2" ) commands.getstatusoutput("source /root/openrc") res = curl_get_kpi() res = res["servers"] tenant_id = [] mysql_root_password = get_mysql_password() mysql_vip = "192.168.42.28" db = MySQLdb.connect(mysql_vip, "root", mysql_root_password) cursor = db.cursor() create_dase_sql = """create database if not exists om_datafree;""" cursor.execute(create_dase_sql) use_base_sql = """use om_datafree;""" cursor.execute(use_base_sql) create_table_sql = """create table if not exists vm_monitor (vm_uuid varchar(128) not null unique, vm_status varchar(32)) ENGINE=InnoDB;""" cursor.execute(create_table_sql) last_cache = {} for i in res: vm_uuid = str(i.get('id')) vm_last_status = i.get('status') tenant_id_vm = i.get('tenant_id') tenant_id.append(tenant_id_vm) last_cache[vm_uuid] = vm_last_status insert_sql = """insert ignore into vm_monitor values(\'%s\',\'%s\');""" % ( vm_uuid, vm_last_status) cursor.execute(insert_sql) db.commit() db.close() while True: message = json.loads( body['oslo.message']) # old def on_message,insert to while event_type = message.get('event_type') res_two = curl_get_kpi() # get all vm uuid and status res_two = res_two["servers"] curr_cache = {} for j in res_two: vm_uuid = str(j.get('id')) vm_curr_status = j.get('status') curr_cache[vm_uuid] = vm_curr_status dict_del = dict.fromkeys( [x for x in last_cache if x not in curr_cache]) if dict_del != {}: for key_uuid in dict_del: print('%s:%s deleted to nova' % (time.ctime(), key_uuid)) del_sql = """DELETE FROM vm_monitor where vm_uuid=\'%s\';""" % ( key_uuid) mysql_crud(del_sql) if last_cache[key_uuid] == 'ERROR': evnet_sender = EventSender() source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, tenant_id[0], key_uuid) event = FmEvent( True, source_one, 193, 2032692, enums.FM_ACTIVE_SEVERITY.CLEARED, enums.FM_EVENT_TYPE.equipmentAlarm, enums.FM_PROBABLE_CAUSE.m3100Indeterminate, "VM status became error", None, "VM %s has changed to status error" % (key_uuid)) evnet_sender.create_new_fm_event(event) if last_cache[key_uuid] == 'SHUTOFF': evnet_sender = EventSender() source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, tenant_id[0], key_uuid) event = FmEvent( True, source_one, 193, 2032693, enums.FM_ACTIVE_SEVERITY.CLEARED, enums.FM_EVENT_TYPE.equipmentAlarm, enums.FM_PROBABLE_CAUSE.m3100Indeterminate, "VM status became shutoff", None, "VM %s has changed to status shutoff" % (key_uuid)) evnet_sender.create_new_fm_event(event) dict_add = dict.fromkeys( [x for x in curr_cache if x not in last_cache]) if dict_add != {}: for key_uuid in dict_add: print('%s:%s added to nova' % (time.ctime(), key_uuid)) dict_abnormal = dict.fromkeys([ x for x in last_cache if x in curr_cache and last_cache[x] != curr_cache[x] ]) if dict_abnormal != {}: for k in dict_abnormal: if last_cache[k] == "ACTIVE" and curr_cache[ k] == "ERROR": print('raise event -to error %s' % k) evnet_sender = EventSender() source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, tenant_id[0], k) event = FmEvent( True, source_one, 193, 2032692, enums.FM_ACTIVE_SEVERITY.CRITICAL, enums.FM_EVENT_TYPE.equipmentAlarm, enums.FM_PROBABLE_CAUSE.m3100Indeterminate, "VM status became error", None, "VM %s has changed to status error" % (k)) evnet_sender.create_new_fm_event(event) res = curl_get_kpi_show(k) res = res["servers"] for i in res: metadata = i.get('metadata', '') if metadata != '': retValue_status, retValue = commands.getstatusoutput( "echo %s | grep \"Auto_Restore:true\"" % (k)) if retValue: print("restore vm to keep it alive") keep_alive(k) #nova reset time.sleep(50) # 原脚本睡50 count = 0 while count <= 2 and event_type != 'compute.instance.power_on.end': # error情况下restore vm,是假数据,因为环境中没有error状态,所以等拿到error状态了再改这个函数 keep_alive(k) count += 1 time.sleep(5) if count > 2: evnet_sender = EventSender() source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, tenant_id[0], k) event = FmEvent( True, source_one, 193, 2032702, enums.FM_ACTIVE_SEVERITY. WARNING, enums.FM_EVENT_TYPE.other, enums.FM_PROBABLE_CAUSE. m3100Indeterminate, "VM Restore Failed", ) evnet_sender.create_new_fm_event( event) if last_cache[k] == "ACTIVE" and curr_cache[ k] == "SHUTOFF": print('raise event -to shutoff %s' % k) evnet_sender = EventSender() source = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, tenant_id[0], k) event = FmEvent( True, source, 193, 2032693, enums.FM_ACTIVE_SEVERITY.CRITICAL, enums.FM_EVENT_TYPE.equipmentAlarm, enums.FM_PROBABLE_CAUSE.m3100Indeterminate, "VM status became shutoff", None, "VM %s has changed to status shutoff" % (k)) evnet_sender.create_new_fm_event(event) res = curl_get_kpi_show(k) res = res["servers"] for i in res: metadata = i.get('metadata', '') if metadata != '': retValue_status, retValue = commands.getstatusoutput( "echo %s | grep \"Keep_Alive:true\"" % (metadata)) if retValue: print "start vm to keep it alive" nova_start(k) # nova start count = 0 while count <= 2 and event_type != 'compute.instance.power_on.end': nova_start(k) count += 1 time.sleep(5) if count > 2: evnet_sender = EventSender() source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, tenant_id[0], k) event = FmEvent( True, source_one, 193, 2032702, enums.FM_ACTIVE_SEVERITY. WARNING, enums.FM_EVENT_TYPE.other, enums.FM_PROBABLE_CAUSE. m3100Indeterminate, "VM Restore Failed", ) evnet_sender.create_new_fm_event( event) if last_cache[k] == "ERROR" and curr_cache[ k] == "ACTIVE": print('clear event -error %s' % k) evnet_sender = EventSender() source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, tenant_id[0], k) event = FmEvent( True, source_one, 193, 2032692, enums.FM_ACTIVE_SEVERITY.CLEARED, enums.FM_EVENT_TYPE.equipmentAlarm, enums.FM_PROBABLE_CAUSE.m3100Indeterminate, "VM status became error", None, "VM %s has changed to status error" % (k)) evnet_sender.create_new_fm_event(event) if last_cache[k] == "SHUTOFF" and curr_cache[ k] == "ACTIVE": print('clear event - shutoff %s' % k) evnet_sender = EventSender() source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % ( region, tenant_id[0], k) event = FmEvent( True, source_one, 193, 2032693, enums.FM_ACTIVE_SEVERITY.CLEARED, enums.FM_EVENT_TYPE.equipmentAlarm, enums.FM_PROBABLE_CAUSE.m3100Indeterminate, "VM status became shutoff", None, "VM %s has changed to status shutoff" % (k)) evnet_sender.create_new_fm_event(event) if cmp(last_cache, curr_cache) != 0: update_sql = """update vm_monitor set vm_status=\'%s\' where vm_uuid=\'%s\';""" % ( vm_curr_status, vm_uuid) mysql_crud(update_sql) last_cache = curr_cache
def main(): SCRIPTS_DIR = "/var/cmcc-la/scripts" region_status, region = commands.getstatusoutput( "grep \"region_name\" /etc/watchmen/watchmen-producer.conf | cut -d\"=\" -f2") commands.getstatusoutput("source /root/openrc") res = curl_get_kpi() res = res["servers"] tenant_id = [] # list have same value,so use tenant_id[0] mysql_root_password = get_mysql_password() mysql_vip = "192.168.42.28" db = MySQLdb.connect(mysql_vip, "root", mysql_root_password) cursor = db.cursor() create_dase_sql = """create database if not exists om_datafree;""" cursor.execute(create_dase_sql) use_base_sql = """use om_datafree;""" cursor.execute(use_base_sql) create_table_sql = """create table if not exists vm_monitor (vm_uuid varchar(128) not null unique, vm_status varchar(32)) ENGINE=InnoDB;""" cursor.execute(create_table_sql) a = {} for i in res: vm_uuid = str(i.get('id')) vm_last_status = i.get('status') tenant_id_vm = i.get('tenant_id') tenant_id.append(tenant_id_vm) a[vm_uuid] = vm_last_status insert_sql = """insert ignore into vm_monitor values(\'%s\',\'%s\');""" % (vm_uuid, vm_last_status) cursor.execute(insert_sql) db.commit() db.close() while True: res_two = curl_get_kpi() res_two = res_two["servers"] b = {} for j in res_two: vm_uuid = str(j.get('id')) vm_curr_status = j.get('status') b[vm_uuid] = vm_curr_status dict4 = dict.fromkeys([x for x in b if x not in a]) if dict4 != {}: for key_4 in dict4: print('%s:%s added to nova' % (time.ctime(), key_4)) dict3 = dict.fromkeys([x for x in a if x in b and a[x] != b[x]]) if dict3 != {}: for k in dict3: if a[k] == "ACTIVE" and b[k] == "ERROR": print('raise event -to error %s' % k) evnet_sender = EventSender() source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % (region, tenant_id[0], k) event = FmEvent(True, source_one, 193, 2032692, enums.FM_ACTIVE_SEVERITY.CRITICAL, enums.FM_EVENT_TYPE.equipmentAlarm, enums.FM_PROBABLE_CAUSE.enums.FM_PROBABLE_CAUSE.m3100Indeterminate, "VM status became error", None, "VM %s has changed to status error" % (k)) evnet_sender.create_new_fm_event(event) res = curl_get_kpi_show(k) res = res["servers"] for i in res: metadata = i.get('metadata', '') if metadata != '': retValue_status, retValue = commands.getstatusoutput( "echo %s | grep \"Auto_Restore:true\"" % (k)) if retValue: keep_alive.main(k) # restore vm if a[k] == "ACTIVE" and b[k] == "SHUTOFF": print('raise event -to shutoff %s' % k) evnet_sender2 = EventSender() source_two = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % (region, tenant_id[0], k) event2 = FmEvent(True, source_two, 193, 2032693, enums.FM_ACTIVE_SEVERITY.CRITICAL, enums.FM_EVENT_TYPE.equipmentAlarm, enums.FM_PROBABLE_CAUSE.enums.FM_PROBABLE_CAUSE.m3100Indeterminate, "VM status became shutoff", None, "VM %s has changed to status shutoff" % (k)) evnet_sender2.create_new_fm_event(event2) res = curl_get_kpi_show(k) res = res["servers"] for i in res: metadata = i.get('metadata', '') if metadata != '': keep_Alive_status, keep_Alive1 = commands.getstatusoutput( "echo %s | grep \"Keep_Alive:true\"" % (metadata)) # migration_status, migration = commands.getstatusoutput( # "echo %s | grep \"Alive_Policy:migration\"" % (metadata)) # evacuation_status, evacuation = commands.getstatusoutput( # "echo %s | grep \"Alive_Policy:evacuation\"" % (metadata)) if keep_Alive1: # migrate and keep alive keep_alive.main(k) # elif keep_Alive1 and evacuation: # # evacuate the vm for a DOWN host # evacuate_vm.main(k) # elif keep_Alive1: # # keep alive # keep_alive.main(k) if a[k] == "ERROR" and b[k] == "ACTIVE": print('clear event -error %s' % k) evnet_sender3 = EventSender() source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % (region, tenant_id[0], k) event3 = FmEvent(True, source_one, 193, 2032692, enums.FM_ACTIVE_SEVERITY.CLEARED, enums.FM_EVENT_TYPE.equipmentAlarm, enums.FM_PROBABLE_CAUSE.enums.FM_PROBABLE_CAUSE.m3100Indeterminate, "VM status became ACTIVE", None, "VM %s has changed to status ACTIVE" % (k)) evnet_sender3.create_new_fm_event(event3) if a[k] == "SHUTOFF" and b[k] == "ACTIVE": print('clear event - shutoff %s' % k) evnet_sender4 = EventSender() source_one = "Region=%s,CeeFunction=1,Tenant=%s,VM=%s" % (region, tenant_id[0], k) event4 = FmEvent(True, source_one, 193, 2032692, enums.FM_ACTIVE_SEVERITY.CLEARED, enums.FM_EVENT_TYPE.equipmentAlarm, enums.FM_PROBABLE_CAUSE.enums.FM_PROBABLE_CAUSE.m3100Indeterminate, "VM status became ACTIVE", None, "VM %s has changed to status ACTIVE" % (k)) evnet_sender4.create_new_fm_event(event4) if cmp(a, b) != 0: mysql_root_password = get_mysql_password() mysql_vip = "192.168.42.28" db = MySQLdb.connect(mysql_vip, "root", mysql_root_password) cursor = db.cursor() use_base_sql = """use om_datafree;""" cursor.execute(use_base_sql) update_sql = """update vm_monitor set vm_status=\'%s\' where vm_uuid=\'%s\';""" % ( vm_curr_status, vm_uuid) cursor.execute(update_sql) db.commit() db.close() a = b