def insert_notification_list_db(self, jsonData, recover_by, session): """ Insert into notification_list DB from notification JSON. :param :jsonData: notifocation json data. :param :recover_by:node recover(0)/VM recover(1)/process error(2) :param :cursor: cursor object :return :ret_dic:and return the information that was registered to notification_list table in the dictionary type """ # NOTE: The notification item 'endTime' may have a NULL value. # reference : The Notification Spec for RecoveryController. # JSON decoder perform null -> None translation try: if not jsonData.get("endTime"): j_endTime = None else: j_endTime = datetime.datetime.strptime( jsonData.get("endTime"), '%Y%m%d%H%M%S') # update and deleted :not yet create_at = datetime.datetime.now() update_at = None delete_at = None deleted = 0 # progress 0:not yet progress = 0 # From /etc/hosts # NOTE: Hosts hostname suffix is # undetermined("_data_line","_control_line") iscsi_ip = None # PF9 change #controle_ip = socket.gethostbyname(jsonData.get("hostname")) recover_to = None # PF9 change """ if recover_by == 0: recover_to = self._get_reserve_node_from_reserve_list_db( jsonData.get("cluster_port"), jsonData.get("hostname"), session) If reserve node is None, set progress 3. if recover_to is None: progress = 3 """ def strp_time(u_time): """ Convert unicode time with format '%Y%m%d%H%M%S' to datetime format. """ try: d = datetime.datetime.strptime(u_time, '%Y%m%d%H%M%S') except (ValueError, TypeError) as e: self.rc_util.syslogout(e, syslog.LOG_WARNING) d = None return d notification_time = strp_time(jsonData.get("time")) notification_startTime = strp_time(jsonData.get("startTime")) except Exception as e: self.rc_util.syslogout_ex("RecoveryControllerUtilDb_0005", syslog.LOG_ERR) error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) self.rc_util.syslogout(e.message, syslog.LOG_ERR) raise e # Todo: (sampath) correct the exceptions catching # Insert to notification_list DB. try: result = dbapi.add_notification_list( session, create_at=create_at, update_at=update_at, delete_at=delete_at, deleted=deleted, notification_id=jsonData.get("id"), notification_type=jsonData.get("type"), notification_regionID=jsonData.get("regionID"), notification_hostname=jsonData.get("hostname"), notification_uuid=jsonData.get("uuid"), notification_time=notification_time, notification_eventID=jsonData.get("eventID"), notification_eventType=jsonData.get("eventType"), notification_detail=jsonData.get("detail"), notification_startTime=notification_startTime, notification_endTime=j_endTime, notification_tzname=jsonData.get("tzname"), notification_daylight=jsonData.get("daylight"), notification_cluster_port=jsonData.get("cluster_port"), progress=progress, recover_by=recover_by, iscsi_ip=iscsi_ip, controle_ip=None, # PF9 change recover_to=recover_to # PF9 this should be None ) self.rc_util.syslogout_ex("RecoveryControllerUtilDb_0006", syslog.LOG_INFO) cnt = dbapi.get_all_reserve_list_by_hostname_not_deleted( session, jsonData.get("hostname") ) if len(cnt) > 0: dbapi.update_reserve_list_by_hostname_as_deleted( session, jsonData.get("hostname"), datetime.datetime.now() ) ret_dic = { "create_at": create_at, "update_at": update_at, "delete_at": delete_at, "deleted": deleted, "notification_id": jsonData.get("id"), "notification_type": jsonData.get("type"), "notification_regionID": jsonData.get("regionID"), "notification_hostname": jsonData.get("hostname"), "notification_uuid": jsonData.get("uuid"), "notification_time": jsonData.get("time"), "notification_eventID": jsonData.get("eventID"), "notification_eventType": jsonData.get("eventType"), "notification_detail": jsonData.get("detail"), "notification_startTime": jsonData.get("startTime"), "notification_endTime": j_endTime, "notification_tzname": jsonData.get("tzname"), "notification_daylight": jsonData.get("daylight"), "notification_cluster_port": jsonData.get("cluster_port"), "progress": progress, "recover_by": recover_by, "iscsi_ip": iscsi_ip, "controle_ip": None, # PF9 change "recover_to": recover_to } return ret_dic except Exception as e: self.rc_util.syslogout_ex("RecoveryControllerUtilDb_0007", syslog.LOG_ERR) error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) self.rc_util.syslogout(e.message, syslog.LOG_ERR) raise e
def add_failed_host(self, notification_id, notification_hostname, notification_cluster_port, retry_mode): """ Node recover start thread : This thread starts the VM recover execution thread, only the number of existing vm in the recovery target node. :param notification_id: The notification ID included in the notification :param notification_hostname: The host name of the failure node that is included in the notification """ try: self.rc_config.set_request_context() db_engine = dbapi.get_engine(self.rc_config) session = dbapi.get_session(db_engine) conf_dict = self.rc_config.get_value('recover_starter') recovery_max_retry_cnt = conf_dict.get('recovery_max_retry_cnt') recovery_retry_interval = conf_dict.get('recovery_retry_interval') vm_list = self.rc_util_api.fetch_servers_on_hypervisor( notification_hostname) # Count vm_list if len(vm_list) == 0: msg = "There is no instance in " + notification_hostname + "." LOG.info(msg) # update record in notification_list self.rc_util_db.update_notification_list_db( session, 'progress', 2, notification_id) return else: msg = "Do get_all_notification_list_by_id_for_update." LOG.info(msg) result = dbapi.get_all_notification_list_by_id_for_update( session, notification_id) msg = "Succeeded in " \ + "get_all_notification_list_by_id_for_update. " \ + "Return_value = " + str(result) LOG.info(msg) recover_to = result.pop().recover_to if retry_mode is False: msg = "Do get_all_reserve_list_by_hostname_not_deleted." LOG.info(msg) cnt = dbapi.get_all_reserve_list_by_hostname_not_deleted( session, recover_to) msg = "Succeeded in " \ + "get_all_reserve_list_by_hostname_not_deleted. " \ + "Return_value = " + str(cnt) LOG.info(msg) if not cnt: msg = "Do " \ + "get_one_reserve_list_by_cluster_port_for_update." LOG.info(msg) cnt = dbapi.\ get_one_reserve_list_by_cluster_port_for_update( session, notification_cluster_port, notification_hostname ) msg = "Succeeded in " \ + "get_one_reserve_list_by_cluster_port_for_update. " \ + "Return_value = " + str(cnt) LOG.info(msg) if not cnt: msg = "The reserve node not exist in " \ "reserve_list DB, " \ "so do not recover instances." LOG.warning(msg) self.rc_util_db.update_notification_list_db( 'progress', 3, notification_id) return result = cnt.pop() recover_to = result.hostname update_at = datetime.datetime.now() msg = "Do " \ + "update_notification_list_by_notification_id_recover_to." LOG.info(msg) dbapi.update_notification_list_by_notification_id_recover_to( session, notification_id, update_at, recover_to) msg = "Succeeded in " \ + "update_notification_list_by_notification_id_recover_to." LOG.info(msg) delete_at = datetime.datetime.now() msg = "Do update_reserve_list_by_hostname_as_deleted." LOG.info(msg) dbapi.update_reserve_list_by_hostname_as_deleted( session, recover_to, delete_at) msg = "Succeeded in " \ + "update_reserve_list_by_hostname_as_deleted." LOG.info(msg) # create semaphore (Multiplicity is get from config.) conf_dict = self.rc_config.get_value('recover_starter') sem_recovery_instance = threading.Semaphore( int(conf_dict.get('semaphore_multiplicity'))) incomplete_list = [] for i in range(0, int(recovery_max_retry_cnt)): incomplete_list = [] for vm_uuid in vm_list: primary_id = self._create_vm_list_db_for_failed_host( session, notification_id, vm_uuid) if primary_id: if retry_mode is True: # Skip recovery_instance thread. Will delegate to # ... msg = "RETRY MODE. Skip recovery_instance thread" \ + " vm_uuide=" + vm_uuid \ + " notification_id=" + notification_id LOG.info(msg) else: msg = "Run thread rc_worker.recovery_instance." \ + " vm_uuid=" + vm_uuid \ + " primary_id=" + str(primary_id) LOG.info(msg) thread_name = self.rc_util.make_thread_name( VM_LIST, primary_id) threading.Thread( target=self.rc_worker.recovery_instance, name=thread_name, args=(vm_uuid, primary_id, sem_recovery_instance)).start() else: if retry_mode is True: continue else: incomplete_list.append(vm_uuid) if incomplete_list: vm_list = incomplete_list greenthread.sleep(int(recovery_retry_interval)) else: break for vm_uuid in incomplete_list: primary_id = self.rc_util_db.insert_vm_list_db( session, notification_id, vm_uuid, 0) # Skip recovery_instance thread. Will delegate to ... msg = "Run thread rc_worker.recovery_instance." \ + " vm_uuid=" + vm_uuid \ + " primary_id=" + str(primary_id) LOG.info(msg) thread_name = self.rc_util.make_thread_name( VM_LIST, primary_id) threading.Thread(target=self.rc_worker.recovery_instance, name=thread_name, args=(vm_uuid, primary_id, sem_recovery_instance)).start() # update record in notification_list self.rc_util_db.update_notification_list_db( session, 'progress', 2, notification_id) return except KeyError: error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) LOG.error(error_type) LOG.error(error_value) for tb in tb_list: LOG.error(tb) return except: error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) LOG.error(error_type) LOG.error(error_value) for tb in tb_list: LOG.error(tb) return
def add_failed_host(self, notification_id, notification_hostname, notification_cluster_port, retry_mode): """ Node recover start thread : This thread starts the VM recover execution thread, only the number of existing vm in the recovery target node. :param notification_id: The notification ID included in the notification :param notification_hostname: The host name of the failure node that is included in the notification """ try: db_engine = dbapi.get_engine() session = dbapi.get_session(db_engine) conf_dict = self.rc_config.get_value('recover_starter') recovery_max_retry_cnt = conf_dict.get('recovery_max_retry_cnt') recovery_retry_interval = conf_dict.get('recovery_retry_interval') vm_list = self.rc_util_api.fetch_servers_on_hypervisor( notification_hostname) # Count vm_list if len(vm_list) == 0: self.rc_util.syslogout_ex("RecoveryControllerStarter_0014", syslog.LOG_INFO) msg = "There is no instance in " + notification_hostname + "." self.rc_util.syslogout(msg, syslog.LOG_INFO) # update record in notification_list self.rc_util_db.update_notification_list_db( session, 'progress', 2, notification_id) return else: result = dbapi.get_all_notification_list_by_id_for_update( session, notification_id) recover_to = result.pop().recover_to if retry_mode is False: cnt = dbapi.get_all_reserve_list_by_hostname_not_deleted( session, recover_to) if not cnt: cnt = dbapi.\ get_one_reserve_list_by_cluster_port_for_update( session, notification_cluster_port, notification_hostname ) if not cnt: self.rc_util.syslogout_ex( "RecoveryControllerStarter_0022", syslog.LOG_WARNING) msg = "The reserve node not exist in " \ "reserve_list DB, " \ "so do not recover instances." self.rc_util.syslogout(msg, syslog.LOG_WARNING) self.rc_util_db.update_notification_list_db( 'progress', 3, notification_id) return result = cnt.pop() recover_to = result.hostname update_at = datetime.datetime.now() dbapi.update_notification_list_by_notification_id_recover_to( session, notification_id, update_at, recover_to) self.rc_util.syslogout_ex( "RecoveryControllerStarter_0024", syslog.LOG_INFO) self.rc_util.syslogout_ex("RecoveryControllerStarter_0015", syslog.LOG_INFO) delete_at = datetime.datetime.now() dbapi.update_reserve_list_by_hostname_as_deleted( session, recover_to, delete_at) # create semaphore (Multiplicity is get from config.) conf_dict = self.rc_config.get_value('recover_starter') sem_recovery_instance = threading.Semaphore( int(conf_dict.get('semaphore_multiplicity'))) incomplete_list = [] for i in range(0, int(recovery_max_retry_cnt)): incomplete_list = [] for vm_uuid in vm_list: primary_id = self._create_vm_list_db_for_failed_host( session, notification_id, vm_uuid) if primary_id: if retry_mode == True: # Skip recovery_instance thread. Will delegate to # ... msg = "RETRY MODE. Skip recovery_instance thread" \ + " vm_uuide=" + vm_uuid \ + " notification_id=" + notification_id self.rc_util.syslogout(msg, syslog.LOG_INFO) else: msg = "Run thread rc_worker.recovery_instance." \ + " vm_uuid=" + vm_uuid \ + " primary_id=" + str(primary_id) self.rc_util.syslogout(msg, syslog.LOG_INFO) threading.Thread( target=self.rc_worker.recovery_instance, args=(vm_uuid, primary_id, sem_recovery_instance)).start() else: if retry_mode == True: continue else: incomplete_list.append(vm_uuid) if incomplete_list: vm_list = incomplete_list greenthread.sleep(int(recovery_retry_interval)) else: break for vm_uuid in incomplete_list: primary_id = self.rc_util_db.insert_vm_list_db( session, notification_id, vm_uuid, 0) # Skip recovery_instance thread. Will delegate to ... self.rc_util.syslogout_ex("RecoveryControllerStarter_0031", syslog.LOG_INFO) msg = "Run thread rc_worker.recovery_instance." \ + " vm_uuid=" + vm_uuid \ + " primary_id=" + str(primary_id) self.rc_util.syslogout(msg, syslog.LOG_INFO) threading.Thread(target=self.rc_worker.recovery_instance, args=(vm_uuid, primary_id, sem_recovery_instance)).start() # update record in notification_list self.rc_util_db.update_notification_list_db( session, 'progress', 2, notification_id) return except KeyError: self.rc_util.syslogout_ex("RecoveryControllerStarter_0017", syslog.LOG_ERR) error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) return except: self.rc_util.syslogout_ex("RecoveryControllerStarter_0018", syslog.LOG_ERR) error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) return
def insert_notification_list_db(self, jsonData, recover_by, session): """ Insert into notification_list DB from notification JSON. :param :jsonData: notifocation json data. :param :recover_by:node recover(0)/VM recover(1)/process error(2) :param :cursor: cursor object :return :ret_dic:and return the information that was registered to notification_list table in the dictionary type """ # NOTE: The notification item 'endTime' may have a NULL value. # reference : The Notification Spec for RecoveryController. # JSON decoder perform null -> None translation try: if not jsonData.get("endTime"): j_endTime = None else: j_endTime = datetime.datetime.strptime(jsonData.get("endTime"), '%Y%m%d%H%M%S') # update and deleted :not yet create_at = datetime.datetime.now() update_at = None delete_at = None deleted = 0 # progress 0:not yet progress = 0 # From /etc/hosts # NOTE: Hosts hostname suffix is # undetermined("_data_line","_control_line") iscsi_ip = None controle_ip = socket.gethostbyname(jsonData.get("hostname")) recover_to = None if recover_by == 0: recover_to = self._get_reserve_node_from_reserve_list_db( jsonData.get("cluster_port"), jsonData.get("hostname"), session) # If reserve node is None, set progress 3. if recover_to is None: progress = 3 def strp_time(u_time): """ Convert unicode time with format '%Y%m%d%H%M%S' to datetime format. """ try: d = datetime.datetime.strptime(u_time, '%Y%m%d%H%M%S') except (ValueError, TypeError) as e: LOG.warning(e) d = None return d notification_time = strp_time(jsonData.get("time")) notification_startTime = strp_time(jsonData.get("startTime")) except Exception as e: error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) LOG.error(error_type) LOG.error(error_value) for tb in tb_list: LOG.error(tb) LOG.error(e.message) raise e # Todo: (sampath) correct the exceptions catching # Insert to notification_list DB. try: msg = "Do add_notification_list." LOG.info(msg) result = dbapi.add_notification_list( session, create_at=create_at, update_at=update_at, delete_at=delete_at, deleted=deleted, notification_id=jsonData.get("id"), notification_type=jsonData.get("type"), notification_regionID=jsonData.get("regionID"), notification_hostname=jsonData.get("hostname"), notification_uuid=jsonData.get("uuid"), notification_time=notification_time, notification_eventID=jsonData.get("eventID"), notification_eventType=jsonData.get("eventType"), notification_detail=jsonData.get("detail"), notification_startTime=notification_startTime, notification_endTime=j_endTime, notification_tzname=jsonData.get("tzname"), notification_daylight=jsonData.get("daylight"), notification_cluster_port=jsonData.get("cluster_port"), progress=progress, recover_by=recover_by, iscsi_ip=iscsi_ip, controle_ip=controle_ip, recover_to=recover_to) msg = "Succeeded in add_notification_list. " \ + "Return_value = " + str(result) LOG.info(msg) msg = "Do get_all_reserve_list_by_hostname_not_deleted." LOG.info(msg) cnt = dbapi.get_all_reserve_list_by_hostname_not_deleted( session, jsonData.get("hostname")) msg = "Succeeded in get_all_reserve_list_by_hostname_not_deleted. " \ + "Return_value = " + str(cnt) LOG.info(msg) if len(cnt) > 0: msg = "Do update_reserve_list_by_hostname_as_deleted." LOG.info(msg) dbapi.update_reserve_list_by_hostname_as_deleted( session, jsonData.get("hostname"), datetime.datetime.now()) msg = "Succeeded in " \ + "update_reserve_list_by_hostname_as_deleted." LOG.info(msg) ret_dic = { "create_at": create_at, "update_at": update_at, "delete_at": delete_at, "deleted": deleted, "notification_id": jsonData.get("id"), "notification_type": jsonData.get("type"), "notification_regionID": jsonData.get("regionID"), "notification_hostname": jsonData.get("hostname"), "notification_uuid": jsonData.get("uuid"), "notification_time": jsonData.get("time"), "notification_eventID": jsonData.get("eventID"), "notification_eventType": jsonData.get("eventType"), "notification_detail": jsonData.get("detail"), "notification_startTime": jsonData.get("startTime"), "notification_endTime": j_endTime, "notification_tzname": jsonData.get("tzname"), "notification_daylight": jsonData.get("daylight"), "notification_cluster_port": jsonData.get("cluster_port"), "progress": progress, "recover_by": recover_by, "iscsi_ip": iscsi_ip, "controle_ip": controle_ip, "recover_to": recover_to } return ret_dic except Exception as e: error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) LOG.error(error_type) LOG.error(error_value) for tb in tb_list: LOG.error(tb) LOG.error(e.message) raise e
def add_failed_host(self, notification_id, notification_hostname, notification_cluster_port, retry_mode): """ Node recover start thread : This thread starts the VM recover execution thread, only the number of existing vm in the recovery target node. :param notification_id: The notification ID included in the notification :param notification_hostname: The host name of the failure node that is included in the notification """ try: db_engine = dbapi.get_engine() session = dbapi.get_session(db_engine) conf_dict = self.rc_config.get_value("recover_starter") recovery_max_retry_cnt = conf_dict.get("recovery_max_retry_cnt") recovery_retry_interval = conf_dict.get("recovery_retry_interval") vm_list = self.rc_util_api.fetch_servers_on_hypervisor(notification_hostname) # Count vm_list if len(vm_list) == 0: self.rc_util.syslogout_ex("RecoveryControllerStarter_0014", syslog.LOG_INFO) msg = "There is no instance in " + notification_hostname + "." self.rc_util.syslogout(msg, syslog.LOG_INFO) # update record in notification_list self.rc_util_db.update_notification_list_db(session, "progress", 2, notification_id) return else: result = dbapi.get_all_notification_list_by_id_for_update(session, notification_id) recover_to = result.pop().recover_to if retry_mode is False: cnt = dbapi.get_all_reserve_list_by_hostname_not_deleted(session, recover_to) if not cnt: cnt = dbapi.get_one_reserve_list_by_cluster_port_for_update( session, notification_cluster_port, notification_hostname ) if not cnt: self.rc_util.syslogout_ex("RecoveryControllerStarter_0022", syslog.LOG_WARNING) msg = "The reserve node not exist in " "reserve_list DB, " "so do not recover instances." self.rc_util.syslogout(msg, syslog.LOG_WARNING) self.rc_util_db.update_notification_list_db("progress", 3, notification_id) return result = cnt.pop() recover_to = result.hostname update_at = datetime.datetime.now() dbapi.update_notification_list_by_notification_id_recover_to( session, notification_id, update_at, recover_to ) self.rc_util.syslogout_ex("RecoveryControllerStarter_0024", syslog.LOG_INFO) self.rc_util.syslogout_ex("RecoveryControllerStarter_0015", syslog.LOG_INFO) delete_at = datetime.datetime.now() dbapi.update_reserve_list_by_hostname_as_deleted(session, recover_to, delete_at) # create semaphore (Multiplicity is get from config.) conf_dict = self.rc_config.get_value("recover_starter") sem_recovery_instance = threading.Semaphore(int(conf_dict.get("semaphore_multiplicity"))) incomplete_list = [] for i in range(0, int(recovery_max_retry_cnt)): incomplete_list = [] for vm_uuid in vm_list: primary_id = self._create_vm_list_db_for_failed_host(session, notification_id, vm_uuid) if primary_id: if retry_mode == True: # Skip recovery_instance thread. Will delegate to # ... msg = ( "RETRY MODE. Skip recovery_instance thread" + " vm_uuide=" + vm_uuid + " notification_id=" + notification_id ) self.rc_util.syslogout(msg, syslog.LOG_INFO) else: msg = ( "Run thread rc_worker.recovery_instance." + " vm_uuid=" + vm_uuid + " primary_id=" + str(primary_id) ) self.rc_util.syslogout(msg, syslog.LOG_INFO) threading.Thread( target=self.rc_worker.recovery_instance, args=(vm_uuid, primary_id, sem_recovery_instance), ).start() else: if retry_mode == True: continue else: incomplete_list.append(vm_uuid) if incomplete_list: vm_list = incomplete_list greenthread.sleep(int(recovery_retry_interval)) else: break for vm_uuid in incomplete_list: primary_id = self.rc_util_db.insert_vm_list_db(session, notification_id, vm_uuid, 0) # Skip recovery_instance thread. Will delegate to ... self.rc_util.syslogout_ex("RecoveryControllerStarter_0031", syslog.LOG_INFO) msg = ( "Run thread rc_worker.recovery_instance." + " vm_uuid=" + vm_uuid + " primary_id=" + str(primary_id) ) self.rc_util.syslogout(msg, syslog.LOG_INFO) threading.Thread( target=self.rc_worker.recovery_instance, args=(vm_uuid, primary_id, sem_recovery_instance) ).start() # update record in notification_list self.rc_util_db.update_notification_list_db(session, "progress", 2, notification_id) return except KeyError: self.rc_util.syslogout_ex("RecoveryControllerStarter_0017", syslog.LOG_ERR) error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) return except: self.rc_util.syslogout_ex("RecoveryControllerStarter_0018", syslog.LOG_ERR) error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) return
def add_failed_host(self, notification_id, notification_hostname, notification_cluster_port, retry_mode): """ Node recover start thread : This thread starts the VM recover execution thread, only the number of existing vm in the recovery target node. :param notification_id: The notification ID included in the notification :param notification_hostname: The host name of the failure node that is included in the notification """ try: self.rc_config.set_request_context() db_engine = dbapi.get_engine(self.rc_config) session = dbapi.get_session(db_engine) conf_dict = self.rc_config.get_value('recover_starter') recovery_max_retry_cnt = conf_dict.get('recovery_max_retry_cnt') recovery_retry_interval = conf_dict.get('recovery_retry_interval') vm_list = self.rc_util_api.fetch_servers_on_hypervisor( notification_hostname) # Count vm_list if len(vm_list) == 0: msg = "There is no instance in " + notification_hostname + "." LOG.info(msg) # update record in notification_list self.rc_util_db.update_notification_list_db( session, 'progress', 2, notification_id) return else: msg = "Do get_all_notification_list_by_id_for_update." LOG.info(msg) result = dbapi.get_all_notification_list_by_id_for_update( session, notification_id) msg = "Succeeded in " \ + "get_all_notification_list_by_id_for_update. " \ + "Return_value = " + str(result) LOG.info(msg) recover_to = result.pop().recover_to if retry_mode is False: msg = "Do get_all_reserve_list_by_hostname_not_deleted." LOG.info(msg) cnt = dbapi.get_all_reserve_list_by_hostname_not_deleted( session, recover_to) msg = "Succeeded in " \ + "get_all_reserve_list_by_hostname_not_deleted. " \ + "Return_value = " + str(cnt) LOG.info(msg) if not cnt: msg = "Do " \ + "get_one_reserve_list_by_cluster_port_for_update." LOG.info(msg) cnt = dbapi.\ get_one_reserve_list_by_cluster_port_for_update( session, notification_cluster_port, notification_hostname ) msg = "Succeeded in " \ + "get_one_reserve_list_by_cluster_port_for_update. " \ + "Return_value = " + str(cnt) LOG.info(msg) if not cnt: msg = "The reserve node not exist in " \ "reserve_list DB, " \ "so do not recover instances." LOG.warning(msg) self.rc_util_db.update_notification_list_db( 'progress', 3, notification_id) return result = cnt.pop() recover_to = result.hostname update_at = datetime.datetime.now() msg = "Do " \ + "update_notification_list_by_notification_id_recover_to." LOG.info(msg) dbapi.update_notification_list_by_notification_id_recover_to( session, notification_id, update_at, recover_to ) msg = "Succeeded in " \ + "update_notification_list_by_notification_id_recover_to." LOG.info(msg) delete_at = datetime.datetime.now() msg = "Do update_reserve_list_by_hostname_as_deleted." LOG.info(msg) dbapi.update_reserve_list_by_hostname_as_deleted( session, recover_to, delete_at) msg = "Succeeded in " \ + "update_reserve_list_by_hostname_as_deleted." LOG.info(msg) # create semaphore (Multiplicity is get from config.) conf_dict = self.rc_config.get_value('recover_starter') sem_recovery_instance = threading.Semaphore( int(conf_dict.get('semaphore_multiplicity'))) incomplete_list = [] for i in range(0, int(recovery_max_retry_cnt)): incomplete_list = [] for vm_uuid in vm_list: primary_id = self._create_vm_list_db_for_failed_host( session, notification_id, vm_uuid) if primary_id: if retry_mode is True: # Skip recovery_instance thread. Will delegate to # ... msg = "RETRY MODE. Skip recovery_instance thread" \ + " vm_uuide=" + vm_uuid \ + " notification_id=" + notification_id LOG.info(msg) else: msg = "Run thread rc_worker.recovery_instance." \ + " vm_uuid=" + vm_uuid \ + " primary_id=" + str(primary_id) LOG.info(msg) thread_name = self.rc_util.make_thread_name( VM_LIST, primary_id) threading.Thread( target=self.rc_worker.recovery_instance, name=thread_name, args=(vm_uuid, primary_id, sem_recovery_instance)).start() else: if retry_mode is True: continue else: incomplete_list.append(vm_uuid) if incomplete_list: vm_list = incomplete_list greenthread.sleep(int(recovery_retry_interval)) else: break for vm_uuid in incomplete_list: primary_id = self.rc_util_db.insert_vm_list_db( session, notification_id, vm_uuid, 0) # Skip recovery_instance thread. Will delegate to ... msg = "Run thread rc_worker.recovery_instance." \ + " vm_uuid=" + vm_uuid \ + " primary_id=" + str(primary_id) LOG.info(msg) thread_name = self.rc_util.make_thread_name( VM_LIST, primary_id) threading.Thread( target=self.rc_worker.recovery_instance, name=thread_name, args=(vm_uuid, primary_id, sem_recovery_instance)).start() # update record in notification_list self.rc_util_db.update_notification_list_db( session, 'progress', 2, notification_id) return except KeyError: error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) LOG.error(error_type) LOG.error(error_value) for tb in tb_list: LOG.error(tb) return except: error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) LOG.error(error_type) LOG.error(error_value) for tb in tb_list: LOG.error(tb) return