def get_cpu_info(self, add_overall_usage=False): """Update and return CPU information in specific format.""" per_cpu_data = [] cpu_present = self.get_cpu_list("present") cpu_online = self.get_cpu_list("online") cpu_usage = self.get_cpu_usage(percpu=True) cpu_usage_dict = dict(zip(cpu_online, cpu_usage)) overall_cpu_usage = list(psutil.getloadavg()) cpu_count = len(cpu_present) overall_usage = { "current": self.get_cpu_usage(percpu=False), "1_min_avg": overall_cpu_usage[0], "5_min_avg": overall_cpu_usage[1], "15_min_avg": overall_cpu_usage[2] } for cpu_id in range(0, cpu_count): uid = f"CPU-{cpu_id}" cpu_dict = self.get_health_template(uid, is_fru=False) online_status = "Online" if cpu_id in cpu_online else "Offline" health_status = "OK" if online_status == "Online" else "NA" usage = "NA" if health_status == "NA" \ else cpu_usage_dict[cpu_id] specifics = [{"cpu_usage": usage, "state": online_status}] self.set_health_data(cpu_dict, status=health_status, specifics=specifics) per_cpu_data.append(cpu_dict) cpu_data = [{ "overall_usage": overall_usage, "cpu_count": cpu_count, "last_updated": int(time.time()), "cpus": per_cpu_data }] if not add_overall_usage: cpu_data = per_cpu_data logger.debug(self.log.svc_log(f"CPU Health Data:{cpu_data}")) return cpu_data
def check_and_conclude_initialization(self): logger.debug("Begin {}.conclude_initializatio()".format( self.__class__.__name__)) # Check that self.lock is held by the caller if self.lock.acquire(blocking=False): self.lock.release() logger.error("SensorThread.check_and_conclude_initialization() called"\ " without acquiring lock. Returning immediately") return if self.status != SensorThreadState.WAITING: return # It is possible that self.event() is not called at all. if self.num_failed_dependees: self.deps_status = DependencyState.DEPS_FAILED elif not self.remaining_dependees: self.deps_status = DependencyState.DEPS_SUCCESS definitely_failed = \ self.deps_status == DependencyState.DEPS_FAILED or \ self.init_status == InitState.INIT_FAILED or \ self.has_timed_out definitely_succeeded = \ self.deps_status == DependencyState.DEPS_SUCCESS and \ self.init_status == InitState.INIT_SUCCESS if (definitely_failed): self.status = SensorThreadState.FAILED elif (definitely_succeeded): self.status = SensorThreadState.RUNNING # else it remains as waiting if self.status != SensorThreadState.WAITING: for d in self.waiting_dependers: d.event(self, self.status == SensorThreadState.RUNNING) logger.debug("End {}.conclude_initializatio() with state {}".format( self.__class__.__name__, self.status))
def connect_to_prop_changed_signal(self, service): """ Bind the service to a signal('PropertiesChanged'). Fetch the service unit from systemd and its state, substate, pid etc. Bind the service to the sigle which will be triggered whenever the service changes it's state/substate. Also raise an alert if service is in failed/inactive state. """ try: unit, _, state, substate, pid = self.get_service_status( service=service) self.update_status_local_cache(service, state, substate, pid) Iunit2 = Interface( unit, dbus_interface='org.freedesktop.systemd1.Manager') Iunit2.connect_to_signal( 'PropertiesChanged', lambda a, b, c, p=unit: self.on_prop_changed(a, b, c, p), dbus_interface=PROPERTIES_IFACE) logger.debug(f"{service}({pid}) state is {state}:{substate}") if state in ["activating", "reloading", "deactivating"]: self.not_active_services[service] = \ [self.current_time(), "N/A", "N/A"] elif state != "active": self.failed_services.append(service) self.raise_alert(service, "N/A", state, "N/A", substate, "N/A", pid, 0) logger.error( f"{service} is not active initially. state = {state}:{substate}" ) return None except DBusException as err: return err
def get_fanmodules_info(self): """Returns fan modules health data.""" response = [] fanmodules_data = self.get_realstor_encl_data("fan-modules") if fanmodules_data: for fan_module in fanmodules_data: uid = fan_module.get('durable-id', 'NA') health = fan_module.get('health') fan_module_resp = self.get_health_template(uid, is_fru=True) specifics = [ self.get_fan_specfics(fan) for fan in fan_module['fan'] ] self.set_health_data(fan_module_resp, health, specifics=specifics) response.append(fan_module_resp) logger.debug( self.log.svc_log(f"Fan modules health Data:{response}")) else: logger.error( self.log.svc_log("No response received from fan modules")) return response
def run(self): """Run the sensor on its own thread""" # Check for debug mode being activated self._read_my_msgQ_noWait() try: with self._iem_log_file_lock: self._iem_logs = open(self._log_file_path) self._create_file(self._timestamp_file_path) with open(self._timestamp_file_path, "r") as timestamp_file: last_processed_log_timestamp = timestamp_file.read().strip() # Read and send unprocessed messages with self._iem_log_file_lock: for iem_log in self._iem_logs: log = iem_log.rstrip() log_timestamp = log[:log.index(" ")] if not last_processed_log_timestamp or log_timestamp > last_processed_log_timestamp: self._process_iem(log) # Reset debug mode if persistence is not enabled self._disable_debug_if_persist_false() # Read new messages self._read_iem() except IOError as io_error: if io_error.errno == errno.ENOENT: logger.debug(f"IEMSensor, self.run, {io_error.args} {io_error.filename}") elif io_error.errno == errno.EACCES: logger.error(f"IEMSensor, self.run, {io_error.args} {io_error.filename}") else: logger.error(f"IEMSensor, self.run, {io_error.args} {io_error.filename}") self._scheduler.enter(10, self._priority, self.run, ()) except Exception as exception: logger.error(f"IEMSensor, self.run, {exception.args}") self._scheduler.enter(10, self._priority, self.run, ())
def _process_msg(self, jsonMsg): """Parses the incoming message and handles appropriately""" self._log_debug(f"RealStorActuatorMsgHandler, _process_msg, jsonMsg: {jsonMsg}") if isinstance(jsonMsg, dict) is False: jsonMsg = json.loads(jsonMsg) # Parse out the uuid so that it can be sent back in Ack message uuid = None if jsonMsg.get("sspl_ll_msg_header").get("uuid") is not None: uuid = jsonMsg.get("sspl_ll_msg_header").get("uuid") self._log_debug(f"_processMsg, uuid: {uuid}") logger.debug(f"RealStorActuatorMsgHandler: _process_msg: jsonMsg: {jsonMsg}") if jsonMsg.get("actuator_request_type").get("storage_enclosure").get("enclosure_request") is not None: enclosure_request = jsonMsg.get("actuator_request_type").get("storage_enclosure").get("enclosure_request") self._log_debug(f"_processMsg, enclosure_request: {enclosure_request}") logger.debug(f"RealStorActuatorMsgHandler: _process_msg: INSIDE: jsonMsg: {jsonMsg}") # Parse out the request field in the enclosure_request (request, fru) = enclosure_request.split(":", 1) request = request.strip() fru = fru.strip() if self._real_stor_actuator is None: try: from actuators.impl.generic.realstor_encl import RealStorActuator self._real_stor_actuator = RealStorActuator() except ImportError as e: logger.warn("RealStor Actuator not loaded") return # Perform the request and get the response real_stor_response = self._real_stor_actuator.perform_request(jsonMsg) self._log_debug(f"_process_msg, RealStor response: {real_stor_response}") json_msg = RealStorActuatorMsg(real_stor_response, uuid).getJson() self._write_internal_msgQ(RabbitMQegressProcessor.name(), json_msg)
def on_prop_changed(self, interface, changed_properties, invalidated_properties, unit): """Handler to process the service state change signal.""" _, service, state, substate, pid = self.get_service_status(unit=unit) prev_state = self.service_status[service]["state"] prev_substate = self.service_status[service]["substate"] prev_pid = self.service_status[service]["pid"] logger.debug(f"Event for {service}, properties changed from "\ f"{prev_state}:{prev_substate} to {state}:{substate}") if prev_state == state: return logger.info(f"{service} changed state from " + \ f"{prev_state}:{prev_substate} to {state}:{substate}") self.update_status_local_cache(service, state, substate, pid) self.action_per_transition(service, prev_state, state, prev_substate, substate, prev_pid, pid)
def get_disks_info(self): """Update and return server drive information in specific format.""" disks = [] sort_key_path = None for disk in Disk.get_disks(): uid = disk.path if disk.path else disk.id disk_health = self.get_health_template(uid, True) health_data = disk.get_health() health = "OK" if (health_data['SMART_health'] == "PASSED") else "Fault" serial_number = disk.id.split("-")[-1] if disk.id else "NA" health_data.update({"serial_number": serial_number}) self.set_health_data(disk_health, health, specifics=[{ "SMART": health_data }]) disks.append(disk_health) # Sort disk list by serial_number sort_key_path = self.resource_indexing_map["hw"]["disk"] disks = MonUtils.sort_by_specific_kv(disks, sort_key_path, self.log) logger.debug(self.log.svc_log(f"Disk Health Data:{disks}")) return disks
def _get_nwalert(self, interfaces): """ Get network interfaces with fault/OK state for each interface. Parameters: interfaces(list) : List of availabel network interfaces Returns: Dictionary of network interfaces having key as interface name and value as fault state. Return type: dict """ nw_alerts = {} try: for interface in interfaces: interface_name = interface.get("ifId") nw_status = interface.get("nwStatus") logger.debug("{0}:{1}".format(interface_name, nw_status)) # fault detected (Down/UNKNOWN, Up/UNKNOWN to Down, Up/Down to UNKNOWN) if nw_status == 'DOWN' or nw_status == 'UNKNOWN': if self.prev_nw_status.get(interface_name) != nw_status: if self.prev_nw_status.get(interface_name) and self.prev_nw_status.get(interface_name) == 'UP': logger.warning(f"Network connection fault is detected for interface:'{interface_name}'") nw_alerts[interface_name] = self.FAULT self.prev_nw_status[interface_name] = nw_status # fault resolved (Down to Up) elif nw_status == 'UP': if self.prev_nw_status.get(interface_name) != nw_status: if self.prev_nw_status.get(interface_name): logger.info(f"Network connection fault is resolved for interface:'{interface_name}'") nw_alerts[interface_name] = self.FAULT_RESOLVED self.prev_nw_status[interface_name] = nw_status else: logger.warning(f"Network connection state is:'{nw_status}', for interface:'{interface_name}'") except Exception as e: logger.error(f"Exception occurs while checking for network alert condition:'{e}'") logger.debug("nw_alerts existed for:{}".format(nw_alerts)) return nw_alerts
def get_psu_info(self): """Update and return PSUs information in specific format.""" data = [] psus = self.get_realstor_encl_data("power-supplies") for psu in psus: uid = psu.get("durable-id") status = psu.get("health", "NA") description = psu.get("description") recommendation = psu.get("health-recommendation") specifics = [{ "location": psu.get("location", "NA"), "dc12v": psu.get("dc12v", "NA"), "dc5v": psu.get("dc5v", "NA"), "dc33v": psu.get("dc33v", "NA"), "dc12i": psu.get("dc12i", "NA"), "dc5i": psu.get("dc5i", "NA"), "dctemp": psu.get("dctemp", "NA") }] psu_dict = self.get_health_template(uid, is_fru=True) self.set_health_data(psu_dict, status, description, recommendation, specifics) data.append(psu_dict) logger.debug(self.log.svc_log(f"PSU Health Data:{data}")) return data
def _update_raid_device_file(self, device): try: status = "failed" raid_check = 0 raid_dir = RaidDataConfig.DIR.value sync_action_file = RaidDataConfig.SYNC_ACTION_FILE.value while raid_check <= RaidDataConfig.MAX_RETRIES.value: CHECK_COMMAND = "echo 'check' |sudo tee " + raid_dir + device + sync_action_file + " > /dev/null" logger.debug('Executing CHECK_COMMAND:{}'.format(CHECK_COMMAND)) response, error = self._run_command(CHECK_COMMAND) if error: logger.warn("Failed in executing command:{}." .format(error)) raid_check += 1 time.sleep(1) else: logger.debug("RAID device state is changed to 'check' with response : {}".format(response)) status = "success" break return status except Exception as ae: logger.error("Failed to update RAID File. ERROR:{}" .format(str(ae))) raise
def get_controllers_info(self): """Update and return controller information in specific format""" data = [] controllers = self.get_realstor_encl_data("controllers") for controller in controllers: uid = controller.get("durable-id") status = controller.get("health", "NA") description = controller.get("description") recommendation = controller.get("health-recommendation") specifics = [{ "serial-number": controller.get("serial-number", "NA"), "disks": controller.get("disks", "NA"), "virtual-disks": controller.get("virtual-disks", "NA"), "model": controller.get("model", "NA"), "part-number": controller.get("part-number", "NA"), "fw": controller.get("sc-fw", "NA"), "location": controller.get("position", "NA") }] controller_dict = self.get_health_template(uid, is_fru=True) self.set_health_data(controller_dict, status, description, recommendation, specifics) data.append(controller_dict) logger.debug(self.log.svc_log(f"Contollers Health Data:{data}")) return data
def _run_command(self, command): """Run the command and get the response and error returned""" logger.debug(f"_run_command: {command}") process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) response, error = process.communicate() if response: logger.debug(f"_run_command, response: {str(response)}") if error: logger.debug(f"_run_command: error: {str(error)}") return response.decode().rstrip('\n'), error.decode().rstrip('\n')
def _check_mismatch_count(self, device): try: status = None mismatch_cnt_file = RaidDataConfig.MISMATCH_COUNT_FILE.value MISMATCH_COUNT_COMMAND = 'cat ' + self.raid_dir + device +\ mismatch_cnt_file logger.debug('Executing MISMATCH_CNT_COMMAND:{}'.format( MISMATCH_COUNT_COMMAND)) response, error = self._run_command(MISMATCH_COUNT_COMMAND) if error: logger.error("Error in cmd{} in raid health monitor".format( MISMATCH_COUNT_COMMAND)) if response == RaidDataConfig.MISMATCH_COUNT_RESPONSE.value: logger.debug("No mismatch count is found") status = "success" with open(self.output_file, 'a') as raid_file: raid_file.write( RaidDataConfig.MISMATCH_COUNT_RESPONSE.value) fault_status_file = self.DEFAULT_RAID_DATA_PATH + device + "_" + RaidDataConfig.RAID_MISMATCH_FAULT_STATUS.value if os.path.exists(fault_status_file): with open(fault_status_file, 'r') as fs: data = fs.read().rstrip() if self.FAULT in data: faulty_device = data.split(":")[0].rstrip() if device == faulty_device: self.alert_type = self.FAULT_RESOLVED self._alert_msg = "RAID disks present in %s RAID array are synchronized." % device self._send_json_msg(self.alert_type, device, self._alert_msg) self._update_fault_state_file( device, self.FAULT_RESOLVED, fault_status_file) self._scan_frequency = Conf.get( SSPL_CONF, f"{self.RAIDIntegritySensor}>{self.SCAN_FREQUENCY}", self.DEFAULT_SCAN_FREQUENCY) self._scan_frequency = max(self._scan_frequency, self.MIN_SCAN_FREQUENCY) else: status = "failed" logger.debug( "Mismatch found in {} file in raid_integrity_data!".format( mismatch_cnt_file)) return status except Exception as ae: logger.error( "Failed in checking mismatch_cnt in RAID file. ERROR:{}". format(str(ae))) raise
def run(self): """Run the sensor on its own thread""" logger.debug("Consul accumulated messages processing started") if not self._is_my_msgQ_empty(): # Check for shut down message from sspl_ll_d and set a flag to shutdown # once our message queue is empty self._jsonMsg, _ = self._read_my_msgQ() if self._jsonMsg.get("message").get( "actuator_response_type") is not None and \ self._jsonMsg.get("message").get( "actuator_response_type").get( "thread_controller") is not None and \ self._jsonMsg.get("message").get( "actuator_response_type").get("thread_controller").get( "thread_response") == \ "SSPL-LL is shutting down": logger.info("EgressAccumulatedMsgsProcessor, run, received" "global shutdown message from sspl_ll_d") self.shutdown() try: # TODO : Fix accumulated message processor when message bus changes are available to # error out in case of failure (EOS-17626) if not self.store_queue.is_empty(): logger.debug( "Found accumulated messages, trying to send again") while not self.store_queue.is_empty(): message = self.store_queue.get() if isinstance(message, bytes): message = message.decode() dict_msg = json.loads(message) if "actuator_response_type" in dict_msg["message"]: event_time = dict_msg["message"] \ ["actuator_response_type"]["info"]["event_time"] time_diff = int(time.time()) - int(event_time) if time_diff > self.MSG_TIMEOUT: continue if "sensor_response_type" in dict_msg["message"]: logger.info(f"Publishing Accumulated Alert: {message}") self._producer.send([message]) except MessageBusError as e: logger.error("EgressAccumulatedMsgsProcessor, run, %r" % e) except Exception as e: logger.error(e) finally: logger.debug("Consul accumulated processing ended") self._scheduler.enter(30, self._priority, self.run, ())
def run(self): """Run the sensor on its own thread""" logger.debug("Consul accumulated messages processing started") if not self._is_my_msgQ_empty(): # Check for shut down message from sspl_ll_d and set a flag to shutdown # once our message queue is empty self._jsonMsg, _ = self._read_my_msgQ() if self._jsonMsg.get("message").get("actuator_response_type") is not None and \ self._jsonMsg.get("message").get("actuator_response_type").get("thread_controller") is not None and \ self._jsonMsg.get("message").get("actuator_response_type").get("thread_controller").get("thread_response") == \ "SSPL-LL is shutting down": logger.info("RabbitMQEgressAccumulatedMsgsProcessor, run, received" \ "global shutdown message from sspl_ll_d") self.shutdown() try: if not self.store_queue.is_empty(): logger.debug( "Found accumulated messages, trying to send again") self._connection._establish_connection() msg_props = pika.BasicProperties() msg_props.content_type = "text/plain" while not self.store_queue.is_empty(): message = self.store_queue.get() dict_msg = json.loads(message) if "actuator_response_type" in dict_msg["message"]: event_time = dict_msg["message"][ "actuator_response_type"]["info"]["event_time"] time_diff = int(time.time()) - int(event_time) if time_diff > self.MSG_TIMEOUT: continue self._connection.publish(exchange=self._exchange_name, routing_key=self._routing_key, properties=msg_props, body=message) if "sensor_response_type" in dict_msg["message"]: logger.info(f"Publishing Accumulated Alert: {message}") self._connection.cleanup() except connection_exceptions as e: logger.error(connection_error_msg.format(e)) except Exception as e: logger.error(e) finally: logger.debug("Consul accumulated processing ended") self._scheduler.enter(30, self._priority, self.run, ())
def get_effective_monitored_services(): """Get platform type based monitored services.""" # Align node type as it is given in sspl.conf SERVICEMONITOR section node_type = Conf.get(GLOBAL_CONF, NODE_TYPE_KEY).lower() vm_types = ["virtual", "vm"] node_type = "vm" if node_type in vm_types else "hw" monitored_services = Conf.get( SSPL_CONF, f'{SERVICEMONITOR}>{MONITORED_SERVICES}', []) excluded_services = Conf.get( SSPL_CONF, f'{SERVICEMONITOR}>{EXCLUDED_SERVICES}>{node_type}', []) effective_monitored_services = list( set(monitored_services) - set(excluded_services)) logger.debug("Monitored services list, %s" % monitored_services) logger.debug("Excluded monitored services list, " \ "%s for environment %s" %(excluded_services, node_type)) logger.debug("Effective monitored services list, " \ "%s" % effective_monitored_services) return effective_monitored_services
def _raid_health_monitor(self): try: devices = self._get_devices() if len(devices) == 0: return logger.debug("Fetched devices:{}".format(devices)) for device in devices: # Update the state as 'check' for RAID device file result = self._update_raid_device_file(device) if result == "failed": self._retry_execution(self._update_raid_device_file, device) logger.info("RAID device state is changed to 'check'") # Check RAID device array state is 'idle' or not result = self._check_raid_state(device) if result == "failed": logger.warn( "'Idle' state not found for RAID device:{}".format( device)) # Retry to check RAID state self._retry_execution(self._check_raid_state, device) logger.info( "'idle' state is found in Raid device:{}.".format(device)) # Check Mismatch count in RAID device files. result = self._check_mismatch_count(device) if result == "failed": # Persist RAID device fault state and send alert fault_status_file = self.DEFAULT_RAID_DATA_PATH + device + "_" + RaidDataConfig.RAID_MISMATCH_FAULT_STATUS.value if os.path.exists(fault_status_file): with open(fault_status_file, 'r') as fs: data = fs.read().rstrip() if self.FAULT_RESOLVED in data: self.alert_type = self.FAULT self._alert_msg = "RAID disks present in %s RAID array"\ ", needs synchronization. If fault persists for "\ "more than 2 days, Please contact Seagate support."%device self._send_json_msg(self.alert_type, device, self._alert_msg) self._update_fault_state_file( device, self.FAULT, fault_status_file) self._scan_frequency = self.MIN_SCAN_FREQUENCY else: self.alert_type = self.FAULT self._alert_msg = "RAID disks present in %s RAID array"\ ", needs synchronization. If fault persists for "\ "more than 2 days, Please contact Seagate support."%device self._send_json_msg(self.alert_type, device, self._alert_msg) self._update_fault_state_file(device, self.FAULT, fault_status_file) self._scan_frequency = self.MIN_SCAN_FREQUENCY # Retry to check mismatch_cnt self._retry_execution(self._check_mismatch_count, device) logger.debug( "No mismatch count is found in Raid device:{}".format( device)) except Exception as ae: raise Exception(f"Failed in monitoring RAID health, {ae}")
def action_per_transition(self, service, prev_state, state, prev_substate, substate, prev_pid, pid): """Take action according to the state change of the service.""" # alert_info_index : index pointing to alert_info table from # ServiceMonitor:raise_alerts() representing alert # description, type, impact etc. to be sent. alert_info_index = -1 logger.debug(f"ServiceMonitor:action_per_transition for {service} : " + \ f"({prev_state}:{prev_substate}) -> ({state}:{substate})") if prev_state in ["active", "reloading"]: if state == "active": # reloading -> active self.not_active_services.pop(service) if service in self.failed_services: self.failed_services.remove(service) alert_info_index = 2 elif state != "failed": # active -> deactivating/inactive/reloading/activating # or # reloading -> deactivating/inactive/activating self.not_active_services[service] = \ [self.current_time(), prev_state, prev_substate] elif state == "failed": # active/reloading -> failed if service not in self.failed_services: self.failed_services.append(service) alert_info_index = 0 elif prev_state == "deactivating": if state in ["inactive", "activating"]: # deactivating -> inactive/activating if service not in self.not_active_services: self.not_active_services[service] = \ [self.current_time(), prev_state, prev_substate] elif state == "failed": # deactivating -> failed if service not in self.failed_services: self.failed_services.append(service) alert_info_index = 0 elif state == "active": # deactivating -> active if service in self.not_active_services: self.not_active_services.pop(service) if service in self.failed_services: self.failed_services.remove(service) alert_info_index = 2 else: alert_info_index = 3 elif prev_state in ["inactive", "failed"]: if state == "activating": # inactive/failed -> activating if service not in self.not_active_services: self.not_active_services[service] = \ [self.current_time(), prev_state, prev_substate] elif state == "active": # inactive/failed -> active if service in self.failed_services: self.failed_services.remove(service) alert_info_index = 2 if service in self.not_active_services: self.not_active_services.pop(service) elif state == "failed": # inactive -> failed if service not in self.failed_services: self.failed_services.append(service) alert_info_index = 0 else: alert_info_index = 3 elif prev_state == "activating": if service in self.not_active_services: self.not_active_services.pop(service) if state in ["inactive", "deactivating"]: # activating -> inactive/deactivating self.failed_services.append(service) alert_info_index = 0 elif state == "active": # activating -> active if service in self.failed_services: self.failed_services.remove(service) alert_info_index = 2 else: # its a restart. pass elif state == "failed": # activating -> failed if service not in self.failed_services: self.failed_services.append(service) alert_info_index = 0 else: alert_info_index = 3 if alert_info_index == 3: logger.warning(f"{service} service state transition from "\ f"{prev_state} to {state} is not handled.") if alert_info_index != -1: self.raise_alert(service, prev_state, state, prev_substate, substate, prev_pid, pid, alert_info_index)
def run(self): logger.info(f"Monitoring Services : {self.services_to_monitor}") try: # Register all the services to signal of 'PropertiesChanged' and # raise an alert if some service is not active on initially or if # Unit is not found for the service services_to_monitor_copy = self.services_to_monitor.copy() for service in services_to_monitor_copy: err = self.connect_to_prop_changed_signal(service) if err: self.raise_alert(service, "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", 0) logger.error( f"{service} is not active initially. \n Error {err}") else: self.services_to_monitor.remove(service) logger.debug(f"failed_services : {self.failed_services}") logger.debug(f"services_to_monitor : {self.services_to_monitor}") # Retrieve the main loop which will be called in the run method self._loop = GLib.MainLoop() # Initialize the gobject threads and get its context GLib.threads_init() context = self._loop.get_context() time_to_check_lists = self.current_time() + self.polling_frequency # WHILE LOOP FUNCTION : every second we check for # properties change event if any generated (using context # iteration) and after a delay of polling frequency we # check for inactive processes. while self.is_running(): # At interval of 'thread_sleep' check for events occured for # registered services and process them(call on_pro_changed()) context.iteration(False) time.sleep(self.thread_sleep) # At interval of 'polling_freqency' process unregistered # services and services with not-active (intermidiate) state. if time_to_check_lists <= self.current_time(): time_to_check_lists = self.current_time() + \ self.polling_frequency # Try to bind the enabled services on the node to the # signal whose Unit was earlier not found. On successfully # registering for service state change signal, remove from # local list as monitoring enabled through SystemD # and to avoid re-registration. services_to_monitor_copy = self.services_to_monitor.copy() for service in services_to_monitor_copy: if not self.connect_to_prop_changed_signal(service): self.services_to_monitor.remove(service) # Check for services in intermidiate state(not active) self.check_notactive_services() logger.info("ServiceMonitor gracefully breaking out " +\ "of dbus Loop, not restarting.") except GLib.Error as err: raise ThreadException( self.SENSOR_NAME, "Ungrecefully breaking out of GLib.MainLoop() with error: %s" % err) except DBusException as err: raise ThreadException( self.SENSOR_NAME, "Ungracefully breaking out of dbus loop with error: %s" % err) except Exception as err: raise ThreadException(self.SENSOR_NAME, "Ungracefully breaking out of ServiceMonitor:run() "\ "with error: %s" % err)
def _process_msg(self, jsonMsg): """Parses the incoming message and hands off to the appropriate logger """ logger.debug(f"_process_msg, jsonMsg: {jsonMsg}") if isinstance(jsonMsg, dict) is False: jsonMsg = json.loads(jsonMsg) # Parse out the uuid so that it can be sent back in Ack message uuid = None if jsonMsg.get("sspl_ll_msg_header") is not None and \ jsonMsg.get("sspl_ll_msg_header").get("uuid") is not None: uuid = jsonMsg.get("sspl_ll_msg_header").get("uuid") logger.debug(f"_processMsg, uuid: {uuid}") # Handle service start, stop, restart, status requests if "actuator_request_type" in jsonMsg and \ "service_controller" in jsonMsg["actuator_request_type"]: logger.debug("_processMsg, msg_type: service_controller") service_name = jsonMsg.get("actuator_request_type") \ .get("service_controller").get("service_name") service_request = jsonMsg.get("actuator_request_type") \ .get("service_controller").get("service_request") request = f"{service_request}:{service_name}" if service_name not in self.monitored_services: logger.error(f"{service_name} - service not monitored") msg = ("Check if supplied service name is valid, %s is not " "monitored or managed." % service_name) self.send_error_response(service_request, service_name, msg, errno.EINVAL) return elif service_request not in ["disable", "enable"]: status = self._dbus_service.is_enabled(service_name) if status == "disabled": logger.error(f"{service_name} - service is disabled") msg = ("%s is disabled, enable request needed before " "current - %s request can be processed." % (service_name, service_request)) self.send_error_response(service_request, service_name, msg, errno.EPERM) return # If the state is INITIALIZED, We can assume that actuator is # ready to perform operation. if actuator_state_manager.is_initialized("Service"): logger.debug(f"_process_msg, service_actuator name: \ {self._service_actuator.name()}") self._execute_request(self._service_actuator, jsonMsg, uuid) # If the state is INITIALIZING, need to send message elif actuator_state_manager.is_initializing("Service"): # This state will not be reached. Kept here for consistency. logger.info("Service actuator is initializing") self.send_error_response(service_request, service_name, \ "BUSY - Service actuator is initializing.", errno.EBUSY) elif actuator_state_manager.is_imported("Service"): # This case will be for first request only. Subsequent # requests will go to INITIALIZED state case. logger.info("Service actuator is imported and initializing") from actuators.IService import IService actuator_state_manager.set_state( "Service", actuator_state_manager.INITIALIZING) service_actuator_class = self._query_utility(IService) if service_actuator_class: # NOTE: Instantiation part should not time consuming # otherwise ServiceMsgHandler will get block and will # not be able serve any subsequent requests. This applies # to instantiation of evey actuator. self._service_actuator = service_actuator_class() logger.info(f"_process_msg, service_actuator name: \ {self._service_actuator.name()}") self._execute_request(self._service_actuator, jsonMsg, uuid) actuator_state_manager.set_state( "Service", actuator_state_manager.INITIALIZED) else: logger.info("Service actuator is not instantiated") # If there is no entry for actuator in table, We can assume # that it is not loaded for some reason. else: logger.warn("Service actuator is not loaded or not supported") # Handle events generated by the service monitor elif "sensor_request_type" in jsonMsg and \ "service_status_alert" in jsonMsg["sensor_request_type"]: logger.debug(f"Received alert from ServiceMonitor : {jsonMsg}") jsonMsg1 = ServiceMonitorMsg( jsonMsg["sensor_request_type"]).getJson() self._write_internal_msgQ("EgressProcessor", jsonMsg1)
def get_disk_groups_info(self): """Update and return disk-group information in specific format.""" dg_data = [] dg_vol_map = {} diskgroups = self.get_realstor_encl_data("disk-groups") # Mapping logical volumes with disk group. logicalvolumes = self.get_realstor_encl_data("volumes") if logicalvolumes: for logicalvolume in logicalvolumes: volume_pool_sr_no = logicalvolume.get("container-serial", "NA") volume_uid = logicalvolume.get("volume-name", "NA") if volume_pool_sr_no in dg_vol_map: dg_vol_map[volume_pool_sr_no].append( {"volume_uid": volume_uid}) else: dg_vol_map.update( {volume_pool_sr_no: [{ "volume_uid": volume_uid }]}) if diskgroups: for diskgroup in diskgroups: uid = diskgroup.get("name", "NA") health = diskgroup.get("health", "NA") pool_sr_no = diskgroup.get("pool-serial-number", "NA") if pool_sr_no in dg_vol_map: volumes = dg_vol_map[pool_sr_no] else: volumes = None recommendation = diskgroup.get("health-recommendation", "NA") specifics = [{ "class": diskgroup.get("storage-type", "NA"), "disks": diskgroup.get("diskcount", "NA"), "size": diskgroup.get("size", "NA"), "free": diskgroup.get("freespace", "NA"), "status": diskgroup.get("status", "NA"), "current_job": diskgroup.get("current-job", "NA"), "current_job_completion": diskgroup.get("current-job-completion", "NA"), "tier": diskgroup.get("storage-tier", "NA"), "pool": diskgroup.get("pool", "NA"), "blocksize": diskgroup.get("blocksize", "NA"), "chunksize": diskgroup.get("chunksize", "NA"), "volumes": volumes }] dg_data_dict = self.get_health_template(uid, is_fru=False) self.set_health_data(dg_data_dict, health, recommendation=recommendation, specifics=specifics) dg_data.append(dg_data_dict) logger.debug(self.log.svc_log(f"disk-group Health Data:{dg_data}")) return dg_data
def get_system_status(self): """Retreive realstor system state info using cli api /show/system""" # poll system would get invoked through multiple realstor sensors # with less frequency compared to configured polling frequency # adding check to comply with polling frequency elapsed = time.time() - self.poll_system_ts if elapsed < self.pollfreq: logger.warn("/show/system request came in {0} seconds," "while configured polling frequency is {1} seconds," "ignoring".format(elapsed, self.pollfreq)) return system = None # make ws request url = self.build_url(self.URI_CLIAPI_SHOWSYSTEM) #logger.info("show system url: %s" % url) response = self.ws_request(url, self.ws.HTTP_GET) if not response: logger.warn("System status unavailable as ws request failed") return if response.status_code != self.ws.HTTP_OK: logger.info("{0}:: http request {1} polling system status failed" " with http err {2}".format(self.LDR_R1_ENCL, url, \ response.status_code)) return self.poll_system_ts = time.time() try: jresponse = json.loads(response.content) except ValueError as badjson: logger.error("%s returned mal-formed json:\n%s" % (url, badjson)) if jresponse: api_resp = self.get_api_status(jresponse['status']) if ((api_resp == -1) and (response.status_code == self.ws.HTTP_OK)): logger.warn("/show/system api response unavailable, " "marking success as http code is 200") api_resp = 0 if api_resp == 0: system = jresponse['system'][0] self.memcache_system = system if system: # Check if fault exists # TODO: use self.FAULT_KEY in system: system.key() generates # list and find item in that. if not self.FAULT_KEY in system.keys(): logger.debug("{0} Healthy, no faults seen".format( self.LDR_R1_ENCL)) self.latest_faults = {} return # Extract system faults self.latest_faults = system[self.FAULT_KEY] #If no in-memory fault cache built yet! if not self.memcache_faults: # build from persistent cache if available logger.info( "No cached faults, building from persistent cache {0}"\ .format(self.faults_persistent_cache)) self.memcache_faults = store.get( self.faults_persistent_cache) # still if none, build from latest faults & persist if not self.memcache_faults: logger.info("No persistent faults cache, building " "cache from latest faults") self.memcache_faults = self.latest_faults # On SSPL boot, run through existing faults as no cache to # verify with for new faults self.existing_faults = True #logger.debug("existing_faults {0}".\ # format(self.existing_faults)) store.put(self.memcache_faults, self.faults_persistent_cache) else: # Reset flag as existing faults processed by now # and cached faults are built already self.existing_faults = False else: logger.error("poll system failed with err %d" % api_resp)
def _transmit_msg_on_exchange(self): """Transmit json message onto RabbitMQ exchange""" self._log_debug("_transmit_msg_on_exchange, jsonMsg: %s" % self._jsonMsg) try: # Check for shut down message from sspl_ll_d and set a flag to shutdown # once our message queue is empty if self._jsonMsg.get("message").get( "actuator_response_type") is not None and \ self._jsonMsg.get("message").get( "actuator_response_type").get( "thread_controller") is not None and \ self._jsonMsg.get("message").get( "actuator_response_type").get("thread_controller").get( "thread_response") == \ "SSPL-LL is shutting down": logger.info( "RabbitMQegressProcessor, _transmit_msg_on_exchange, received" "global shutdown message from sspl_ll_d") self._request_shutdown = True # Publish json message to the correct channel # NOTE: We need to route ThreadController messages to ACK channel. # We can't modify schema as it will affect other modules too. As a # temporary solution we have added a extra check to see if actuator_response_type # is "thread_controller". # TODO: Find a proper way to solve this issue. Avoid changing # core egress processor code if self._jsonMsg.get("message").get( "actuator_response_type") is not None and \ (self._jsonMsg.get("message").get( "actuator_response_type").get("ack") is not None or self._jsonMsg.get("message").get( "actuator_response_type").get( "thread_controller") is not None): self._add_signature() self._producer.send([json.dumps(self._jsonMsg)]) logger.debug( "_transmit_msg_on_exchange, Successfully Sent: %s" % self._jsonMsg) # Routing requests for IEM msgs sent from the LoggingMsgHandler elif self._jsonMsg.get("message").get("IEM_routing") is not None: log_msg = self._jsonMsg.get("message").get("IEM_routing").get( "log_msg") if self._iem_route_addr != "": self._producer.send([json.dumps(self._jsonMsg)]) else: logger.warn( "RabbitMQegressProcessor, Attempted to route IEM without a valid 'iem_route_addr' set." ) logger.debug( "_transmit_msg_on_exchange, Successfully Sent: %s" % log_msg) else: self._add_signature() jsonMsg = json.dumps(self._jsonMsg) try: if self.store_queue.is_empty(): self._producer.send([jsonMsg]) logger.info(f"Published Alert: {jsonMsg}") else: logger.info("'Accumulated msg queue' is not Empty." + " Adding the msg to the end of the queue") self.store_queue.put(jsonMsg) except MessageBusError as e: logger.error( f"RabbitMQegressProcessor, _transmit_msg_on_exchange, error {e} in producing message,\ adding message to consul {self._jsonMsg}") self.store_queue.put(jsonMsg) except Exception as err: logger.error( f'RabbitMQegressProcessor, _transmit_msg_on_exchange, Unknown error {err} while publishing the message, adding to persistent store {self._jsonMsg}' ) self.store_queue.put(jsonMsg) # If event is added by sensors, set it if self._event: self._event.set() except Exception as ex: logger.error( f'RabbitMQegressProcessor, _transmit_msg_on_exchange, problem while publishing the message:{ex}, adding message to consul: {self._jsonMsg}' )
def perform_request(self, jsonMsg): """Performs the service request""" self._check_debug(jsonMsg) # Parse out the service name and request to perform on it if jsonMsg.get("actuator_request_type").get("service_controller") \ is not None: self._service_name = jsonMsg.get("actuator_request_type").\ get("service_controller").get("service_name") self._service_request = jsonMsg.get("actuator_request_type").\ get("service_controller").get("service_request") else: self._service_name = jsonMsg.get("actuator_request_type").\ get("service_watchdog_controller").get("service_name") self._service_request = jsonMsg.get("actuator_request_type").\ get("service_watchdog_controller").get("service_request") logger.debug("perform_request, service_name: %s, service_request: %s" % \ (self._service_name, self._service_request)) try: # Load the systemd unit for the service systemd_unit = self._manager.LoadUnit(self._service_name) # Get a proxy to systemd for accessing properties of units self._proxy = self._bus.get_object("org.freedesktop.systemd1", \ str(systemd_unit)) # The returned result of the desired action result = {} is_err_response = False if self._service_request in ['restart', 'start']: # Before restart/start the service, check service state. # If it is not active or activating then only process # restart/start request. service_state = self._service.get_state(self._service_name) state = service_state.state if state not in ['active', 'activating']: if self._service_request == "restart": self._service.restart(self._service_name) elif self._service_request == "start": self._service.start(self._service_name) # Ensure we get an "active" state and not "activating" service_state = self._service.get_state(self._service_name) state = service_state.state max_wait = 0 while state != "active": logger.debug( "%s status is activating, needs 'active' " "state after %s request has been processed, retrying" % (self._service_name, self._service_request)) time.sleep(1) max_wait += 1 if max_wait > 20: logger.debug("maximum wait - %s seconds, for " "service restart reached." % max_wait) break service_state = self._service.get_state( self._service_name) state = service_state.state else: is_err_response = True err_msg = ( "Can not process %s request, for %s, as service " "is already in %s state." % (self._service_request, self._service_name, state)) logger.error(err_msg) return (self._service_name, err_msg, is_err_response) elif self._service_request == "stop": self._service.stop(self._service_name) elif self._service_request == "status": # Return the status below service_status = self._service.get_state(self._service_name) # TODO: Use cortx.utils Service class methods for # enable/disable services. elif self._service_request == "enable": service_list = [] service_list.append(self._service_name) # EnableUnitFiles() function takes second argument as boolean. # 'True' will enable a service for runtime only(creates symlink # in /run/.. directory) 'False' will enable a service # persistently (creates symlink in /etc/.. directory) _, dbus_result = self._manager.EnableUnitFiles( service_list, False, True) res = parse_enable_disable_dbus_result(dbus_result) result.update(res) logger.debug("perform_request, result for enable request: " "result: %s" % (result)) elif self._service_request == "disable": service_list = [] service_list.append(self._service_name) # DisableUnitFiles() function takes second argument as boolean. # 'True' will disable a service for runtime only(removes symlink # from /run/.. directory) 'False' will disable a service # persistently(removes symlink from /etc/.. directory) dbus_result = self._manager.DisableUnitFiles( service_list, False) res = parse_enable_disable_dbus_result(dbus_result) result.update(res) logger.debug( "perform_request, result for disable request: %s" % result) else: logger.error("perform_request, Unknown service request - %s " "for service - %s" % (self._service_request, self._service_name)) is_err_response = True return (self._service_name, "Unknown service request", is_err_response) except debus_exceptions.DBusException as error: is_err_response = True logger.exception("DBus Exception: %r" % error) return (self._service_name, str(error), is_err_response) except Exception as ae: logger.exception("SystemD Exception: %r" % ae) is_err_response = True return (self._service_name, str(ae), is_err_response) # Give the unit some time to finish starting/stopping to get final status time.sleep(5) # Get the current status of the process and return it back: service_status = self._service.get_state(self._service_name) pid = service_status.pid state = service_status.state substate = service_status.substate status = self._service.is_enabled(self._service_name) uptime = get_service_uptime(self._service_name) # Parse dbus output to fetch command line path with args. command_line = service_status.command_line_path command_line_path_with_args = [] for field in list(command_line[0][1]): command_line_path_with_args.append(str(field)) result["pid"] = pid result["state"] = state result["substate"] = substate result["status"] = status result["uptime"] = uptime result["command_line_path"] = command_line_path_with_args logger.debug("perform_request, state: %s, substate: %s" % (str(state), str(substate))) return (self._service_name, result, is_err_response)
def _run_ipmitool_subcommand(self, subcommand, grep_args=None): """Executes ipmitool sub-commands, and optionally greps the output.""" self.ACTIVE_IPMI_TOOL = self.IPMITOOL host_conf_cmd = "" # Set ipmitool to ipmisimtool if activated. if os.path.exists(f"{DATA_PATH}/server/activate_ipmisimtool"): cmd = self.IPMISIMTOOL + " sel info" _, _, retcode = SimpleProcess(cmd).run() if retcode in [0, 2]: self.ACTIVE_IPMI_TOOL = self.IPMISIMTOOL logger.debug("IPMI simulator is activated.") # Fetch channel info from config file and cache. _channel_interface = Conf.get( SSPL_CONF, "%s>%s" % (BMC_INTERFACE, BMC_CHANNEL_IF)) _active_interface = store.get(BMCInterface.ACTIVE_BMC_IF.value, None) if isinstance(_active_interface, bytes): _active_interface = _active_interface.decode() # Set host_conf_cmd based on channel info. if (self.ACTIVE_IPMI_TOOL != self.IPMISIMTOOL and _active_interface in BMCInterface.LAN_IF.value): bmc_ip = Conf.get(GLOBAL_CONF, BMC_IP_KEY, '') bmc_user = Conf.get(GLOBAL_CONF, BMC_USER_KEY, 'ADMIN') bmc_secret = Conf.get(GLOBAL_CONF, BMC_SECRET_KEY, 'ADMIN') decryption_key = encryptor.gen_key(MACHINE_ID, ServiceTypes.SERVER_NODE.value) bmc_pass = encryptor.decrypt(decryption_key, bmc_secret, self.NAME) host_conf_cmd = BMCInterface.LAN_CMD.value.format( _active_interface, bmc_ip, bmc_user, bmc_pass) # generate the final cmd and execute on shell. command = " ".join([self.ACTIVE_IPMI_TOOL, host_conf_cmd, subcommand]) command = shlex.split(command) out, error, retcode = SimpleProcess(command).run() # Decode bytes encoded strings. if not isinstance(out, str): out = out.decode(self.IPMI_ENCODING) if not isinstance(error, str): error = error.decode(self.IPMI_ENCODING) # Grep the output as per grep_args provided. if grep_args is not None and retcode == 0: final_list = [] for l in out.split('\n'): if re.search(grep_args, l) is not None: final_list += [l] out = '\n'.join(final_list) # Assign error_msg to err from output if retcode and not error: out, error = error, out # Remove '\n' from error, for matching errors to error stings. if error: error = error.replace('\n', '') return out, error, retcode
def get_platform_sensors_info(self): sensor_list = ['temperature', 'current', 'voltage'] sensor_data = self.build_encl_platform_sensors_data(sensor_list) logger.debug( self.log.svc_log(f"Platform Sensors Health Data:{sensor_data}")) return sensor_data
def get_drives_info(self): """Update and return drives information in specific format.""" data = [] drives = ENCL.get_realstor_encl_data("drives") for drive in drives: slot = drive.get("slot", -1) if slot == -1: continue drive_dict = { "uid": drive.get("durable-id", "NA"), "type": drive.get("type", "NA"), "description": drive.get("description", "NA"), "product": drive.get("object-name", "NA"), "manufacturer": drive.get("vendor", "NA"), "serial_number": drive.get("serial-number", "NA"), "version": drive.get("hardware-version", "NA"), "part_number": drive.get("part-number", "NA"), "last_updated": int(time.time()), "specifics": [{ "drive-serial-number": drive.get("serial-number")[:8], "model": drive.get("model", "NA"), "slot": slot, "architecture": drive.get("architecture", "NA"), "interface": drive.get("interface", "NA"), "usage": drive.get("usage", "NA"), "current_job_completion": drive.get("current-job-completion", "NA"), "speed": drive.get("speed", "NA"), "size": drive.get("size", "NA"), "enclosure_wwn": drive.get("enclosure-wwn", "NA"), "status": drive.get("status", "NA"), "ssd_life_left": drive.get("ssd-life-left", "NA"), "led_status": drive.get("led-status", "NA"), "temperature": drive.get("temperature", "NA"), "location": drive.get("location", "NA") }] } data.append(drive_dict) logger.debug(self.log.svc_log(f"Drive Manifest Data:{data}")) sort_key_path = self.resource_indexing_map["hw"]["disk"] data = MonUtils.sort_by_specific_kv(data, sort_key_path, self.log) return data
def get_drives_info(self): """Update and return drives information in specific format.""" drive_data = [] drives = self.get_realstor_encl_data("drives") for drive in drives: slot = drive.get("slot", -1) if slot == -1: continue uid = drive.get("durable-id") status = drive.get("health", "NA") description = drive.get("description", "NA") recommendation = drive.get("health-recommendation", "NA") specifics = [{ "serial-number": drive.get("serial-number", "NA"), "model": drive.get("model", "NA"), "size": drive.get("size", "NA"), "temperature": drive.get("temperature", "NA"), "disk-group": drive.get("disk-group", "NA"), "storage-pool-name": drive.get("storage-pool-name", "NA"), "location": drive.get("location", "NA"), "enclosure-id": drive.get("enclosure-id", "NA"), "drawer-id": drive.get("drawer-id", "NA"), "slot": drive.get("slot", "NA"), "port": drive.get("port", "NA"), "scsi-id": drive.get("scsi-id", "NA"), "blocksize": drive.get("blocksize", "NA"), "blocks": drive.get("blocks", "NA"), "vendor": drive.get("vendor", "NA"), "revision": drive.get("revision", "NA"), "architecture": drive.get("architecture", "NA"), "interface": drive.get("interface", "NA"), "type": drive.get("type", "NA"), "blink": drive.get("blink", "NA"), "locator-led": drive.get("locator-led", "NA"), "enclosure-wwn": drive.get("enclosure-wwn", "NA"), "virtual-disk-serial": drive.get("virtual-disk-serial", "NA"), "led-status": drive.get("led-status", "NA"), "power-on-hours": drive.get("power-on-hours", "NA") }] drives_dict = self.get_health_template(uid, is_fru=True) self.set_health_data(drives_dict, status, description, recommendation, specifics) drive_data.append(drives_dict) logger.debug(self.log.svc_log(f"disk Health data:{drive_data}")) return drive_data
def get_versions_info(self): """Update and return versions information in specific format.""" data = [] versions = ENCL.get_realstor_encl_data("versions") for version in versions: version_dict = { "uid": version.get("object-name", "NA"), "type": version.get("sc-cpu-type", "NA"), "description": version.get("description", "NA"), "product": version.get("object-name", "NA"), "manufacturer": version.get("vendor", "NA"), "serial_number": version.get("serial-number", "NA"), "version": version.get("bundle-version", "NA"), "part_number": version.get("part-number", "NA"), "last_updated": int(time.time()), "specifics": [{ "sc_cpu_type": version.get("sc-cpu-type", "NA"), "bundle_version": version.get("bundle-version", "NA"), "bundle_base_version": version.get("bundle-base-version", "NA"), "build_date": version.get("build-date", "NA"), "sc_fw": version.get("sc-fw", "NA"), "sc_baselevel": version.get("sc-baselevel", "NA"), "sc_memory": version.get("sc-memory", "NA"), "sc_fu_version": version.get("sc-fu-version", "NA"), "sc_loader": version.get("sc-loader", "NA"), "capi_version": version.get("capi-version", "NA"), "mc_fw": version.get("mc-fw", "NA"), "mc_loader": version.get("mc-loader", "NA"), "mc_base_fw": version.get("mc-base-fw", "NA"), "fw_default_platform_brand": version.get("fw-default-platform-brand", "NA"), "ec_fw": version.get("ec-fw", "NA"), "pld_rev": version.get("pld-rev", "NA"), "prm_version": version.get("prm-version", "NA"), "hw_rev": version.get("hw-rev", "NA"), "him_rev": version.get("him-rev", "NA"), "him_model": version.get("him-model", "NA"), "backplane_type": version.get("backplane-type", "NA"), "host_channel_revision": version.get("host-channel_revision", "NA"), "disk_channel_revision": version.get("disk-channel_revision", "NA"), "mrc_version": version.get("mrc-version", "NA"), "ctk_version": version.get("ctk-version", "NA") }] } data.append(version_dict) logger.debug( self.log.svc_log(f"Controller firmware Manifest Data:{data}")) return data