Exemplo n.º 1
0
 def __init__(self):
     """Initialize server."""
     super().__init__()
     self.log = CustomLog(const.HEALTH_SVC_NAME)
     server_type = Conf.get(GLOBAL_CONF, NODE_TYPE_KEY)
     Platform.validate_server_type_support(self.log, ResourceMapError,
                                           server_type)
     self.sysfs = ToolFactory().get_instance('sysfs')
     self.sysfs.initialize()
     self.sysfs_base_path = self.sysfs.get_sysfs_base_path()
     self.cpu_path = self.sysfs_base_path + const.CPU_PATH
     hw_resources = {
         'cpu': self.get_cpu_info,
         'platform_sensor': self.get_platform_sensors_info,
         'memory': self.get_mem_info,
         'fan': self.get_fans_info,
         'nw_port': self.get_nw_ports_info,
         'sas_hba': self.get_sas_hba_info,
         'sas_port': self.get_sas_ports_info,
         'disk': self.get_disks_info,
         'psu': self.get_psu_info
     }
     sw_resources = {
         'cortx_sw_services': self.get_cortx_service_info,
         'external_sw_services': self.get_external_service_info,
         'raid': self.get_raid_info
     }
     self.server_resources = {"hw": hw_resources, "sw": sw_resources}
     self._ipmi = IpmiFactory().get_implementor("ipmitool")
     self.platform_sensor_list = ['Temperature', 'Voltage', 'Current']
     self.service = Service()
     self.resource_indexing_map = ServerResourceMap.resource_indexing_map\
         ["health"]
Exemplo n.º 2
0
 def __init__(self):
     """Initialize server manifest."""
     super().__init__()
     self.log = CustomLog(MANIFEST_SVC_NAME)
     server_type = Conf.get(GLOBAL_CONF, NODE_TYPE_KEY)
     # import pdb; pdb.set_trace()
     Platform.validate_server_type_support(self.log, ResourceMapError,
                                           server_type)
     self.field_mapping = {
         'id': 'uid',
         'class': 'type',
         'description': 'description',
         'product': 'product',
         'serial': 'serial_number',
         'vendor': 'manufacturer',
         'part_number': 'part_number',
         'model_number': 'model_number',
         'physid': 'physical_id',
         'version': 'version',
         'logicalname': 'logical_name'
     }
     self.class_mapping = {
         'memory': 'hw>memory[%s]>%s',
         'disk': 'hw>disk[%s]>%s',
         'storage': 'hw>storage[%s]>%s',
         'system': 'hw>system[%s]>%s',
         'processor': 'hw>cpu[%s]>%s',
         'network': 'hw>nw_port[%s]>%s',
         'power': 'hw>psu[%s]>%s',
         'volume': 'hw>volume[%s]>%s',
         'bus': 'hw>bus[%s]>%s',
         'bridge': 'hw>bridge[%s]>%s',
         'display': 'hw>display[%s]>%s',
         'input': 'hw>input[%s]>%s',
         'generic': 'hw>generic[%s]>%s'
     }
     self.kv_dict = {}
     sw_resources = {
         'os': self.get_os_server_info,
         'cortx_sw_services': self.get_cortx_service_info,
         'external_sw_services': self.get_external_service_info
     }
     fw_resources = {'bmc': self.get_bmc_version_info}
     # Extracting resource type for 'self.class_mapping' dictionary values
     # and adding to hw_resources for function mapping.
     hw_resources = {value[len('hw>'):-len('[%s]>%s')]: \
         self.get_hw_resources_info for value in self.class_mapping.values()}
     self.server_resources = {
         "hw": hw_resources,
         "sw": sw_resources,
         "fw": fw_resources
     }
     self.service = Service()
     self.platform = Platform()
     self.resource_indexing_map = ServerResourceMap.resource_indexing_map\
         ["manifest"]
Exemplo n.º 3
0
    def __init__(self):
        """Initialize the relevant datastructures."""
        super(ServiceMonitor, self).__init__(self.SENSOR_NAME, self.PRIORITY)

        self.services_to_monitor = set(
            Platform.get_effective_monitored_services())

        self.services = {}

        self.thread_sleep = int(
            Conf.get(SSPL_CONF, f"{self.SERVICEMONITOR}>{self.THREAD_SLEEP}",
                     "1"))

        self.polling_frequency = int(
            Conf.get(SSPL_CONF,
                     f"{self.SERVICEMONITOR}>{self.POLLING_FREQUENCY}", "30"))
Exemplo n.º 4
0
    def get_server_health_info(self):
        """Returns overall server information"""
        unhealthy_resource_found = False
        server_details = Platform().get_server_details()
        # Currently only one instance of server is considered
        server = []
        info = {}
        info["make"] = server_details["Board Mfg"]
        info["model"] = server_details["Product Name"]
        try:
            build_instance = BuildInfo()
            info["product_family"] = build_instance.get_attribute("NAME")
            info["version"] = build_instance.get_attribute("VERSION")
            info["build"] = build_instance.get_attribute("BUILD")
        except Exception as err:
            logger.error(
                self.log.svc_log(f"Unable to get build info due to {err}"))
        info["resource_usage"] = {}
        info["resource_usage"]["cpu_usage"] = self.get_cpu_overall_usage()
        info["resource_usage"]["disk_usage"] = self.get_disk_overall_usage()
        info["resource_usage"]["memory_usage"] = self.get_memory_overall_usage(
        )

        for res_type in self.server_resources:
            info.update({res_type: {}})
            for fru, method in self.server_resources[res_type].items():
                try:
                    info[res_type].update({fru: method()})
                    unhealthy_resource_found = self._is_any_resource_unhealthy(
                        fru, info[res_type])
                except Exception as err:
                    logger.error(
                        self.log.svc_log(f"{err.__class__.__name__}:{err}"))
                    info[res_type].update({fru: None})

        info["uid"] = socket.getfqdn()
        info["last_updated"] = int(time.time())
        info["health"] = {}
        info["health"][
            "status"] = "OK" if not unhealthy_resource_found else "Degraded"
        health_desc = 'good' if info["health"]["status"] == 'OK' else 'bad'
        info["health"]["description"] = f"Server is in {health_desc} health."
        info["health"]["recommendation"] = const.DEFAULT_RECOMMENDATION \
            if info["health"]["status"] != "OK" else "NA"
        info["health"]["specifics"] = []
        server.append(info)
        return server
Exemplo n.º 5
0
    def initialize(self, conf_reader, msgQlist, product):
        """initialize configuration reader and internal msg queues"""
        # Initialize ScheduledMonitorThread
        super(ServiceMsgHandler, self).initialize(conf_reader)

        # Initialize internal message queues for this module
        super(ServiceMsgHandler, self).initialize_msgQ(msgQlist)

        self._import_products(product)

        self.host_id = socket.getfqdn()
        self.site_id = Conf.get(GLOBAL_CONF, SITE_ID_KEY, "DC01")
        self.rack_id = Conf.get(GLOBAL_CONF, RACK_ID_KEY, "RC01")
        self.node_id = Conf.get(GLOBAL_CONF, NODE_ID_KEY, "SN01")
        self.cluster_id = Conf.get(GLOBAL_CONF, CLUSTER_ID_KEY, "CC01")
        self.storage_set_id = Conf.get(GLOBAL_CONF, STORAGE_SET_ID_KEY, "ST01")
        self.monitored_services = Platform.get_effective_monitored_services()
Exemplo n.º 6
0
class ServerManifest():
    """
    ServerManifest class provides resource map and related information
    like health.
    """

    name = "server_manifest"

    def __init__(self):
        """Initialize server manifest."""
        super().__init__()
        self.log = CustomLog(MANIFEST_SVC_NAME)
        server_type = Conf.get(GLOBAL_CONF, NODE_TYPE_KEY)
        # import pdb; pdb.set_trace()
        Platform.validate_server_type_support(self.log, ResourceMapError,
                                              server_type)
        self.field_mapping = {
            'id': 'uid',
            'class': 'type',
            'description': 'description',
            'product': 'product',
            'serial': 'serial_number',
            'vendor': 'manufacturer',
            'part_number': 'part_number',
            'model_number': 'model_number',
            'physid': 'physical_id',
            'version': 'version',
            'logicalname': 'logical_name'
        }
        self.class_mapping = {
            'memory': 'hw>memory[%s]>%s',
            'disk': 'hw>disk[%s]>%s',
            'storage': 'hw>storage[%s]>%s',
            'system': 'hw>system[%s]>%s',
            'processor': 'hw>cpu[%s]>%s',
            'network': 'hw>nw_port[%s]>%s',
            'power': 'hw>psu[%s]>%s',
            'volume': 'hw>volume[%s]>%s',
            'bus': 'hw>bus[%s]>%s',
            'bridge': 'hw>bridge[%s]>%s',
            'display': 'hw>display[%s]>%s',
            'input': 'hw>input[%s]>%s',
            'generic': 'hw>generic[%s]>%s'
        }
        self.kv_dict = {}
        sw_resources = {
            'os': self.get_os_server_info,
            'cortx_sw_services': self.get_cortx_service_info,
            'external_sw_services': self.get_external_service_info
        }
        fw_resources = {'bmc': self.get_bmc_version_info}
        # Extracting resource type for 'self.class_mapping' dictionary values
        # and adding to hw_resources for function mapping.
        hw_resources = {value[len('hw>'):-len('[%s]>%s')]: \
            self.get_hw_resources_info for value in self.class_mapping.values()}
        self.server_resources = {
            "hw": hw_resources,
            "sw": sw_resources,
            "fw": fw_resources
        }
        self.service = Service()
        self.platform = Platform()
        self.resource_indexing_map = ServerResourceMap.resource_indexing_map\
            ["manifest"]

    def get_data(self, rpath):
        """Fetch manifest information for given rpath."""
        logger.info(self.log.svc_log(f"Get Manifest data for rpath:{rpath}"))
        info = {}
        resource_found = False
        nodes = rpath.strip().split(">")
        leaf_node, _ = ServerResourceMap.get_node_info(nodes[-1])

        # Fetch manifest information for all sub nodes
        if leaf_node == "server":
            # Example rpath: 'node>server[0]'
            server_hw_data = self.get_server_hw_info()
            info = self.get_server_info(server_hw_data)
            resource_found = True
        elif leaf_node == "hw":
            # Example rpath: 'node>server[0]>hw'
            server_hw_data = self.get_server_hw_info()
            info = self.get_hw_resources_info(server_hw_data, "hw")["hw"]
            resource_found = True
        elif leaf_node in ["sw", "fw"]:
            # Example rpath: 'node>server[0]>fw' or sw
            for resource, method in self.server_resources[leaf_node].items():
                try:
                    info.update({resource: method()})
                    resource_found = True
                except Exception as err:
                    logger.error(
                        self.log.svc_log(f"{err.__class__.__name__}: {err}"))
                    info = None
        else:
            # Example rpath: 'node>server[0]>hw>disk'
            server_hw_data = self.get_server_hw_info()
            for node in nodes:
                resource, _ = ServerResourceMap.get_node_info(node)
                for res_type in self.server_resources:
                    method = self.server_resources[res_type].get(resource)
                    if not method:
                        logger.error(
                            self.log.svc_log(
                                f"No mapping function found for {res_type}"))
                        continue
                    try:
                        if res_type == "hw":
                            info = method(server_hw_data, resource)
                        else:
                            info = method()
                        resource_found = True
                    except Exception as err:
                        logger.error(
                            self.log.svc_log(
                                f"{err.__class__.__name__}: {err}"))
                        info = None
                if resource_found:
                    break

        if not resource_found:
            msg = f"Invalid rpath or manifest provider doesn't have support for'{rpath}'."
            logger.error(self.log.svc_log(f"{msg}"))
            raise ResourceMapError(errno.EINVAL, msg)

        info = MonUtils.normalize_kv(info, HEALTH_UNDESIRED_VALS,
                                     "Not Available")
        return info

    def get_server_info(self, server_hw_data):
        """Get server manifest information."""
        server = []
        info = {}
        for res_type in self.server_resources:
            info.update({res_type: {}})
            for fru, method in self.server_resources[res_type].items():
                try:
                    if res_type == "hw":
                        info[res_type].update(
                            {fru: method(server_hw_data, fru)})
                    else:
                        info[res_type].update({fru: method()})
                except Exception as err:
                    logger.error(
                        self.log.svc_log(f"{err.__class__.__name__}:{err}"))
                    info[res_type].update({fru: []})
        info["last_updated"] = int(time.time())
        server.append(info)
        return server

    def get_server_hw_info(self):
        """Get server hw information."""
        cls_res_cnt = {}
        lshw_data = {}
        data, output_file = self.set_lshw_input_data()
        for kv_key in data.get_keys():
            if kv_key.endswith('class'):
                r_spec = data.get(kv_key)
                if r_spec in cls_res_cnt:
                    cls_res_cnt[r_spec] += 1
                else:
                    cls_res_cnt[r_spec] = 0
                if r_spec in self.class_mapping.keys():
                    for field in self.field_mapping.keys():
                        manifest_key = self.class_mapping[r_spec] % (
                            cls_res_cnt[r_spec], self.field_mapping[field])
                        self.map_manifest_server_data(field, manifest_key,
                                                      data, kv_key)
        # Adding data to kv
        output_file.set(self.kv_dict.keys(), self.kv_dict.values())
        lshw_data = self.get_manifest_output_data()
        # Removing out storage enclosure (RBOD/JBOD) drives from disk server data
        # as they are coming from lshw , as those need to be represented
        # separately & uniquely
        if "disk" in lshw_data["hw"]:
            lshw_data["hw"]["disk"] = self.get_local_disk(
                lshw_data["hw"]["disk"])
        # Sort list by serial_number
        eth_ctrl = []
        for resource, sort_key_path in self.resource_indexing_map["hw"].items(
        ):
            if resource in lshw_data["hw"]:
                if resource == "nw_port":
                    # Separating out ethernet controller and ethernet interface
                    # data for sorting.
                    eth_ctrl = [eth_ctr for eth_ctr in lshw_data["hw"][resource] \
                        if eth_ctr['logical_name']=='NA']
                    lshw_data["hw"][resource] = [eth_interface for eth_interface \
                        in lshw_data["hw"][resource] if eth_interface[
                            'logical_name']!='NA']
                sorted_data = MonUtils.sort_by_specific_kv(
                    lshw_data["hw"][resource], sort_key_path, self.log)
                lshw_data["hw"][resource] = sorted_data
            if resource == "nw_port" and eth_ctrl:
                lshw_data["hw"][resource] += eth_ctrl
        return lshw_data

    def get_hw_resources_info(self, server_hw_data, resource=False):
        """Get server hw resource information."""
        server = {}
        if resource == "hw" and "hw" in server_hw_data:
            server.update({"hw": {}})
            # Sorting output dictionary according to data priority.
            for server_type in self.server_resources["hw"].keys():
                if server_type in server_hw_data["hw"]:
                    server["hw"][server_type] = server_hw_data["hw"][
                        server_type]
        else:
            server = server_hw_data["hw"].get(resource, [])
        return server

    def set_lshw_input_data(self):
        """
        KvStoreFactory can not accept a dictionary as direct input and output
        It will support only JSON, YAML, TOML, INI, PROPERTIES files. So here
        we are fetching the lshw data and adding that to a file for further
        execution.
        """
        input_file = None
        output_file = None
        response, err, returncode = SimpleProcess("lshw -json").run()
        if returncode:
            msg = f"Failed to capture Node support data. Error:{str(err)}"
            logger.error(self.log.svc_log(msg))
            raise ResourceMapError(errno.EINVAL, msg)
        try:
            with open(LSHW_FILE, 'w+') as fp:
                json.dump(json.loads(response.decode("utf-8")), fp, indent=4)
            with open(MANIFEST_OUTPUT_FILE, 'w+') as fp:
                json.dump({}, fp, indent=4)
            input_file = KvStoreFactory.get_instance(
                f'json://{LSHW_FILE}').load()
            output_file = KvStoreFactory.get_instance(
                f'json://{MANIFEST_OUTPUT_FILE}')
        except Exception as e:
            msg = "Error in getting {0} file: {1}".format(LSHW_FILE, e)
            logger.error(self.log.svc_log(msg))
            raise ResourceMapError(errno.EINVAL, msg)
        return input_file, output_file

    def map_manifest_server_data(self, field, manifest_key, data, kv_key):
        """Mapping actual lshw output data with standard structured manifest data."""
        parent_id = ""
        base_key = '>'.join(kv_key.split('>')[:-1])
        if base_key:
            value = data.get(base_key + '>' + field)
        else:
            value = data.get(field)
        if isinstance(value, list):
            value = ','.join(value)
        value = value.replace(" (To be filled by O.E.M.)", "") \
            if value else 'NA'
        if field == 'id' and '>' in kv_key:
            parent_key = '>'.join(kv_key.split('>')[:-2])
            field = '>' + field if parent_key else field
            parent_id = data.get(parent_key + field) + "-"
        self.kv_dict[manifest_key] = parent_id + value

    def get_manifest_output_data(self):
        """Returns JSON data in the manifest output file."""
        data = {}
        try:
            with open(MANIFEST_OUTPUT_FILE) as json_file:
                data = json.loads(json_file.read())
        except Exception as e:
            msg = "Error in getting {0} file: {1}".format(
                MANIFEST_OUTPUT_FILE, e)
            logger.error(self.log.svc_log(msg))
            raise ResourceMapError(errno.EINVAL, msg)
        try:
            if os.path.exists(LSHW_FILE):
                os.remove(LSHW_FILE)
            if os.path.exists(MANIFEST_OUTPUT_FILE):
                os.remove(MANIFEST_OUTPUT_FILE)
        except OSError as ex:
            msg = f"Failed in manifest tmp files cleanup. Error:{str(ex)}"
            logger.warn(self.log.svc_log(msg))
        return data

    def get_cortx_service_info(self):
        """Get cortx service info in required format."""
        cortx_services = self.service.get_cortx_service_list()
        cortx_service_info = self.get_service_info(cortx_services)
        sort_key_path = self.resource_indexing_map["sw"]["cortx_sw_services"]
        cortx_service_info = MonUtils.sort_by_specific_kv(
            cortx_service_info, sort_key_path, self.log)
        return cortx_service_info

    def get_external_service_info(self):
        """Get external service info in required format."""
        external_services = self.service.get_external_service_list()
        external_service_info = self.get_service_info(external_services)
        sort_key_path = self.resource_indexing_map["sw"][
            "external_sw_services"]
        external_service_info = MonUtils.sort_by_specific_kv(
            external_service_info, sort_key_path, self.log)
        return external_service_info

    def get_service_info(self, services):
        """Returns node server services info."""
        services_info = []
        for service in services:
            response = self.service.get_systemd_service_info(self.log, service)
            if response is not None:
                uid, _, health_description, _, specifics = response
                service_info = {
                    "uid": uid,
                    "type": "software",
                    "description": health_description,
                    "product": specifics[0].pop("service_name"),
                    "manufacturer": "Not Applicable",
                    "serial_number": "Not Applicable",
                    "part_number": "Not Applicable",
                    "version": specifics[0].pop("version"),
                    "last_updated": int(time.time()),
                    "specifics": specifics
                }
                services_info.append(service_info)
        return services_info

    def get_bmc_version_info(self):
        """Returns node server bmc info."""
        bmc_data = []
        specifics = self.platform.get_bmc_info()
        if specifics:
            bmc = {
                "uid": 'bmc',
                "type": "firmware",
                "description": "BMC and IPMI version information",
                "product": specifics.get("product_name", "NA"),
                "manufacturer": specifics.get("manufacturer_name", "NA"),
                "serial_number": "Not Applicable",
                "part_number": "Not Applicable",
                "version": specifics.get("firmware_revision", "NA"),
                "last_updated": int(time.time()),
                "specifics": [specifics]
            }
            bmc_data.append(bmc)
        return bmc_data

    def get_os_server_info(self):
        """Returns node server os info."""
        os_data = []
        specifics = self.platform.get_os_info()
        if specifics:
            os_info = {
                "uid":
                specifics.get("id", "NA"),
                "type":
                "software",
                "description":
                "OS information",
                "product":
                specifics.get("pretty_name", "NA"),
                "manufacturer":
                specifics.get("manufacturer_name", "Not Applicable"),
                "serial_number":
                "Not Applicable",
                "part_number":
                "Not Applicable",
                "version":
                specifics.get("version", "NA"),
                "last_updated":
                int(time.time()),
                "specifics": [specifics]
            }
            os_data.append(os_info)
        return os_data

    @staticmethod
    def get_local_disk(disks):
        #TODO: We need better logic to identify server local disks.
        local_disk = []
        for disk in disks:
            if disk.get("version") in ["G265", "G280"]:
                continue
            else:
                local_disk.append(disk)
        return local_disk