def _inspect_hardware(node): """Inspect the node and get hardware information. :param node: node object. :raises: HardwareInspectionFailure, if unable to get essential hardware properties. :returns: a pair of dictionary and list, the dictionary contains keys as in IRMCInspect.ESSENTIAL_PROPERTIES and its inspected values, the list contains mac addresses. """ try: report = irmc_common.get_irmc_report(node) props = scci.get_essential_properties(report, IRMCInspect.ESSENTIAL_PROPERTIES) macs = _get_mac_addresses(node) except (scci.SCCIInvalidInputError, scci.SCCIClientError, exception.SNMPFailure) as e: error = (_("Inspection failed for node %(node_id)s " "with the following error: %(error)s") % { 'node_id': node.uuid, 'error': e }) raise exception.HardwareInspectionFailure(error=error) return (props, macs)
def _inspect_hardware(node, **kwargs): """Inspect the node and get hardware information. :param node: node object. :param kwargs: the dictionary of additional parameters. :raises: HardwareInspectionFailure, if unable to get essential hardware properties. :returns: a pair of dictionary and list, the dictionary contains keys as in IRMCInspect.ESSENTIAL_PROPERTIES and its inspected values, the list contains mac addresses. """ capabilities_props = set(CAPABILITIES_PROPERTIES) # Remove all capabilities item which will be inspected in the existing # capabilities of node if 'capabilities' in node.properties: existing_cap = node.properties['capabilities'].split(',') for item in capabilities_props: for prop in existing_cap: if item == prop.split(':')[0]: existing_cap.remove(prop) node.properties['capabilities'] = ",".join(existing_cap) # get gpu_ids in ironic configuration values = [gpu_id.lower() for gpu_id in CONF.irmc.gpu_ids] # if gpu_ids = [], pci_gpu_devices will not be inspected if len(values) == 0: capabilities_props.remove('pci_gpu_devices') try: report = irmc_common.get_irmc_report(node) props = scci.get_essential_properties(report, IRMCInspect.ESSENTIAL_PROPERTIES) d_info = irmc_common.parse_driver_info(node) capabilities = scci.get_capabilities_properties( d_info, capabilities_props, values, **kwargs) if capabilities: if capabilities.get('pci_gpu_devices') == 0: capabilities.pop('pci_gpu_devices') if capabilities.get('trusted_boot') is False: capabilities.pop('trusted_boot') capabilities = utils.get_updated_capabilities( node.properties.get('capabilities'), capabilities) if capabilities: props['capabilities'] = capabilities macs = _get_mac_addresses(node) except (scci.SCCIInvalidInputError, scci.SCCIClientError, exception.SNMPFailure) as e: error = (_("Inspection failed for node %(node_id)s " "with the following error: %(error)s") % { 'node_id': node.uuid, 'error': e }) raise exception.HardwareInspectionFailure(error=error) return (props, macs)
def _get_sensors_data(task): """Get sensors data method. It gets sensor data from the task's node via SCCI, and convert the data from XML to the dict format. :param task: A TaskManager instance. :raises: FailedToGetSensorData when getting the sensor data fails. :returns: Returns a consistent formatted dict of sensor data grouped by sensor type, which can be processed by Ceilometer. """ try: report = irmc_common.get_irmc_report(task.node) sensor = scci.get_sensor_data(report) except (exception.InvalidParameterValue, exception.MissingParameterValue, scci.SCCIInvalidInputError, scci.SCCIClientError) as e: LOG.error(_LE("SCCI get sensor data failed for node %(node_id)s " "with the following error: %(error)s"), {'node_id': task.node.uuid, 'error': e}) raise exception.FailedToGetSensorData( node=task.node.uuid, error=e) sensors_data = {} for sdr in sensor: sensor_type_name = sdr.find('./Data/Decoded/Sensor/TypeName') sensor_type_number = sdr.find('./Data/Decoded/Sensor/Type') entity_name = sdr.find('./Data/Decoded/Entity/Name') entity_id = sdr.find('./Data/Decoded/Entity/ID') if None in (sensor_type_name, sensor_type_number, entity_name, entity_id): continue sensor_type = ('%s (%s)' % (sensor_type_name.text, sensor_type_number.text)) sensor_id = ('%s (%s)' % (entity_name.text, entity_id.text)) reading_value = sdr.find( './Data/Decoded/Sensor/Thresholds/*/Normalized') reading_value_text = "None" if ( reading_value is None) else str(reading_value.text) reading_units = sdr.find('./Data/Decoded/Sensor/BaseUnitName') reading_units_text = "None" if ( reading_units is None) else str(reading_units.text) sensor_reading = '%s %s' % (reading_value_text, reading_units_text) sensors_data.setdefault(sensor_type, {})[sensor_id] = { 'Sensor Reading': sensor_reading, 'Sensor ID': sensor_id, 'Units': reading_units_text, } return sensors_data
def test_get_irmc_report(self, mock_scci): self.info['irmc_port'] = 80 self.info['irmc_auth_method'] = 'digest' self.info['irmc_client_timeout'] = 60 mock_scci.get_report.return_value = 'get_report' returned_mock_scci_get_report = irmc_common.get_irmc_report(self.node) mock_scci.get_report.assert_called_with( self.info['irmc_address'], self.info['irmc_username'], self.info['irmc_password'], port=self.info['irmc_port'], auth_method=self.info['irmc_auth_method'], client_timeout=self.info['irmc_client_timeout']) self.assertEqual('get_report', returned_mock_scci_get_report)
def test_get_irmc_report(self, mock_scci): self.info["irmc_port"] = 80 self.info["irmc_auth_method"] = "digest" self.info["irmc_client_timeout"] = 60 mock_scci.get_report.return_value = "get_report" returned_mock_scci_get_report = irmc_common.get_irmc_report(self.node) mock_scci.get_report.assert_called_with( self.info["irmc_address"], self.info["irmc_username"], self.info["irmc_password"], port=self.info["irmc_port"], auth_method=self.info["irmc_auth_method"], client_timeout=self.info["irmc_client_timeout"], ) self.assertEqual("get_report", returned_mock_scci_get_report)
def _get_sensors_data(task): """Get sensors data method. It gets sensor data from the task's node via SCCI, and convert the data from XML to the dict format. :param task: A TaskManager instance. :raises: FailedToGetSensorData when getting the sensor data fails. :returns: Returns a consistent formatted dict of sensor data grouped by sensor type, which can be processed by Ceilometer. """ try: report = irmc_common.get_irmc_report(task.node) sensor = scci.get_sensor_data(report) except Exception as e: LOG.error( _LE("SCCI get sensor data failed for node %(node_id)s " "with the following error: %(error)s"), {"node_id": task.node.uuid, "error": e}, ) raise exception.FailedToGetSensorData(node=task.node.uuid, error=e) sensors_data = {} for sdr in sensor: sensor_type_name = sdr.find("./Data/Decoded/Sensor/TypeName") sensor_type_number = sdr.find("./Data/Decoded/Sensor/Type") entity_name = sdr.find("./Data/Decoded/Entity/Name") entity_id = sdr.find("./Data/Decoded/Entity/ID") if None in (sensor_type_name, sensor_type_number, entity_name, entity_id): continue sensor_type = "%s (%s)" % (sensor_type_name.text, sensor_type_number.text) sensor_id = "%s (%s)" % (entity_name.text, entity_id.text) reading_value = sdr.find("./Data/Decoded/Sensor/Thresholds/*/Normalized") reading_value_text = "None" if (reading_value is None) else str(reading_value.text) reading_units = sdr.find("./Data/Decoded/Sensor/BaseUnitName") reading_units_text = "None" if (reading_units is None) else str(reading_units.text) sensor_reading = "%s %s" % (reading_value_text, reading_units_text) sensors_data.setdefault(sensor_type, {})[sensor_id] = { "Sensor Reading": sensor_reading, "Sensor ID": sensor_id, "Units": reading_units_text, } return sensors_data
def _inspect_hardware(node): """Inspect the node and get hardware information. :param node: node object. :raises: HardwareInspectionFailure, if unable to get essential hardware properties. :returns: a pair of dictionary and list, the dictionary contains keys as in IRMCInspect.ESSENTIAL_PROPERTIES and its inspected values, the list contains mac addresses. """ try: report = irmc_common.get_irmc_report(node) props = scci.get_essential_properties( report, IRMCInspect.ESSENTIAL_PROPERTIES) macs = _get_mac_addresses(node) except (scci.SCCIInvalidInputError, scci.SCCIClientError, exception.SNMPFailure) as e: error = (_("Inspection failed for node %(node_id)s " "with the following error: %(error)s") % {'node_id': node.uuid, 'error': e}) raise exception.HardwareInspectionFailure(error=error) return (props, macs)
def _inspect_hardware(node, existing_traits=None, **kwargs): """Inspect the node and get hardware information. :param node: node object. :param existing_traits: existing traits list. :param kwargs: the dictionary of additional parameters. :raises: HardwareInspectionFailure, if unable to get essential hardware properties. :returns: a pair of dictionary and list, the dictionary contains keys as in IRMCInspect.ESSENTIAL_PROPERTIES and its inspected values, the list contains mac addresses. """ capabilities_props = set(CAPABILITIES_PROPERTIES) new_traits = list(existing_traits) if existing_traits else [] # Remove all capabilities item which will be inspected in the existing # capabilities of node if 'capabilities' in node.properties: existing_cap = node.properties['capabilities'].split(',') for item in capabilities_props: for prop in existing_cap: if item == prop.split(':')[0]: existing_cap.remove(prop) node.properties['capabilities'] = ",".join(existing_cap) # get gpu_ids, fpga_ids in ironic configuration gpu_ids = [gpu_id.lower() for gpu_id in CONF.irmc.gpu_ids] fpga_ids = [fpga_id.lower() for fpga_id in CONF.irmc.fpga_ids] # if gpu_ids = [], pci_gpu_devices will not be inspected if len(gpu_ids) == 0: capabilities_props.remove('pci_gpu_devices') # if fpga_ids = [], cpu_fpga will not be inspected if len(fpga_ids) == 0: capabilities_props.remove('cpu_fpga') try: report = irmc_common.get_irmc_report(node) props = scci.get_essential_properties( report, IRMCInspect.ESSENTIAL_PROPERTIES) d_info = irmc_common.parse_driver_info(node) capabilities = scci.get_capabilities_properties( d_info, capabilities_props, gpu_ids, fpga_ids=fpga_ids, **kwargs) if capabilities: if capabilities.get('pci_gpu_devices') == 0: capabilities.pop('pci_gpu_devices') cpu_fpga = capabilities.pop('cpu_fpga', 0) if cpu_fpga == 0 and 'CUSTOM_CPU_FPGA' in new_traits: new_traits.remove('CUSTOM_CPU_FPGA') elif cpu_fpga != 0 and 'CUSTOM_CPU_FPGA' not in new_traits: new_traits.append('CUSTOM_CPU_FPGA') if capabilities.get('trusted_boot') is False: capabilities.pop('trusted_boot') capabilities = utils.get_updated_capabilities( node.properties.get('capabilities'), capabilities) if capabilities: props['capabilities'] = capabilities macs = _get_mac_addresses(node) except (scci.SCCIInvalidInputError, scci.SCCIClientError, exception.SNMPFailure) as e: error = (_("Inspection failed for node %(node_id)s " "with the following error: %(error)s") % {'node_id': node.uuid, 'error': e}) raise exception.HardwareInspectionFailure(error=error) return props, macs, new_traits
def _query_raid_config_fgi_status(self, manager, context): """Periodic tasks to check the progress of running RAID config.""" filters = { 'reserved': False, 'provision_state': states.CLEANWAIT, 'maintenance': False } fields = ['raid_config'] node_list = manager.iter_nodes(fields=fields, filters=filters) for (node_uuid, driver, conductor_group, raid_config) in node_list: try: lock_purpose = 'checking async RAID configuration tasks' with task_manager.acquire(context, node_uuid, purpose=lock_purpose, shared=True) as task: node = task.node node_uuid = task.node.uuid if not isinstance(task.driver.raid, IRMCRAID): continue if task.node.target_raid_config is None: continue if not raid_config or raid_config.get('fgi_status'): continue task.upgrade_lock() if node.provision_state != states.CLEANWAIT: continue # Avoid hitting clean_callback_timeout expiration node.touch_provisioning() try: report = irmc_common.get_irmc_report(node) except client.scci.SCCIInvalidInputError: raid_config.update({'fgi_status': RAID_FAILED}) raid_common.update_raid_info(node, raid_config) self._set_clean_failed(task, RAID_FAILED) continue except client.scci.SCCIClientError: raid_config.update({'fgi_status': RAID_FAILED}) raid_common.update_raid_info(node, raid_config) self._set_clean_failed(task, RAID_FAILED) continue fgi_status_dict = _get_fgi_status(report, node_uuid) # Note(trungnv): Allow to check until RAID mechanism to be # completed with RAID information in report. if fgi_status_dict == 'completing': continue if not fgi_status_dict: raid_config.update({'fgi_status': RAID_FAILED}) raid_common.update_raid_info(node, raid_config) self._set_clean_failed(task, fgi_status_dict) continue if all(fgi_status == 'Idle' for fgi_status in fgi_status_dict.values()): raid_config.update({'fgi_status': RAID_COMPLETED}) LOG.info( 'RAID configuration has completed on ' 'node %(node)s with fgi_status is %(fgi)s', { 'node': node_uuid, 'fgi': RAID_COMPLETED }) self._resume_cleaning(task) except exception.NodeNotFound: LOG.info( 'During query_raid_config_job_status, node ' '%(node)s was not found raid_config and presumed ' 'deleted by another process.', {'node': node_uuid}) except exception.NodeLocked: LOG.info( 'During query_raid_config_job_status, node ' '%(node)s was already locked by another process. ' 'Skip.', {'node': node_uuid})
def _query_raid_config_fgi_status(self, manager, context): """Periodic tasks to check the progress of running RAID config.""" filters = {'reserved': False, 'provision_state': states.CLEANWAIT, 'maintenance': False} fields = ['raid_config'] node_list = manager.iter_nodes(fields=fields, filters=filters) for (node_uuid, driver, conductor_group, raid_config) in node_list: try: lock_purpose = 'checking async RAID configuration tasks' with task_manager.acquire(context, node_uuid, purpose=lock_purpose, shared=True) as task: node = task.node node_uuid = task.node.uuid if not isinstance(task.driver.raid, IRMCRAID): continue if task.node.target_raid_config is None: continue if not raid_config or raid_config.get('fgi_status'): continue task.upgrade_lock() if node.provision_state != states.CLEANWAIT: continue # Avoid hitting clean_callback_timeout expiration node.touch_provisioning() try: report = irmc_common.get_irmc_report(node) except client.scci.SCCIInvalidInputError: raid_config.update({'fgi_status': RAID_FAILED}) raid_common.update_raid_info(node, raid_config) self._set_clean_failed(task, RAID_FAILED) continue except client.scci.SCCIClientError: raid_config.update({'fgi_status': RAID_FAILED}) raid_common.update_raid_info(node, raid_config) self._set_clean_failed(task, RAID_FAILED) continue fgi_status_dict = _get_fgi_status(report, node_uuid) # Note(trungnv): Allow to check until RAID mechanism to be # completed with RAID information in report. if fgi_status_dict == 'completing': continue if not fgi_status_dict: raid_config.update({'fgi_status': RAID_FAILED}) raid_common.update_raid_info(node, raid_config) self._set_clean_failed(task, fgi_status_dict) continue if all(fgi_status == 'Idle' for fgi_status in fgi_status_dict.values()): raid_config.update({'fgi_status': RAID_COMPLETED}) LOG.info('RAID configuration has completed on ' 'node %(node)s with fgi_status is %(fgi)s', {'node': node_uuid, 'fgi': RAID_COMPLETED}) self._resume_cleaning(task) except exception.NodeNotFound: LOG.info('During query_raid_config_job_status, node ' '%(node)s was not found raid_config and presumed ' 'deleted by another process.', {'node': node_uuid}) except exception.NodeLocked: LOG.info('During query_raid_config_job_status, node ' '%(node)s was already locked by another process. ' 'Skip.', {'node': node_uuid})