def _update_nvml_static_info(): driver_version = pynvml.nvmlSystemGetDriverVersion().decode() nvml_version = pynvml.nvmlSystemGetNVMLVersion().decode() device_count = pynvml.nvmlDeviceGetCount() devices = [] devices_handles = [] for i in range(device_count): handle = pynvml.nvmlDeviceGetHandleByIndex(i) name = pynvml.nvmlDeviceGetName(handle).decode() mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle) devices.append({ 'index': i, 'name': name, 'memory': { 'total': mem_info.total } }) devices_handles.append(handle) _static_info['public'].update({ 'gpu': { 'driver': driver_version, 'nvml': nvml_version, 'devices': devices } }) _static_info['private'].update({'gpu': {'handles': devices_handles}})
def initialize(self, **kwargs) -> None: try: nvmlInit() driver_version = nvmlSystemGetDriverVersion().decode("UTF-8") nvml_version = nvmlSystemGetNVMLVersion().decode("UTF-8") self.logger.info(f"NVML initialized, driver version: {driver_version}, NVML version: {nvml_version}") self.detect_devices() except NVMLError as error: self.raise_nvml_error(error)
def get_nv_info(): nv_info = dict() try: nvmlInit() nv_info["_Driver_Version"] = str(nvmlSystemGetDriverVersion(), errors="ignore") nv_info["_NVML_Version"] = str(nvmlSystemGetNVMLVersion(), errors="ignore") device_count = nvmlDeviceGetCount() nv_info["Device_Count"] = device_count devices = [] for i in range(device_count): dev_info = dict() handle = nvmlDeviceGetHandleByIndex(i) dev_info["_Name"] = str(nvmlDeviceGetName(handle), errors="ignore") memory_info = nvmlDeviceGetMemoryInfo(handle) dev_info["Total_Memory"] = memory_info.total dev_info["Free_Memory"] = memory_info.free dev_info["Used_Memory"] = memory_info.used util_rates = nvmlDeviceGetUtilizationRates(handle) dev_info["GPU_Utilization_Rate"] = util_rates.gpu dev_info["Memory_Utilization_Rate"] = util_rates.memory devices.append(dev_info) nv_info["Devices"] = devices nvmlShutdown() except Exception as e: nv_info["Exception"] = str(e) return nv_info
def test_nvmlSystemGetNVMLVersion(nvml): vsn = 0.0 vsn = pynvml.nvmlSystemGetNVMLVersion().decode() print("[NVML Version: " + vsn + "]", end=" ") assert vsn > LooseVersion("0.0")
return { 'index': index, 'name': pynvml.nvmlDeviceGetName(handle).decode(), 'utilization': pynvml.nvmlDeviceGetUtilizationRates(handle).gpu, 'uuid': pynvml.nvmlDeviceGetUUID(handle).decode(), } if __name__ == '__main__': pynvml.nvmlInit() try: print('NVIDIA Driver version %s' % pynvml.nvmlSystemGetDriverVersion().decode()) print('NVML API %s initialized' % pynvml.nvmlSystemGetNVMLVersion().decode()) print() config = parse_arguments() device_count = pynvml.nvmlDeviceGetCount() devices = [query_device(i) for i in range(0, device_count)] if not (config.device_index in range(0, device_count)): print('Available devices:') list( map( lambda device: print('Device #%d:\t%s\t%d%%\t%s' % (device[ 'index'], device['name'], device[ 'utilization'], device['uuid'])), devices)) exit(1)
def test_nvmlSystemGetNVMLVersion(nvml): vsn = 0.0 vsn = pynvml.nvmlSystemGetNVMLVersion().decode() print('[NVML Version: ' + vsn + ']', end=' ') assert vsn > LooseVersion("0.0")