def collect_impl(self): gpu_info = nvidia.nvidia_smi(GpuCollector.cmd_histogram, GpuCollector.cmd_timeout) logger.debug("get gpu_info %s", gpu_info) now = datetime.datetime.now() self.gpu_info_ref.set(gpu_info, now) zombie_info = self.zombie_info_ref.get(now) if gpu_info is not None: return GpuCollector.convert_to_metrics( gpu_info, zombie_info, GpuCollector.get_container_id, self.mem_leak_thrashold) return None
def collect_impl(self): gpu_info = nvidia.nvidia_smi(GpuCollector.cmd_histogram, GpuCollector.cmd_timeout) logger.debug("get gpu_info %s", gpu_info) self.gpu_info_ref.get_and_set(gpu_info) if gpu_info is not None: core_utils = gen_gpu_util_gauge() mem_utils = gen_gpu_mem_util_gauge() for minor, info in gpu_info.items(): core_utils.add_metric([minor], info["gpu_util"]) mem_utils.add_metric([minor], info["gpu_mem_util"]) return [core_utils, mem_utils] return None