示例#1
0
    def collect_impl(self):
        gpu_info = nvidia.nvidia_smi(GpuCollector.cmd_histogram,
                                     GpuCollector.cmd_timeout)

        logger.debug("get gpu_info %s", gpu_info)

        now = datetime.datetime.now()
        self.gpu_info_ref.set(gpu_info, now)
        zombie_info = self.zombie_info_ref.get(now)

        if gpu_info is not None:
            return GpuCollector.convert_to_metrics(
                gpu_info, zombie_info, GpuCollector.get_container_id,
                self.mem_leak_thrashold)
        return None
示例#2
0
文件: collector.py 项目: zmoon111/pai
    def collect_impl(self):
        gpu_info = nvidia.nvidia_smi(GpuCollector.cmd_histogram,
                GpuCollector.cmd_timeout)

        logger.debug("get gpu_info %s", gpu_info)

        self.gpu_info_ref.get_and_set(gpu_info)

        if gpu_info is not None:
            core_utils = gen_gpu_util_gauge()
            mem_utils = gen_gpu_mem_util_gauge()

            for minor, info in gpu_info.items():
                core_utils.add_metric([minor], info["gpu_util"])
                mem_utils.add_metric([minor], info["gpu_mem_util"])

            return [core_utils, mem_utils]

        return None