def get_running_processes(): """Returns list of `dict` objects representing running processes on GPUs.""" retcode, result = local_command.run_local_command('nvidia-smi') lines = result.splitlines() if retcode == 0 and len(lines) > 1: # Goes to the first line with the word Processes, jumps down one and then # parses the list of processes. look_for_processes = False processes = [] for line in lines: # Summary line starts with images/sec if line.find('Processes') > 0: look_for_processes = True if look_for_processes: p = re.compile('[0-1]+') m = p.search(line) if m and m.span()[0] == 5: line_parts = line.strip().replace('|', '').split() processes.append(line_parts) return processes else: print('nvidia-smi did not return as expected:{}'.format(result)) return '', ''
def _cpu_info(): cmd = 'cat /proc/cpuinfo' retcode, result = local_command.run_local_command(cmd) if retcode == 0: return result else: print('Error getting cpuinfo: {}'.format(result)) return ''
def _socket_count(): cmd = 'grep -i "physical id" /proc/cpuinfo | sort -u | wc -l' retcode, result = local_command.run_local_command(cmd) lines = result.splitlines() if retcode == 0 and lines: return int(lines[0]) else: print('Error getting cpuinfo scocket count: {}'.format(result)) return -1
def get_gpu_count(): cmd = 'nvidia-smi --query-gpu=driver_version,gpu_name --format=csv' retcode, result = local_command.run_local_command(cmd) lines = result.splitlines() if retcode == 0 and len(lines) > 1: return len(lines) - 1 else: print('nvidia-smi did not return as expected:{}'.format(result)) return -1
def _model_name(): cmd = "cat /proc/cpuinfo | grep 'model name' | sort --unique" retcode, result = local_command.run_local_command(cmd) lines = result.splitlines() if retcode == 0 and lines: model_name_parts = lines[0].split(':') return model_name_parts[1].strip() else: print('Error getting cpuinfo model name: {}'.format(result)) return ''
def _core_count(): cmd = "cat /proc/cpuinfo | grep 'cpu cores' | sort --unique" retcode, result = local_command.run_local_command(cmd) lines = result.splitlines() if retcode == 0 and lines: core_count_parts = lines[0].split(':') # Cores * sockets = total cores for the system. core_count = int(core_count_parts[1].strip()) total_cores = core_count * _socket_count() return total_cores else: print('Error getting cpuinfo core count: {}'.format(result)) return -1
def git_repo_describe(git_dir): """Returns describe for git_dir. Args: git_dir: git directory to run describe on. Returns: str with git describe info. Raises: Exception: If return value of the command is non-zero. """ cmd = 'git -C {} describe --always'.format(git_dir) retval, stdout = local_command.run_local_command(cmd) if retval != 0: raise Exception('Command ({}) failed to run:{}'.format(cmd, stdout)) return stdout.strip()
def git_repo_last_commit_id(git_dir): """Returns last_commit_id for git_dir. Args: git_dir: git directory to run describe on. Returns: str of last commit_id. Raises: Exception: If return value of the command is non-zero. """ cmd = 'git -C {} log --format="%H" -n 1'.format(git_dir) retval, stdout = local_command.run_local_command(cmd) if retval != 0: raise Exception('Command ({}) failed to run:{}'.format(cmd, stdout)) return stdout.strip()
def get_gpu_info(): """Returns driver and gpu info using nvidia-smi. Note: Assumes if the system has multiple GPUs that they are all the same with one exception. If the first result is a Quadro, the heuristic assumes this may be a workstation and takes the second entry. Returns: Tuple of device driver version and gpu name. """ cmd = 'nvidia-smi --query-gpu=driver_version,gpu_name --format=csv' retcode, result = local_command.run_local_command(cmd) lines = result.splitlines() if retcode == 0 and len(lines) > 1: gpu_info = lines[1].split(',') if 'Quadro' in gpu_info[1] and len(lines) > 2: gpu_info = lines[2].split(',') return gpu_info[0].strip(), gpu_info[1].strip() else: return gpu_info[0].strip(), gpu_info[1].strip() else: print('nvidia-smi did not return as expected:{}'.format(result)) return '', ''