示例#1
0
def monitor(path: Path, period: int):
    """Performs monitoring of GPU utilization

    Args:
        path:
            Path to write monitors.
        period:
            Period of writting information.
    """
    logger = SummaryWriter(str(path))
    k = 0
    while True:
        monitors = GpuUtils.analyzeSystem(pandas_format=False)
        if len(monitors['gpu_index']) == 0:
            print('No GPUs found')
            break
        for i, utilization, memory_available, memory_utilization \
                in zip(monitors['gpu_index'],
                       monitors['utilizations'],
                       monitors['available_memories_in_mb'],
                       monitors['memory_usage_percentage']):
            logger.add_scalar(f'Monitoring/GPU{i}/utilization', utilization, k)
            logger.add_scalar(f'Monitoring/GPU{i}/MB left', memory_available,
                              k)
            logger.add_scalar(f'Monitoring/GPU{i}/memory utilization',
                              memory_utilization, k)
            k += 1
        time.sleep(period)
示例#2
0
import numpy as np
import matplotlib.pyplot as plt
import itertools

import os
from gpuutils import GpuUtils
GpuUtils.allocate(gpu_count=1, framework='keras')

import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
for device in physical_devices:
    tf.config.experimental.set_memory_growth(device, True)

from tensorflow import keras
from generator import list_of_file_ids_test, n_events_per_file, n_files_train, n_files_val, batch_size, TestDataset
from sklearn.metrics import confusion_matrix


def plot_confusion_matrix(cm,
                          classes,
                          normalize=True,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    fig = plt.figure()
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
示例#3
0
from gpuutils import GpuUtils
import pandas as pd

#------------------------------

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

#------------------------------

print("GPU analysis of a machine have 8 GPUs")

mock_response = 'Tue Apr 21 09:58:12 2020	   \n+-----------------------------------------------------------------------------+\n| NVIDIA-SMI 418.67	   Driver Version: 418.67	   CUDA Version: 10.1	 |\n|-------------------------------+----------------------+----------------------+\n| GPU  Name		Persistence-M| Bus-Id		Disp.A | Volatile Uncorr. ECC |\n| Fan  Temp  Perf  Pwr:Usage/Cap|		 Memory-Usage | GPU-Util  Compute M. |\n|===============================+======================+======================|\n|   0  Tesla V100-SXM2...  Off  | 00000000:15:00.0 Off |				  N/A |\n| N/A   34C	P0	56W / 300W |   1280MiB / 32480MiB |	  0%	  Default |\n+-------------------------------+----------------------+----------------------+\n|   1  Tesla V100-SXM2...  Off  | 00000000:16:00.0 Off |					0 |\n| N/A   34C	P0	43W / 300W |	 11MiB / 32480MiB |	  0%	  Default |\n+-------------------------------+----------------------+----------------------+\n|   2  Tesla V100-SXM2...  Off  | 00000000:3A:00.0 Off |					0 |\n| N/A   33C	P0	41W / 300W |	 11MiB / 32480MiB |	  0%	  Default |\n+-------------------------------+----------------------+----------------------+\n|   3  Tesla V100-SXM2...  Off  | 00000000:3B:00.0 Off |					0 |\n| N/A   35C	P0	42W / 300W |	 11MiB / 32480MiB |	  0%	  Default |\n+-------------------------------+----------------------+----------------------+\n|   4  Tesla V100-SXM2...  Off  | 00000000:89:00.0 Off |					0 |\n| N/A   31C	P0	42W / 300W |	 11MiB / 32480MiB |	  0%	  Default |\n+-------------------------------+----------------------+----------------------+\n|   5  Tesla V100-SXM2...  Off  | 00000000:8A:00.0 Off |					0 |\n| N/A   33C	P0	41W / 300W |	 11MiB / 32480MiB |	  0%	  Default |\n+-------------------------------+----------------------+----------------------+\n|   6  Tesla V100-SXM2...  Off  | 00000000:B2:00.0 Off |					0 |\n| N/A   33C	P0	43W / 300W |	 11MiB / 32480MiB |	  0%	  Default |\n+-------------------------------+----------------------+----------------------+\n|   7  Tesla V100-SXM2...  Off  | 00000000:B3:00.0 Off |					0 |\n| N/A   33C	P0	43W / 300W |	 11MiB / 32480MiB |	  0%	  Default |\n+-------------------------------+----------------------+----------------------+\n																			   \n+-----------------------------------------------------------------------------+\n| Processes:													   GPU Memory |\n|  GPU	   PID   Type   Process name							 Usage	  |\n|=============================================================================|\n+-----------------------------------------------------------------------------+\n'

df = GpuUtils.analyzeSystem(mock_response=mock_response)

print(type(df))
print(df)

required_memory = 10000
gpu_count = 1

df = df[(df.available_memories_in_mb > required_memory)]

print("--------------------------------------------")

print("GPU analysis of this machine:")
GpuUtils.analyzeSystem()

print("--------------------------------------------")