def __init__(self, debug_mode=False, challenge_mode=False, track=None, timeout=20.0): """ Init requires scenario as input """ self.scenario = None self.scenario_tree = None self.scenario_class = None self.ego_vehicles = None self.other_actors = None self._debug_mode = debug_mode self._challenge_mode = challenge_mode self._track = track self._agent = None self._running = False self._timestamp_last_run = 0.0 self._timeout = timeout self._watchdog = Watchdog(float(self._timeout)) self.scenario_duration_system = 0.0 self.scenario_duration_game = 0.0 self.start_system_time = None self.end_system_time = None nvidia_smi.nvmlInit() self.handle = nvidia_smi.nvmlDeviceGetHandleByIndex(1) # Register the scenario tick as callback for the CARLA world # Use the callback_id inside the signal handler to allow external interrupts signal.signal(signal.SIGINT, self._signal_handler)
def get_gpu_memory(): import nvidia_smi nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) print("Used GPU memory: {}%".format((info.used * 100) // info.total)) nvidia_smi.nvmlShutdown()
def __init__(self, gpus=[]): r"""CO2 consumption tracker for deep learning models. Look at https://arxiv.org/abs/1906.02243 for details. """ # temporal variables self._start = None self._step = None # power variables self._cpu_power = 0 self._gpu_power = 0 self._ram_power = 0 self.total_energy = 0 # GPU-specific constants self._cuda = torch.cuda.is_available() print(gpus) if self._cuda: nvidia_smi.nvmlInit() self._handles = [ nvidia_smi.nvmlDeviceGetHandleByIndex(gpu) for gpu in gpus ] # energy consumption constants self._pue_coeff = 1.58 self._co2_coeff = 0.477
def __init__(self, batch_size, validation_split=0.2, gpu=0, smooth=0.05): self.cuda = torch.cuda.is_available() self.gpu = gpu self.smooth = smooth self.device = torch.device(f"cuda:{self.gpu}" if self.cuda else "cpu") if (NVIDIA_SMI): nvidia_smi.nvmlInit() self.handle = nvidia_smi.nvmlDeviceGetHandleByIndex(self.gpu) print("Computing in {0} : {1}".format(self.device, nvidia_smi.nvmlDeviceGetName(self.handle))) self.batch_size = batch_size self.validation_split = validation_split kwargs = {'num_workers': 2, 'pin_memory': False} if self.cuda else {} self.model = model.Network(95*3+1, 100, 2).to(self.device) print('N. total parameters : {0}'.format(sum(p.numel() for p in self.model.parameters() if p.requires_grad))) self.dataset = Dataset() # Compute the fraction of data for training/validation idx = np.arange(self.dataset.n_training) self.train_index = idx[0:int((1-validation_split)*self.dataset.n_training)] self.validation_index = idx[int((1-validation_split)*self.dataset.n_training):] # Define samplers for the training and validation sets self.train_sampler = torch.utils.data.sampler.SubsetRandomSampler(self.train_index) self.validation_sampler = torch.utils.data.sampler.SubsetRandomSampler(self.validation_index) # Data loaders that will inject data during training self.train_loader = torch.utils.data.DataLoader(self.dataset, batch_size=self.batch_size, sampler=self.train_sampler, shuffle=False, **kwargs) self.validation_loader = torch.utils.data.DataLoader(self.dataset, batch_size=self.batch_size, sampler=self.validation_sampler, shuffle=False, **kwargs)
def check_gpu_stat(): nvidia_smi.nvmlInit() deviceCount = nvmlDeviceGetCount() for i in range(deviceCount): handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i) res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) print(f'gpu{i}: {res.gpu}%, gpu-mem: {res.memory}%')
def __init__(self, batch_size, validation_split=0.2, gpu=0, smooth=0.05, K=3, model_class='conv1d'): self.cuda = torch.cuda.is_available() self.gpu = gpu self.smooth = smooth self.device = torch.device(f"cuda:{self.gpu}" if self.cuda else "cpu") # self.device = 'cpu' self.batch_size = batch_size self.model_class = model_class self.K = K if (NVIDIA_SMI): nvidia_smi.nvmlInit() self.handle = nvidia_smi.nvmlDeviceGetHandleByIndex(self.gpu) print("Computing in {0} : {1}".format( self.device, nvidia_smi.nvmlDeviceGetName(self.handle))) self.validation_split = validation_split kwargs = {'num_workers': 4, 'pin_memory': False} if self.cuda else {} if (model_class == 'conv1d'): self.model = model.Network(K=self.K, L=32, device=self.device, model_class=model_class).to(self.device) if (model_class == 'conv2d'): self.model = model.Network(K=self.K, L=32, NSIDE=16, device=self.device, model_class=model_class).to(self.device) print('N. total parameters : {0}'.format( sum(p.numel() for p in self.model.parameters() if p.requires_grad))) self.train_dataset = Dataset(n_training=20000) self.validation_dataset = Dataset(n_training=2000) # Data loaders that will inject data during training self.train_loader = torch.utils.data.DataLoader( self.train_dataset, batch_size=self.batch_size, shuffle=True, drop_last=True, **kwargs) self.validation_loader = torch.utils.data.DataLoader( self.validation_dataset, batch_size=self.batch_size, shuffle=True, drop_last=True, **kwargs)
def __init__(self, device='cpu'): self.log = SummaryWriter() if nvidia_smi and device != 'cpu': nvidia_smi.nvmlInit() self.handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) else: self.handle = None
def checkGPUsAvailability(n_gpus=1): ''' Test that GPUs have free memory on 'n_gpus'. OUT: True: if they have False: if not ''' # For every gpu to check for i_gpu in range(n_gpus): # Access to the memory used by the i-th gpu try: nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i_gpu) mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) except Exception: print('Warning: GPU did not accessed') break # If more than 1GB is taken, then stop if (mem_res.used/(1024.**3) > 1.0): # greater than 1GB of VRAM # Report it print('Memory used (gpu-%i): %.2f GB' % (i_gpu, mem_res.used/(1024**3)), end='') print(' - on total: %.2f GB' % (mem_res.total/(1024**3))) return False return True
def __init__(self, basis_wavefront='zernike', npix_image=128, n_modes=44, n_frames=10, gpu=0, smooth=0.05,\ batch_size=16, arguments=None): self.pixel_size = 0.0303 self.telescope_diameter = 256.0 # cm self.central_obscuration = 51.0 # cm self.wavelength = 8000.0 self.n_frames = n_frames self.batch_size = batch_size self.arguments = arguments self.basis_for_wavefront = basis_wavefront self.npix_image = npix_image self.n_modes = n_modes self.gpu = gpu self.cuda = torch.cuda.is_available() self.device = torch.device(f"cuda:{self.gpu}" if self.cuda else "cpu") # Ger handlers to later check memory and usage of GPUs if (NVIDIA_SMI): nvidia_smi.nvmlInit() self.handle = nvidia_smi.nvmlDeviceGetHandleByIndex(self.gpu) print("Computing in {0} : {1}".format( gpu, nvidia_smi.nvmlDeviceGetName(self.handle))) # Define the neural network model print("Defining the model...") self.model = model.Network(device=self.device, n_modes=self.n_modes, n_frames=self.n_frames, \ pixel_size=self.pixel_size, telescope_diameter=self.telescope_diameter, central_obscuration=self.central_obscuration, wavelength=self.wavelength,\ basis_for_wavefront=self.basis_for_wavefront, npix_image=self.npix_image).to(self.device) print('N. total parameters : {0}'.format( sum(p.numel() for p in self.model.parameters() if p.requires_grad))) kwargs = {'num_workers': 1, 'pin_memory': False} if self.cuda else {} # Data loaders that will inject data during training self.training_dataset = Dataset( filename='/scratch1/aasensio/fastcam/training_small.h5', n_training_per_star=1000, n_frames=self.n_frames) self.train_loader = torch.utils.data.DataLoader( self.training_dataset, batch_size=self.batch_size, shuffle=True, drop_last=True, **kwargs) self.validation_dataset = Dataset( filename='/scratch1/aasensio/fastcam/validation_small.h5', n_training_per_star=100, n_frames=self.n_frames, validation=True) self.validation_loader = torch.utils.data.DataLoader( self.validation_dataset, batch_size=self.batch_size, shuffle=True, drop_last=True, **kwargs)
def get_mem_info(device_id): gpu_list = [device_id] nvidia_smi.nvmlInit() handle = [nvidia_smi.nvmlDeviceGetHandleByIndex(i) for i in gpu_list] res = [nvidia_smi.nvmlDeviceGetMemoryInfo(item) for item in handle] res = [100 * item.used / item.total for item in res] nvidia_smi.nvmlShutdown() return res[0]
def show_memory_usage(): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) # GPU number mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) # print('=' * 50) # print(f'mem: {mem_res.used / (1024 ** 3)} (GiB)') # usage in GiB print(f'mem usage: {100 * (mem_res.used / mem_res.total):.3f}%' ) # percentage
def get_gpu_temp(): try: nvmlInit() gpu = nvmlDeviceGetHandleByIndex(0) gpu_temp = nvmlDeviceGetTemperature(gpu, NVML_TEMPERATURE_GPU) return gpu_temp except NVMLError: return None
def print_gpu_info(idx=0): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(idx) info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) print("Total memory:", info.total) print("Free memory:", info.free) print("Used memory:", info.used) nvidia_smi.nvmlShutdown()
def gpu_memory_tracker(): """returns nvidia gpu memory consumed""" nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) used = info.used total = info.total percent = used / total * 100 return percent
def memory_check(): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) # card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) mbs = mem_res.used / (1024**2) percent = mem_res.used / mem_res.total return mbs, percent
def Watch_fin(): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(1) res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) time.sleep(1) if res.used == 0: return 0 else: return 1
def get_usage(gpu_list=None, **kwargs): """ Track GPU memory usage. """ _ = kwargs gpu_list = gpu_list or [0] nvidia_smi.nvmlInit() handle = [nvidia_smi.nvmlDeviceGetHandleByIndex(i) for i in gpu_list] res = [nvidia_smi.nvmlDeviceGetMemoryInfo(item) for item in handle] res = [100 * item.used / item.total for item in res] nvidia_smi.nvmlShutdown() return res
def get_usage(gpu_list=None, **kwargs): """ Track GPU memory utilization. """ _ = kwargs gpu_list = gpu_list or [0] nvidia_smi.nvmlInit() handle = [nvidia_smi.nvmlDeviceGetHandleByIndex(i) for i in gpu_list] res = [ nvidia_smi.nvmlDeviceGetUtilizationRates(item) for item in handle ] return [item.memory for item in res]
def use_gpu(): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) nvidia_smi.nvmlShutdown() if info.used > 1000000000: return True else: return False
def gpu_usage(): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) # card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) usage = info.used nvidia_smi.nvmlShutdown() return usage
def get_max_data_group_size(): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) total_memory = info.total if total_memory >= 12 * (10 ** 9): return 2 ** 12 elif total_memory >= 6 * (10 ** 9): return 2 ** 11 else: os.environ["CUDA_VISIBLE_DEVICES"] = "" return 2 ** 12
def check_cuda_memory(): nvidia_smi.nvmlInit() deviceCount = nvidia_smi.nvmlDeviceGetCount() for i in range(deviceCount): handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i) info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) print("Device {}: {}, Memory : ({:.2f}% free): {}(total), {} (free), {} (used)"\ .format(i, nvidia_smi.nvmlDeviceGetName(handle), 100*info.free/info.total, \ info.total, info.free, info.used)) nvidia_smi.nvmlShutdown() return
def get_device(gpuID=False): """Checks available GPUs and selects the one with the most available memory Parameters ---------- gpuID: bool or int whether to use GPU, or the device ID of a specific GPU to use. If False, use only CPU. If True, attempts to find the GPU with most available memory. Returns ------- device : jax.device handle to gpu or cpu device selected """ import jax if gpuID is False: return jax.devices('cpu')[0] try: gpus = jax.devices('gpu') # did the user request a specific GPU? if isinstance(gpuID, int) and gpuID < len(gpus): return gpus[gpuID] if isinstance(gpuID, int): from desc.backend import TextColors # ID was not valid warnings.warn( TextColors.WARNING + 'gpuID did not match any found devices, trying default gpu option' + TextColors.ENDC) # find all available options and see which has the most space import nvidia_smi nvidia_smi.nvmlInit() maxmem = 0 gpu = gpus[0] for i in range(len(gpus)): handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i) info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) if info.free > maxmem: maxmem = info.free gpu = gpus[i] nvidia_smi.nvmlShutdown() return gpu except: from desc.backend import TextColors warnings.warn(TextColors.WARNING + 'No GPU found, falling back to CPU' + TextColors.ENDC) return jax.devices('cpu')[0]
def on_train_batch_begin(self, batch, logs=None): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) # card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) res1 = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) #GPUs = GPU.getGPUs() #gpu = GPUs[0] print(f'gpu: {res.gpu}%, gpu-mem: {res.memory}%')
def __init__(self, exclude_gpu_ids: list = []): """ Usage: g = GPUAllocator() gpu_id = g.get_gpu() ## do something with gpu_id g.set_as_free(gpu_id) """ nvidia_smi.nvmlInit() self.num_gpus = nvidia_smi.nvmlDeviceGetCount() self.gpu_names = [] for i in range(self.num_gpus): if i in exclude_gpu_ids: pass else: self.gpu_names.append('cuda:' + str(i)) self.usage = {} for i in range(self.num_gpus): if i in exclude_gpu_ids: pass else: self.usage[i] = False """ on a good day, this is how the variables look like: self.num_gpus= 2 self.gpu_names= [ 'cuda:0', 'cuda:1' ] self.usage= { 0: False, 1: False } """ print( "[" + Colors.CYAN+ "EDEN" +Colors.END+ "] " + 'Initialized GPUAllocator with devices: ', self.gpu_names) """
def predict_age(file_path="/media/original/data/vtps/sub-CC00050XX01_ses-7201_hemi-L_inflated_reduce50.vtp"): torch.manual_seed(0) if osp.isfile(file_path): # mesh = read(file_path) # reader = vtk.vtkPolyDataReader() reader = vtk.vtkXMLPolyDataReader() reader.SetFileName(file_path) reader.Update() # output = reader.GetOutput() points = torch.tensor(np.array(reader.GetOutput().GetPoints().GetData())) local_features = ['corrected_thickness', 'curvature', 'sulcal_depth'] x = get_features(local_features, reader) transform = T.NormalizeScale() # transform_samp = T.FixedPoints(10000) data = Data(batch=torch.zeros_like(x[:, 0]).long(), x=x, pos=points) data = transform(data) # data = transform_samp(data) # data = Data(batch=torch.zeros_like(x[:, 0]).long(), x=x, pos=points) # data = Data(x=x, pos=points) try: nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) free_mem = mem_res.free / 1024 ** 2 except: free_mem = 0 device = torch.device('cuda' if torch.cuda.is_available() and free_mem >= GPU_MEM_LIMIT else 'cpu') numb_local_features = x.size(1) numb_global_features = 0 model = Net(numb_local_features, numb_global_features).to(device) model.load_state_dict(torch.load(MODEL_PATH, map_location=device)) model.eval() # data_loader = DataLoader([data], batch_size=1, shuffle=False) # print(len(data_loader)) # pred = model(next(iter(data_loader)).to(device)) pred = model(data.to(device)) return pred.item() else: return 'Unable to predict..'
def reserve(h=24): nvidia_smi.nvmlInit() deviceCount = nvmlDeviceGetCount() free_mem = [] for i in range(deviceCount): total, used, free = check_mem(i) free_mem.append(free) block_mem = int(max(free_mem) * 0.9) x = torch.cuda.FloatTensor(256, 1024, block_mem) try: sleep(h * 3600) except KeyboardInterrupt: print('\nMemory Released') del x
def get_free_gpu_mem(gpu_index): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) # card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) # logging.info("Total GPU memory: {}".format(info.total)) # logging.info("Free GPU memory: {}".format(info.free)) # logging.info("Used GPU memory: {}".format(info.used)) nvidia_smi.nvmlShutdown() return info.free
def watch(memory_max): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(1) res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) time.sleep(1) if memory_max < res.used: memory_max = res.used with open( "/mnt/mqs02/data/ogawa/BERT/preprocess-for-BERT/conbination/NUMAS/vocab_cost/recipe/32000/memory_cost_3.txt", "a", encoding="utf-8") as f: result = str(memory_max) + "\n" f.write(result) print(memory_max) return memory_max
def cal_gpu_util(job): ct = 0 gpu = 0 nvidia_smi.nvmlInit() for key in job.gpus_loc.keys(): for i in job.gpus_loc[key]: ct += 1 handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i) res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) gpu += res.gpu if ct > 0: avg = gpu / ct return avg else: print('job no gpu') return 0
if os.path.exists(fn): config = ConfigParser.ConfigParser() with open(fn) as f: config.readfp(f) cfg_dev = config.get("global", "device") if cfg_dev is not None: dev = cfg_dev if 'THEANO_FLAGS' in os.environ: res = re.match(r'device=(\w+)', os.environ['THEANO_FLAGS']) if res: dev = res.group(1) return dev device = get_default_device() if device == 'gpu': nvidia_smi.nvmlInit() print "default is", device if device == 'gpu': gpu = get_gpu() if 'THEANO_FLAGS' in os.environ: flags = os.environ['THEANO_FLAGS'] else: flags = "" os.environ['THEANO_FLAGS'] = flags + ",device=gpu%d" % gpu print "Using device gpu", gpu if __name__ == "__main__": import theano print theano.config.device