def __init__(self, batch_size, validation_split=0.2, gpu=0, smooth=0.05): self.cuda = torch.cuda.is_available() self.gpu = gpu self.smooth = smooth self.device = torch.device(f"cuda:{self.gpu}" if self.cuda else "cpu") if (NVIDIA_SMI): nvidia_smi.nvmlInit() self.handle = nvidia_smi.nvmlDeviceGetHandleByIndex(self.gpu) print("Computing in {0} : {1}".format(self.device, nvidia_smi.nvmlDeviceGetName(self.handle))) self.batch_size = batch_size self.validation_split = validation_split kwargs = {'num_workers': 2, 'pin_memory': False} if self.cuda else {} self.model = model.Network(95*3+1, 100, 2).to(self.device) print('N. total parameters : {0}'.format(sum(p.numel() for p in self.model.parameters() if p.requires_grad))) self.dataset = Dataset() # Compute the fraction of data for training/validation idx = np.arange(self.dataset.n_training) self.train_index = idx[0:int((1-validation_split)*self.dataset.n_training)] self.validation_index = idx[int((1-validation_split)*self.dataset.n_training):] # Define samplers for the training and validation sets self.train_sampler = torch.utils.data.sampler.SubsetRandomSampler(self.train_index) self.validation_sampler = torch.utils.data.sampler.SubsetRandomSampler(self.validation_index) # Data loaders that will inject data during training self.train_loader = torch.utils.data.DataLoader(self.dataset, batch_size=self.batch_size, sampler=self.train_sampler, shuffle=False, **kwargs) self.validation_loader = torch.utils.data.DataLoader(self.dataset, batch_size=self.batch_size, sampler=self.validation_sampler, shuffle=False, **kwargs)
def __init__(self, batch_size, validation_split=0.2, gpu=0, smooth=0.05, K=3, model_class='conv1d'): self.cuda = torch.cuda.is_available() self.gpu = gpu self.smooth = smooth self.device = torch.device(f"cuda:{self.gpu}" if self.cuda else "cpu") # self.device = 'cpu' self.batch_size = batch_size self.model_class = model_class self.K = K if (NVIDIA_SMI): nvidia_smi.nvmlInit() self.handle = nvidia_smi.nvmlDeviceGetHandleByIndex(self.gpu) print("Computing in {0} : {1}".format( self.device, nvidia_smi.nvmlDeviceGetName(self.handle))) self.validation_split = validation_split kwargs = {'num_workers': 4, 'pin_memory': False} if self.cuda else {} if (model_class == 'conv1d'): self.model = model.Network(K=self.K, L=32, device=self.device, model_class=model_class).to(self.device) if (model_class == 'conv2d'): self.model = model.Network(K=self.K, L=32, NSIDE=16, device=self.device, model_class=model_class).to(self.device) print('N. total parameters : {0}'.format( sum(p.numel() for p in self.model.parameters() if p.requires_grad))) self.train_dataset = Dataset(n_training=20000) self.validation_dataset = Dataset(n_training=2000) # Data loaders that will inject data during training self.train_loader = torch.utils.data.DataLoader( self.train_dataset, batch_size=self.batch_size, shuffle=True, drop_last=True, **kwargs) self.validation_loader = torch.utils.data.DataLoader( self.validation_dataset, batch_size=self.batch_size, shuffle=True, drop_last=True, **kwargs)
def __init__(self, basis_wavefront='zernike', npix_image=128, n_modes=44, n_frames=10, gpu=0, smooth=0.05,\ batch_size=16, arguments=None): self.pixel_size = 0.0303 self.telescope_diameter = 256.0 # cm self.central_obscuration = 51.0 # cm self.wavelength = 8000.0 self.n_frames = n_frames self.batch_size = batch_size self.arguments = arguments self.basis_for_wavefront = basis_wavefront self.npix_image = npix_image self.n_modes = n_modes self.gpu = gpu self.cuda = torch.cuda.is_available() self.device = torch.device(f"cuda:{self.gpu}" if self.cuda else "cpu") # Ger handlers to later check memory and usage of GPUs if (NVIDIA_SMI): nvidia_smi.nvmlInit() self.handle = nvidia_smi.nvmlDeviceGetHandleByIndex(self.gpu) print("Computing in {0} : {1}".format( gpu, nvidia_smi.nvmlDeviceGetName(self.handle))) # Define the neural network model print("Defining the model...") self.model = model.Network(device=self.device, n_modes=self.n_modes, n_frames=self.n_frames, \ pixel_size=self.pixel_size, telescope_diameter=self.telescope_diameter, central_obscuration=self.central_obscuration, wavelength=self.wavelength,\ basis_for_wavefront=self.basis_for_wavefront, npix_image=self.npix_image).to(self.device) print('N. total parameters : {0}'.format( sum(p.numel() for p in self.model.parameters() if p.requires_grad))) kwargs = {'num_workers': 1, 'pin_memory': False} if self.cuda else {} # Data loaders that will inject data during training self.training_dataset = Dataset( filename='/scratch1/aasensio/fastcam/training_small.h5', n_training_per_star=1000, n_frames=self.n_frames) self.train_loader = torch.utils.data.DataLoader( self.training_dataset, batch_size=self.batch_size, shuffle=True, drop_last=True, **kwargs) self.validation_dataset = Dataset( filename='/scratch1/aasensio/fastcam/validation_small.h5', n_training_per_star=100, n_frames=self.n_frames, validation=True) self.validation_loader = torch.utils.data.DataLoader( self.validation_dataset, batch_size=self.batch_size, shuffle=True, drop_last=True, **kwargs)
def check_cuda_memory(): nvidia_smi.nvmlInit() deviceCount = nvidia_smi.nvmlDeviceGetCount() for i in range(deviceCount): handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i) info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) print("Device {}: {}, Memory : ({:.2f}% free): {}(total), {} (free), {} (used)"\ .format(i, nvidia_smi.nvmlDeviceGetName(handle), 100*info.free/info.total, \ info.total, info.free, info.used)) nvidia_smi.nvmlShutdown() return
def init_gpu_writers(logdir): global gpus, loggers ''' Set up tensorboard file writers. ''' for i in range(nvidia_smi.nvmlDeviceGetCount()): handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i) gpus.append(handle) name = nvidia_smi.nvmlDeviceGetName(handle).decode().replace( ' ', '-') + ':' + str(int(i)) loggers.append( tf.summary.create_file_writer( os.path.join(logdir, os.uname().nodename, name)))
def __init__(self, batch_size=128, training_size=10000, test_size=1000, inversions_path=None): self.cuda = torch.cuda.is_available() if self.cuda: nvidia_smi.nvmlInit() self.handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) self.batch_size = batch_size self.device = torch.device("cuda") if self.cuda else None print("Computing in {0}".format( nvidia_smi.nvmlDeviceGetName(self.handle))) else: print("Computing in CPU") self.dataset_train = dataset_spot(mode='train', size=training_size, inversions_path=inversions_path) self.dataset_test = dataset_spot(mode='test', size=test_size, inversions_path=inversions_path) if self.cuda: self.train_loader = torch.utils.data.DataLoader( self.dataset_train, shuffle=True, batch_size=self.batch_size) self.test_loader = torch.utils.data.DataLoader( self.dataset_test, shuffle=True, batch_size=self.batch_size) else: self.train_loader = torch.utils.data.DataLoader(self.dataset_train, shuffle=True) self.test_loader = torch.utils.data.DataLoader(self.dataset_test, shuffle=True) self.in_planes = self.dataset_train.in_planes self.out_planes = self.dataset_train.out_planes if self.cuda: self.model = model.block(in_planes=self.in_planes, out_planes=self.out_planes).to( self.device) else: self.model = model.block(in_planes=self.in_planes, out_planes=self.out_planes)
def __init__(self, batch_size, validation_split=0.2, gpu=0, smooth=0.05): self.cuda = torch.cuda.is_available() self.gpu = gpu self.device = torch.device(f"cuda:{self.gpu}" if self.cuda else "cpu") self.smooth = smooth nvidia_smi.nvmlInit() self.handle = nvidia_smi.nvmlDeviceGetHandleByIndex(self.gpu) print("Computing in {0} : {1}".format(self.device, nvidia_smi.nvmlDeviceGetName(self.handle))) self.batch_size = batch_size self.validation_split = validation_split # torch.backends.cudnn.benchmark = True # Training/validation datasets kwargs = {'num_workers': 2, 'pin_memory': False} if self.cuda else {} self.dataset = Dataset() idx = np.random.permutation(self.dataset.n_training) self.train_index = idx[0:int((1-validation_split)*self.dataset.n_training)] self.validation_index = idx[int((1-validation_split)*self.dataset.n_training):] print(f"Training sample size : {len(self.train_index)}") print(f"Validation sample size : {len(self.validation_index)}") # Define samplers for the training and validation sets self.train_sampler = torch.utils.data.sampler.SubsetRandomSampler(self.train_index) self.validation_sampler = torch.utils.data.sampler.SubsetRandomSampler(self.validation_index) # Data loaders that will inject data during training self.train_loader = torch.utils.data.DataLoader(self.dataset, batch_size=self.batch_size, sampler=self.train_sampler, shuffle=False, **kwargs) self.validation_loader = torch.utils.data.DataLoader(self.dataset, batch_size=self.batch_size, sampler=self.validation_sampler, shuffle=False, **kwargs) # Neural model print("Defining neural network...") self.model = model.Neural(n_stokes=self.dataset.n_spectral, n_latent=9, n_hidden=300).to(self.device) self.model.weights_init() print('N. total parameters : {0}'.format(sum(p.numel() for p in self.model.parameters() if p.requires_grad))) self.weights = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]) self.weights = torch.tensor(self.weights.astype('float32')).to(self.device)
def __init__(self, basis_wavefront='zernike', npix_image=128, n_modes=44, n_frames=10, gpu=0, corner=(0,0),\ batch_size=16, checkpoint=None): self.pixel_size = 0.0303 self.telescope_diameter = 256.0 # cm self.central_obscuration = 51.0 # cm self.wavelength = 8000.0 self.n_frames = n_frames self.batch_size = batch_size self.basis_for_wavefront = basis_wavefront self.npix_image = npix_image self.n_modes = n_modes self.gpu = gpu self.cuda = torch.cuda.is_available() self.device = torch.device(f"cuda:{self.gpu}" if self.cuda else "cpu") # Ger handlers to later check memory and usage of GPUs if (NVIDIA_SMI): nvidia_smi.nvmlInit() self.handle = nvidia_smi.nvmlDeviceGetHandleByIndex(self.gpu) print("Computing in {0} : {1}".format(gpu, nvidia_smi.nvmlDeviceGetName(self.handle))) # Define the neural network model print("Defining the model...") self.model = model.Network(device=self.device, n_modes=self.n_modes, n_frames=self.n_frames, \ pixel_size=self.pixel_size, telescope_diameter=self.telescope_diameter, central_obscuration=self.central_obscuration, wavelength=self.wavelength,\ basis_for_wavefront=self.basis_for_wavefront, npix_image=self.npix_image).to(self.device) print('N. total parameters : {0}'.format(sum(p.numel() for p in self.model.parameters() if p.requires_grad))) if (checkpoint is None): files = glob.glob('trained/*.pth') self.checkpoint = max(files, key=os.path.getctime) else: self.checkpoint = '{0}'.format(checkpoint) print("=> loading checkpoint '{}'".format(self.checkpoint)) tmp = torch.load(self.checkpoint, map_location=lambda storage, loc: storage) self.model.load_state_dict(tmp['state_dict']) print("=> loaded checkpoint '{}'".format(self.checkpoint))
def __init__(self, batch_size, n_training=10000, n_validation=1000, n_pixels=32): self.cuda = torch.cuda.is_available() self.batch_size = batch_size self.device = torch.device("cuda" if self.cuda else "cpu") nvidia_smi.nvmlInit() self.handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) print("Computing in {0}".format( nvidia_smi.nvmlDeviceGetName(self.handle))) kwargs = {'num_workers': 4, 'pin_memory': True} if self.cuda else {} self.dataset_train = dataset_spot(n_training=n_training, n_pixels=n_pixels) self.train_loader = torch.utils.data.DataLoader( self.dataset_train, batch_size=self.batch_size, shuffle=True, **kwargs) self.dataset_test = dataset_spot(n_training=n_validation, n_pixels=n_pixels) self.test_loader = torch.utils.data.DataLoader( self.dataset_test, batch_size=self.batch_size, shuffle=True, **kwargs) self.in_planes = self.dataset_train.in_planes self.out_planes = self.dataset_train.out_planes self.model = model.block(in_planes=self.in_planes, out_planes=self.out_planes).to(self.device)
def __get_gpu_info(self): def parse_unit(val, scale=1000): unit_ls = ['B', 'KB', 'MB', 'GB'] unit_lv = 0 while val >= scale: val /= scale unit_lv += 1 if unit_lv == len(unit_ls) - 1: break return '{:.2f} {}'.format(val, unit_ls[unit_lv]) sum_info = [] process_ls = [] nv.nvmlInit() gpu_num = nv.nvmlDeviceGetCount() # 遍历每块卡 for gpu_idx in range(gpu_num): h = nv.nvmlDeviceGetHandleByIndex(gpu_idx) dev_name = nv.nvmlDeviceGetName(h).decode() raw_total_mem = nv.nvmlDeviceGetMemoryInfo(h).total total_mem = parse_unit(raw_total_mem, 1024) raw_used_mem = nv.nvmlDeviceGetMemoryInfo(h).used used_mem = parse_unit(raw_used_mem, 1024) gpu_util = '{:.2f}'.format(nv.nvmlDeviceGetUtilizationRates(h).gpu) gpu_mem_util = '{:.2f}'.format(raw_used_mem * 100 / raw_total_mem) tmp = {} tmp['gpu_idx'] = str(gpu_idx) tmp['dev_name'] = dev_name tmp['total_mem'] = total_mem tmp['used_mem'] = used_mem tmp['gpu_util'] = gpu_util tmp['gpu_mem_util'] = gpu_mem_util sum_info.append(tmp) running_process_obj_ls = nv.nvmlDeviceGetComputeRunningProcesses(h) for obj in running_process_obj_ls: process_pid = obj.pid process_type = 'C' process_raw_gpu_mem = obj.usedGpuMemory process_name = nv.nvmlSystemGetProcessName( process_pid).decode() ctan_name = self.get_ctan_name_by_pid(process_pid) tmp = {} tmp['gpu_idx'] = str(gpu_idx) tmp['dev_name'] = dev_name tmp['process_pid'] = str(process_pid) tmp['process_type'] = process_type tmp['process_name'] = process_name tmp['process_gpu_mem'] = parse_unit(process_raw_gpu_mem, 1024) tmp['ctan_name'] = ctan_name process_ls.append(tmp) running_process_obj_ls = nv.nvmlDeviceGetGraphicsRunningProcesses( h) for obj in running_process_obj_ls: process_pid = obj.pid process_type = 'G' process_raw_gpu_mem = obj.usedGpuMemory process_name = nv.nvmlSystemGetProcessName( process_pid).decode() ctan_name = self.get_ctan_name_by_pid(process_pid) tmp = {} tmp['gpu_idx'] = str(gpu_idx) tmp['dev_name'] = dev_name tmp['process_pid'] = str(process_pid) tmp['process_type'] = process_type tmp['process_name'] = process_name tmp['process_gpu_mem'] = parse_unit(process_raw_gpu_mem, 1024) tmp['ctan_name'] = ctan_name process_ls.append(tmp) return sum_info, process_ls
def print_device_info(): for i in range(NvidiaSmi.total_devices): handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i) print("Device {}, : {}".format( i, nvidia_smi.nvmlDeviceGetName(handle)))