def get_gpu_mem(self): if cutorch.is_available(): return sum([ cutorch.memory_cached(i) for i in range(cutorch.device_count()) ]) else: return 0
def get_gpu_statistics(self): id = cuda.current_device() print("Max memory allocated on GPU %d: %d bytes" % (id, cuda.max_memory_allocated(id))) print("Max memory cached on GPU %d: %d bytes" % (id, cuda.max_memory_cached(id))) print("Current memory allocated on GPU %d: %d bytes" % (id, cuda.memory_allocated(id))) print("Current memory cached on GPU %d: %d bytes" % (id, cuda.memory_cached(id)))
def occupy_gpu_memory(gpu_id, maximum_usage=None, buffer_memory=2000): """ As pytorch is dynamic, you might wanna take enough GPU memory to avoid OOM when you run your code in a messy server. if maximum_usage is specified, this function will return a dummy buffer which takes memory of (current_available_memory - (maximum_usage - current_usage) - buffer_memory) MB. otherwise, maximum_usage would be replaced by maximum usage till now, which is returned by torch.cuda.max_memory_cached() :param gpu_id: :param maximum_usage: float, measured in MB :param buffer_memory: float, measured in MB :return: """ gpu_id = int(gpu_id) if maximum_usage is None: maximum_usage = cutorch.max_memory_cached() result = subprocess.check_output([ 'nvidia-smi', '--query-gpu=memory.free', '--format=csv,nounits,noheader' ]) # Convert lines into a dictionary gpu_memory = [int(x) for x in result.strip().split(b'\n')] gpu_memory_map = dict(zip(range(len(gpu_memory)), gpu_memory)) available_memory = gpu_memory_map[gpu_id] if available_memory < buffer_memory + 1000: print( 'Gpu memory has been mostly occupied (although maybe not by you)!') else: memory_to_occupy = int( (available_memory - (maximum_usage - cutorch.memory_cached()) - buffer_memory)) dim = int(memory_to_occupy * 1024 * 1024 * 8 / 32) x = torch.zeros(dim, dtype=torch.int) x.pin_memory() print('Occupied {}MB extra gpu memory.'.format(memory_to_occupy)) x_ = x.cuda() del x_
def detect(opt): model = opt.model result_path = opt.rp file_list = opt.filelist filepath = opt.filepath if not os.path.exists(result_path): os.makedirs(result_path) devices = [int(item) for item in opt.devices.split(',')] ngpu = len(devices) #net = DispNetC(ngpu, True) #net = DispNetCSRes(ngpu, False, True) #net = DispNetCSResWithMono(ngpu, False, True, input_channel=3) if opt.net == "psmnet" or opt.net == "ganet": net = build_net(opt.net)(maxdisp=192) elif opt.net == "dispnetc": net = build_net(opt.net)(batchNorm=False, lastRelu=True, resBlock=False) else: net = build_net(opt.net)(batchNorm=False, lastRelu=True) net = torch.nn.DataParallel(net, device_ids=devices).cuda() model_data = torch.load(model) print(model_data.keys()) if 'state_dict' in model_data.keys(): net.load_state_dict(model_data['state_dict']) else: net.load_state_dict(model_data) num_of_parameters = count_parameters(net) print('Model: %s, # of parameters: %d' % (opt.net, num_of_parameters)) net.eval() batch_size = int(opt.batchSize) test_dataset = DispDataset(txt_file=file_list, root_dir=filepath, phase='detect') test_loader = DataLoader(test_dataset, batch_size = batch_size, \ shuffle = False, num_workers = 1, \ pin_memory = True) s = time.time() #high_res_EPE = multiscaleloss(scales=1, downscale=1, weights=(1), loss='L1', sparse=False) avg_time = [] display = 100 warmup = 10 for i, sample_batched in enumerate(test_loader): input = torch.cat((sample_batched['img_left'], sample_batched['img_right']), 1) # print('input Shape: {}'.format(input.size())) num_of_samples = input.size(0) target = sample_batched['gt_disp'] #print('disp Shape: {}'.format(target.size())) #original_size = (1, target.size()[2], target.size()[3]) target = target.cuda() input = input.cuda() input_var = torch.autograd.Variable(input, volatile=True) target_var = torch.autograd.Variable(target, volatile=True) if i > warmup: ss = time.time() if opt.net == "psmnet" or opt.net == "ganet": output = net(input_var) elif opt.net == "dispnetc": output = net(input_var)[0] else: output = net(input_var)[-1] if i > warmup: avg_time.append((time.time() - ss)) if (i - warmup) % display == 0: print('Average inference time: %f' % np.mean(avg_time)) mbytes = 1024.*1024 print('GPU memory usage memory_allocated: %d MBytes, max_memory_allocated: %d MBytes, memory_cached: %d MBytes, max_memory_cached: %d MBytes, CPU memory usage: %d MBytes' % \ (ct.memory_allocated()/mbytes, ct.max_memory_allocated()/mbytes, ct.memory_cached()/mbytes, ct.max_memory_cached()/mbytes, process.memory_info().rss/mbytes)) avg_time = [] # output = net(input_var)[1] output[output > 192] = 0 output = scale_disp(output, (output.size()[0], 540, 960)) for j in range(num_of_samples): # scale back depth np_depth = output[j][0].data.cpu().numpy() gt_depth = target_var[j, 0, :, :].data.cpu().numpy() #print(np.min(np_depth), np.max(np_depth)) #cuda_depth = torch.from_numpy(np_depth).cuda() #cuda_depth = torch.autograd.Variable(cuda_depth, volatile=True) # flow2_EPE = high_res_EPE(output[j], target_var[j]) * 1.0 #flow2_EPE = high_res_EPE(cuda_depth, target_var[j]) * 1.0 #print('Shape: {}'.format(output[j].size())) print('Batch[{}]: {}, average disp: {}'.format(i, j, np.mean(np_depth))) #print('Batch[{}]: {}, Flow2_EPE: {}'.format(i, sample_batched['img_names'][0][j], flow2_EPE.data.cpu().numpy())) name_items = sample_batched['img_names'][0][j].split('/') #save_name = '_'.join(name_items).replace('.png', '.pfm')# for girl02 dataset #save_name = 'predict_{}_{}_{}.pfm'.format(name_items[-4], name_items[-3], name_items[-1].split('.')[0]) #save_name = 'predict_{}_{}.pfm'.format(name_items[-1].split('.')[0], name_items[-1].split('.')[1]) #save_name = 'predict_{}.pfm'.format(name_items[-1]) #img = np.flip(np_depth[0], axis=0) save_name = '_'.join(name_items)# for girl02 dataset img = np_depth print('Name: {}'.format(save_name)) print('') #save_pfm('{}/{}'.format(result_path, save_name), img) skimage.io.imsave(os.path.join(result_path, save_name),(img*256).astype('uint16')) save_name = '_'.join(name_items).replace(".png", "_gt.png")# for girl02 dataset img = gt_depth print('Name: {}'.format(save_name)) print('') #save_pfm('{}/{}'.format(result_path, save_name), img) skimage.io.imsave(os.path.join(result_path, save_name),(img*256).astype('uint16')) print('Evaluation time used: {}'.format(time.time()-s))
def detect(opt): net_name = opt.net model = opt.model result_path = opt.rp file_list = opt.filelist filepath = opt.filepath if not os.path.exists(result_path): os.makedirs(result_path) devices = [int(item) for item in opt.devices.split(',')] ngpu = len(devices) # build net according to the net name if net_name == "psmnet" or net_name == "ganet": net = build_net(net_name)(192) elif net_name in ["fadnet", "dispnetc"]: net = build_net(net_name)(batchNorm=False, lastRelu=True) net = torch.nn.DataParallel(net, device_ids=devices).cuda() model_data = torch.load(model) print(model_data.keys()) if 'state_dict' in model_data.keys(): net.load_state_dict(model_data['state_dict']) else: net.load_state_dict(model_data) num_of_parameters = count_parameters(net) print('Model: %s, # of parameters: %d' % (net_name, num_of_parameters)) net.eval() batch_size = int(opt.batchSize) test_dataset = StereoDataset(txt_file=file_list, root_dir=filepath, phase='detect') test_loader = DataLoader(test_dataset, batch_size = batch_size, \ shuffle = False, num_workers = 1, \ pin_memory = True) s = time.time() avg_time = [] display = 50 warmup = 10 for i, sample_batched in enumerate(test_loader): #if i > 215: # break input = torch.cat( (sample_batched['img_left'], sample_batched['img_right']), 1) # print('input Shape: {}'.format(input.size())) num_of_samples = input.size(0) #output, input_var = detect_batch(net, sample_batched, opt.net, (540, 960)) input = input.cuda() input_var = torch.autograd.Variable(input, volatile=True) if i > warmup: ss = time.time() with torch.no_grad(): if opt.net == "psmnet" or opt.net == "ganet": output = net(input_var) output = output.unsqueeze(1) elif opt.net == "dispnetc": output = net(input_var)[0] else: output = net(input_var)[-1] if i > warmup: avg_time.append((time.time() - ss)) if (i - warmup) % display == 0: print('Average inference time: %f' % np.mean(avg_time)) mbytes = 1024. * 1024 print('GPU memory usage memory_allocated: %d MBytes, max_memory_allocated: %d MBytes, memory_cached: %d MBytes, max_memory_cached: %d MBytes, CPU memory usage: %d MBytes' % \ (ct.memory_allocated()/mbytes, ct.max_memory_allocated()/mbytes, ct.memory_cached()/mbytes, ct.max_memory_cached()/mbytes, process.memory_info().rss/mbytes)) avg_time = [] output = scale_disp(output, (output.size()[0], 540, 960)) disp = output[:, 0, :, :] for j in range(num_of_samples): name_items = sample_batched['img_names'][0][j].split('/') # write disparity to file output_disp = disp[j] np_disp = disp[j].data.cpu().numpy() print('Batch[{}]: {}, average disp: {}({}-{}).'.format( i, j, np.mean(np_disp), np.min(np_disp), np.max(np_disp))) save_name = '_'.join(name_items).replace( ".png", "_d.png") # for girl02 dataset print('Name: {}'.format(save_name)) skimage.io.imsave(os.path.join(result_path, save_name), (np_disp * 256).astype('uint16')) #save_name = '_'.join(name_items).replace("png", "pfm")# for girl02 dataset #print('Name: {}'.format(save_name)) #np_disp = np.flip(np_disp, axis=0) #save_pfm('{}/{}'.format(result_path, save_name), np_disp) print('Evaluation time used: {}'.format(time.time() - s))
def get_memory_use(): device = cuda.current_device() message = cuda.get_device_name(device) + ':\n' message += 'allocated:' + str(cuda.memory_allocated(device)) + '/' + str(cuda.max_memory_allocated()) + '\n' message += 'cached:' + str(cuda.memory_cached(device)) + '/' + str(cuda.max_memory_cached()) + '\n' return message
def detect(opt): net_name = opt.net model = opt.model result_path = opt.rp file_list = opt.filelist filepath = opt.filepath if not os.path.exists(result_path): os.makedirs(result_path) devices = [int(item) for item in opt.devices.split(',')] ngpu = len(devices) # build net according to the net name if net_name in ["dispnetcres", "dispnetc"]: net = build_net(net_name)(batchNorm=False, lastRelu=True) else: net = build_net(net_name)(batchNorm=False, lastRelu=True) net.set_focal_length(1050.0, 1050.0) net = torch.nn.DataParallel(net, device_ids=devices).cuda() #net.cuda() model_data = torch.load(model) print(model_data.keys()) if 'state_dict' in model_data.keys(): net.load_state_dict(model_data['state_dict']) else: net.load_state_dict(model_data) num_of_parameters = count_parameters(net) print('Model: %s, # of parameters: %d' % (net_name, num_of_parameters)) net.eval() batch_size = int(opt.batchSize) #test_dataset = StereoDataset(txt_file=file_list, root_dir=filepath, phase='detect') test_dataset = SceneFlowDataset(txt_file=file_list, root_dir=filepath, phase='detect') test_loader = DataLoader(test_dataset, batch_size = batch_size, \ shuffle = False, num_workers = 1, \ pin_memory = True) s = time.time() #high_res_EPE = multiscaleloss(scales=1, downscale=1, weights=(1), loss='L1', sparse=False) avg_time = [] display = 100 warmup = 10 for i, sample_batched in enumerate(test_loader): input = torch.cat( (sample_batched['img_left'], sample_batched['img_right']), 1) if opt.disp_on: target_disp = sample_batched['gt_disp'] target_disp = target_disp.cuda() if opt.norm_on: target_norm = sample_batched['gt_norm'] target_norm = target_norm.cuda() # print('input Shape: {}'.format(input.size())) num_of_samples = input.size(0) #output, input_var = detect_batch(net, sample_batched, opt.net, (540, 960)) input = input.cuda() input_var = torch.autograd.Variable(input, volatile=True) if i > warmup: ss = time.time() if opt.net == "psmnet" or opt.net == "ganet": output = net(input_var) elif opt.net == "dispnetc": output = net(input_var)[0] elif opt.net in ["dispnormnet", "dtonnet", "dnfusionnet"]: output = net(input_var) disp = output[0] normal = output[1] output = torch.cat((normal, disp), 1) else: output = net(input_var)[-1] if i > warmup: avg_time.append((time.time() - ss)) if (i - warmup) % display == 0: print('Average inference time: %f' % np.mean(avg_time)) mbytes = 1024. * 1024 print('GPU memory usage memory_allocated: %d MBytes, max_memory_allocated: %d MBytes, memory_cached: %d MBytes, max_memory_cached: %d MBytes, CPU memory usage: %d MBytes' % \ (ct.memory_allocated()/mbytes, ct.max_memory_allocated()/mbytes, ct.memory_cached()/mbytes, ct.max_memory_cached()/mbytes, process.memory_info().rss/mbytes)) avg_time = [] # output = net(input_var)[1] if opt.disp_on and not opt.norm_on: output = scale_disp(output, (output.size()[0], 540, 960)) disp = output[:, 0, :, :] elif opt.disp_on and opt.norm_on: output = scale_norm(output, (output.size()[0], 4, 540, 960)) disp = output[:, 3, :, :] normal = output[:, :3, :, :] print('disp shape:', disp.shape) for j in range(num_of_samples): name_items = sample_batched['img_names'][0][j].split('/') # write disparity to file if opt.disp_on: output_disp = disp[j] _target_disp = target_disp[j, 0] target_valid = _target_disp < 192 print('target size', _target_disp.size()) print('output size', output_disp.size()) epe = F.smooth_l1_loss(output_disp[target_valid], _target_disp[target_valid], size_average=True) print('EPE: {}'.format(epe)) np_disp = disp[j].data.cpu().numpy() print('Batch[{}]: {}, average disp: {}({}-{}).'.format( i, j, np.mean(np_disp), np.min(np_disp), np.max(np_disp))) save_name = '_'.join(name_items).replace(".png", "_d.png") print('Name: {}'.format(save_name)) skimage.io.imsave(os.path.join(result_path, save_name), (np_disp * 256).astype('uint16')) #save_name = '_'.join(name_items).replace(".png", "_d.pfm") #print('Name: {}'.format(save_name)) #np_disp = np.flip(np_disp, axis=0) #save_pfm('{}/{}'.format(result_path, save_name), np_disp) if opt.norm_on: normal[j] = (normal[j] + 1.0) * 0.5 #np_normal = normal[j].data.cpu().numpy().transpose([1, 2, 0]) np_normal = normal[j].data.cpu().numpy() #save_name = '_'.join(name_items).replace('.png', '_n.png') save_name = '_'.join(name_items).replace('.png', '_n.exr') print('Name: {}'.format(save_name)) #skimage.io.imsave(os.path.join(result_path, save_name),(normal*256).astype('uint16')) #save_pfm('{}/{}'.format(result_path, save_name), img) save_exr(np_normal, '{}/{}'.format(result_path, save_name)) print('') #save_name = '_'.join(name_items).replace(".png", "_left.png") #img = input_var[0].detach().cpu().numpy()[:3,:,:] #img = np.transpose(img, (1, 2, 0)) #print('Name: {}'.format(save_name)) #print('') ##save_pfm('{}/{}'.format(result_path, save_name), img) #skimage.io.imsave(os.path.join(result_path, save_name),img) print('Evaluation time used: {}'.format(time.time() - s))
def train(self, num_of_iters=1, data=None, hidden=None): self.loss = 0.0 s = time.time() for i in range(num_of_iters): self.adjust_learning_rate(self.train_epoch, self.optimizer) if self.train_iter % self.num_batches_per_epoch == 0 and self.train_iter > 0: logger.info('train iter: %d, num_batches_per_epoch: %d', self.train_iter, self.num_batches_per_epoch) logger.info( 'Epoch %d, avg train acc: %f, lr: %f, avg loss: %f' % (self.train_iter // self.num_batches_per_epoch, np.mean(self.train_acc_top1), self.lr, self.avg_loss_per_epoch / self.num_batches_per_epoch)) mean_s = np.mean(self.sparsities) if self.train_iter > 0 and np.isnan(mean_s): logger.warn('NaN detected! sparsities: %s' % self.sparsities) logger.info( 'Average Sparsity: %f, compression ratio: %f, communication size: %f', np.mean(self.sparsities), np.mean(self.compression_ratios), np.mean(self.communication_sizes)) if self.rank == 0 and self.writer is not None: self.writer.add_scalar( 'cross_entropy', self.avg_loss_per_epoch / self.num_batches_per_epoch, self.train_epoch) self.writer.add_scalar('top-1 acc', np.mean(self.train_acc_top1), self.train_epoch) if self.rank == 0: self.test(self.train_epoch) self.sparsities = [] self.compression_ratios = [] self.communication_sizes = [] self.train_acc_top1 = [] self.epochs_info.append(self.avg_loss_per_epoch / self.num_batches_per_epoch) self.avg_loss_per_epoch = 0.0 if self.train_iter > 0 and self.rank == 0: state = { 'iter': self.train_iter, 'epoch': self.train_epoch, 'state': self.get_model_state() } if self.prefix: relative_path = './weights/%s/%s-n%d-bs%d-lr%.4f' % ( self.prefix, self.dnn, self.nworkers, self.batch_size, self.base_lr) else: relative_path = './weights/%s-n%d-bs%d-lr%.4f' % ( self.dnn, self.nworkers, self.batch_size, self.base_lr) if settings.SPARSE: relative_path += '-s%.5f' % self.sparsity utils.create_path(relative_path) filename = '%s-rank%d-epoch%d.pth' % (self.dnn, self.rank, self.train_epoch) fn = os.path.join(relative_path, filename) #self.save_checkpoint(state, fn) #self.remove_dict(state) self.train_epoch += 1 if self.train_sampler and (self.nworkers > 1): self.train_sampler.set_epoch(self.train_epoch) ss = time.time() if data is None: data = self.data_iter() if self.dataset == 'an4': inputs, labels_cpu, input_percentages, target_sizes = data input_sizes = input_percentages.mul_(int(inputs.size(3))).int() else: inputs, labels_cpu = data if self.is_cuda: if self.dnn == 'lstm': inputs = Variable(inputs.transpose(0, 1).contiguous()).cuda() labels = Variable(labels_cpu.transpose( 0, 1).contiguous()).cuda() else: inputs, labels = inputs.cuda( non_blocking=True), labels_cpu.cuda(non_blocking=True) else: labels = labels_cpu self.iotime += (time.time() - ss) if self.dnn == 'lstman4': out, output_sizes = self.net(inputs, input_sizes) out = out.transpose(0, 1) # TxNxH loss = self.criterion(out, labels_cpu, output_sizes, target_sizes) loss = loss / inputs.size(0) # average the loss by minibatch loss.backward() elif self.dnn == 'lstm': hidden = lstmpy.repackage_hidden(hidden) outputs, hidden = self.net(inputs, hidden) tt = torch.squeeze( labels.view(-1, self.net.batch_size * self.net.num_steps)) loss = self.criterion(outputs.view(-1, self.net.vocab_size), tt) loss.backward() else: # forward + backward + optimize outputs = self.net(inputs) loss = self.criterion(outputs, labels) loss.backward() loss_value = loss.item() # logger.info statistics self.loss += loss_value self.avg_loss_per_epoch += loss_value if self.dnn not in ['lstm', 'lstman4']: acc1, = self.cal_accuracy(outputs, labels, topk=(1, )) self.train_acc_top1.append(acc1) self.train_iter += 1 self.num_of_updates_during_comm += 1 self.loss /= num_of_iters self.timer += time.time() - s display = 100 if self.train_iter % display == 0: logger.info( '[%3d][%5d/%5d][rank:%d] loss: %.3f, average forward and backward time: %f, iotime: %f ' % (self.train_epoch, self.train_iter, self.num_batches_per_epoch, self.rank, self.loss, self.timer / display, self.iotime / display)) mbytes = 1024. * 1024 logger.info( 'GPU memory usage memory_allocated: %d MBytes, max_memory_allocated: %d MBytes, memory_cached: %d MBytes, max_memory_cached: %d MBytes, CPU memory usage: %d MBytes', ct.memory_allocated() / mbytes, ct.max_memory_allocated() / mbytes, ct.memory_cached() / mbytes, ct.max_memory_cached() / mbytes, process.memory_info().rss / mbytes) self.timer = 0.0 self.iotime = 0.0 if self.is_cuda: torch.cuda.empty_cache() if self.dnn == 'lstm': return num_of_iters, hidden return num_of_iters
def detect(opt): net_name = opt.net model = opt.model result_path = opt.rp file_list = opt.filelist filepath = opt.filepath if not os.path.exists(result_path): os.makedirs(result_path) devices = [int(item) for item in opt.devices.split(',')] ngpu = len(devices) # build net according to the net name if net_name == "psmnet" or net_name == "ganet": net = build_net(net_name)(192) elif net_name in ["fadnet", "dispnetc", "mobilefadnet", "slightfadnet"]: net = build_net(net_name)(batchNorm=False, lastRelu=True) #elif net_name in ["mobilefadnet", "slightfadnet"]: # #B, max_disp, H, W = (wopt.batchSize, 40, 72, 120) # shape = (opt.batchSize, 40, 72, 120) #TODO: Should consider how to dynamically use # warp_size = (opt.batchSize, 3, 576, 960) # net = build_net(net_name)(batchNorm=False, lastRelu=True, input_img_shape=shape, warp_size=warp_size) if ngpu > 1: net = torch.nn.DataParallel(net, device_ids=devices) model_data = torch.load(model) print(model_data.keys()) if 'state_dict' in model_data.keys(): #net.load_state_dict(model_data['state_dict']) load_model_trained_with_DP(net, model_data['state_dict']) else: net.load_state_dict(model_data) num_of_parameters = count_parameters(net) print('Model: %s, # of parameters: %d' % (net_name, num_of_parameters)) batch_size = int(opt.batchSize) test_dataset = StereoDataset(txt_file=file_list, root_dir=filepath, phase='detect') test_loader = DataLoader(test_dataset, batch_size = batch_size, \ shuffle = False, num_workers = 1, \ pin_memory = True) net.eval() #net.dispnetc.eval() #net.dispnetres.eval() net = net.cuda() #for i, sample_batched in enumerate(test_loader): # input = torch.cat((sample_batched['img_left'], sample_batched['img_right']), 1) # num_of_samples = input.size(0) # input = input.cuda() # x = input # break net_trt = trt_transform(net) torch.save(net_trt.state_dict(), 'models/mobilefadnet_trt.pth') s = time.time() avg_time = [] display = 50 warmup = 2 for i, sample_batched in enumerate(test_loader): #if i > 215: # break stime = time.time() input = torch.cat( (sample_batched['img_left'], sample_batched['img_right']), 1) print('input Shape: {}'.format(input.size())) num_of_samples = input.size(0) input = input.cuda() break iterations = 14 + warmup #iterations = len(test_loader) - warmup #for i, sample_batched in enumerate(test_loader): for i in range(iterations): stime = time.time() input = torch.cat( (sample_batched['img_left'], sample_batched['img_right']), 1) print('input Shape: {}'.format(input.size())) num_of_samples = input.size(0) input = input.cuda() input_var = input #torch.autograd.Variable(input, volatile=True) iotime = time.time() print('[{}] IO time:{}'.format(i, iotime - stime)) if i == warmup: ss = time.time() with torch.no_grad(): if opt.net == "psmnet" or opt.net == "ganet": output = net_trt(input_var) output = output.unsqueeze(1) elif opt.net == "dispnetc": output = net_trt(input_var)[0] else: output = net_trt(input_var)[-1] itime = time.time() print('[{}] Inference time:{}'.format(i, itime - iotime)) if i > warmup: avg_time.append((time.time() - ss)) if (i - warmup) % display == 0: print('Average inference time: %f' % np.mean(avg_time)) mbytes = 1024. * 1024 print('GPU memory usage memory_allocated: %d MBytes, max_memory_allocated: %d MBytes, memory_cached: %d MBytes, max_memory_cached: %d MBytes, CPU memory usage: %d MBytes' % \ (ct.memory_allocated()/mbytes, ct.max_memory_allocated()/mbytes, ct.memory_cached()/mbytes, ct.max_memory_cached()/mbytes, process.memory_info().rss/mbytes)) avg_time = [] print('[%d] output shape:' % i, output.size()) #output = scale_disp(output, (output.size()[0], 540, 960)) #disp = output[:, 0, :, :] ptime = time.time() print('[{}] Post-processing time:{}'.format(i, ptime - itime)) #for j in range(num_of_samples): # name_items = sample_batched['img_names'][0][j].split('/') # # write disparity to file # output_disp = disp[j] # np_disp = disp[j].float().cpu().numpy() # print('Batch[{}]: {}, average disp: {}({}-{}).'.format(i, j, np.mean(np_disp), np.min(np_disp), np.max(np_disp))) # save_name = '_'.join(name_items).replace(".png", "_d.png")# for girl02 dataset # print('Name: {}'.format(save_name)) # skimage.io.imsave(os.path.join(result_path, save_name),(np_disp*256).astype('uint16')) print('Current batch time used:: {}'.format(time.time() - stime)) #save_name = '_'.join(name_items).replace("png", "pfm")# for girl02 dataset #print('Name: {}'.format(save_name)) #np_disp = np.flip(np_disp, axis=0) #save_pfm('{}/{}'.format(result_path, save_name), np_disp) print('Evaluation time used: {}, avg iter: {}'.format( time.time() - ss, (time.time() - ss) / iterations))
# - torch.cuda.max_memory_cached(device=None):返回指定设备缓存分配器管理的最大GPU内存 #%% cuda.max_memory_cached(0) #%% [markdown] # - torch.cuda.memory_allocated(device=None):返回指定设备上张量使用的GPU内存 #%% cuda.memory_allocated(0) #%% [markdown] # - torch.cuda.memory_cached(device=None) #%% cuda.memory_cached() #%% [markdown] # - orch.cuda.set_device(device):设置当前的设备. 该函数不鼓励使用. 更好的方法为使用CUDA_VISIBLE_DEVICES. #%% [markdown] # - torch.cuda.stream(stream):用于选定流的上下文管理器. #%% [markdown] # - torch.cuda.synchronize():在当前设备上等待所有流中核操作的结束. #%% [markdown] # # 2.随机数生成器 #%% [markdown] # - torch.cuda.get_rng_state(device=-1):返回当前GPU的随机数生成器状态. #%% [markdown] # - torch.cuda.set_rng_state(new_state, device=-1):设置当前GPU的随机数生成器状态. #%% [markdown] # - torch.cuda.manual_seed(seed):为当前GPU设置生成随机数的种子