def __init__(self, argv=None): if self.server_name is None: self.server_name = self.__class__.__name__ if argv is None: argv = sys.argv conf_path, options = self.parseArgs(argv) self.default_log_path = self.server_name + '.log' self.default_pid_path = self.server_name + '.pid' self.server = None self.quit = False # read conf conf = ConfigParser() conf.read(conf_path) self.conf_path = conf_path self.host = conf.get('server', 'host') self.port = conf.getint('server', 'port') try: self.pid_path = conf.get('server', 'pid_path') except NoOptionError: self.pid_path = self.default_pid_path try: log_path = conf.get('server', 'log_path') except NoOptionError: log_path = self.default_log_path if is_server_running(self.host, self.port): trace("Server already running on %s:%s." % (self.host, self.port)) sys.exit(0) trace('Starting %s server at http://%s:%s/' % (self.server_name, self.host, self.port)) # init logger if options.verbose: level = logging.DEBUG else: level = logging.INFO if options.debug: log_to = 'file console' else: log_to = 'file' self.logger = get_default_logger(log_to, log_path, level=level, name=self.server_name) # subclass init self._init_cb(conf, options) # daemon mode if not options.debug: trace(' as daemon.\n') close_logger(self.server_name) create_daemon() # re init the logger self.logger = get_default_logger(log_to, log_path, level=level, name=self.server_name) else: trace(' in debug mode.\n') # init rpc self.initServer()
def test_fastdvdnet(**args): """Denoises all sequences present in a given folder. Sequences must be stored as numbered image sequences. The different sequences must be stored in subfolders under the "test_path" folder. Inputs: args (dict) fields: "model_file": path to model "test_path": path to sequence to denoise "suffix": suffix to add to output name "max_num_fr_per_seq": max number of frames to load per sequence "noise_sigma": noise level used on test set "dont_save_results: if True, don't save output images "no_gpu": if True, run model on CPU "save_path": where to save outputs as png "gray": if True, perform denoising of grayscale images instead of RGB """ # Start time start_time = time.time() # If save_path does not exist, create it logger = init_logger_test(os.path.dirname(args['save_path'])) # Sets data type according to CPU or GPU modes if args['cuda']: device = torch.device('cuda') else: device = torch.device('cpu') # Create models print('Loading models ...') model_temp = FastDVDnet(num_input_frames=NUM_IN_FR_EXT) # Load saved weights state_temp_dict = torch.load(args['model_file']) if args['cuda']: device_ids = [0] model_temp = nn.DataParallel(model_temp, device_ids=device_ids).cuda() else: # CPU mode: remove the DataParallel wrapper state_temp_dict = remove_dataparallel_wrapper(state_temp_dict) model_temp.load_state_dict(state_temp_dict) # Sets the model in evaluation mode (e.g. it removes BN) model_temp.eval() with torch.no_grad(): # process data seq = open_sequence(args['test_path'], args['first'], args['last'], args['already_norm']) seq = torch.from_numpy(seq).to(device) seq_time = time.time() # Add noise if not args['already_noisy']: noise = torch.empty_like(seq).normal_( mean=0, std=args['noise_sigma']).to(device) seqn = seq + noise else: seqn = seq noisestd = torch.FloatTensor([args['noise_sigma']]).to(device) denframes = denoise_seq_fastdvdnet(seq=seqn, noise_std=noisestd, temp_psz=NUM_IN_FR_EXT, model_temporal=model_temp) # Compute PSNR and log it stop_time = time.time() psnr = compute_psnr(denframes.cpu().numpy(), seq.cpu().numpy(), 1.) psnr_noisy = compute_psnr(seqn.cpu().numpy().squeeze(), seq.cpu().numpy(), 1.) loadtime = (seq_time - start_time) runtime = (stop_time - seq_time) seq_length = seq.size()[0] logger.info("Finished denoising {}".format(args['test_path'])) logger.info( "\tDenoised {} frames in {:.3f}s, loaded seq in {:.3f}s".format( seq_length, runtime, loadtime)) logger.info("\tPSNR noisy {:.4f}dB, PSNR result {:.4f}dB".format( psnr_noisy, psnr)) # Save outputs if not args['dont_save_results']: # Save sequence save_out_seq(denframes.cpu(), args['save_path'], args['first']) # close logger close_logger(logger)
def main(args): random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True logdir = args.log_dir writer = SummaryWriter(log_dir=logdir) logger = get_logger(os.path.join(logdir, 'log')) logger.info('Arguments : -------------------------------') for name, value in args._get_kwargs(): logger.info('{:20} : {:}'.format(name, value)) nas_bench = nb.NASBench201API(args.nas_bench_path) op_space = cell.SearchSpaceNames[args.search_space_name] predictor = Predictor(len(op_space), 1, args.max_nodes, 64, 1).to('cuda') optim_p = torch.optim.Adam(predictor.parameters(), args.p_lr, weight_decay=args.weight_decay) logger.info("predictor params size = %fM" % (count_parameters(predictor) / 1e6)) logger.info("\n") train_data, _, _, classnum = get_datasets(args.dataset, args.data_path, -1) # disable cutout for search train_data, valid_data = split_dataset(args.dataset, train_data) supernet = TinyNetwork(args.channel, args.num_cells, args.max_nodes, classnum, op_space, False, args.track_running_stat).cuda() criterion = nn.CrossEntropyLoss().cuda() optim = torch.optim.SGD(supernet.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optim, args.epochs, args.eta_min) train_loader = torch.utils.data.DataLoader(train_data, args.batch_size, True, num_workers=args.load_workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader(valid_data, args.batch_size_valid, True, drop_last=True, num_workers=args.load_workers, pin_memory=True) start_time, search_time, epoch_time = time.time(), AverageMeter( ), AverageMeter() if not args.load_checkpoint: # first, train supernet by uniform sampling for epoch in range(args.start_epoch, args.epochs): loss, top1, top5 = train_supernet(train_loader, supernet, criterion, optim, str(epoch), args.print_freq, logger) scheduler.step() writer.add_scalar('train/loss', loss, epoch) writer.add_scalar('train/top1', top1, epoch) writer.add_scalar('train/top5', top5, epoch) loss, top1, top5 = valid_func(valid_loader, supernet, criterion) logger.info("Valid: loss=%.2f, top1=%2.2f, top5=%2.2f" % (loss, top1, top5)) epoch_time.update(time.time() - start_time) start_time = time.time() # save trained supernet weights torch.save(supernet.state_dict(), os.path.join(args.log_dir, 'supernet.pth')) else: logger.info('load supernet from %s' % args.load_checkpoint) load_path = os.path.join(args.load_checkpoint, 'supernet.pth') supernet.load_state_dict(torch.load(load_path, map_location='cuda')) logger.info('supernet loaded') search_time.update(epoch_time.sum) logger.info('Pre-searching costs {:.1f} s'.format(search_time.sum)) start_time = time.time() # perform search best_arch, best_valid_acc = search_find_best(valid_loader, train_loader, supernet, predictor, optim_p, logger, args) search_time.update(time.time() - start_time) arch_idx = nas_bench.query_index_by_arch(best_arch) logger.info('found best arch with valid error %f:' % best_valid_acc) nas_bench.show(arch_idx) logger.info('time cost: %fs' % search_time.sum) vacc = 100 - nas_bench.get_more_info( arch_idx, 'cifar10-valid', None, is_random=False, hp='200')['valid-accuracy'] tacc = 100 - nas_bench.get_more_info( arch_idx, 'cifar10', None, is_random=False, hp='200')['test-accuracy'] close_logger(logger) return best_arch, vacc, tacc, search_time.sum
def test_dvdnet(**args): """Denoises all sequences present in a given folder. Sequences must be stored as numbered image sequences. The different sequences must be stored in subfolders under the "test_path" folder. Inputs: args (dict) fields: "model_spatial_file": path to model of the pretrained spatial denoiser "model_temp_file": path to model of the pretrained temporal denoiser "test_path": path to sequence to denoise "suffix": suffix to add to output name "max_num_fr_per_seq": max number of frames to load per sequence "noise_sigma": noise level used on test set "dont_save_results: if True, don't save output images "no_gpu": if True, run model on CPU "save_path": where to save outputs as png """ start_time = time.time() # If save_path does not exist, create it if not os.path.exists(args['save_path']): os.makedirs(args['save_path']) logger = init_logger_test(args['save_path']) # Sets data type according to CPU or GPU modes if args['cuda']: device = torch.device('cuda') else: device = torch.device('cpu') # Create models model_spa = DVDnet_spatial() model_temp = DVDnet_temporal(num_input_frames=NUM_IN_FRAMES) # Load saved weights state_spatial_dict = torch.load(args['model_spatial_file']) state_temp_dict = torch.load(args['model_temp_file']) if args['cuda']: device_ids = [0] model_spa = nn.DataParallel(model_spa, device_ids=device_ids).cuda() model_temp = nn.DataParallel(model_temp, device_ids=device_ids).cuda() else: # CPU mode: remove the DataParallel wrapper state_spatial_dict = remove_dataparallel_wrapper(state_spatial_dict) state_temp_dict = remove_dataparallel_wrapper(state_temp_dict) model_spa.load_state_dict(state_spatial_dict) model_temp.load_state_dict(state_temp_dict) # Sets the model in evaluation mode (e.g. it removes BN) model_spa.eval() model_temp.eval() with torch.no_grad(): # process data seq, _, _ = open_sequence(args['test_path'],\ False,\ expand_if_needed=False,\ max_num_fr=args['max_num_fr_per_seq']) seq = torch.from_numpy(seq[:, np.newaxis, :, :, :]).to(device) seqload_time = time.time() # Add noise noise = torch.empty_like(seq).normal_(mean=0, std=args['noise_sigma']).to(device) seqn = seq + noise noisestd = torch.FloatTensor([args['noise_sigma']]).to(device) denframes = denoise_seq_dvdnet(seq=seqn,\ noise_std=noisestd,\ temp_psz=NUM_IN_FRAMES,\ model_temporal=model_temp,\ model_spatial=model_spa,\ mc_algo=MC_ALGO) den_time = time.time() # Compute PSNR and log it psnr = batch_psnr(denframes, seq.squeeze(), 1.) psnr_noisy = batch_psnr(seqn.squeeze(), seq.squeeze(), 1.) print("\tPSNR on {} : {}\n".format(os.path.split(args['test_path'])[-1], psnr)) print("\tDenoising time: {:.2f}s".format(den_time - seqload_time)) print("\tSequence loaded in : {:.2f}s".format(seqload_time - start_time)) print("\tTotal time: {:.2f}s\n".format(den_time - start_time)) logger.info("%s, %s, PSNR noisy %fdB, PSNR %f dB" % \ (args['test_path'], args['suffix'], psnr_noisy, psnr)) # Save outputs if not args['dont_save_results']: # Save sequence save_out_seq(seqn, denframes, args['save_path'], int(args['noise_sigma']*255), \ args['suffix'], args['save_noisy']) # close logger close_logger(logger)
) parser.add_argument( '--num_runs', type=int, default=10, help='Number of runs' ) args = parser.parse_args() DATA_SET = args.DATA_SET num_runs = args.num_runs LOG_FILE = 'log_results_{}.txt'.format(DATA_SET) LOGGER = utils.get_logger(LOG_FILE) utils.log_time(LOGGER) LOGGER.info(DATA_SET) results = [] for n in range(1,num_runs+1): mean_aupr, std = execute_run(DATA_SET) results.append(mean_aupr) LOGGER.info(' Run {}: Mean: {:4f} | Std {:4f}'.format(n,mean_aupr,std)) mean_all_runs = np.mean(results) print('Mean AuPR over {} runs {:4f}'.format(num_runs, mean_all_runs)) print('Details: ', results) LOGGER.info('Mean AuPR over {} runs {:4f} Std {:4f}'.format(num_runs, mean_all_runs, np.std(results))) LOGGER.info(' Details ' + str(results)) utils.close_logger(LOGGER)
def main(**args): r"""Performs the main training loop """ gray_mode = args['gray'] # gray mode indicator C = 1 if gray_mode else 3 # number of color channels # Load dataset print('> Loading datasets ...') dataset_val = ValDataset(valsetdir=args['valset_dir'], gray_mode=gray_mode) # for grayscale/color video # dataset_val = ValDataset(valsetdir=args['valset_dir'], gray_mode=False) # for color videos only loader_train = train_dali_loader(batch_size=args['batch_size'],\ file_root=args['trainset_dir'],\ sequence_length=args['temp_patch_size'],\ crop_size=args['patch_size'],\ epoch_size=args['max_number_patches'],\ random_shuffle=True,\ temp_stride=3,\ gray_mode=gray_mode) num_minibatches = int(args['max_number_patches'] // args['batch_size']) ctrl_fr_idx = (args['temp_patch_size'] - 1) // 2 print("\t# of training samples: %d\n" % int(args['max_number_patches'])) # Init loggers writer, logger = init_logging(args) # Define GPU devices device_ids = [0] torch.backends.cudnn.benchmark = True # CUDNN optimization # Create model model = FastDVDnet(num_color_channels=C) model = model.cuda() # Define loss criterion = nn.MSELoss(reduction='sum') criterion.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=args['lr']) # [AMP initialization] automated half-precision training if args['fp16']: model, optimizer = amp.initialize(model, optimizer, opt_level=args['amp_opt_level']) # model = nn.DataParallel(model, device_ids=device_ids).cuda() model = nn.DataParallel(model) # Resume training or start anew start_epoch, training_params = resume_training(args, model, optimizer) # Training start_time = time.time() for epoch in range(start_epoch, args['epochs']): # Set learning rate current_lr, reset_orthog = lr_scheduler(epoch, args) if reset_orthog: training_params['no_orthog'] = True # set learning rate in optimizer for param_group in optimizer.param_groups: param_group["lr"] = current_lr print('\nlearning rate %f' % current_lr) # train for i, data in enumerate(loader_train, 0): # Pre-training step model.train() # When optimizer = optim.Optimizer(net.parameters()) we only zero the optim's grads optimizer.zero_grad() # convert inp to [N, num_frames*C. H, W] in [0., 1.] from [N, num_frames, C. H, W] in [0., 255.] # extract ground truth (central frame) img_train, gt_train = normalize_augment(data[0]['data'], ctrl_fr_idx, gray_mode) N, _, H, W = img_train.size() # std dev of each sequence stdn = torch.empty( (N, 1, 1, 1)).cuda().uniform_(args['noise_ival'][0], to=args['noise_ival'][1]) # draw noise samples from std dev tensor noise = torch.zeros_like(img_train) noise = torch.normal(mean=noise, std=stdn.expand_as(noise)) #define noisy input imgn_train = img_train + noise # Send tensors to GPU gt_train = gt_train.cuda(non_blocking=True) imgn_train = imgn_train.cuda(non_blocking=True) noise = noise.cuda(non_blocking=True) noise_map = stdn.expand( (N, 1, H, W)).cuda(non_blocking=True) # one channel per image # Evaluate model and optimize it out_train = model(imgn_train, noise_map) # Compute loss loss = criterion(gt_train, out_train) / (N * 2) # [AMP scale loss to avoid overflow of float16] automated mixed precision training if args['fp16']: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() # Results if training_params['step'] % args['save_every'] == 0: # Apply regularization by orthogonalizing filters if not training_params['no_orthog']: model.apply(svd_orthogonalization) # Compute training PSNR log_train_psnr(out_train, \ gt_train, \ loss, \ writer, \ epoch, \ i, \ num_minibatches, \ training_params) # update step counter training_params['step'] += 1 # save model and checkpoint training_params['start_epoch'] = epoch + 1 save_model_checkpoint(model, args, optimizer, training_params, epoch) # Call to model.eval() to correctly set the BN layers before inference model.eval() # Validation and log images validate_and_log( model_temp=model, \ dataset_val=dataset_val, \ valnoisestd=args['val_noiseL'], \ temp_psz=args['temp_patch_size'], \ writer=writer, \ epoch=epoch, \ lr=current_lr, \ logger=logger, \ trainimg=img_train, \ gray_mode=gray_mode ) # Print elapsed time elapsed_time = time.time() - start_time print('Elapsed time {}'.format( time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))) # Close logger file close_logger(logger)
def search(args): random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True logdir = args.log_dir writer = SummaryWriter(log_dir=logdir) logger = get_logger(os.path.join(logdir, 'log')) logger.info('Arguments : -------------------------------') for name, value in args._get_kwargs(): logger.info('{:16} : {:}'.format(name, value)) predictor = Predictor(t_edge=1, t_node=len(OPS_FULL), n_node=N_NODES, h_dim=64, n_out=1, n_layers=4).to('cuda') optim_p = torch.optim.Adam(predictor.parameters(), args.p_lr, weight_decay=args.weight_decay) logger.info("params size = %fM" % (count_parameters(predictor) / 1e6)) logger.info("\n") nas_bench = api.NASBench(args.nas_bench_path) cifar_bench = NASBench(nas_bench, average_all=args.average_all, use_test=args.use_test) logger.info("initialize arch pool") arch_pool, seen_arch = initialize_pool(cifar_bench, args.pool_size) # logging initial samples best_arch_seen = cifar_bench.choose_best(seen_arch) logger.info("init pool: %d, seen arch: %d" % (len(arch_pool), len(seen_arch))) logger.info("simulated time cost: %f" % cifar_bench.total_cost) logger.info("best initial arch:") cifar_bench.log_arch(best_arch_seen, 0, 'acc_best', logger, writer) trace_history = [] select_history = [[(arch, cifar_bench.lookup(arch.struct)) for arch in arch_pool]] pid = [-1 for _ in arch_pool] if args.regression: _y = [cifar_bench.lookup(arch.struct) for arch in arch_pool] _mean = np.mean(_y) _std = np.std(_y) def reg_norm(y): return (y - _mean) / _std else: reg_norm = None global_train_step = 0 for step in range(1, args.steps + 1): # train predictor logger.info('step on predictor') # use valid acc for predictor training train_loader = gd.DataListLoader(cifar_bench.history_data(), args.train_batch_size, shuffle=True) for _ in tqdm(range(args.epochs)): loss = predictor.fit(train_loader, optim_p, args.unlabeled_size, args.grad_clip) writer.add_scalar('loss_r', loss, global_train_step) global_train_step += 1 # step on arch logger.info('step on arch') def checker(arch): arch_str = cifar_bench.arch_str(arch.struct) return arch_str is not None and arch_str not in seen_arch step_size = args.step_size new_trace = [] while len(new_trace) == 0 and step_size < args.step_size * (2**3): new_trace = predictor.grad_step_on_archs(arch_pool, args.step_batch_size, step_size, checker, log_conn=False) step_size *= 2 # select new population according to predicted acc new_trace = sorted(new_trace, key=itemgetter(1))[:args.new_pop_limit] new_arch = [(t[0], t[-1]) for t in new_trace] old_arch = sorted(arch_pool, key=lambda a: cifar_bench.lookup(a.struct)) logger.info("produced %d new archs" % len(new_trace)) trace_history.append(new_trace) arch_pool, selected, new_pid = merge_arch_pool(cifar_bench, old_arch, new_arch, args.time_budget, args.pool_size) select_history.append(selected) seen_arch.update(cifar_bench.arch_str(a.struct) for a, _ in selected) pid += new_pid logger.info("step %d, new archs: %d, seen arch %d" % (step, len(arch_pool), len(seen_arch))) logger.info("simulated time cost: %f" % cifar_bench.total_cost) if len(selected) > 0: best_arch_select, best_acc_select = min(selected, key=itemgetter(1)) best_arch_select = cifar_bench.arch_str(best_arch_select.struct) logger.info("best arch of current step:") cifar_bench.log_arch(best_arch_select, step, 'acc_step', logger, writer) if best_acc_select < cifar_bench.lookup(best_arch_seen): best_arch_seen = best_arch_select logger.info("best arch ever:") cifar_bench.log_arch(best_arch_seen, step, 'acc_best', logger, writer) if cifar_bench.total_cost >= args.time_budget: break with open(os.path.join(logdir, 'selections'), 'w') as f: for step in select_history: for a in step: f.write(cifar_bench.arch_str(a[0].struct) + ',' + str(a[1])) f.write('\n') with open(os.path.join(logdir, 'pid'), 'w') as f: for p in pid: f.write(str(p)) f.write('\n') predictor.save(os.path.join(logdir, 'predictor.pth')) close_logger(logger) return best_arch_seen, cifar_bench.valid_acc( best_arch_seen), cifar_bench.test_acc(best_arch_seen)
def close(self): if self.metrics_fh is not None: self.metrics_fh.close() utils.close_logger(self.logger)
def test_fastdvdnet(**args): """Denoises all sequences present in a given folder. Sequences must be stored as numbered image sequences. The different sequences must be stored in subfolders under the "test_path" folder. Inputs: args (dict) fields: "model_file": path to model "test_path": path to sequence to denoise "suffix": suffix to add to output name "max_num_fr_per_seq": max number of frames to load per sequence "dont_save_results: if True, don't save output images "no_gpu": if True, run model on CPU "save_path": where to save outputs as png "gray": if True, perform denoising of grayscale images instead of RGB """ # Start time start_time = time.time() # If save_path does not exist, create it if not os.path.exists(args['save_path']): os.makedirs(args['save_path']) logger = init_logger_test(args['save_path']) # Sets data type according to CPU or GPU modes if args['cuda']: device = args['device_id'][0] else: device = torch.device('cpu') # Create models print('Loading models ...') model_temp = FastDVDnet(num_input_frames=NUM_IN_FR_EXT) # Load saved weights state_temp_dict = torch.load(args['model_file']) if args['cuda']: device_ids = args['device_id'] model_temp = nn.DataParallel(model_temp, device_ids=device_ids).cuda(device) else: # CPU mode: remove the DataParallel wrapper state_temp_dict = remove_dataparallel_wrapper(state_temp_dict) model_temp.load_state_dict(state_temp_dict) # Sets the model in evaluation mode (e.g. it removes BN) model_temp.eval() gt = None with torch.no_grad(): # process data seq, _, _ = open_sequence(args['test_path'], args['gray'], expand_if_needed=False, max_num_fr=args['max_num_fr_per_seq']) seq = torch.from_numpy(seq).to(device) seq_time = time.time() denframes = denoise_seq_fastdvdnet(seq=seq, temp_psz=NUM_IN_FR_EXT, model_temporal=model_temp) if args['gt_path'] is not None: gt, _, _ = open_sequence(args['gt_path'], args['gray'], expand_if_needed=False, max_num_fr=args['max_num_fr_per_seq']) gt = torch.from_numpy(gt).to(device) # Compute PSNR and log it stop_time = time.time() if gt is None: psnr = 0 psnr_noisy = 0 else: psnr = batch_psnr(denframes, gt, 1.) psnr_noisy = batch_psnr(seq.squeeze(), gt, 1.) loadtime = (seq_time - start_time) runtime = (stop_time - seq_time) seq_length = seq.size()[0] logger.info("Finished denoising {}".format(args['test_path'])) logger.info( "\tDenoised {} frames in {:.3f}s, loaded seq in {:.3f}s".format( seq_length, runtime, loadtime)) logger.info("\tPSNR noisy {:.4f}dB, PSNR result {:.4f}dB".format( psnr_noisy, psnr)) # Save outputs if not args['dont_save_results']: # Save sequence save_out_seq(seq, denframes, args['save_path'], 0, args['suffix'], args['save_noisy']) # close logger close_logger(logger)
def main(**args): r"""Performs the main training loop """ # Load dataset print('> Loading datasets ...') dataset_val = ValDataset(valsetdir=args['valset_dir'], gtvalsetdir=args['gt_valset_dir'], gray_mode=False) loader_train = train_dali_loader(batch_size=args['batch_size'],\ file_root=args['trainset_dir'],\ gt_file_root=args['gt_trainset_dir'],\ sequence_length=args['temp_patch_size'],\ crop_size=args['patch_size'],\ epoch_size=args['max_number_patches'],\ device_id=args['device_id'][0],\ random_shuffle=True,\ temp_stride=3) num_minibatches = int(args['max_number_patches'] // args['batch_size']) ctrl_fr_idx = (args['temp_patch_size'] - 1) // 2 print("\t# of training samples: %d\n" % int(args['max_number_patches'])) # Init loggers writer, logger = init_logging(args) # Define GPU devices device_ids = args['device_id'] torch.backends.cudnn.benchmark = True # CUDNN optimization # Create model model = FastDVDnet() model = nn.DataParallel(model, device_ids=device_ids).cuda() # Define loss criterion = nn.MSELoss(reduction='sum') criterion.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=args['lr']) # Resume training or start anew start_epoch, training_params = resume_training(args, model, optimizer) # Training start_time = time.time() for epoch in range(start_epoch, args['epochs']): # Set learning rate current_lr, reset_orthog = lr_scheduler(epoch, args) if reset_orthog: training_params['no_orthog'] = True # set learning rate in optimizer for param_group in optimizer.param_groups: param_group["lr"] = current_lr print('\nlearning rate %f' % current_lr) # train for i, data in enumerate(loader_train, 0): # Pre-training step model.train() # When optimizer = optim.Optimizer(net.parameters()) we only zero the optim's grads optimizer.zero_grad() # convert inp to [N, num_frames*C. H, W] in [0., 1.] from [N, num_frames, C. H, W] in [0., 255.] # extract ground truth (central frame) img_train, gt_train = normalize_augment(data[0], ctrl_fr_idx) N, _, H, W = img_train.size() # Send tensors to GPU gt_train = gt_train.cuda(non_blocking=True) img_train = img_train.cuda(non_blocking=True) # Evaluate model and optimize it out_train = model(img_train) ''' while torch.isinf(out_train).any(): print("out_inf") return ''' loss = criterion(gt_train, out_train) / (N * 2) loss.backward() optimizer.step() # Results if training_params['step'] % args['save_every'] == 0: # Apply regularization by orthogonalizing filters if not training_params['no_orthog']: model.apply(svd_orthogonalization) # Compute training PSNR log_train_psnr(out_train, \ gt_train, \ loss, \ writer, \ epoch, \ i, \ num_minibatches, \ training_params) # update step counter training_params['step'] += 1 # Call to model.eval() to correctly set the BN layers before inference model.eval() # Validation and log images validate_and_log( model_temp=model, \ dataset_val=dataset_val, \ valnoisestd=0, \ temp_psz=args['temp_patch_size'], \ writer=writer, \ epoch=epoch, \ lr=current_lr, \ logger=logger, \ trainimg=img_train ) # save model and checkpoint training_params['start_epoch'] = epoch + 1 save_model_checkpoint(model, args, optimizer, training_params, epoch) # Print elapsed time elapsed_time = time.time() - start_time print('Elapsed time {}'.format( time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))) # Close logger file close_logger(logger)
def test_fastdvdnet(**args): """Denoises all sequences present in a given folder. Sequences must be stored as numbered image sequences. The different sequences must be stored in subfolders under the "test_path" folder. Inputs: args (dict) fields: "model_file": path to model "test_path": path to sequence to denoise "suffix": suffix to add to output name "max_num_fr_per_seq": max number of frames to load per sequence "noise_sigma": noise level used on test set "dont_save_results: if True, don't save output images "no_gpu": if True, run model on CPU "save_path": where to save outputs as png "gray": if True, perform denoising of grayscale images instead of RGB """ # If save_path does not exist, create it if not os.path.exists(args['save_path']): os.makedirs(args['save_path']) logger = init_logger_test(args['save_path']) # Sets data type according to CPU or GPU modes if args['cuda']: device = torch.device('cuda') else: device = torch.device('cpu') # Create models print('Loading models ...') model_temp = FastDVDnet(num_input_frames=NUM_IN_FR_EXT) # Load saved weights state_temp_dict = torch.load(args['model_file']) if args['cuda']: device_ids = [0] model_temp = nn.DataParallel(model_temp, device_ids=device_ids).cuda() else: # CPU mode: remove the DataParallel wrapper state_temp_dict = remove_dataparallel_wrapper(state_temp_dict) model_temp.load_state_dict(state_temp_dict) # Sets the model in evaluation mode (e.g. it removes BN) model_temp.eval() processed_count = 0 # To avoid out of memory issues, we only process one folder at a time. for tmp_folder in get_next_folder(args['test_path'], args['max_num_fr_per_seq']): folder = tmp_folder.name # Start time print("Processing {}".format(os.listdir(tmp_folder.name))) logger.info("Processing {}".format(os.listdir(folder))) start_time = time.time() with torch.no_grad(): # process data seq, _, _ = open_sequence(folder, args['gray'], expand_if_needed=False, max_num_fr=args['max_num_fr_per_seq']) seq = torch.from_numpy(seq).to(device) seq_time = time.time() # Add noise noise = torch.empty_like(seq).normal_( mean=0, std=args['noise_sigma']).to(device) seqn = seq + noise noisestd = torch.FloatTensor([args['noise_sigma']]).to(device) denframes = denoise_seq_fastdvdnet(seq=seqn, noise_std=noisestd, temp_psz=NUM_IN_FR_EXT, model_temporal=model_temp) # Compute PSNR and log it stop_time = time.time() psnr = batch_psnr(denframes, seq, 1.) psnr_noisy = batch_psnr(seqn.squeeze(), seq, 1.) loadtime = (seq_time - start_time) runtime = (stop_time - seq_time) seq_length = seq.size()[0] logger.info("Finished denoising {}".format(args['test_path'])) logger.info("\tDenoised {} frames in {:.3f}s, loaded seq in {:.3f}s". format(seq_length, runtime, loadtime)) logger.info( "\tPSNR noisy {:.4f}dB, PSNR result {:.4f}dB".format(psnr_noisy, psnr)) # Save outputs if not args['dont_save_results']: # Save sequence save_out_seq(seqn, denframes, args['save_path'], int(args['noise_sigma']*255), args['suffix'], args['save_noisy'], processed_count) # subtract half stride because of the half-steps get_next_folder takes. processed_count+=seqn.size()[0] # close logger close_logger(logger)
def search(args): random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True logdir = args.log_dir writer = SummaryWriter(log_dir=logdir) logger = get_logger(os.path.join(logdir, 'log')) nas_bench = nb.NASBench201API(args.nas_bench_path) logger.info('Arguments : -------------------------------') for name, value in args._get_kwargs(): logger.info('{:16} : {:}'.format(name, value)) predictor = Predictor(len(OP_SPACE), 1, N_NODES, 64, 1, n_layers=3).to('cuda') optim_p = torch.optim.Adam(predictor.parameters(), args.p_lr, weight_decay=args.weight_decay) logger.info("params size = %fM" % (count_parameters(predictor) / 1e6)) logger.info("\n") cifar_bench = CifarBench(nas_bench) def log_arch(arch, step, name): valid_12_acc = cifar_bench.lookup(arch) valid_final_acc = cifar_bench.valid_acc(arch) test_acc = cifar_bench.test_acc(arch) logger.info('\n' + nas_bench.query_by_arch(arch)) writer.add_scalar('%s/valid12' % name, valid_12_acc, step) writer.add_scalar('%s/valid' % name, valid_final_acc, step) writer.add_scalar('%s/test' % name, test_acc, step) logger.info("initialize arch pool") arch_pool, seen_arch = initialize_pool(cifar_bench, args.pool_size) # logging initial samples best_arch_seen = cifar_bench.choose_best(seen_arch) logger.info("init pool: %d, seen arch: %d" % (len(arch_pool), len(seen_arch))) logger.info("simulated time cost: %f" % cifar_bench.total_cost) logger.info("best initial arch:") log_arch(best_arch_seen, 0, 'acc_best') trace_history = [] # select_history = [] select_history = [[(arch, cifar_bench.lookup(arch.struct.tostr())) for arch in arch_pool]] pid = [-1 for _ in arch_pool] global_train_step = 0 for step in range(1, args.steps + 1): # train predictor logger.info('step on predictor') # use valid acc for predictor training arch_data = cifar_bench.history_data() train_loader = gd.DataListLoader(cifar_bench.history_data(), args.train_batch_size, shuffle=True) for _ in tqdm(range(args.epochs)): random.shuffle(arch_data) loss = predictor.fit(train_loader, optim_p, args.grad_clip, args.decoder_coe) writer.add_scalar('loss_r', loss, global_train_step) global_train_step += 1 # step on arch def checker(arch): return arch.struct.tostr() not in seen_arch logger.info('step on arch') step_size = args.step_size new_trace = [] while len(new_trace) == 0 and step_size < args.step_size * (2**3): new_trace = predictor.grad_step_on_archs(arch_pool, args.step_batch_size, step_size, checker) step_size *= 2 # select new population according to predicted acc new_trace = sorted(new_trace, key=itemgetter(1))[:args.new_pop_limit] new_arch = [(t[0], t[-1]) for t in new_trace] old_arch = sorted(arch_pool, key=lambda a: cifar_bench.lookup(a.struct.tostr())) logger.info("produced %d new archs" % len(new_trace)) trace_history.append(new_trace) arch_pool, selected, new_pid = merge_arch_pool(cifar_bench, old_arch, new_arch, args.time_budget, args.pool_size) select_history.append(selected) seen_arch.update(a.struct.tostr() for a, _ in selected) pid += new_pid logger.info("step %d, new archs: %d, seen arch %d" % (step, len(arch_pool), len(seen_arch))) logger.info("simulated time cost: %f" % cifar_bench.total_cost) if len(selected) > 0: best_arch_select, best_acc_select = min(selected, key=itemgetter(1)) best_arch_select = best_arch_select.struct.tostr() logger.info("best arch of current step:") log_arch(best_arch_select, step, 'acc_step') if best_acc_select < cifar_bench.lookup(best_arch_seen): best_arch_seen = best_arch_select logger.info("best arch ever:") log_arch(best_arch_seen, step, 'acc_best') if cifar_bench.total_cost >= args.time_budget: break with open(os.path.join(logdir, 'selections'), 'w') as f: for step in select_history: for a in step: f.write(a[0].struct.tostr() + ',' + str(a[1])) f.write('\n') with open(os.path.join(logdir, 'pid'), 'w') as f: for p in pid: f.write(str(p)) f.write('\n') predictor.save(os.path.join(logdir, 'predictor.pth')) close_logger(logger) return best_arch_seen, cifar_bench.valid_acc( best_arch_seen), cifar_bench.test_acc(best_arch_seen)