def test(self, data_fetcher, num_samples, if_baseline=False, if_return_each=False, img_save_folder=None, if_train=True): """ val (in training): idx_out=0/1/2/3/4 test: idx_out=-2, record time wo. iqa """ if if_baseline or if_train: assert self.crit_lst is not None, 'NO METRICS!' if self.crit_lst is not None: if_tar_only = False msg = 'dst vs. src | ' if if_baseline else 'tar vs. src | ' else: if_tar_only = True msg = 'only get dst | ' report_dict = None recorder_dict = dict() for crit_name in self.crit_lst: recorder_dict[crit_name] = Recorder() write_dict_lst = [] timer = CUDATimer() # validation baseline: no iqa, no parse name # validation, not baseline: no iqa, parse name # test baseline: no iqa, no parse name # test, no baseline, iqa, no parse name if_iqa = True if (not if_train) and (not if_baseline) else False if if_iqa: timer_wo_iqam = Recorder() idx_out = -2 # testing; judge by IQAM if_parse_name = True if if_train and (not if_baseline) else False self.set_eval_mode() data_fetcher.reset() test_data = data_fetcher.next() assert len(test_data['name']) == 1, 'ONLY SUPPORT bs==1!' pbar = tqdm(total=num_samples, ncols=100) while test_data is not None: im_lq = test_data['lq'].cuda(non_blocking=True) # assume bs=1 im_name = test_data['name'][0] # assume bs=1 if if_parse_name: im_type = im_name.split('_')[-1].split('.')[0] if im_type in ['qf50', 'qp22']: idx_out = 0 elif im_type in ['qf40', 'qp27']: idx_out = 1 elif im_type in ['qf30', 'qp32']: idx_out = 2 elif im_type in ['qf20', 'qp37']: idx_out = 3 elif im_type in ['qf10', 'qp42']: idx_out = 4 else: raise Exception(f"im_type IS {im_type}, NO MATCHING TYPE!") timer.start_record() if if_tar_only: if if_iqa: time_wo_iqa, im_out = self.model.net[self.model.infer_subnet](inp_t=im_lq, idx_out=idx_out).clamp_(0., 1.) else: im_out = self.model.net[self.model.infer_subnet](inp_t=im_lq, idx_out=idx_out).clamp_(0., 1.) timer.record_inter() else: im_gt = test_data['gt'].cuda(non_blocking=True) # assume bs=1 if if_baseline: im_out = im_lq else: if if_iqa: time_wo_iqa, im_out = self.model.net[self.model.infer_subnet](inp_t=im_lq, idx_out=idx_out) im_out = im_out.clamp_(0., 1.) else: im_out = self.model.net[self.model.infer_subnet](inp_t=im_lq, idx_out=idx_out).clamp_(0., 1.) timer.record_inter() _msg = f'{im_name} | ' for crit_name in self.crit_lst: crit_fn = self.crit_lst[crit_name]['fn'] crit_unit = self.crit_lst[crit_name]['unit'] perfm = crit_fn(torch.squeeze(im_out, 0), torch.squeeze(im_gt, 0)) recorder_dict[crit_name].record(perfm) _msg += f'[{perfm:.3e}] {crit_unit:s} | ' _msg = _msg[:-3] if if_return_each: msg += _msg + '\n' pbar.set_description(_msg) if if_iqa: timer_wo_iqam.record(time_wo_iqa) if img_save_folder is not None: # save im im = tensor2im(torch.squeeze(im_out, 0)) save_path = img_save_folder / (str(im_name) + '.png') cv2.imwrite(str(save_path), im) pbar.update() test_data = data_fetcher.next() pbar.close() if not if_tar_only: for crit_name in self.crit_lst: crit_unit = self.crit_lst[crit_name]['unit'] crit_if_focus = self.crit_lst[crit_name]['if_focus'] ave_perfm = recorder_dict[crit_name].get_ave() msg += f'{crit_name} | [{ave_perfm:.3e}] {crit_unit} | ' write_dict_lst.append(dict(tag=f'{crit_name} (val)', scalar=ave_perfm)) if crit_if_focus: report_dict = dict(ave_perfm=ave_perfm, lsb=self.crit_lst[crit_name]['fn'].lsb) ave_fps = 1. / timer.get_ave_inter() msg += f'ave. fps | [{ave_fps:.1f}]' if if_iqa: ave_time_wo_iqam = timer_wo_iqam.get_ave() fps_wo_iqam = 1. / ave_time_wo_iqam msg += f' | ave. fps wo. IQAM | [{fps_wo_iqam:.1f}]' if if_train: assert report_dict is not None return msg.rstrip(), write_dict_lst, report_dict else: return msg.rstrip()
# base_hidden = repackage_hidden(base_hidden) # hidden = repackage_hidden(hidden) nbsz = (i // cfg['max_len'] + 1) if nbsz % cfg['report_interval'] == 0: print('batch ', nbsz, ': loss = ', total_loss.cpu().numpy() / cfg['report_interval']) print('batch ', nbsz, ': LM loss = ', total_LM_loss / cfg['report_interval']) total_loss = 0.0 total_LM_loss = 0.0 # print('elapse time: ', time.time() - start_time) # loss = evaluate(val_data, reinforce_model.policy, cfg) # print('validation loss: ', loss) print('Epoch: ', epoch, ' elapse:', time.time() - start_time) loss = evaluate(val_data, reinforce_model.policy, cfg) if loss > valid_loss[-1]: annealing(optimizer, decay_rate=2) cfg['lr'] /= 2 print('learning rate anneals to ', cfg['lr']) valid_loss.append(loss) recorder.record(epoch, cfg['alpha'], loss) save_path = cfg['saveto'] + '_epoch' + str(epoch) + '_loss' + str(loss) save_model(save_path, reinforce_model.policy) print('Epoch: ', epoch, ' save to ', save_path) test_loss = evaluate(test_data, reinforce_model.policy, cfg) recorder.record('', cfg['alpha'], test_loss) recorder.close()
out = model.forward(x) loss = model.loss(out, target) loss.backward() optimizer.step() end_time = time.time() epoch_time.update(end_time - start_time) training_time = end_time - all_start_time model.eval() print('Epoch {0} finished in {et.val:.3f}s (avg.: {et.avg:.3f}s). Training for {1}' .format(epoch, format_time(training_time), et=epoch_time)) print('\tLR: {:.4}'.format(scheduler.get_lr()[0])) if (epoch+1)%args.print_freq == 0: accuracy, ave_loss = compute_acc_loss(my_eval, train_loader) rec.record('train', [ave_loss, accuracy, training_time, epoch+1]) print('\ttrain loss: {:.6f}, accuracy: {:.4f}'.format(ave_loss, accuracy)) accuracy, ave_loss = compute_acc_loss(my_eval, test_loader) print('\ttest loss: {:.6f}, accuracy: {:.4f}'.format(ave_loss, accuracy)) rec.record('test', [ave_loss, accuracy, training_time, epoch+1]) scheduler.step() if args.checkpoint and (epoch+1) % args.checkpoint == 0: # create and save checkpoint here to_save = {'records': rec, 'epoch_time': epoch_time, 'training_time': training_time} to_save['model_state'] = model.state_dict() to_save['optimizer_state'] = optimizer.state_dict() to_save['lr'] = scheduler.get_lr()[0] to_save['epoch'] = epoch + 1 to_save['args'] = args