def cpu_sk(self): """ Sinkhorn Knopp optimization on CPU * stores activations to RAM * does matrix-vector multiplies on CPU * slower than GPU """ # 1. aggregate inputs: N = len(self.pseudo_loader.dataset) if self.num_heads == 1: self.PS = np.zeros((N, self.num_clusters_per_head), dtype=self.dtype) else: self.PS_pre = np.zeros((N, self.presize), dtype=self.dtype) now = time.time() l_dl = len(self.pseudo_loader) time.time() batch_time = MovingAverage(intertia=0.9) self.model.headcount = 1 for batch_idx, (data, _, _selected) in enumerate(self.pseudo_loader): data = data.to(self.device) mass = data.size(0) if self.num_heads == 1: p = nn.functional.softmax(self.model(data), 1) self.PS[_selected, :] = p.detach().cpu().numpy().astype(self.dtype) else: p = self.model(data) self.PS_pre[_selected, :] = p.detach().cpu().numpy().astype(self.dtype) batch_time.update(time.time() - now) now = time.time() if batch_idx % 50 == 0: print(f"Aggregating batch {batch_idx:03}/{l_dl}, speed: {mass / batch_time.avg:04.1f}Hz", end='\r', flush=True) self.model.headcount = self.num_heads print("Aggreg of outputs took {0:.2f} min".format((time.time() - now) / 60.), flush=True) # 2. solve label assignment via sinkhorn-knopp: if self.num_heads == 1: optimize_L_sk(self, nh=0) else: for nh in range(self.num_heads): print(f"computing head {nh} ", end="\r", flush=True) tl = getattr(self.model, f"top_layer{nh:d}") time_mat = time.time() # clear memory try: del self.PS except: pass # apply last FC layer (a matmul and adding of bias) self.PS = (self.PS_pre @ tl.weight.cpu().numpy().T.astype(self.dtype) + tl.bias.cpu().numpy().astype(self.dtype)) print(f"matmul took {(time.time() - time_mat) / 60:.2f}min", flush=True) self.PS = py_softmax(self.PS, 1) optimize_L_sk(self, nh=nh) return
def aggreg_multi_gpu(model, dataloader, hc, dim, TYPE=torch.float64, model_gpus=1): """"Accumulate activations and save them on multiple GPUs * this function assumes the model is on the first `model_gpus` GPUs so that it can write the activations on the remaining ones * it splits the activations evenly between the remaining GPUs """ # number of gpus to store ngpu_store = torch.cuda.device_count() - model_gpus # number of batches in DL l_dl = len(dataloader) # number of batches each gpu gets batches_per_gpu = l_dl // ngpu_store # number of data each gpu gets points_per_gpu = batches_per_gpu * dataloader.batch_size # empty array of indices that we need to keep track of indices = torch.empty(len(dataloader.dataset), dtype=torch.long) # set up matrix PS: (N x K) when using one head, otherwise N x D, where D is the dim before the last FC layer. PS = [ torch.empty(points_per_gpu, dim, device='cuda:' + str(i), dtype=TYPE) for i in range(model_gpus, model_gpus + ngpu_store - 1) ] # accomodate remainder PS.append( torch.empty(len(dataloader.dataset) - (ngpu_store - 1) * points_per_gpu, dim, device='cuda:' + str(model_gpus + ngpu_store - 1), dtype=TYPE)) # slice sizes, i.e. how many activations will be on the gpus slices = [qq.shape[0] for qq in PS] print("slice sizes: ", slices, flush=True) batch_time = MovingAverage(intertia=0.9) now = time.time() st = 0 softmax = torch.nn.Softmax(dim=1).to('cuda:0') # switch the model to not output array but instead last-FC output for one head and pre-last activations for multi-heads model.headcount = 1 for batch_idx, (data, _, _selected) in enumerate(dataloader): data = data.to(torch.device('cuda:0')) mass = data.size(0) en = st + mass # j keeps track of which part of PS we're writing to j = min((batch_idx // batches_per_gpu), ngpu_store - 1) subs = j * points_per_gpu if hc == 1: p = softmax(model(data)).detach().to(TYPE) # when using one head: save softmax (N x K) matrix: PS[j][st - subs:en - subs, :].copy_(p) else: # when using multiple heads: save softmax (N x D) matrix PS[j][st - subs:en - subs, :].copy_(model(data).detach()) indices[st:en].copy_(_selected) st = en batch_time.update(time.time() - now) now = time.time() if batch_idx % 50 == 0: print( f"Aggregating batch {batch_idx:03}/{l_dl}, speed: {mass / batch_time.avg:04.1f}Hz. To rGPU {j + 1}", end='\r', flush=True) torch.cuda.synchronize() # just in case return PS, indices
def train(max_iter, snapshot, dataset, setname, mu, lr, bs, tfmodel_folder, conv5, model_name, stop_iter, pre_emb=False): iters_per_log = 100 data_folder = './' + dataset + '/' + setname + '_batch/' data_prefix = dataset + '_' + setname snapshot_file = os.path.join(tfmodel_folder, dataset + '_iter_%d.tfmodel') if not os.path.isdir(tfmodel_folder): os.makedirs(tfmodel_folder) cls_loss_avg = 0 avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg = 0, 0, 0 decay = 0.99 vocab_size = 8803 if dataset == 'referit' else 12112 emb_name = 'referit' if dataset == 'referit' else 'Gref' if pre_emb: print("Use pretrained Embeddings.") model = get_segmentation_model(model_name, mode='train', vocab_size=vocab_size, start_lr=lr, batch_size=bs, conv5=conv5, emb_name=emb_name) else: model = get_segmentation_model(model_name, mode='train', vocab_size=vocab_size, start_lr=lr, batch_size=bs, conv5=conv5) weights = './data/weights/deeplab_resnet_init.ckpt' print("Loading pretrained weights from {}".format(weights)) load_var = {var.op.name: var for var in tf.global_variables() if var.name.startswith('res') or var.name.startswith('bn') or var.name.startswith('conv1')} snapshot_loader = tf.train.Saver(load_var) snapshot_saver = tf.train.Saver(max_to_keep=4) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) snapshot_loader.restore(sess, weights) im_h, im_w, num_steps = model.H, model.W, model.num_steps text_batch = np.zeros((bs, num_steps), dtype=np.float32) image_batch = np.zeros((bs, im_h, im_w, 3), dtype=np.float32) mask_batch = np.zeros((bs, im_h, im_w, 1), dtype=np.float32) valid_idx_batch = np.zeros((bs, 1), dtype=np.int32) reader = data_reader.DataReader(data_folder, data_prefix) # for time calculate last_time = time.time() time_avg = MovingAverage() for n_iter in range(max_iter): for n_batch in range(bs): batch = reader.read_batch(is_log=(n_batch == 0 and n_iter % iters_per_log == 0)) text = batch['text_batch'] im = batch['im_batch'].astype(np.float32) mask = np.expand_dims(batch['mask_batch'].astype(np.float32), axis=2) im = im[:, :, ::-1] im -= mu text_batch[n_batch, ...] = text image_batch[n_batch, ...] = im mask_batch[n_batch, ...] = mask for idx in range(text.shape[0]): if text[idx] != 0: valid_idx_batch[n_batch, :] = idx break _, cls_loss_val, lr_val, scores_val, label_val = sess.run([model.train_step, model.cls_loss, model.learning_rate, model.pred, model.target], feed_dict={ model.words: text_batch, # np.expand_dims(text, axis=0), model.im: image_batch, # np.expand_dims(im, axis=0), model.target_fine: mask_batch, # np.expand_dims(mask, axis=0) model.valid_idx: valid_idx_batch }) cls_loss_avg = decay * cls_loss_avg + (1 - decay) * cls_loss_val # Accuracy accuracy_all, accuracy_pos, accuracy_neg = compute_accuracy(scores_val, label_val) avg_accuracy_all = decay * avg_accuracy_all + (1 - decay) * accuracy_all avg_accuracy_pos = decay * avg_accuracy_pos + (1 - decay) * accuracy_pos avg_accuracy_neg = decay * avg_accuracy_neg + (1 - decay) * accuracy_neg # timing cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time if n_iter % iters_per_log == 0: print('iter = %d, loss (cur) = %f, loss (avg) = %f, lr = %f' % (n_iter, cls_loss_val, cls_loss_avg, lr_val)) print('iter = %d, accuracy (cur) = %f (all), %f (pos), %f (neg)' % (n_iter, accuracy_all, accuracy_pos, accuracy_neg)) print('iter = %d, accuracy (avg) = %f (all), %f (pos), %f (neg)' % (n_iter, avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg)) time_avg.add(elapsed) print('iter = %d, cur time = %.5f, avg time = %.5f, model_name: %s' % (n_iter, elapsed, time_avg.get_avg(), model_name)) # Save snapshot if (n_iter + 1) % snapshot == 0 or (n_iter + 1) >= max_iter: snapshot_saver.save(sess, snapshot_file % (n_iter + 1)) print('snapshot saved to ' + snapshot_file % (n_iter + 1)) if (n_iter + 1) >= stop_iter: print('stop training at iter ' + str(stop_iter)) break print('Optimization done.')
def optimize_epoch(self, model, criterion, optimizer, loader, epoch, is_validation=False): top1 = [] top5 = [] loss_value = [] for i in range(len(model.probes)): top1.append(TotalAverage()) top5.append(TotalAverage()) loss_value.append(TotalAverage()) batch_time = MovingAverage(intertia=0.9) now = time.time() if is_validation is False: model.train() lr = self.lr_schedule(epoch) for pg in optimizer.param_groups: pg['lr'] = lr print(f"Starting epoch {epoch} with learning rate {lr}") else: model.eval() for iter, (input, label) in enumerate(loader): input = input.to('cuda:0') label = label.to('cuda:0') mass = input.size(0) total_loss = None if args.data in ['Imagenet', 'Places' ] and is_validation and args.tencrops: bs, ncrops, c, h, w = input.size() input_tensor = input.view(-1, c, h, w) input = torch.autograd.Variable(input_tensor.cuda()) else: input = torch.autograd.Variable(input.cuda()) predictions = model(input) if args.data in ['Imagenet', 'Places' ] and is_validation and args.tencrops: predictions = [ torch.squeeze(p.view(bs, ncrops, -1).mean(1)) for p in predictions ] for i, prediction in enumerate(predictions): loss = criterion(prediction, label) if total_loss is None: total_loss = loss else: total_loss = total_loss + loss top1_, top5_ = accuracy(prediction, label, topk=(1, 5)) top1[i].update(top1_.item(), mass) top5[i].update(top5_.item(), mass) loss_value[i].update(loss.item(), mass) if is_validation is False: optimizer.zero_grad() total_loss.backward() optimizer.step() batch_time.update(time.time() - now) now = time.time() top1_str = 'top1 val' if is_validation else 'top1 train' top5_str = 'top5 val' if is_validation else 'top5 train' writer.add_scalars( top1_str, {f"depth_{k+1}": top1[k].avg for k in range(len(model.probes))}, epoch) writer.add_scalars( top5_str, {f"depth_{k+1}": top5[k].avg for k in range(len(model.probes))}, epoch) writer.add_scalars('losses', { f"depth_{k+1}": loss_value[k].avg for k in range(len(model.probes)) }, epoch) if is_validation: print('VAL:') for i in range(len(model.probes)): print( f" [{i}] t1:{top1[i].avg:04.2f} loss:{loss_value[i].avg:.2f}", end='') print() else: print('TRAIN:') for i in range(len(model.probes)): print( f" [{i}] t1:{top1[i].avg:04.2f} loss:{loss_value[i].avg:.2f}", end='') print() return { "loss": [x.avg for x in loss_value], "top1": [x.avg for x in top1], "top5": [x.avg for x in top5] }
def train(method, environment, resume, episodes, lr, lr_episodes, min_lr, eval_only, replay_width, batch_size, gamma, update_rate, save_interval): history = History(method + '_' + environment, ['steps', 'avg_reward', 'loss'], resume is not None) history.flush() memory = ReplayMemory(replay_width) game = Game(name=environments_to_names[environment], memory=memory, render=False) init_state, state_shape = game.get_state(True) n_actions = game.env.action_space.n agent_cls = agent_factory[method] agent = agent_cls(state_shape, n_actions, environment, episodes, update_rate, step_size=lr_episodes, lr=lr, save_interval=save_interval) # resume from a ckpt if resume is not None: agent.load(resume) avg_reward = MovingAverage(100) avg_loss = MovingAverage(100) log.info(f'Training with {episodes}, starting ...') # main training loop for i in range(episodes): state = game.reset() done = False loss = None while not done: state = game.state action = agent.select_action(state) transition, done = game.step(int(action.to('cpu').numpy())) if len(memory) > batch_size: batched = memory.sample(batch_size) loss = agent.train(batched, batch_size, gamma, i) avg_loss.add(loss) reward = game.rewards # agent.save_best(reward) agent.save() agent.scheduler.step() avg_reward.add(reward) # moving averages text = [ f'steps: {agent.step_cnt}', f'game epochs: {i}/{episodes}', f'train loss: {float(avg_loss):.5}', f'avg reward: {float(avg_reward):.5}', # f'best reward: {float(agent.best_reward):.5}', f'reward: {float(reward):.5}', f'epsilon: {agent.epsilon:.3}', ] log.info(', '.join(text), update=True) if agent.step_cnt % save_interval == 0: history.record({ 'steps': agent.step_cnt, 'avg_reward': float(avg_reward), 'loss': float(avg_loss), }) game.env.close()
def optimize_epoch(self, model, criterion, optimizer, loader, epoch, is_validation=False): top1 = [] top5 = [] loss_value = [] top1.append(TotalAverage()) top5.append(TotalAverage()) loss_value.append(TotalAverage()) batch_time = MovingAverage(intertia=0.9) now = time.time() if is_validation is False: model.run() lr = self.lr_schedule(epoch) for pg in optimizer.param_groups: pg['lr'] = lr print("Starting epoch %s" % epoch) else: model.eval() l_dl = len(loader) for iter, q in enumerate(loader): if len(q) == 3: input, label, _s = q else: input, label = q input = input.to(self.dev) label = label.to(self.dev) mass = input.size(0) if is_validation and args.tencrops: bs, ncrops, c, h, w = input.size() input_tensor = input.view(-1, c, h, w) input = input_tensor.to(self.dev) predictions = model(input) predictions = torch.squeeze( predictions.view(bs, ncrops, -1).mean(1)) else: input = input.to(self.dev) predictions = model(input) loss = criterion(predictions, label) top1_, top5_ = accuracy(predictions, label, topk=(1, 5)) top1[0].update(top1_.item(), mass) top5[0].update(top5_.item(), mass) loss_value[0].update(loss.item(), mass) if is_validation is False: optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - now) now = time.time() if iter % 50 == 0: print( f"{'V' if is_validation else 'T'} Loss: {loss_value[0].avg:03.3f} " f"Top1: {top1[0].avg:03.1f} Top5: {top5[0].avg:03.1f} " f"{epoch: 3}/{iter:05}/{l_dl:05} Freq: {mass / batch_time.avg:04.1f}Hz:", end='\r', flush=True) if is_validation: print("validation") print("val-top1: %s" % top1[0].avg) print("val-top5: %s" % top5[0].avg) if self.writer: str_ = 'LP/val' if is_validation else 'LP/train' self.writer.add_scalar(f'{str_}/top1', top1[0].avg, epoch) self.writer.add_scalar(f'{str_}/top5', top5[0].avg, epoch) self.writer.add_scalar(f'{str_}/Freq', mass / batch_time.avg, epoch) return { "loss": [x.avg for x in loss_value], "top1": [x.avg for x in top1], "top5": [x.avg for x in top1] }
def train(max_iter, snapshot, dataset, data_dir, setname, mu, lr, bs, tfmodel_folder, conv5, model_name, stop_iter, last_iter, pre_emb=False, finetune=False, pretrain_path='', emb_dir=''): global args iters_per_log = 100 data_folder = os.path.join(data_dir, dataset + '/' + setname + '_batch/') data_prefix = dataset + '_' + setname snapshot_file = os.path.join(tfmodel_folder, dataset + '_finetune') if not os.path.isdir(tfmodel_folder): os.makedirs(tfmodel_folder) cls_loss_avg = 0 avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg = 0, 0, 0 decay = 0.99 vocab_size = 8803 if dataset == 'referit' else 1917498 emb_name = dataset if pre_emb: print("Use pretrained Embeddings.") model = get_segmentation_model(model_name, mode='train', vocab_size=vocab_size, start_lr=lr, batch_size=bs, conv5=conv5, emb_name=emb_name, emb_dir=emb_dir, freeze_bn=args.freeze_bn, is_aug=args.is_aug) else: model = get_segmentation_model(model_name, mode='train', vocab_size=vocab_size, start_lr=lr, batch_size=bs, conv5=conv5) if finetune: weights = os.path.join(pretrain_path) snapshot_loader = tf.train.Saver() else: weights = './data/weights/deeplab_resnet_init.ckpt' print("Loading pretrained weights from {}".format(weights)) load_var = { var.op.name: var for var in tf.global_variables() if var.name.startswith('res') or var.name.startswith('bn') or var.name.startswith('conv1') or var.name.startswith('Adam') } snapshot_loader = tf.train.Saver(load_var) snapshot_saver = tf.train.Saver(max_to_keep=4) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) snapshot_loader.restore(sess, weights) # Log tensorboard train_writer = tf.summary.FileWriter(args.log_dir + '/train', sess.graph) im_h, im_w, num_steps = model.H, model.W, model.num_steps text_batch = np.zeros((bs, num_steps), dtype=np.float32) image_batch = np.zeros((bs, im_h, im_w, 3), dtype=np.float32) mask_batch = np.zeros((bs, im_h, im_w, 1), dtype=np.float32) seq_len_batch = np.zeros(bs, dtype=np.int32) valid_idx_batch = np.zeros(bs, dtype=np.int32) if dataset == 'refvos': reader = data_reader_refvos.DataReader(im_dir=args.im_dir, mask_dir=args.mask_dir, train_metadata=args.meta) # for time calculate last_time = time.time() time_avg = MovingAverage() meanIoU = 0 last_epoch = (last_iter * bs) // reader.num_batch for n_iter in range(last_iter + 1, max_iter): for n_batch in range(bs): batch = reader.read_batch( is_log=(n_batch == 0 and n_iter % iters_per_log == 0)) text = batch['text_batch'] im = batch['im_batch'].astype(np.float32) # mask = batch['mask_batch'] mask = np.expand_dims(batch['mask_batch'].astype(np.float32), axis=2) seq_len = batch['seq_length'] im = im[:, :, ::-1] im -= mu text_batch[n_batch, ...] = text image_batch[n_batch, ...] = im mask_batch[n_batch, ...] = mask seq_len_batch[n_batch] = seq_len _, train_step, summary = sess.run( [ model.train, model.train_step, model.merged, ], feed_dict={ model.words: text_batch, model.im: image_batch, model.target_fine: mask_batch, model.seq_len: seq_len_batch, }) # cls_loss_avg = decay * cls_loss_avg + (1 - decay) * cls_loss_val # cls_loss_avg # Accuracy # accuracy_all, accuracy_pos, accuracy_neg = compute_accuracy(scores_val, label_val) # avg_accuracy_all = decay * avg_accuracy_all + (1 - decay) * accuracy_all # avg_accuracy_pos = decay * avg_accuracy_pos + (1 - decay) * accuracy_pos # avg_accuracy_neg = decay * avg_accuracy_neg + (1 - decay) * accuracy_neg # IoU = compute_meanIoU(scores_val, mask_batch) # meanIoU += IoU # timing cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time train_writer.add_summary(summary, train_step) # if n_iter % iters_per_log == 0: # print('iter = %d, loss (cur) = %f, loss (avg) = %f, lr = %f' # % (n_iter, cls_loss_val, cls_loss_avg, lr_val)) # print('iter = %d, accuracy (cur) = %f (all), %f (pos), %f (neg)' # % (n_iter, accuracy_all, accuracy_pos, accuracy_neg)) # print('iter = %d, accuracy (avg) = %f (all), %f (pos), %f (neg)' # % (n_iter, avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg)) # print('iter = %d, meanIoU = %f (neg)' # % (n_iter, meanIoU / iters_per_log)) # meanIoU = 0 # time_avg.add(elapsed) # print('iter = %d, cur time = %.5f, avg time = %.5f, model_name: %s' % (n_iter, elapsed, time_avg.get_avg(), model_name)) # Save snapshot if (n_iter * bs // reader.num_batch > last_epoch): last_epoch += 1 snapshot_saver.save(sess, snapshot_file, global_step=train_step) print('snapshot saved at iteration {}'.format(n_iter)) if (n_iter + 1) % snapshot == 0 or (n_iter + 1) >= max_iter: snapshot_saver.save(sess, snapshot_file, global_step=train_step) print('snapshot saved at iteration {}'.format(n_iter)) if (n_iter + 1) >= stop_iter: print('stop training at iter ' + str(stop_iter)) break print('Optimization done.')
def __init__(self): self.avg_reward = MovingAverage(100) self.avg_loss = MovingAverage(100)
def optimize_epoch(self, model, optimizer, loader, epoch, validation=False): print(f"Starting epoch {epoch}, validation: {validation} " + "=" * 30) loss_value = AverageMeter() rotacc_value = AverageMeter() # house keeping if not validation: model.run() lr = self.lr_schedule(epoch) for pg in optimizer.param_groups: pg['lr'] = lr else: model.eval() XE = torch.nn.CrossEntropyLoss().to(self.dev) l_dl = 0 # len(loader) now = time.time() batch_time = MovingAverage(intertia=0.9) for iter, (data, label, selected) in enumerate(loader): now = time.time() if not validation: niter = epoch * len(loader.dataset) + iter * args.batch_size data = data.to(self.dev) mass = data.size(0) where = np.arange(mass, dtype=int) * 4 data = data.view(mass * 4, 3, data.size(3), data.size(4)) rotlabel = torch.tensor(range(4)).view(-1, 1).repeat(mass, 1).view(-1).to(self.dev) #################### train CNN ########################################### if not validation: final = model(data) if args.onlyrot: loss = torch.Tensor([0]).to(self.dev) else: if args.hc == 1: loss = XE(final[0][where], self.L[selected]) else: loss = torch.mean( torch.stack([XE(final[k][where], self.L[k, selected]) for k in range(args.hc)])) rotloss = XE(final[-1], rotlabel) pred = torch.argmax(final[-1], 1) total_loss = loss + rotloss optimizer.zero_grad() total_loss.backward() optimizer.step() correct = (pred == rotlabel).to(torch.float) rotacc = correct.sum() / float(mass) else: final = model(data) pred = torch.argmax(final[-1], 1) correct = (pred == rotlabel.cuda()).to(torch.float) rotacc = correct.sum() / float(mass) total_loss = torch.Tensor([0]) loss = torch.Tensor([0]) rotloss = torch.Tensor([0]) rotacc_value.update(rotacc.item(), mass) loss_value.update(total_loss.item(), mass) batch_time.update(time.time() - now) now = time.time() print( f"Loss: {loss_value.avg:03.3f}, RotAcc: {rotacc_value.avg:03.3f} | {epoch: 3}/{iter:05}/{l_dl:05} Freq: {mass / batch_time.avg:04.1f}Hz:", end='\r', flush=True) # every few iter logging if iter % args.logiter == 0: if not validation: print(niter, f" Loss: {loss.item():.3f}", flush=True) with torch.no_grad(): if not args.onlyrot: pred = torch.argmax(final[0][where], dim=1) pseudoloss = XE(final[0][where], pred) if not args.onlyrot: self.writer.add_scalar('Pseudoloss', pseudoloss.item(), niter) self.writer.add_scalar('lr', self.lr_schedule(epoch), niter) self.writer.add_scalar('Loss', loss.item(), niter) self.writer.add_scalar('RotLoss', rotloss.item(), niter) self.writer.add_scalar('RotAcc', rotacc.item(), niter) if iter > 0: self.writer.add_scalar('Freq(Hz)', mass / (time.time() - now), niter) # end of epoch logging if self.writer and (epoch % self.log_interval == 0): write_conv(self.writer, model, epoch) if validation: print('val Rot-Acc: ', rotacc_value.avg) self.writer.add_scalar('val Rot-Acc', rotacc_value.avg, epoch) files.save_checkpoint_all(self.checkpoint_dir, model, args.arch, optimizer, self.L, epoch, lowest=False) return {'loss': loss_value.avg}