def step(phase, epoch, opt, dataloader, model, criterion, optimizer=None): # Choose the phase(Evaluate phase-Normally without Dropout and BatchNorm) if phase == 'train': model.train() else: model.eval() # Load default values Loss, Err, Acc = AverageMeter(), AverageMeter(), AverageMeter() Acc_tot = AverageMeter() seqlen = set_sequence_length(opt.MinSeqLenIndex, opt.MaxSeqLenIndex, epoch) # Show iteration using Bar nIters = len(dataloader) bar = Bar(f'{opt.expID}', max=nIters) # Loop in dataloader for i, gt in enumerate(dataloader): ## Wraps tensors and records the operations applied to it input, label = gt['input'], gt['label'] gtpts, center, scale = gt['gtpts'], gt['center'], gt['scale'] input_var = input[:, 0, ].float().cuda(device=opt.device, non_blocking=True) label_var = label.float().cuda(device=opt.device, non_blocking=True) Loss.reset() Err.reset() Acc.reset() ### if it is 3D, may need the nOutput to get the different target, not just only the heatmap ## Forwad propagation output = model(input_var) ## Get model outputs and calculate loss loss = criterion(output, label_var) ## Backward + Optimize only if in training phase if phase == 'train': ## Zero the parameter gradients optimizer.zero_grad() loss.mean().backward() optimizer.step() Loss.update(loss.sum()) ## Compute the accuracy # acc = Accuracy(opt, output.data.cpu().numpy(), labels_var.data.cpu().numpy()) ref = get_ref(opt.dataset, scale) for j in range(opt.preSeqLen): if j <= seqlen: pred_hm = get_preds(output[:, j, ].float()) pred_pts = original_coordinate(pred_hm, center[:, ], scale, opt.outputRes) err, ne = error(pred_pts, gtpts[:, j, ], ref) acc, na = accuracy(pred_pts, gtpts[:, j, ], ref) # assert ne == na, "ne must be the same as na" Err.update(err) Acc.update(acc) Acc_tot.update(acc) Bar.suffix = f'{phase}[{epoch}][{i}/{nIters}]|Total:{bar.elapsed_td}' \ f'|ETA:{bar.eta_td}|Loss:{Loss.val:.6f}|Err:{Err.avg:.6f}|Acc:{Acc.avg:.6f}' bar.next() bar.finish() return Loss.val, Acc_tot.avg
def step(args, split, epoch, loader, model, optimizer = None, M = None, f = None, tag = None): losses, mpjpe, mpjpe_r = AverageMeter(), AverageMeter(), AverageMeter() viewLosses, shapeLosses, supLosses = AverageMeter(), AverageMeter(), AverageMeter() if split == 'train': model.train() else: model.eval() bar = Bar('{}'.format(ref.category), max=len(loader)) nViews = loader.dataset.nViews for i, (input, target, meta) in enumerate(loader): input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) output = model(input_var) loss = ShapeConsistencyCriterion(nViews, supWeight = 1, unSupWeight = args.shapeWeight, M = M)(output, target_var, torch.autograd.Variable(meta)) if split == 'test': for j in range(input.numpy().shape[0]): img = (input.numpy()[j] * 255).transpose(1, 2, 0).astype(np.uint8) cv2.imwrite('{}/img_{}/{}.png'.format(args.save_path, tag, i * input.numpy().shape[0] + j), img) gt = target.cpu().numpy()[j] pred = (output.data).cpu().numpy()[j] vis = meta.cpu().numpy()[j][5:] for t in range(ref.J): f.write('{} {} {} '.format(pred[t * 3], pred[t * 3 + 1], pred[t * 3 + 2])) f.write('\n') for t in range(ref.J): f.write('{} {} {} '.format(gt[t, 0], gt[t, 1], gt[t, 2])) f.write('\n') if args.saveVis: for t in range(ref.J): f.write('{} 0 0 '.format(vis[t])) f.write('\n') mpjpe_this = accuracy(output.data, target, meta) mpjpe_r_this = accuracy_dis(output.data, target, meta) shapeLoss = shapeConsistency(output.data, meta, nViews, M, split = split) losses.update(loss.data[0], input.size(0)) shapeLosses.update(shapeLoss, input.size(0)) mpjpe.update(mpjpe_this, input.size(0)) mpjpe_r.update(mpjpe_r_this, input.size(0)) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() Bar.suffix = '{split:10}: [{0:2}][{1:3}/{2:3}] | Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | shapeLoss {shapeLoss.avg:.6f} | AE {mpjpe.avg:.6f} | ShapeDis {mpjpe_r.avg:.6f}'.format(epoch, i, len(loader), total=bar.elapsed_td, eta=bar.eta_td, loss=losses, mpjpe=mpjpe, split = split, shapeLoss = shapeLosses, mpjpe_r = mpjpe_r) bar.next() bar.finish() return mpjpe.avg, losses.avg, shapeLosses.avg
def train(train_loader, m, criterion, optimizer, writer): lossLogger = DataLogger() accLogger = DataLogger() m.train() train_loader_desc = tqdm(train_loader) for i, (inps, labels, setMask, img_info) in enumerate(train_loader_desc): if device != "cpu": inps = inps.cuda().requires_grad_() labels = labels.cuda() setMask = setMask.cuda() else: inps = inps.requires_grad_() out = m(inps) loss = criterion(out.mul(setMask), labels) acc = accuracy(out.data.mul(setMask), labels.data, train_loader.dataset) accLogger.update(acc[0], inps.size(0)) lossLogger.update(loss.item(), inps.size(0)) optimizer.zero_grad() if mix_precision: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() if config.sparse: for mod in m.modules(): if isinstance(mod, nn.BatchNorm2d): mod.weight.grad.data.add_(config.sparse_s * torch.sign(mod.weight.data)) optimizer.step() opt.trainIters += 1 # Tensorboard writer.add_scalar('Train/Loss', lossLogger.avg, opt.trainIters) writer.add_scalar('Train/Acc', accLogger.avg, opt.trainIters) # TQDM train_loader_desc.set_description( 'loss: {loss:.8f} | acc: {acc:.2f}'.format(loss=lossLogger.avg, acc=accLogger.avg * 100)) train_loader_desc.close() return lossLogger.avg, accLogger.avg
def train(train_dataloader, model, criterion, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, data in enumerate(train_dataloader): # measure data loading time data_time.update(time.time() - end) # get the inputs; data is a list of [inputs, labels] inputs, targets = data inputs = inputs.to(device) targets = targets.to(device) # compute output output = model(inputs) loss = criterion(output, targets) # measure accuracy and record loss prec1, prec5 = accuracy(output, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1, inputs.size(0)) top5.update(prec5, inputs.size(0)) # compute gradients in a backward pass optimizer.zero_grad() loss.backward() # Call step of optimizer to update model params optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % 5 == 0: print( f"Epoch [{epoch + 1}] [{i}/{len(train_dataloader)}]\t" f"Time {data_time.val:.3f} ({data_time.avg:.3f})\t" f"Loss {loss.item():.4f}\t" f"Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t" f"Prec@5 {top5.val:.3f} ({top5.avg:.3f})", end="\r") torch.save(model.state_dict(), f"./checkpoints/{opt.datasets}_epoch_{epoch + 1}.pth")
def train(train_loader, m, criterion, optimizer, writer): # Logger lossLogger = DataLogger() accLogger = DataLogger() m.train() train_loader_desc = tqdm(train_loader) for i, (inps, labels, setMask, imgset) in enumerate(train_loader_desc): # 自动求导autograd函数功能 inps = inps.cuda().requires_grad_() labels = labels.cuda() setMask = setMask.cuda() out = m(inps) # 计算loss loss = criterion(out.mul(setMask), labels) # 计算准确率 acc = accuracy(out.data.mul(setMask), labels.data, train_loader.dataset) # accLogger.update(acc[0], inps.size(0)) lossLogger.update(loss.item(), inps.size(0)) # 加算梯度 optimizer.zero_grad() # 反响传播 loss.backward() optimizer.step() opt.trainIters +=1 # 将数据写道tensorborx writer.add_scalar( 'Train/Loss', lossLogger.avg, opt.trainIters ) writer.add_scalar( 'Train/Acc', lossLogger.avg, opt.trainIters ) # TQDM train_loader_desc.set_description( 'loss: {loss:.8f} | acc: {acc:.2f}'.format( loss=lossLogger.avg, acc=accLogger.avg * 100 ) ) train_loader_desc.close() return lossLogger.avg, accLogger.avg
def train(train_loader, m, criterion, optimizer, writer): lossLogger = DataLogger() accLogger = DataLogger() m.train() # train_loader_desc = tqdm(train_loader) total = len(train_loader) total_desc = tqdm(range(total)) train_loader = train_loader.__iter__() for ii in total_desc: try: inps, labels, setMask, imgset = train_loader.next() except BaseException as e: print('Error:', ii, e) continue inps = inps.cuda().requires_grad_() labels = labels.cuda() # setMask = setMask.cuda() out = m(inps) loss = criterion(out, labels) acc = accuracy(out.data, labels.data, train_loader.dataset) accLogger.update(acc[0], inps.size(0)) lossLogger.update(loss.item(), inps.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() opt.trainIters += 1 # Tensorboard writer.add_scalar( 'Train/Loss', lossLogger.avg, opt.trainIters) writer.add_scalar( 'Train/Acc', accLogger.avg, opt.trainIters) # TQDM total_desc.set_description( 'loss: {loss:.8f} | acc: {acc:.2f}'.format( loss=lossLogger.avg, acc=accLogger.avg * 100) ) total_desc.close() return lossLogger.avg, accLogger.avg
def valid(val_loader, m, criterion, optimizer, writer): draw_kp = False lossLogger = DataLogger() accLogger = DataLogger() m.eval() val_loader_desc = tqdm(val_loader) for i, (inps, labels, setMask, img_info) in enumerate(val_loader_desc): if device != "cpu": inps = inps.cuda() labels = labels.cuda() setMask = setMask.cuda() with torch.no_grad(): out = m(inps) loss = criterion(out.mul(setMask), labels) flip_out = m(flip(inps)) flip_out = flip(shuffleLR(flip_out, val_loader.dataset)) out = (flip_out + out) / 2 acc = accuracy(out.mul(setMask), labels, val_loader.dataset) if not draw_kp: draw_kp = True kps_img = draw_kps(out) # writer.add lossLogger.update(loss.item(), inps.size(0)) accLogger.update(acc[0], inps.size(0)) opt.valIters += 1 # Tensorboard writer.add_scalar('Valid/Loss', lossLogger.avg, opt.valIters) writer.add_scalar('Valid/Acc', accLogger.avg, opt.valIters) val_loader_desc.set_description( 'loss: {loss:.8f} | acc: {acc:.2f}'.format(loss=lossLogger.avg, acc=accLogger.avg * 100)) val_loader_desc.close() return lossLogger.avg, accLogger.avg
def metrics(actual_output, pred_output): # variables to return acc = 0 acc_dict = {'avg': 0, 'score': 0, 'zero': 0} prec = 0 reca = 0 for i, j in zip(actual_output, pred_output): temp = accuracy(i, j) acc = acc + temp[0] acc_dict[temp[1]] += 1 prec = prec + precision(i, j) reca = reca + recall(i, j) return acc, prec, reca, acc_dict
def valid(val_loader, m, criterion, optimizer, writer): lossLogger = DataLogger() accLogger = DataLogger() m.eval() # val_loader_desc = tqdm(val_loader) for i, (inps, labels, setMask, imgset) in enumerate(val_loader): inps = inps.cuda() labels = labels.cuda() setMask = setMask.cuda() with torch.no_grad(): out = m(inps) loss = criterion(out.mul(setMask), labels) flip_out = m(flip_v(inps, cuda=True)) flip_out = flip_v(shuffleLR_v(flip_out, val_loader.dataset, cuda=True), cuda=True) out = (flip_out + out) / 2 acc = accuracy(out.mul(setMask), labels, val_loader.dataset) lossLogger.update(loss.item(), inps.size(0)) accLogger.update(acc[0], inps.size(0)) opt.valIters += 1 # Tensorboard writer.add_scalar('Valid/Loss', lossLogger.avg, opt.valIters) writer.add_scalar('Valid/Acc', accLogger.avg, opt.valIters) # val_loader_desc.set_description( # 'loss: {loss:.8f} | acc: {acc:.2f}'.format( # loss=lossLogger.avg, # acc=accLogger.avg * 100) # ) # val_loader_desc.set_postfix( # loss='%.2e' % lossLogger.avg, acc='%.2f%%' % (accLogger.avg * 100)) # val_loader_desc.close() return lossLogger.avg, accLogger.avg
def train(train_loader, m, criterion, optimizer, writer): lossLogger = DataLogger() accLogger = DataLogger() f = open("acc_loss.csv", "w+") f.write('epoch,acc,loss,eval_acc\n') f.close() m.train() train_loader_desc = tqdm(train_loader) for i, (inps, labels, setMask, imgset) in enumerate(train_loader_desc): inps = inps.cuda().requires_grad_() labels = labels.cuda() setMask = setMask.cuda() out = m(inps) # embed() loss = criterion(out.mul(setMask), labels) acc = accuracy(out.data.mul(setMask), labels.data, train_loader.dataset) accLogger.update(acc[0], inps.size(0)) lossLogger.update(loss.item(), inps.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() opt.trainIters += 1 # Tensorboard writer.add_scalar('Train/Loss', lossLogger.avg, opt.trainIters) writer.add_scalar('Train/Acc', accLogger.avg, opt.trainIters) # writer.export_scalars_to_json("../log/all_scalars.json") # TQDM # train_loader_desc.set_description( # 'loss: {loss:.5f} | acc: {acc:.2f}'.format( # loss=lossLogger.avg, # acc=accLogger.avg) # ) train_loader_desc.close() return lossLogger.avg, accLogger.avg
def batch_inference(self, images, targets=None, backward=True): if self.use_cuda: images = images.cuda() if targets is not None: targets = targets.cuda() if (not self.backbone.training and not self.head.training) or targets is None: features = self.backbone(images) return features features = self.backbone(images) outputs = self.head(features, targets.long()) total_loss = 0 losses = self.loss_forward(self.criterions, features, outputs, targets) accuracy_top_1, accuracy_top_5 = accuracy(outputs, targets, (1, 5)) total_loss = torch.stack(losses).mul(self.loss_weights).sum() if backward: self.optimizer.zero_grad() total_loss.backward() apply_weight_decay(self.backbone) apply_weight_decay(self.head) self.optimizer.step() losses_value = [] for index, criterion_name in enumerate(self.criterions.keys()): losses_value.append(losses[index].item()) total_loss_value = total_loss.item() accuracy_top_1_value = accuracy_top_1.item() accuracy_top_5_value = accuracy_top_5.item() for index, criterion_name in enumerate(self.criterions.keys()): self.loss_meters[index].update(losses_value[index], targets.size(0)) self.total_losses_meter.update(total_loss_value, targets.size(0)) self.accuracy_top_1.update(accuracy_top_1_value, targets.size(0)) self.accuracy_top_5.update(accuracy_top_5_value, targets.size(0)) return outputs
def train(train_loader, m, criterion, optimizer, writer): lossLogger = DataLogger() accLogger = DataLogger() m.train() # train_loader_desc = tqdm(train_loader) for i, (inps, labels, setMask, imgset) in enumerate(train_loader): inps = inps.cuda().requires_grad_() #[32,17,80,64] labels = labels.cuda() #[32,17,80,64] setMask = setMask.cuda() #[32,17,80,64] out = m(inps) #[32,17,80,64] loss = criterion(out.mul(setMask), labels) acc = accuracy(out.data.mul(setMask), labels.data, train_loader.dataset) accLogger.update(acc[0], inps.size(0)) lossLogger.update(loss.item(), inps.size(0)) # train_loader_desc.set_postfix( # loss='%.2e' % lossLogger.avg, acc='%.2f%%' % (accLogger.avg * 100)) optimizer.zero_grad() loss.backward() optimizer.step() opt.trainIters += 1 # Tensorboard writer.add_scalar('Train/Loss', lossLogger.avg, opt.trainIters) writer.add_scalar('Train/Acc', accLogger.avg, opt.trainIters) # train_loader_desc.close() return lossLogger.avg, accLogger.avg
def train(model, device, train_loader, sm_loader, criterion, optimizer, epoch, args, writer): print( " ->->->->->->->->->-> One epoch with Adversarial training (TRADES) <-<-<-<-<-<-<-<-<-<-" ) batch_time = AverageMeter("Time", ":6.3f") data_time = AverageMeter("Data", ":6.3f") losses = AverageMeter("Loss", ":.4f") top1 = AverageMeter("Acc_1", ":6.2f") top5 = AverageMeter("Acc_5", ":6.2f") progress = ProgressMeter( len(train_loader), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch), ) model.train() end = time.time() dataloader = train_loader if sm_loader is None else zip( train_loader, sm_loader) for i, data in enumerate(dataloader): if sm_loader: images, target = ( torch.cat([d[0] for d in data], 0).to(device), torch.cat([d[1] for d in data], 0).to(device), ) else: images, target = data[0].to(device), data[1].to(device) # basic properties of training data if i == 0: print( images.shape, target.shape, f"Batch_size from args: {args.batch_size}", "lr: {:.5f}".format(optimizer.param_groups[0]["lr"]), ) print( f"Training images range: {[torch.min(images), torch.max(images)]}" ) output = model(images) # calculate robust loss loss = pgd_loss( model=model, x_natural=images, y=target, device=device, optimizer=optimizer, step_size=args.step_size, epsilon=args.epsilon, perturb_steps=args.num_steps, beta=args.beta, clip_min=args.clip_min, clip_max=args.clip_max, distance=args.distance, ) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) progress.write_to_tensorboard(writer, "train", epoch * len(train_loader) + i) # write a sample of training images to tensorboard (helpful for debugging) if i == 0: writer.add_image( "training-images", torchvision.utils.make_grid(images[0:len(images) // 4]), )
def train(model, device, train_loader, sm_loader, criterion, optimizer, epoch, args, writer): epsilon = set_epsilon(args, epoch) k = args.mixtraink alpha = 0.8 iw = set_interval_weight(args, epoch) print(" ->->->->->->->->->-> One epoch with MixTrain{} (SYM {:.3f})" " <-<-<-<-<-<-<-<-<-<-".format(k, epsilon)) batch_time = AverageMeter("Time", ":6.3f") data_time = AverageMeter("Data", ":6.3f") losses = AverageMeter("Loss", ":.4f") sym_losses = AverageMeter("Sym_Loss", ":.4f") top1 = AverageMeter("Acc_1", ":6.2f") sym1 = AverageMeter("Sym1", ":6.2f") progress = ProgressMeter( len(train_loader), [batch_time, data_time, losses, sym_losses, top1, sym1], prefix="Epoch: [{}]".format(epoch), ) model.train() end = time.time() dataloader = train_loader if sm_loader is None else zip( train_loader, sm_loader) for i, data in enumerate(dataloader): if sm_loader: images, target = ( torch.cat([d[0] for d in data], 0).to(device), torch.cat([d[1] for d in data], 0).to(device), ) else: images, target = data[0].to(device), data[1].to(device) # basic properties of training data if i == 0: print( images.shape, target.shape, f"Batch_size from args: {args.batch_size}", "lr: {:.5f}".format(optimizer.param_groups[0]["lr"]), ) print( f"Training images range: {[torch.min(images), torch.max(images)]}" ) output = model(images) ce = nn.CrossEntropyLoss()(output, target) if (np.random.uniform() <= alpha): r = np.random.randint(low=0, high=images.shape[0], size=k) rce, rerr = sym_interval_analyze( model, epsilon, images[r], target[r], use_cuda=torch.cuda.is_available(), parallel=False) #print("sym:", rce.item(), ce.item()) loss = iw * rce + ce sym_losses.update(rce.item(), k) sym1.update((1 - rerr) * 100., images.size(0)) else: loss = ce # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) top1.update(acc1[0], images.size(0)) losses.update(ce.item(), images.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) progress.write_to_tensorboard(writer, "train", epoch * len(train_loader) + i) # write a sample of training images to tensorboard (helpful for debugging) if i == 0: writer.add_image( "training-images", torchvision.utils.make_grid(images[0:len(images) // 4]), )
def evaluate(val_loader, model, criterion, test=None): ''' 模型评估 :param val_loader: :param model: :param criterion: :param test: :return: ''' global best_acc batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() predict_all = np.array([], dtype=int) labels_all = np.array([], dtype=int) ################# # val the model ################# model.eval() end = time.time() # 训练每批数据,然后进行模型的训练 ## 定义bar 变量 bar = Bar('Processing', max=len(val_loader)) for batch_index, (inputs, targets) in enumerate(val_loader): data_time.update(time.time() - end) # move tensors to GPU if cuda is_available inputs, targets = inputs.to(device), targets.to(device) # 模型的预测 outputs = model(inputs) # 计算loss loss = criterion(outputs, targets) # 计算acc和变量更新 prec1, _ = accuracy(outputs.data, targets.data, topk=(1, 1)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) batch_time.update(time.time() - end) end = time.time() # 评估混淆矩阵的数据 targets = targets.data.cpu().numpy() # 真实数据的y数值 predic = torch.max(outputs.data, 1)[1].cpu().numpy() # 预测数据y数值 labels_all = np.append(labels_all, targets) # 数据赋值 predict_all = np.append(predict_all, predic) ## 把主要的参数打包放进bar中 # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f}'.format( batch=batch_index + 1, size=len(val_loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg) bar.next() bar.finish() if test: return (losses.avg, top1.avg, predict_all, labels_all) else: return (losses.avg, top1.avg)
def train(train_loader, model, criterion, optimizer): ''' 模型训练 :param train_loader: :param model: :param criterion: :param optimizer: :return: ''' # 定义保存更新变量 data_time = AverageMeter() batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() end = time.time() ################# # train the model ################# model.train() # 训练每批数据,然后进行模型的训练 ## 定义bar 变量 bar = Bar('Processing', max=len(train_loader)) for batch_index, (inputs, targets) in enumerate(train_loader): data_time.update(time.time() - end) # move tensors to GPU if cuda is_available inputs, targets = inputs.to(device), targets.to(device) # 在进行反向传播之前,我们使用zero_grad方法清空梯度 optimizer.zero_grad() # 模型的预测 outputs = model(inputs) # 计算loss loss = criterion(outputs, targets) # backward pass: loss.backward() # perform as single optimization step (parameter update) optimizer.step() # 计算acc和变量更新 prec1, _ = accuracy(outputs.data, targets.data, topk=(1, 1)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) batch_time.update(time.time() - end) end = time.time() # plot progress ## 把主要的参数打包放进bar中 # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f}'.format( batch=batch_index + 1, size=len(train_loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg) bar.next() bar.finish() return (losses.avg, top1.avg)
def train(model, device, train_loader, sm_loader, criterion, optimizer, epoch, args, writer): print( " ->->->->->->->->->-> One epoch with Natural training <-<-<-<-<-<-<-<-<-<-" ) batch_time = AverageMeter("Time", ":6.3f") data_time = AverageMeter("Data", ":6.3f") losses = AverageMeter("Loss", ":.4f") top1 = AverageMeter("Acc_1", ":6.2f") top5 = AverageMeter("Acc_5", ":6.2f") progress = ProgressMeter( len(train_loader), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch), ) model.train() end = time.time() dataloader = train_loader if sm_loader is None else zip( train_loader, sm_loader) for i, data in enumerate(dataloader): if sm_loader: images, target = ( torch.cat([d[0] for d in data], 0).to(device), torch.cat([d[1] for d in data], 0).to(device), ) else: images, target = data[0].to(device), data[1].to(device) # basic properties of training if i == 0: print( images.shape, target.shape, f"Batch_size from args: {args.batch_size}", "lr: {:.5f}".format(optimizer.param_groups[0]["lr"]), ) print("Pixel range for training images : [{}, {}]".format( torch.min(images).data.cpu().numpy(), torch.max(images).data.cpu().numpy(), )) output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) progress.write_to_tensorboard(writer, "train", epoch * len(train_loader) + i) # write a sample of training images to tensorboard (helpful for debugging) if i == 0: writer.add_image( "training-images", torchvision.utils.make_grid(images[0:len(images) // 4]), )
def step(args, split, epoch, loader, model, optimizer=None, M=None, f=None, tag=None): losses, mpjpe, mpjpe_r = AverageMeter(), AverageMeter(), AverageMeter() viewLosses, shapeLosses, supLosses = AverageMeter(), AverageMeter( ), AverageMeter() if split == 'train': model.train() else: model.eval() bar = Bar('{}'.format(ref.category), max=len(loader)) nViews = loader.dataset.nViews for i, (input, target, meta) in enumerate(loader): input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) output = model(input_var) loss = ShapeConsistencyCriterion(nViews, supWeight=1, unSupWeight=args.shapeWeight, M=M)(output, target_var, torch.autograd.Variable(meta)) if split == 'test': for j in range(input.numpy().shape[0]): img = (input.numpy()[j] * 255).transpose(1, 2, 0).astype(np.uint8) cv2.imwrite( '{}/img_{}/{}.png'.format(args.save_path, tag, i * input.numpy().shape[0] + j), img) gt = target.cpu().numpy()[j] pred = (output.data).cpu().numpy()[j] vis = meta.cpu().numpy()[j][5:] for t in range(ref.J): f.write('{} {} {} '.format(pred[t * 3], pred[t * 3 + 1], pred[t * 3 + 2])) f.write('\n') for t in range(ref.J): f.write('{} {} {} '.format(gt[t, 0], gt[t, 1], gt[t, 2])) f.write('\n') if args.saveVis: for t in range(ref.J): f.write('{} 0 0 '.format(vis[t])) f.write('\n') mpjpe_this = accuracy(output.data, target, meta) mpjpe_r_this = accuracy_dis(output.data, target, meta) shapeLoss = shapeConsistency(output.data, meta, nViews, M, split=split) losses.update(loss.data[0], input.size(0)) shapeLosses.update(shapeLoss, input.size(0)) mpjpe.update(mpjpe_this, input.size(0)) mpjpe_r.update(mpjpe_r_this, input.size(0)) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() Bar.suffix = '{split:10}: [{0:2}][{1:3}/{2:3}] | Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | shapeLoss {shapeLoss.avg:.6f} | AE {mpjpe.avg:.6f} | ShapeDis {mpjpe_r.avg:.6f}'.format( epoch, i, len(loader), total=bar.elapsed_td, eta=bar.eta_td, loss=losses, mpjpe=mpjpe, split=split, shapeLoss=shapeLosses, mpjpe_r=mpjpe_r) bar.next() bar.finish() return mpjpe.avg, losses.avg, shapeLosses.avg
def main(): # Parse the options from parameters opts = Opts().parse() ## For PyTorch 0.4.1, cuda(device) opts.device = torch.device(f'cuda:{opts.gpu[0]}') print(opts.expID, opts.task, os.path.dirname(os.path.realpath(__file__))) # Load the trained model test if opts.loadModel != 'none': model_path = os.path.join(opts.root_dir, opts.loadModel) model = torch.load(model_path).cuda(device=opts.device) model.eval() else: print('ERROR: No model is loaded!') return # Read the input image, pass input to gpu if opts.img == 'None': val_dataset = PENN_CROP(opts, 'val') val_loader = tud.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=int(opts.num_workers)) opts.nJoints = val_dataset.nJoints opts.skeleton = val_dataset.skeleton for i, gt in enumerate(val_loader): # Test Visualizer, Input and get_preds if i == 0: input, label = gt['input'], gt['label'] gtpts, center, scale, proj = gt['gtpts'], gt['center'], gt[ 'scale'], gt['proj'] input_var = input[:, 0, ].float().cuda(device=opts.device, non_blocking=True) # output = label output = model(input_var) # Test Loss, Err and Acc(PCK) Loss, Err, Acc = AverageMeter(), AverageMeter(), AverageMeter() ref = get_ref(opts.dataset, scale) for j in range(opts.preSeqLen): pred = get_preds(output[:, j, ].cpu().float()) pred = original_coordinate(pred, center[:, ], scale, opts.outputRes) err, ne = error(pred, gtpts[:, j, ], ref) acc, na = accuracy(pred, gtpts[:, j, ], ref) # assert ne == na, "ne must be the same as na" Err.update(err) Acc.update(acc) print(j, f"{Err.val:.6f}", Acc.val) print('all', f"{Err.avg:.6f}", Acc.avg) # Visualizer Object ## Initialize v = Visualizer(opts.nJoints, opts.skeleton, opts.outputRes) # ## Add input image # v.add_img(input[0,0,].transpose(2, 0).numpy().astype(np.uint8)) # ## Get the predicted joints # predJoints = get_preds(output[:, 0, ]) # # ## Add joints and skeleton to the figure # v.add_2d_joints_skeleton(predJoints, (0, 0, 255)) # Transform heatmap to show hm_img = output[0, 0, ].cpu().detach().numpy() v.add_hm(hm_img) ## Show image v.show_img(pause=True) break else: print('NOT ready for the raw input outside the dataset') img = cv2.imread(opts.img) input = torch.from_numpy(img.tramspose(2, 0, 1)).float() / 256. input = input.view(1, input.size(0), input.size(1), input.size(2)) input_var = torch.autograd.variable(input).float().cuda( device=opts.device) output = model(input_var) predJoints = get_preds(output[-2].data.cpu().numpy())[0] * 4
def train(model, device, train_loader, sm_loader, criterion, optimizer, epoch, args, writer): print( " ->->->->->->->->->-> One epoch with Natural training <-<-<-<-<-<-<-<-<-<-" ) batch_time = AverageMeter("Time", ":6.3f") data_time = AverageMeter("Data", ":6.3f") losses = AverageMeter("Loss", ":.4f") top1 = AverageMeter("Acc_1", ":6.2f") top5 = AverageMeter("Acc_5", ":6.2f") progress = ProgressMeter( len(train_loader), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch), ) model.train() end = time.time() dataloader = train_loader if sm_loader is None else zip( train_loader, sm_loader) for i, data in enumerate(dataloader): if sm_loader: images, target = ( torch.cat([d[0] for d in data], 0).to(device), torch.cat([d[1] for d in data], 0).to(device), ) else: images, target = data[0].to(device), data[1].to(device) # basic properties of training if i == 0: print( images.shape, target.shape, f"Batch_size from args: {args.batch_size}", "lr: {:.5f}".format(optimizer.param_groups[0]["lr"]), ) print("Pixel range for training images : [{}, {}]".format( torch.min(images).data.cpu().numpy(), torch.max(images).data.cpu().numpy(), )) # stability-loss if args.dataset == "imagenet": std = (torch.tensor( [0.229, 0.224, 0.225]).unsqueeze(0).unsqueeze(-1).unsqueeze(-1)).to(device) noise = (torch.randn_like(images) / std).to(device) * args.noise_std output = model(images + noise) loss = nn.CrossEntropyLoss()(output, target) else: output = model(images) loss_natural = nn.CrossEntropyLoss()(output, target) loss_robust = (1.0 / len(images)) * nn.KLDivLoss( size_average=False)( F.log_softmax( model(images + torch.randn_like(images).to(device) * args.noise_std), dim=1, ), F.softmax(output, dim=1), ) loss = loss_natural + args.beta * loss_robust # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) progress.write_to_tensorboard(writer, "train", epoch * len(train_loader) + i) # write a sample of training images to tensorboard (helpful for debugging) if i == 0: writer.add_image( "training-images", torchvision.utils.make_grid(images[0:len(images) // 4]), )
def step(split, epoch, opt, data_loader, model, optimizer=None): if split == 'train': model.train() else: model.eval() crit = torch.nn.MSELoss() crit_3d = FusionLoss(opt.device, opt.weight_3d, opt.weight_var) acc_idxs = data_loader.dataset.acc_idxs edges = data_loader.dataset.edges edges_3d = data_loader.dataset.edges_3d shuffle_ref = data_loader.dataset.shuffle_ref mean = data_loader.dataset.mean std = data_loader.dataset.std convert_eval_format = data_loader.dataset.convert_eval_format Loss, Loss3D = AverageMeter(), AverageMeter() Acc, MPJPE = AverageMeter(), AverageMeter() data_time, batch_time = AverageMeter(), AverageMeter() preds = [] time_str = '' nIters = len(data_loader) bar = Bar('{}'.format(opt.exp_id), max=nIters) end = time.time() for i, batch in enumerate(data_loader): data_time.update(time.time() - end) for k in batch: if k != 'meta': batch[k] = batch[k].cuda(device=opt.device, non_blocking=True) gt_2d = batch['meta']['pts_crop'].cuda( device=opt.device, non_blocking=True).float() / opt.output_h output = model(batch['input']) loss = crit(output[-1]['hm'], batch['target']) loss_3d = crit_3d( output[-1]['depth'], batch['reg_mask'], batch['reg_ind'], batch['reg_target'],gt_2d) for k in range(opt.num_stacks - 1): loss += crit(output[k], batch['target']) loss_3d = crit_3d( output[-1]['depth'], batch['reg_mask'], batch['reg_ind'], batch['reg_target'], gt_2d) loss += loss_3d if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() else: input_ = batch['input'].cpu().numpy().copy() input_[0] = flip(input_[0]).copy()[np.newaxis, ...] input_flip_var = torch.from_numpy(input_).cuda( device=opt.device, non_blocking=True) output_flip_ = model(input_flip_var) output_flip = shuffle_lr( flip(output_flip_[-1]['hm'].detach().cpu().numpy()[0]), shuffle_ref) output_flip = output_flip.reshape( 1, opt.num_output, opt.output_h, opt.output_w) output_depth_flip = shuffle_lr( flip(output_flip_[-1]['depth'].detach().cpu().numpy()[0]), shuffle_ref) output_depth_flip = output_depth_flip.reshape( 1, opt.num_output, opt.output_h, opt.output_w) output_flip = torch.from_numpy(output_flip).cuda( device=opt.device, non_blocking=True) output_depth_flip = torch.from_numpy(output_depth_flip).cuda( device=opt.device, non_blocking=True) output[-1]['hm'] = (output[-1]['hm'] + output_flip) / 2 output[-1]['depth'] = (output[-1]['depth'] + output_depth_flip) / 2 # pred = get_preds(output[-1]['hm'].detach().cpu().numpy()) # preds.append(convert_eval_format(pred, conf, meta)[0]) Loss.update(loss.item(), batch['input'].size(0)) Loss3D.update(loss_3d.item(), batch['input'].size(0)) Acc.update(accuracy(output[-1]['hm'].detach().cpu().numpy(), batch['target'].detach().cpu().numpy(), acc_idxs)) mpeje_batch, mpjpe_cnt = mpjpe(output[-1]['hm'].detach().cpu().numpy(), output[-1]['depth'].detach().cpu().numpy(), batch['meta']['gt_3d'].detach().numpy(), convert_func=convert_eval_format) MPJPE.update(mpeje_batch, mpjpe_cnt) batch_time.update(time.time() - end) end = time.time() if not opt.hide_data_time: time_str = ' |Data {dt.avg:.3f}s({dt.val:.3f}s)' \ ' |Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time) Bar.suffix = '{split}: [{0}][{1}/{2}] |Total {total:} |ETA {eta:} '\ '|Loss {loss.avg:.5f} |Loss3D {loss_3d.avg:.5f}'\ '|Acc {Acc.avg:.4f} |MPJPE {MPJPE.avg:.2f}'\ '{time_str}'.format(epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, Acc=Acc, split=split, time_str=time_str, MPJPE=MPJPE, loss_3d=Loss3D) if opt.print_iter > 0: if i % opt.print_iter == 0: print('{}| {}'.format(opt.exp_id, Bar.suffix)) else: bar.next() if opt.debug >= 2: gt = get_preds(batch['target'].cpu().numpy()) * 4 pred = get_preds(output[-1]['hm'].detach().cpu().numpy()) * 4 debugger = Debugger(ipynb=opt.print_iter > 0, edges=edges) img = ( batch['input'][0].cpu().numpy().transpose(1, 2, 0) * std + mean) * 256 img = img.astype(np.uint8).copy() debugger.add_img(img) debugger.add_mask( cv2.resize(batch['target'][0].cpu().numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'target') debugger.add_mask( cv2.resize(output[-1]['hm'][0].detach().cpu().numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'pred') debugger.add_point_2d(gt[0], (0, 0, 255)) debugger.add_point_2d(pred[0], (255, 0, 0)) debugger.add_point_3d( batch['meta']['gt_3d'].detach().numpy()[0], 'r', edges=edges_3d) pred_3d = get_preds_3d(output[-1]['hm'].detach().cpu().numpy(), output[-1]['depth'].detach().cpu().numpy()) debugger.add_point_3d(convert_eval_format(pred_3d[0]), 'b',edges=edges_3d) debugger.show_all_imgs(pause=False) debugger.show_3d() bar.finish() return {'loss': Loss.avg, 'acc': Acc.avg, 'mpjpe': MPJPE.avg, 'time': bar.elapsed_td.total_seconds() / 60.}, preds
def step(split, epoch, opt, data_loader, model, optimizer=None): if split == 'train': model.train() else: model.eval() crit = torch.nn.MSELoss() acc_idxs = data_loader.dataset.acc_idxs edges = data_loader.dataset.edges shuffle_ref = data_loader.dataset.shuffle_ref mean = data_loader.dataset.mean std = data_loader.dataset.std convert_eval_format = data_loader.dataset.convert_eval_format Loss, Acc = AverageMeter(), AverageMeter() data_time, batch_time = AverageMeter(), AverageMeter() preds = [] nIters = len(data_loader) bar = Bar('{}'.format(opt.exp_id), max=nIters) end = time.time() for i, batch in enumerate(data_loader): data_time.update(time.time() - end) input, target, meta = batch['input'], batch['target'], batch['meta'] input_var = input.cuda(device=opt.device, non_blocking=True) target_var = target.cuda(device=opt.device, non_blocking=True) output = model(input_var) loss = crit(output[-1]['hm'], target_var) for k in range(opt.num_stacks - 1): loss += crit(output[k], target_var) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() else: input_ = input.cpu().numpy().copy() input_[0] = flip(input_[0]).copy()[np.newaxis, ...] input_flip_var = torch.from_numpy(input_).cuda( device=opt.device, non_blocking=True) output_flip = model(input_flip_var) output_flip = shuffle_lr( flip(output_flip[-1]['hm'].detach().cpu().numpy()[0]), shuffle_ref) output_flip = output_flip.reshape( 1, opt.num_output, opt.output_h, opt.output_w) # output_ = (output[-1].detach().cpu().numpy() + output_flip) / 2 output_flip = torch.from_numpy(output_flip).cuda( device=opt.device, non_blocking=True) output[-1]['hm'] = (output[-1]['hm'] + output_flip) / 2 pred, conf = get_preds( output[-1]['hm'].detach().cpu().numpy(), True) preds.append(convert_eval_format(pred, conf, meta)[0]) Loss.update(loss.detach().item(), input.size(0)) Acc.update(accuracy(output[-1]['hm'].detach().cpu().numpy(), target_var.detach().cpu().numpy(), acc_idxs)) batch_time.update(time.time() - end) end = time.time() if not opt.hide_data_time: time_str = ' |Data {dt.avg:.3f}s({dt.val:.3f}s)' \ ' |Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time) else: time_str = '' Bar.suffix = '{split}: [{0}][{1}/{2}] |Total {total:} |ETA {eta:}' \ '|Loss {loss.avg:.5f} |Acc {Acc.avg:.4f}'\ '{time_str}'.format(epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, Acc=Acc, split=split, time_str=time_str) if opt.print_iter > 0: if i % opt.print_iter == 0: print('{}| {}'.format(opt.exp_id, Bar.suffix)) else: bar.next() if opt.debug >= 2: gt, amb_idx = get_preds(target.cpu().numpy()) gt *= 4 pred, amb_idx = get_preds(output[-1]['hm'].detach().cpu().numpy()) pred *= 4 debugger = Debugger(ipynb=opt.print_iter > 0, edges=edges) img = (input[0].numpy().transpose(1, 2, 0) * std + mean) * 256 img = img.astype(np.uint8).copy() debugger.add_img(img) debugger.add_mask( cv2.resize(target[0].numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'target') debugger.add_mask( cv2.resize(output[-1]['hm'][0].detach().cpu().numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'pred') debugger.add_point_2d(pred[0], (255, 0, 0)) debugger.add_point_2d(gt[0], (0, 0, 255)) debugger.show_all_imgs(pause=True) bar.finish() return {'loss': Loss.avg, 'acc': Acc.avg, 'time': bar.elapsed_td.total_seconds() / 60.}, preds
def train( model, device, train_loader, sm_loader, criterion, optimizer, epoch, args, writer=None, ): assert ( not args.normalize ), "Explicit normalization is done in the training loop, Dataset should have [0, 1] dynamic range." global_noise_data = torch.zeros( [args.batch_size, 3, args.image_dim, args.image_dim]).to(device) mean = torch.Tensor(np.array(args.mean)[:, np.newaxis, np.newaxis]) mean = mean.expand(3, args.image_dim, args.image_dim).to(device) std = torch.Tensor(np.array(args.std)[:, np.newaxis, np.newaxis]) std = std.expand(3, args.image_dim, args.image_dim).to(device) batch_time = AverageMeter("Time", ":6.3f") data_time = AverageMeter("Data", ":6.3f") losses = AverageMeter("Loss", ":.4f") top1 = AverageMeter("Acc_1", ":6.2f") top5 = AverageMeter("Acc_5", ":6.2f") progress = ProgressMeter( len(train_loader), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch), ) # switch to train mode model.train() for i, (input, target) in enumerate(train_loader): end = time.time() input = input.to(device, non_blocking=True) target = target.to(device, non_blocking=True) data_time.update(time.time() - end) for _ in range(args.n_repeats): # Ascend on the global noise noise_batch = Variable(global_noise_data[0:input.size(0)], requires_grad=True).to(device) in1 = input + noise_batch in1.clamp_(0, 1.0) in1.sub_(mean).div_(std) output = model(in1) loss = criterion(output, target) prec1, prec5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() # Update the noise for the next iteration pert = fgsm(noise_batch.grad, args.epsilon) global_noise_data[0:input.size(0)] += pert.data global_noise_data.clamp_(-args.epsilon, args.epsilon) optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) progress.write_to_tensorboard(writer, "train", epoch * len(train_loader) + i) if i == 0: print( in1.shape, target.shape, f"Batch_size from args: {args.batch_size}", "lr: {:.5f}".format(optimizer.param_groups[0]["lr"]), ) print(f"Training images range: {[torch.min(in1), torch.max(in1)]}") # write a sample of training images to tensorboard (helpful for debugging) if i == 0: writer.add_image( "training-images", torchvision.utils.make_grid(input[0:len(input) // 4]), )
def train(): try: os.makedirs(opt.checkpoints_dir) except OSError: pass if torch.cuda.device_count() > 1: model = torch.nn.parallel.DataParallel( AlexNet(num_classes=opt.num_classes)) else: model = AlexNet(num_classes=opt.num_classes) if os.path.exists(MODEL_PATH): model.load_state_dict( torch.load(MODEL_PATH, map_location=lambda storage, loc: storage)) model.to(device) ################################################ # Set loss function and Adam optimizer ################################################ criterion = torch.nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=opt.lr) for epoch in range(opt.epochs): # train for one epoch print(f"\nBegin Training Epoch {epoch + 1}") # Calculate and return the top-k accuracy of the model # so that we can track the learning process. losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() for i, data in enumerate(train_dataloader): # get the inputs; data is a list of [inputs, labels] inputs, targets = data inputs = inputs.to(device) targets = targets.to(device) # compute output output = model(inputs) loss = criterion(output, targets) # measure accuracy and record loss prec1, prec5 = accuracy(output, targets, topk=(1, 2)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1, inputs.size(0)) top5.update(prec5, inputs.size(0)) # compute gradients in a backward pass optimizer.zero_grad() loss.backward() # Call step of optimizer to update model params optimizer.step() print( f"Epoch [{epoch + 1}] [{i + 1}/{len(train_dataloader)}]\t" f"Loss {loss.item():.4f}\t" f"Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t" f"Prec@5 {top5.val:.3f} ({top5.avg:.3f})", end="\r") # save model file torch.save(model.state_dict(), MODEL_PATH)
def train(): try: os.makedirs(opt.checkpoints_dir) except OSError: pass CNN.to(device) CNN.train() torchsummary.summary(CNN, (1, 28, 28)) ################################################ # Set loss function and Adam optimier ################################################ criterion = torch.nn.CrossEntropyLoss() optimizer = optim.Adam(CNN.parameters(), lr=opt.lr) for epoch in range(opt.epochs): # train for one epoch print(f"\nBegin Training Epoch {epoch + 1}") # Calculate and return the top-k accuracy of the model # so that we can track the learning process. batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() for i, data in enumerate(train_dataloader): # get the inputs; data is a list of [inputs, labels] inputs, targets = data inputs = inputs.to(device) targets = targets.to(device) # compute output output = CNN(inputs) loss = criterion(output, targets) # measure accuracy and record loss prec1, prec5 = accuracy(output, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1, inputs.size(0)) top5.update(prec5, inputs.size(0)) # compute gradients in a backward pass optimizer.zero_grad() loss.backward() # Call step of optimizer to update model params optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % 15 == 0: print( f"Epoch [{epoch + 1}] [{i}/{len(train_dataloader)}]\t" f"Loss {loss.item():.4f}\t" f"Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t" f"Prec@5 {top5.val:.3f} ({top5.avg:.3f})", end="\r") # save model file torch.save(CNN.state_dict(), MODEL_PATH)
top5 = AverageMeter() net.train() for i, data in enumerate(trainloader, 0): # get the inputs; data is a list of [inputs, labels] inputs, labels = data[0].to(device), data[1].to(device) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) # measure accuracy and record loss prec1, prec5 = accuracy(outputs, labels, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1, inputs.size(0)) top5.update(prec5, inputs.size(0)) loss.backward() optimizer.step() # print statistics if i % 5 == 0: print(f"Epoch [{epoch + 1}] [{i}/{len(trainloader)}]\t" f"Loss {loss.item():.4f}\t" f"Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t" f"Prec@5 {top5.val:.3f} ({top5.avg:.3f})") net.eval()
def train( model, device, train_loader, sm_loader, criterion, optimizer, epoch, args, writer ): num_class = 10 sa = np.zeros((num_class, num_class - 1), dtype = np.int32) for i in range(sa.shape[0]): for j in range(sa.shape[1]): if j < i: sa[i][j] = j else: sa[i][j] = j + 1 sa = torch.LongTensor(sa) batch_size = args.batch_size*2 schedule_start = 0 num_steps_per_epoch = len(train_loader) eps_scheduler = EpsilonScheduler("linear", args.schedule_start, ((args.schedule_start + args.schedule_length) - 1) *\ num_steps_per_epoch, args.starting_epsilon, args.epsilon, num_steps_per_epoch) end_eps = eps_scheduler.get_eps(epoch+1, 0) start_eps = eps_scheduler.get_eps(epoch, 0) print( " ->->->->->->->->->-> One epoch with CROWN-IBP ({:.6f}-{:.6f})" " <-<-<-<-<-<-<-<-<-<-".format(start_eps, end_eps) ) batch_time = AverageMeter("Time", ":6.3f") data_time = AverageMeter("Data", ":6.3f") losses = AverageMeter("Loss", ":.4f") ibp_losses = AverageMeter("IBP_Loss", ":.4f") top1 = AverageMeter("Acc_1", ":6.2f") ibp_acc1 = AverageMeter("IBP1", ":6.2f") progress = ProgressMeter( len(train_loader), [batch_time, data_time, losses, ibp_losses, top1, ibp_acc1], prefix="Epoch: [{}]".format(epoch), ) model = BoundSequential.convert(model,\ {'same-slope': False, 'zero-lb': False,\ 'one-lb': False}).to(device) model.train() end = time.time() dataloader = train_loader if sm_loader is None else zip(train_loader, sm_loader) for i, data in enumerate(dataloader): if sm_loader: images, target = ( torch.cat([d[0] for d in data], 0).to(device), torch.cat([d[1] for d in data], 0).to(device), ) else: images, target = data[0].to(device), data[1].to(device) # basic properties of training data if i == 0: print( images.shape, target.shape, f"Batch_size from args: {args.batch_size}", "lr: {:.5f}".format(optimizer.param_groups[0]["lr"]), ) print(f"Training images range: {[torch.min(images), torch.max(images)]}") output = model(images, method_opt="forward") ce = nn.CrossEntropyLoss()(output, target) eps = eps_scheduler.get_eps(epoch, i) # generate specifications c = torch.eye(num_class).type_as(images)[target].unsqueeze(1) -\ torch.eye(num_class).type_as(images).unsqueeze(0) # remove specifications to self I = (~(target.unsqueeze(1) ==\ torch.arange(num_class).to(device).type_as(target).unsqueeze(0))) c = (c[I].view(images.size(0),num_class-1,num_class)).to(device) # scatter matrix to avoid compute margin to self sa_labels = sa[target].to(device) # storing computed lower bounds after scatter lb_s = torch.zeros(images.size(0), num_class).to(device) ub_s = torch.zeros(images.size(0), num_class).to(device) data_ub = torch.min(images + eps, images.max()).to(device) data_lb = torch.max(images - eps, images.min()).to(device) ub, ilb, relu_activity, unstable, dead, alive =\ model(norm=np.inf, x_U=data_ub, x_L=data_lb,\ eps=eps, C=c, method_opt="interval_range") crown_final_beta = 0. beta = (args.epsilon - eps * (1.0 - crown_final_beta)) / args.epsilon if beta < 1e-5: # print("pure naive") lb = ilb else: # print("crown-ibp") # get the CROWN bound using interval bounds _, _, clb, bias = model(norm=np.inf, x_U=data_ub,\ x_L=data_lb, eps=eps, C=c,\ method_opt="backward_range") # how much better is crown-ibp better than ibp? # diff = (clb - ilb).sum().item() lb = clb * beta + ilb * (1 - beta) lb = lb_s.scatter(1, sa_labels, lb) robust_ce = criterion(-lb, target) #print(ce, robust_ce) racc = accuracy(-lb, target, topk=(1,)) loss = robust_ce # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) top1.update(acc1[0].item(), images.size(0)) losses.update(ce.item(), images.size(0)) ibp_losses.update(robust_ce.item(), images.size(0)) ibp_acc1.update(racc[0].item(), images.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) progress.write_to_tensorboard( writer, "train", epoch * len(train_loader) + i ) # write a sample of training images to tensorboard (helpful for debugging) if i == 0: writer.add_image( "training-images", torchvision.utils.make_grid(images[0 : len(images) // 4]), )