def train(train_loader, model, criterion, optimizer, epoch, use_cuda): # switch to train mode model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() bar = Bar('Processing', max=len(train_loader)) for batch_idx, (inputs, targets) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda(async=True) inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.data[0], inputs.size(0)) top1.update(prec1[0], inputs.size(0)) top5.update(prec5[0], inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(train_loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() bar.finish() return (losses.avg, top1.avg)
def validate(val_loader, model, criterion): bar = Bar('Processing', max=len(val_loader)) batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (image, target) in enumerate(val_loader): # measure data loading time data_time.update(time.time() - end) image, target = image.cuda(non_blocking=True), target.cuda( non_blocking=True) # compute output output = model(image) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target.data, topk=(1, 5)) losses.update(loss.item(), image.size(0)) top1.update(prec1.item(), image.size(0)) top5.update(prec5.item(), image.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=i + 1, size=len(val_loader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() bar.finish() return (losses.avg, top1.avg, top5.avg)
def validate(valloader, model, criterion, epoch, use_cuda, mode): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() bar = Bar(f'{mode}', max=len(valloader)) with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(valloader): # measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda(non_blocking=True) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(valloader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() bar.finish() return (losses.avg, top1.avg)
def train(train_loader, num_classes, model, optimizer, criterion, epoch, use_cuda): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() end = time.time() bar = Bar('Training', max=args.val_iteration) t = tqdm(enumerate(train_loader), total=len(train_loader), desc='training') model.train() for batch_idx, (input, target) in t: if use_cuda: input, target = input.cuda(), target.cuda(non_blocking=True) # measure data loading time data_time.update(time.time()-end) # batch size batch_size = input.size(0) output = model(input) loss = criterion(output, target.squeeze(1)) # record loss losses.update(loss.item(), input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time()-end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} '.format( batch=batch_idx + 1, size=args.val_iteration, data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg ) bar.next() bar.finish() return (batch_idx, losses.avg,)
def test(testloader, model, epoch, use_cuda): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to evaluate mode model.eval() with torch.no_grad(): bar = Bar('Processing', max=len(testloader)) for batch_idx, (inputs, targets) in enumerate(testloader): data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() # compute output outputs = model(inputs)[-1] loss = F.cross_entropy(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.shape[0]) top1.update(prec1.item(), inputs.shape[0]) top5.update(prec5.item(), inputs.shape[0]) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(testloader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() bar.finish() return (losses.avg, top1.avg)
def test(testloader, model, criterion, epoch, use_cuda): global best_acc batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() DEBUG = False bar = Bar('Processing', max=len(testloader)) for batch_idx, (inputs, targets) in enumerate(testloader): # measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(testloader), data=data_time.avg, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() bar.finish() return (losses.avg, top1.avg)
def fine_tuning(train_loader, model, criterion, optimizer): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() microF1 = AverageMeter() macroF1 = AverageMeter() model.train() end = time.time() bar = Bar('Training', max=len(train_loader)) for batch_idx, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) input = input.cuda() target = target.cuda() output = model(input) loss = criterion(output, target.float()) target = target.data.cpu().float() output = output.data.cpu() micro, macro = calc_f1(target, output) losses.update(loss.item(), input.size(0)) microF1.update(micro.item(), input.size(0)) macroF1.update(macro.item(), input.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() model.weight_norm() bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | Micro-f1: {microF1: .4f} |Macro-f1: {macroF1: .4f}'.format( batch=batch_idx + 1, size=len(train_loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, microF1=microF1.avg, macroF1=macroF1.avg, ) bar.next() bar.finish() return (losses.avg, microF1.avg, macroF1.avg)
def train_adv(trainloader, net, criterion, optimizer, epoch, adversary): print('\nEpoch: %d' % epoch) net.train() batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() end = time.time() bar = Bar('Processing', max=len(trainloader)) for batch_idx, data_info in enumerate(trainloader): inputs = data_info[0] targets = data_info[1].long() inputs, targets = inputs.to(device), targets.to(device) # adv_inputs = adv_train(inputs, targets, net, criterion, adversary) _, adv_inputs = adversary.perturb(inputs, targets) net = net.train() for update_idx in range(args.update): # adv_inputs, targets = adv_inputs.to(device), targets.to(device) outputs = net(adv_inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() bar.suffix = '({batch}/{size}) Batch: {bt:.3f}s| Total:{total:}| ETA:{eta:}| Loss:{loss:.4f}| top1:{top1:.2f}'.format( batch=batch_idx + 1, size=len(trainloader), bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg) bar.next() bar.finish() return losses.avg, top1.avg
def test(testloader, model, criterion, device='cuda:0'): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() bar = Bar('Processing', max=len(testloader)) for batch_idx, (inputs, targets) in enumerate(testloader): # measure data loading time data_time.update(time.time() - end) inputs, targets = inputs.to(device), targets.to(device) with torch.no_grad(): outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.data.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(testloader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() bar.finish() return (losses.avg, top1.avg)
def get_novel_weights(novel_loader, model, weight_generator): batch_time = AverageMeter() data_time = AverageMeter() # switch to evaluate mode model.eval() weight_generator.eval() end = time.time() bar = Bar('Imprinting', max=len(novel_loader)) with torch.no_grad(): for batch_idx, (input, target) in enumerate(novel_loader): # measure data loading time data_time.update(time.time() - end) input = input.cuda() # compute output output = model.extract(input) if batch_idx == 0: output_stack = output target_stack = target else: output_stack = torch.cat((output_stack, output), 0) target_stack = torch.cat((target_stack, target), 0) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:}'.format( batch=batch_idx + 1, size=len(novel_loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td ) bar.next() bar.finish() new_weight = torch.zeros(100, 256) for i in range(100): tmp = output_stack[target_stack == (i + 100)].mean(0) if not args.random else torch.randn(256) new_weight[i] = tmp / tmp.norm(p=2) gen_weight = weight_generator(new_weight.cuda()) return gen_weight
def test(testloader, model, criterion, use_cuda): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() # switch to evaluate mode model.eval() end = time.time() bar = Bar('Processing', max=len(testloader)) for batch_idx, batch_data in enumerate(testloader): # measure data loading time data_time.update(time.time() - end) inputs = batch_data['image'] targets = batch_data['landmarks'] if use_cuda: inputs, targets = inputs.cuda(), targets.cuda(async=True) inputs, targets = torch.autograd.Variable( inputs), torch.autograd.Variable(targets) # compute output outputs = model(inputs) loss = criterion(outputs, targets.squeeze()) # measure accuracy and record loss losses.update(loss.item(), inputs.size(0)) # measure elapsed time batch_time.update(float(time.time() - end)) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} '.format( batch=batch_idx + 1, size=len(testloader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, ) bar.next() bar.finish() return (losses.avg, 0)
def train(trainloader, model, criterion, optimizer, epoch): # switch to train mode model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() end = time.time() bar = Bar('Processing', max=len(trainloader)) for batch_idx, (inputs, targets) in enumerate(trainloader): # measure data loading time data_time.update(time.time() - end) inputs, targets = inputs.to(device), targets.to(device).squeeze() # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1 = accuracy(outputs.data, targets.data, topk=(1, )) losses.update(loss.item(), inputs.size(0)) top1.update(prec1[0].item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f}'.format( batch=batch_idx + 1, size=len(trainloader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, ) bar.next() bar.finish() return (losses.avg, top1.avg)
def validate(val_loader, model): batch_time = AverageMeter() data_time = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() bar = Bar('Testing ', max=len(val_loader)) with torch.no_grad(): end = time.time() for batch_idx, (input, target) in enumerate(val_loader): # measure data loading time data_time.update(time.time() - end) input = input.cuda() target = target.cuda(non_blocking=True) # compute output output = model(input) # measure accuracy prec1, prec5 = accuracy(output, target, topk=(1, 5)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(val_loader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, top1=top1.avg, top5=top5.avg, ) bar.next() bar.finish() return top1.avg
def output_attention(val_loader, model, epoch, use_cuda, save_dir): batch_time = AverageMeter() data_time = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() model.eval() end = time.time() bar = Bar('Processing', max=len(val_loader)) fw = open(os.path.join(save_dir, 'attention.txt'), 'w') for batch_idx, (inputs, targets) in enumerate(val_loader): data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = torch.autograd.Variable(inputs, volatile=True), torch.autograd.Variable(targets) probs, attention = model(inputs) bs, c, w, h = attention.size() attention = attention.sum(1) attention = attention.cpu().data.numpy() attention = attention.reshape((bs, -1)) for index in range(bs): hot = '' for j in range(w * h): hot += '{:.3f} '.format(attention[index][j]) hot += '\n' fw.write(hot) prec1, prec5 = accuracy(probs.data, targets.data, topk=(1,5)) top1.update(prec1[0], inputs.size(0)) top5.update(prec5[0], inputs.size(0)) bar.shuffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(val_loader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, top1=top1.avg, top5=top5.avg ) bar.next() bar.finish() fw.close()
def test(testloader, model, use_cuda, loader_len): batch_time = AverageMeter() data_time = AverageMeter() data_represent_list = [] # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() bar = Bar('Processing', max=len(testloader)) for batch_idx, (inputs, targets) in enumerate(testloader): # measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = torch.autograd.Variable( inputs), torch.autograd.Variable(targets) # compute output outputs = model(inputs) outputs = torch.nn.functional.avg_pool2d(outputs, kernel_size=(4, 4), stride=(1, 1)) outputs = outputs.view(outputs.size(0), -1) data_represent_list.extend(outputs.detach().cpu().numpy()) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:}'.format( batch=batch_idx + 1, size=len(testloader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() return data_represent_list
def get_p(evalloader, model, epoch, use_cuda): # switch to evaluate mode model.eval() p_results = [] bar = Bar('Evaluating', max=len(evalloader)) for batch_idx, (inputs, targets) in enumerate(evalloader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = torch.autograd.Variable( inputs, volatile=True), torch.autograd.Variable(targets) # compute output outputs = model(inputs) outputs = nn.functional.softmax(outputs, dim=-1) # normalization p_results.append(outputs.cpu().data[0].numpy()) bar.suffix = '({batch}/{size})'.format(batch=batch_idx + 1, size=len(evalloader)) bar.next() bar.finish() return p_results
def test(testloader, model, use_cuda): landmarks = [] batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() # switch to evaluate mode model.eval() end = time.time() bar = Bar('Processing', max=len(testloader)) for batch_idx, batch_data in enumerate(testloader): # measure data loading time data_time.update(time.time() - end) inputs = batch_data if use_cuda: inputs = inputs.cuda() inputs = torch.autograd.Variable(inputs) # compute output outputs = model(inputs) landmarks.append(outputs.cpu().data.numpy()) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} '.format( batch=batch_idx + 1, size=len(testloader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, ) bar.next() bar.finish() return landmarks
def attack_over_test(testloader, net, criterion, adversary): net.eval() batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() end = time.time() bar = Bar('Processing', max=len(testloader)) for batch_idx, data_info in enumerate(testloader): inputs = data_info[0] targets = data_info[1].long() # adv_inputs = inputs inputs, targets = inputs.to(device), targets.to(device) _, adv_inputs = adversary.perturb(inputs, targets) outputs = net(adv_inputs) loss = criterion(outputs, targets) prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) batch_time.update(time.time() - end) end = time.time() bar.suffix = '({batch}/{size}) Batch: {bt:.3f}s| Total: {total:}| ETA: {eta:}| Loss:{loss:.4f}| top1: {top1:.2f}'.format( batch=batch_idx + 1, size=len(testloader), bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg) bar.next() bar.finish() return losses.avg, top1.avg
def train(loader, model, criterion, optimizer, epoch, use_cuda): # switch to train mode model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() bar = Bar('Processing', max=len(loader)) for batch_idx, (inputs, targets) in enumerate(loader): #parents = targets[:,1].contiguous() if isinstance(targets, tuple): targets = targets[:, 0].contiguous() #print(targets[:,2]) # measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda(async=True) inputs, targets = torch.autograd.Variable( inputs), torch.autograd.Variable(targets) # compute output outputs = model(inputs) if isinstance(outputs, tuple): loss = sum((criterion(o, targets) for o in outputs)) prec1, prec5 = accuracy(outputs[0].data, targets.data, topk=(1, 5)) else: loss = criterion(outputs, targets) prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) # measure accuracy and record loss losses.update(loss.data[0], inputs.size(0)) top1.update(prec1[0], inputs.size(0)) top5.update(prec5[0], inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() bar.finish() return (losses.avg, top1.avg, top5.avg)
def playGames(self, num, verbose=False): """ Plays num games in which player1 starts num/2 games and player2 starts num/2 games. Returns: oneWon: games won by player1 twoWon: games won by player2 draws: games won by nobody """ eps_time = AverageMeter() bar = Bar('Arena.playGames', max=num) end = time.time() eps = 0 maxeps = int(num) num = int(num / 2) oneWon = 0 twoWon = 0 draws = 0 for _ in range(num): gameResult = self.playGame(verbose=verbose) if gameResult == 1: oneWon += 1 elif gameResult == -1: twoWon += 1 else: draws += 1 # bookkeeping + plot progress eps += 1 eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=maxeps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() self.player1, self.player2 = self.player2, self.player1 for _ in range(num): gameResult = self.playGame(verbose=verbose) if gameResult == -1: oneWon += 1 elif gameResult == 1: twoWon += 1 else: draws += 1 # bookkeeping + plot progress eps += 1 eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=num, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() return oneWon, twoWon, draws
def __solve__(self, mode, epoch, dataloader): '''solve mode: train / val ''' batch_timer = AverageMeter() data_timer = AverageMeter() prec_losses = AverageMeter() prec_top1 = AverageMeter() prec_top3 = AverageMeter() prec_top5 = AverageMeter() if mode in ['val']: pass #confusion_matrix = ConusionMeter() since = time.time() bar = Bar('[{}]{}'.format(mode.upper(), self.title), max=len(dataloader)) for batch_idx, (inputs, labels) in enumerate(dataloader): # measure data loading time data_timer.update(time.time() - since) # wrap inputs in variable if mode in ['train']: if __is_cuda__(): inputs = inputs.cuda() labels = labels.cuda(async=True) inputs = __to_var__(inputs) labels = __to_var__(labels) elif mode in ['val']: if __is_cuda__(): inputs = inputs.cuda() labels = labels.cuda(async=True) inputs = __to_var__(inputs, volatile=True) labels = __to_var__(labels, volatile=False) # forward outputs = self.model(inputs) loss = self.criterion(outputs, labels) # backward + optimize if mode in ['train']: self.optimizer.zero_grad() loss.backward() self.optimizer.step() # statistics prec_1, prec_3, prec_5 = compute_precision_top_k( __to_tensor__(outputs), __to_tensor__(labels), top_k=(1, 3, 5)) batch_size = inputs.size(0) prec_losses.update(loss.item(), batch_size) prec_top1.update(prec_1.item(), batch_size) prec_top3.update(prec_3.item(), batch_size) prec_top5.update(prec_5.item(), batch_size) # measure elapsed time batch_timer.update(time.time() - since) since = time.time() # progress log_msg = ('\n[{mode}][epoch:{epoch}][iter:({batch}/{size})]'+ '[lr:{lr}] loss: {loss:.4f} | top1: {top1:.4f} | ' + 'top3: {top3:.4f} | top5: {top5:.4f} | eta: ' + '(data:{dt:.3f}s),(batch:{bt:.3f}s),(total:{tt:})') \ .format( mode=mode, epoch=self.epoch+1, batch=batch_idx+1, size=len(dataloader), lr=self.lr_scheduler.get_lr()[0], loss=prec_losses.avg, top1=prec_top1.avg, top3=prec_top3.avg, top5=prec_top5.avg, dt=data_timer.val, bt=batch_timer.val, tt=bar.elapsed_td) print(log_msg) bar.next() bar.finish() # write to logger self.logger[mode].append([ self.epoch + 1, self.lr_scheduler.get_lr()[0], prec_losses.avg, prec_top1.avg, prec_top3.avg, prec_top5.avg ]) # save model if mode == 'val' and prec_top1.avg >= self.best_acc: print('best_acc={}, new_best_acc={}'.format( self.best_acc, prec_top1.avg)) self.best_acc = prec_top1.avg state = { 'epoch': self.epoch, 'acc': self.best_acc, 'optimizer': self.optimizer.state_dict(), } self.model.write_to(state) filename = 'bestshot.pth.tar' self.__save_checkpoint__(state, ckpt=self.ckpt_path, filename=filename)
def train(train_loader, model, criterion, optimizer, epoch): bar = Bar('Processing', max=len(train_loader)) batch_time = AverageMeter() data_time = AverageMeter() losses = [AverageMeter() for _ in range(1)] top1 = [AverageMeter() for _ in range(1)] # switch to train mode model.train() loss_avg = 0 prec1_avg = 0 end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) target = target.cuda(non_blocking=True) # compute output output = model(input) assert len(output) == 1 # measure accuracy and record loss loss = [] prec1 = [] for j in range(len(output)): loss.append(criterion(output[j], target[:, j])) prec1.append(accuracy(output[j], target[:, j], topk=(1,))) losses[j].update(loss[j].item(), input.size(0)) top1[j].update(prec1[j][0].item(), input.size(0)) losses_avg = [losses[k].avg for k in range(len(losses))] top1_avg = [top1[k].avg for k in range(len(top1))] loss_avg = sum(losses_avg) / len(losses_avg) prec1_avg = sum(top1_avg) / len(top1_avg) # compute gradient and do SGD step optimizer.zero_grad() loss_sum = sum(loss) loss_sum.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f}'.format( batch=i + 1, size=len(train_loader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=loss_avg, top1=prec1_avg, ) bar.next() bar.finish() return (loss_avg, prec1_avg)
def test(val_loader, model, criterion, epoch, use_cuda): global best_acc batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() # torch.set_grad_enabled(False) end = time.time() if args.local_rank == 0: bar = Bar('Processing', max=len(val_loader)) prefetcher = data_prefetcher(val_loader) inputs, targets = prefetcher.next() batch_idx = -1 while inputs is not None: # for batch_idx, (inputs, targets) in enumerate(val_loader): batch_idx += 1 #if use_cuda: # inputs, targets = inputs.cuda(), targets.cuda() #inputs, targets = torch.autograd.Variable(inputs, volatile=True), torch.autograd.Variable(targets) # compute output with torch.no_grad(): outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) reduced_loss = reduce_tensor(loss.data) prec1 = reduce_tensor(prec1) prec5 = reduce_tensor(prec5) # to_python_float incurs a host<->device sync losses.update(to_python_float(reduced_loss), inputs.size(0)) top1.update(to_python_float(prec1), inputs.size(0)) top5.update(to_python_float(prec5), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress if args.local_rank == 0: bar.suffix = 'Valid({batch}/{size}) | Batch: {bt:.3f}s | Total: {total:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(val_loader), bt=batch_time.avg, total=bar.elapsed_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() inputs, targets = prefetcher.next() if args.local_rank == 0: print(bar.suffix) bar.finish() return (losses.avg, top1.avg)
def train(train_loader, model, criterion, optimizer, epoch, use_cuda): printflag = False # switch to train mode model.train() torch.set_grad_enabled(True) batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() if args.local_rank == 0: bar = Bar('Processing', max=len(train_loader)) show_step = len(train_loader) // 10 prefetcher = data_prefetcher(train_loader) inputs, targets = prefetcher.next() batch_idx = -1 while inputs is not None: # for batch_idx, (inputs, targets) in enumerate(train_loader): batch_idx += 1 batch_size = inputs.size(0) if batch_size < args.train_batch: break # measure data loading time #if use_cuda: # inputs, targets = inputs.cuda(), targets.cuda(async=True) #inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets) if (batch_idx) % show_step == 0 and args.local_rank == 0: print_flag = True else: print_flag = False if args.cutmix: if printflag==False: print('using cutmix !') printflag=True inputs, targets_a, targets_b, lam = cutmix_data(inputs, targets, args.cutmix_prob, use_cuda) outputs = model(inputs) loss_func = mixup_criterion(targets_a, targets_b, lam) old_loss = loss_func(criterion, outputs) elif args.mixup: if printflag==False: print('using mixup !') printflag=True inputs, targets_a, targets_b, lam = mixup_data(inputs, targets, args.alpha, use_cuda) outputs = model(inputs) loss_func = mixup_criterion(targets_a, targets_b, lam) old_loss = loss_func(criterion, outputs) elif args.cutout: if printflag==False: print('using cutout !') printflag=True inputs = cutout_data(inputs, args.cutout_size, use_cuda) outputs = model(inputs) old_loss = criterion(outputs, targets) else: outputs = model(inputs) old_loss = criterion(outputs, targets) # compute gradient and do SGD step optimizer.zero_grad() # loss.backward() with amp.scale_loss(old_loss, optimizer) as loss: loss.backward() if args.el2: optimizer.step(print_flag=print_flag) else: optimizer.step() if batch_idx % args.print_freq == 0: # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) reduced_loss = reduce_tensor(loss.data) prec1 = reduce_tensor(prec1) prec5 = reduce_tensor(prec5) # to_python_float incurs a host<->device sync losses.update(to_python_float(reduced_loss), inputs.size(0)) top1.update(to_python_float(prec1), inputs.size(0)) top5.update(to_python_float(prec5), inputs.size(0)) torch.cuda.synchronize() # measure elapsed time batch_time.update((time.time() - end) / args.print_freq) end = time.time() if args.local_rank == 0: # plot progress bar.suffix = '({batch}/{size}) | Batch: {bt:.3f}s | Total: {total:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(train_loader), bt=batch_time.val, total=bar.elapsed_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() if (batch_idx) % show_step == 0 and args.local_rank == 0: print('E%d' % (epoch) + bar.suffix) inputs, targets = prefetcher.next() if args.local_rank == 0: bar.finish() return (losses.avg, top1.avg)
def train(trainloader, model, criterion, optimizer, epoch, use_cuda, logger): # switch to train mode model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() bar = Bar('Processing', max=len(trainloader)) for batch_idx, (inputs, targets) in enumerate(trainloader): # measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda(non_blocking=True) inputs, targets = torch.autograd.Variable( inputs), torch.autograd.Variable(targets) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # for restarting if args.optimizer.lower() == 'srsgd' or args.optimizer.lower( ) == 'sradam' or args.optimizer.lower( ) == 'sradamw' or args.optimizer.lower() == 'srradam': iter_count, iter_total = optimizer.update_iter() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '(Epoch {epoch}, ({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( epoch=epoch, batch=batch_idx + 1, size=len(trainloader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) logger.file.write(bar.suffix) bar.next() bar.finish() if args.optimizer.lower( ) == 'srsgd' or args.optimizer.lower() == 'sradam' or args.optimizer.lower( ) == 'sradamw' or args.optimizer.lower() == 'srradam': return (losses.avg, top1.avg, iter_count) else: return (losses.avg, top1.avg)
def train(trainloader, model, criterion, optimizer, epoch, use_cuda): # switch to train mode model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() if args.debug_batch_size: bar = Bar('Processing', max=args.debug_batch_size) else: bar = Bar('Processing', max=len(trainloader)) for batch_idx, (inputs, targets) in enumerate(trainloader): if args.debug_batch_size: if batch_idx >= args.debug_batch_size: break # measure data loading time data_time.update(time.time() - end) inputs, targets = inputs.to(device), targets.to(device) # if use_cuda: # inputs, targets = inputs.cuda(), targets.cuda(non_blocking=True) # async=True) # inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) # losses.update(loss.data[0], inputs.size(0)) # top1.update(prec1[0], inputs.size(0)) # top5.update(prec5[0], inputs.size(0)) losses.update(loss.data.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(trainloader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() bar.finish() return (losses.avg, top1.avg)
def train(trainloader, model, criterion, optimizer, epoch, use_cuda): # switch to train mode model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() bar = Bar('Processing', max=len(trainloader)) print(args) for batch_idx, (inputs, targets) in enumerate(trainloader): # measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.data[0], inputs.size(0)) top1.update(prec1[0], inputs.size(0)) top5.update(prec5[0], inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() for k, m in enumerate(model.modules()): # print(k, m) if isinstance(m, nn.Conv2d): weight_copy = m.weight.data.abs().clone() mask = weight_copy.gt(0).float().cuda() m.weight.grad.data.mul_(mask) optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(trainloader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() bar.finish() return (losses.avg, top1.avg)
def train(train_loader, model, weight_generator, criterion, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() bar = Bar('Training ', max=len(train_loader)) for batch_idx, (base_samples, base_labels, fake_novel_samples, fake_novel_query, fake_novel_labels) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) base_samples = torch.cat(base_samples).cuda() base_labels = torch.cat(base_labels).cuda() fake_novel_samples = torch.cat(fake_novel_samples).cuda() fake_novel_labels = torch.cat(fake_novel_labels).cuda() fake_novel_query = torch.cat(fake_novel_query).cuda() fake_train = model.extract(fake_novel_samples) unique_novel_labels = torch.unique(fake_novel_labels) new_weight = torch.zeros(unique_novel_labels.shape[0], 256) for i, f_l in enumerate(unique_novel_labels): tmp = fake_train[fake_novel_labels == f_l].mean(0) new_weight[i] = tmp / tmp.norm(p=2) new_weight = new_weight.cuda() gen_weight = weight_generator(new_weight) # compute output of the sampled 10-way classification problem input = torch.cat((base_samples, fake_novel_query)) unique_base_labels = torch.unique(base_labels) output = model(input, base_class_indexes = unique_base_labels, novel_class_classifiers = gen_weight, detach_feature=True) lst_lab = np.repeat(list(range(10)), 5) target = torch.LongTensor(lst_lab).cuda() loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() model.weight_norm() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(train_loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() bar.finish() return (losses.avg, top1.avg)
def train(trainloader, model, criterion, optimizer, look_up_table, epoch, use_cuda): # switch to train mode model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() bar = Bar('Processing', max=len(trainloader)) for batch_idx, (inputs, targets) in enumerate(trainloader): if batch_idx % period == 0: model, sub = low_rank_approx(model, look_up_table, criterion=EnergyThreshold(0.9), use_trp=args.trp, type=args.type) # measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda(async=True) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() # apply nuclear norm regularization if args.nuclear_weight is not None and batch_idx % period == 0: for name, m in model.named_modules(): if name in look_up_table: m.weight.grad.data.add_(args.nuclear_weight * sub[name]) optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(trainloader), data=data_time.avg, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() bar.finish() return (losses.avg, top1.avg)
def train(train_loader, model, criterion, optimizer, epoch, use_cuda, logger): global batch_time_global, data_time_global # switch to train mode model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() train_loader_len = int(train_loader._size / args.train_batch) + 1 bar = Bar('Processing', max=train_loader_len) for batch_idx, data in enumerate(train_loader): # measure data loading time data_time_lap = time.time() - end data_time.update(data_time_lap) if epoch > 0: data_time_global.update(data_time_lap) inputs = data[0]["data"] targets = data[0]["label"].squeeze().cuda().long() if use_cuda: inputs, targets = inputs.cuda(), targets.cuda(non_blocking=True) inputs, targets = torch.autograd.Variable( inputs), torch.autograd.Variable(targets) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(to_python_float(loss.data), inputs.size(0)) top1.update(to_python_float(prec1), inputs.size(0)) top5.update(to_python_float(prec5), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time_lap = time.time() - end batch_time.update(batch_time_lap) if epoch > 0: batch_time_global.update(batch_time_lap) end = time.time() # plot progress bar.suffix = '(Epoch {epoch}, {batch}/{size}) Data: {data:.3f}s/{data_global:.3f}s | Batch: {bt:.3f}s/{bt_global:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( epoch=epoch, batch=batch_idx + 1, size=train_loader_len, data=data_time.val, data_global=data_time_global.avg, bt=batch_time.val, bt_global=batch_time_global.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() logger.file.write(bar.suffix) bar.finish() return (losses.avg, top1.avg, top5.avg)
def train(trainloader, model, criterion, criterion_kl, optimizer, epoch, use_cuda): # switch to train mode model.train() losses = AverageMeter() losses_kl = AverageMeter() top1_c1 = AverageMeter() top5_c1 = AverageMeter() top1_c2 = AverageMeter() top5_c2 = AverageMeter() top1_c3 = AverageMeter() top5_c3 = AverageMeter() top1_t = AverageMeter() top5_t = AverageMeter() bar = Bar('Processing', max=len(trainloader)) consistency_weight = get_current_consistency_weight(epoch) for batch_idx, (inputs, targets) in enumerate(trainloader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda(async=True) inputs, targets = torch.autograd.Variable( inputs), torch.autograd.Variable(targets) outputs1, outputs2, outputs3, outputs4 = model(inputs) loss_cross = criterion(outputs1, targets) + criterion( outputs2, targets) + criterion(outputs3, targets) + criterion( outputs4, targets) loss_kl = consistency_weight * (criterion_kl(outputs1, outputs4) + criterion_kl(outputs2, outputs4) + criterion_kl(outputs3, outputs4)) prec1_t, prec5_t = accuracy(outputs4.data, targets.data, topk=(1, 5)) prec1_c1, prec5_c1 = accuracy(outputs1.data, targets.data, topk=(1, 5)) prec1_c2, prec5_c2 = accuracy(outputs2.data, targets.data, topk=(1, 5)) prec1_c3, prec5_c3 = accuracy(outputs3.data, targets.data, topk=(1, 5)) top1_c1.update(prec1_c1[0], inputs.size(0)) top5_c1.update(prec5_c1[0], inputs.size(0)) loss = loss_cross + loss_kl losses_kl.update(loss_kl.data[0], inputs.size(0)) losses.update(loss.data[0], inputs.size(0)) top1_c2.update(prec1_c2[0], inputs.size(0)) top5_c2.update(prec5_c2[0], inputs.size(0)) top1_c3.update(prec1_c3[0], inputs.size(0)) top5_c3.update(prec5_c3[0], inputs.size(0)) top1_t.update(prec1_t[0], inputs.size(0)) top5_t.update(prec5_t[0], inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() bar.suffix = '({batch}/{size}) || Loss: {loss:.4f} |LossKL: {losses_kl:.4f} | top1_C1: {top1_C1: .4f} | top1_C2: {top1_C2: .4f}|top1_C3: {top1_C3: .4f}| top1_t: {top1_t: .4f} '.format( batch=batch_idx + 1, size=len(trainloader), loss=losses.avg, losses_kl=losses_kl.avg, top1_C1=top1_c1.avg, top1_C2=top1_c2.avg, top1_C3=top1_c3.avg, top1_t=top1_t.avg, ) bar.next() bar.finish() return (losses.avg, top1_c1.avg, top1_c2.avg, top1_c3.avg, top1_t.avg)