def validate(val_loader, model, criterion, device, is_test): losses = AverageMeter("Loss", ":.4e") top1 = AverageMeter("Acc@1", ":6.2f") top5 = AverageMeter("Acc@5", ":6.2f") prefix = "Test: " if is_test else "Validation: " progress = ProgressMeter(len(val_loader), [losses, top1, top5], prefix=prefix) # switch to evaluate mode model.eval() with torch.no_grad(): for i, (images, target) in enumerate(val_loader): if torch.cuda.is_available(): images = images.to(device) target = target.to(device) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0].item(), images.size(0)) top5.update(acc5[0].item(), images.size(0)) if i % 100 == 0: progress.display(i) return losses.avg, top1.avg, top5.avg
def train(train_loader, model, criteria, optimizer, device, batch_size): model.train() evaluator = Evaluator(21) evaluator.reset() train_loss = AverageMeter("Loss", ":.4") progress = ProgressMeter(len(train_loader), train_loss) for i, (image, mask) in enumerate(train_loader): image = image.to(device) mask = mask.to(device) output = model(image) loss = criteria(output, mask) predict = output.data.cpu().numpy() predict = np.argmax(predict, axis=1) target = mask.cpu().numpy() evaluator.add_batch(target, predict) train_loss.update(loss.item(), batch_size) optimizer.zero_grad() loss.backward() optimizer.step() if i % 100 == 0: progress.print(i) evaluator.add_batch(target, predict) info = {"loss": train_loss.val, "pixel acc": evaluator.Pixel_Accuracy(), "mean acc": evaluator.Pixel_Accuracy_Class(), "miou": evaluator.Mean_Intersection_over_Union()} return info
def train(train_loader, epoch, model, optimizer, criterion, device): top1 = AverageMeter('Acc@1', ':6.2f') progress = ProgressMeter(len(train_loader), top1, prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() for i, (inputs, target) in enumerate(train_loader): inputs = inputs.to(device) target = target.to(device) # compute output output = model(inputs) loss = criterion(output, target) # measure accuracy and record loss, accuracy acc1 = accuracy(output, target, topk=(1, )) top1.update(acc1[0].item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() if i % PRINTFREQ == 0: progress.print(i) print('=> Acc@1 {top1.avg:.3f}'.format(top1=top1))
def train(train_loader, epoch, model, optimizer, criterion): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_loader), batch_time, data_time, losses, top1, top5, prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) input = input.cuda() target = target.cuda() r = np.random.rand(1) if args.beta > 0 and r < args.cutmix_prob: # generate mixed sample lam = np.random.beta(args.beta, args.beta) rand_index = torch.randperm(input.size()[0]).cuda() target_a = target target_b = target[rand_index] bbx1, bby1, bbx2, bby2 = rand_bbox(input.size(), lam) input[:, :, bbx1:bbx2, bby1:bby2] = input[rand_index, :, bbx1:bbx2, bby1:bby2] # adjust lambda to exactly match pixel ratio lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (input.size()[-1] * input.size()[-2])) # compute output output = model(input) loss = criterion(output, target_a) * lam + criterion(output, target_b) * (1. - lam) else: # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss, accuracy acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(acc1[0].item(), input.size(0)) top5.update(acc5[0].item(), input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.print(i) print('=> Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' .format(top1=top1, top5=top5)) return top1.avg
def train(train_loader, model, criterion, optimizer, epoch, args, cluster_result=None): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') acc_inst = AverageMeter('Acc@Inst', ':6.2f') acc_proto = AverageMeter('Acc@Proto', ':6.2f') progress = ProgressMeter( len(train_loader), [batch_time, data_time, losses, acc_inst, acc_proto], prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() for i, (images, index) in tqdm(enumerate(train_loader)): # measure data loading time data_time.update(time.time() - end) if args.gpu is not None: images[0] = images[0].cuda(args.gpu, non_blocking=True) images[1] = images[1].cuda(args.gpu, non_blocking=True) # compute output output, target, output_proto, target_proto = model(im_q=images[0], im_k=images[1], cluster_result=cluster_result, index=index) # InfoNCE loss loss = criterion(output, target) # ProtoNCE loss if output_proto is not None: loss_proto = 0 for proto_out, proto_target in zip(output_proto, target_proto): loss_proto += criterion(proto_out, proto_target) accp = accuracy(proto_out, proto_target)[0] acc_proto.update(accp[0], images[0].size(0)) # average loss across all sets of prototypes loss_proto /= len(args.num_cluster) loss += loss_proto losses.update(loss.item(), images[0].size(0)) acc = accuracy(output, target)[0] acc_inst.update(acc[0], images[0].size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i)
def train_kd(train_loader, teacher, model, criterion, optimizer, epoch, args): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_loader), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() for i, (images, target, idx) in enumerate(train_loader): # grid_img = torchvision.utils.make_grid(images) # imshow(grid_img) # time.sleep(100) # measure data loading time data_time.update(time.time() - end) if args.gpu is not None: images = images.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(images) with torch.no_grad(): o_teacher = teacher(images) #o_teacher = gaussian_noise(o_teacher, mean=0, stddev=0.5, alpha=0.4) # 0.1, 0.4 76.640 # 0.3, 0.4 76.630 # 0.5, 0.4 76.632 # o_teacher = torch.from_numpy(o_teacher_label_train[idx]).cuda() loss = criterion(output, o_teacher, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.detach().item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i)
def train(train_loader, model, criterion, optimizer, epoch, args, lr_scheduler): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_loader), [ batch_time, data_time, losses, top1, top5, ], prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() for i, (images, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.gpu is not None: images = images.cuda(args.gpu, non_blocking=True) if torch.cuda.is_available(): target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() lr_scheduler.step() if i % args.print_freq == 0: progress.display(i) if i % 1000 == 0: print('cur lr: ', lr_scheduler.get_lr()[0])
def train(args, epoch, loader, model, optimizer, writer): model.train() batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') lr = AverageMeter('Lr', ':.3f') progress = ProgressMeter( len(loader), [lr, batch_time, losses], prefix='Epoch: [{}]'.format(epoch)) end = time.time() for _iter, (images, targets) in enumerate(loader): images[0], images[1] = images[0].cuda(args.gpu, non_blocking=True), images[1].cuda(args.gpu, non_blocking=True) # swap the image yi, xj_moment = model(images[0], images[1]) yj, xi_moment = model(images[1], images[0]) if args.loss == 'pixpro': base_A_matrix, moment_A_matrix = targets[0].cuda(args.gpu), targets[1].cuda(args.gpu) pixpro_loss = PixproLoss(args) overall_loss = pixpro_loss(yi, xj_moment, base_A_matrix) + pixpro_loss(yj, xi_moment, moment_A_matrix) elif args.loss == 'pixcontrast': base_A_matrix, moment_A_matrix = targets[0][0].cuda(args.gpu), targets[0][1].cuda(args.gpu) base_inter_mask, moment_inter_mask = targets[1][0].cuda(args.gpu), targets[1][1].cuda(args.gpu) pixcontrast_loss = PixContrastLoss(args) overall_loss = (pixcontrast_loss(yi, xj_moment, base_A_matrix, base_inter_mask) + pixcontrast_loss(yj, xi_moment, moment_A_matrix, moment_inter_mask)) / 2 else: ValueError('HAVE TO SELECT PROPER LOSS TYPE') # if there is no intersection, skip the update if torch.max(base_A_matrix) < 1 and torch.max(moment_A_matrix) < 1: continue losses.update(overall_loss.item(), images[0].size(0)) for param_group in optimizer.param_groups: cur_lr = param_group['lr'] lr.update(cur_lr) optimizer.zero_grad() overall_loss.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() if (_iter % args.print_freq == 0) and (args.gpu==0): progress.display(_iter) writer.add_scalar('Loss', overall_loss.item(), (epoch*len(loader))+_iter) writer.add_scalar('lr', cur_lr, (epoch*len(loader))+_iter)
def validate_identification(cfg, model, test_loader, criterion): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(test_loader), batch_time, losses, top1, top5, prefix='Test: ', logger=logger) # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(test_loader): input = input.cuda(non_blocking=True) #.squeeze(0) target = target.cuda(non_blocking=True) # compute output output = model(input) # MODIFYING EVALUATION STAGE: WHY AVG THIS THING? # output = torch.mean(output, dim=0, keepdim=True) # output = model.forward_classifier(output) print("DEBUG 1") print(f'Target shape: {target.shape}. Target:') print(target) print(f'output of forward shape: {output.shape}. output:') print(output) acc1 = accuracy(output, target, topk=(1, )) top1.update(acc1[0], input.size(0)) # top5.update(acc5[0], input.size(0)) loss = criterion(output, target) losses.update(loss.item(), 1) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % 2000 == 0: progress.print(i) # This gives formatting problems. Just printing the top1 object is fine # logger.info('Test Acc@1: {:.8f} Acc@5: {:.8f}'.format(top1.avg, top5.avg)) print(top1) return top1.avg
def train_prune(train_loader, model, criterion, optimizer, epoch, zero_weight, zero_grad, args): scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=len(train_loader), eta_min=0, last_epoch=-1) batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_loader), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() for i, (images, target, idx) in enumerate(train_loader): model.apply(zero_weight) # measure data loading time data_time.update(time.time() - end) if args.gpu is not None: images = images.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.detach().item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() model.apply(zero_grad) optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i)
def train_epoch(train_loader, model, criterion, optimizer, epoch, args): """ 训练模型一个epoch的数据 :param train_loader: 训练集 :param model: 模型 :param criterion: 损失函数 :param optimizer: 优化器 :param epoch: 当前迭代次数 :param args: 训练超参 """ batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') progress = ProgressMeter(len(train_loader), batch_time, data_time, losses, top1, prefix=f"Epoch: [{epoch}]") # 训练模式 model.train() end_time = time.time() for i, (images, scores, _) in enumerate(train_loader): # 更新数据加载时间度量 data_time.update(time.time() - end_time) if args.cuda: images = images.cuda(args.gpu, non_blocking=True) scores = scores.cuda(args.gpu, non_blocking=True) # 网络推理 outputs = model(images) loss = criterion(outputs, scores) # 计算梯度、更新 optimizer.zero_grad() if args.cuda: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() # 更新度量 acc1, _, _ = criterion.accuracy(outputs, scores) batch_size = images.size(0)/2 if args.criterion == 'rank' else images.size(0) losses.update(loss.detach().cpu().item(), batch_size) top1.update(acc1.item(), batch_size) # 更新一个batch训练时间度量 batch_time.update(time.time() - end_time) end_time = time.time() if i % args.print_freq == 0: progress.print(i) return losses.avg, top1.avg
class Replicator(Thread): """ Thread responsible for sending compressed batches to each replica in synchronized new threads """ def __init__(self, replicas, payid, total_batches): super().__init__(name="Replicator") self.progress = ProgressMeter(total_batches, "Replicator") self.replicas = replicas self.payid = payid self.__queue = Queue() #maybe cache def send_handler(payid, msock, shape, compressed): """ Send to a replicas the current chunk Args: payid (Payload.Id) : data network id msock (network.Socket): socket to send data. shape (tuple): batch's number of lines and columns compressed (bytes): compressed batch data """ msock.send(Payload(payid, (shape, compressed))) def run(self): i = 0 threads = [None for _ in self.replicas] while True: should_stop, shape, compressed = self.__queue.get() if should_stop: break print(f"t={i} sending {shape[0]} points") #shuffles replicas to reduce same interface distribution problem for j, replica in enumerate(outplace_shuffle(self.replicas)): thread = Thread(name=f"Replicator.send_handler-{i,j}", target=Replicator.send_handler, args=(self.payid, replica, shape, compressed)) threads[j] = thread thread.start() for thread in threads: thread.join() self.progress.update(1) i += 1 def add_job(self, shape, compressed): self.__queue.put((False, shape, compressed)) def join(self): self.__queue.put((True, None, None)) super().join()
def validate_verification(cfg, model, test_loader): batch_time = AverageMeter('Time', ':6.3f') progress = ProgressMeter(len(test_loader), batch_time, prefix='Test: ', logger=logger) # switch to evaluate mode model.eval() labels, distances = [], [] output_dir = "embs_" with torch.no_grad(): end = time.time() for i, (input1, path1) in enumerate(test_loader): input1 = input1.cuda(non_blocking=True).squeeze(0) input1 = input1[:8] # compute output outputs1 = model(input1, 1).mean(dim=0).unsqueeze(0) # outputs2 = model(input2).mean(dim=0).unsqueeze(0) fn = os.path.basename(path1[0]) np.save(f"{output_dir}/{fn}", outputs1.detach().cpu().numpy()) if i % 1000 == 0: print(i)
def set_log(self, epoch: int, num_batchs: int) -> Tuple[Dict[str, AverageMeter], ProgressMeter]: meters, _ = super().set_log(epoch, num_batchs) meters['kl_dropout_losses'] = AverageMeter('KL_Dropout_Loss', ':.4f') progress = ProgressMeter(num_batchs, meters=meters.values(), prefix=f'Epoch[{epoch}] Batch') return meters, progress
def validate(val_loader, model, criterion): batch_time = AverageMeter("Time", ":6.3f") losses = AverageMeter("Loss", ":.4e") top1 = AverageMeter("Acc@1", ":6.2f") top5 = AverageMeter("Acc@5", ":6.2f") progress = ProgressMeter(len(val_loader), batch_time, losses, top1, top5, prefix="Test: ") # switch to evaluate mode model.eval() total_loss = 0.0 with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(val_loader): input = input.cuda() target = target.cuda() # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(acc1[0], input.size(0)) top5.update(acc5[0], input.size(0)) # measure elapsed time batch_time.update(time.time() - end) total_loss += loss.item() if i % args.print_freq == 0: progress.print(i) end = time.time() print("====> Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}".format( top1=top1, top5=top5)) total_loss = total_loss / len(val_loader) return top1.avg
def set_log(self, epoch, num_batchs): meters, _ = super().set_log(epoch, num_batchs) meters['kl_losses'] = AverageMeter('KL_Loss', ':.4f') meters['G_losses'] = AverageMeter('G_Loss', ':.4f') meters['D_losses'] = AverageMeter('D_Loss', ':.4f') progress = ProgressMeter(num_batchs, meters=meters.values(), prefix=f'Epoch[{epoch}] Batch') return meters, progress
def train(train_loader, model, criterion, optimizer, epoch, device, print_freq): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter( len(train_loader), [batch_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() for i, (images, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) images = images.to(device) target = target.to(device) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: progress.display(i)
def validate_identification(cfg, model, test_loader, criterion): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(test_loader), batch_time, losses, top1, top5, prefix='Test: ', logger=logger) # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(test_loader): input = input.cuda(non_blocking=True).squeeze(0) target = target.cuda(non_blocking=True) # compute output output = model(input) output = torch.mean(output, dim=0, keepdim=True) output = model.forward_classifier(output) acc1, acc5 = accuracy(output, target, topk=(1, 5)) top1.update(acc1[0], input.size(0)) top5.update(acc5[0], input.size(0)) loss = criterion(output, target) losses.update(loss.item(), 1) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % 2000 == 0: progress.print(i) logger.info('Test Acc@1: {:.8f} Acc@5: {:.8f}'.format( top1.avg, top5.avg)) return top1.avg
def validate(val_loader, model, criterion, args): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter( len(val_loader), [batch_time, losses, top1, top5], prefix='Test: ') # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(val_loader): if args.gpu is not None: images = images.cuda(args.gpu, non_blocking=True) if torch.cuda.is_available(): target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) # TODO: this should also be done with the ProgressMeter print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' .format(top1=top1, top5=top5)) return top1.avg
def train(train_loader, model, criterion, optimizer, epoch, cfg, logger): curr_lr = optimizer.param_groups[0]["lr"] progress = ProgressMeter( len(train_loader), [logger.time, logger.loss, logger.acc1, logger.acc5], prefix="Epoch: [{}/{}]\t" "LR: {}\t".format(epoch, cfg.epochs, curr_lr), ) # switch to train mode model.train() end = time.time() for i, (images, target) in enumerate(train_loader): # measure data loading time if cfg.gpu is not None: images = images.cuda(cfg.gpu, non_blocking=True) target = target.cuda(cfg.gpu, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time logger.time.update(time.time() - end) logger.loss.update(loss.item(), images.size(0)) logger.acc1.update(acc1[0].item(), images.size(0)) logger.acc5.update(acc5[0].item(), images.size(0)) logger.save(batch=i, epoch=epoch) end = time.time() if i % cfg.print_freq == 0: progress.display(i)
def validate_verification(cfg, model, test_loader): batch_time = AverageMeter('Time', ':6.3f') progress = ProgressMeter(len(test_loader), batch_time, prefix='Test: ', logger=logger) # switch to evaluate mode model.eval() labels, distances = [], [] with torch.no_grad(): end = time.time() for i, (input1, input2, label) in enumerate(test_loader): input1 = input1.cuda(non_blocking=True).squeeze(0) input2 = input2.cuda(non_blocking=True).squeeze(0) label = label.cuda(non_blocking=True) # compute output outputs1 = model(input1).mean(dim=0).unsqueeze(0) outputs2 = model(input2).mean(dim=0).unsqueeze(0) dists = F.cosine_similarity(outputs1, outputs2) dists = dists.data.cpu().numpy() distances.append(dists) labels.append(label.data.cpu().numpy()) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % 2000 == 0: progress.print(i) labels = np.array([sublabel for label in labels for sublabel in label]) distances = np.array( [subdist for dist in distances for subdist in dist]) eer = compute_eer(distances, labels) logger.info('Test EER: {:.8f}'.format(np.mean(eer))) return eer
def set_log(self, epoch: int, num_batchs: int) -> Tuple[Dict[str, AverageMeter], ProgressMeter]: meters = {} meters['batch_time'] = AverageMeter('Time', ':.3f') meters['data_time'] = AverageMeter('Data', ':.3f') meters['losses'] = AverageMeter('Loss', ':.4f') progress = ProgressMeter(num_batchs, meters.values(), prefix=f'Epoch[{epoch}] Batch') return meters, progress
def validate(val_loader, model, criterion, args): batch_time = AverageMeter("Time", ":6.3f") losses = AverageMeter("Loss", ":.4e") top1 = AverageMeter("Acc@1", ":6.2f") top5 = AverageMeter("Acc@5", ":6.2f") progress = ProgressMeter(len(val_loader), [batch_time, losses, top1, top5], prefix="Test: ") # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(val_loader): # blur images if args.blur_val: images = GaussianBlurAll(images, args.sigma) if torch.cuda.is_available(): images = images.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) print(" * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}".format(top1=top1, top5=top5)) return losses.avg, top1.avg, top5.avg
def validate(val_loader, model, criterion, cfg, epoch, logger): progress = ProgressMeter( len(val_loader), [logger.time, logger.loss, logger.acc1, logger.acc5], prefix="Test: ", ) # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(val_loader): if cfg.gpu is not None: images = images.cuda(cfg.gpu, non_blocking=True) target = target.cuda(cfg.gpu, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) logger.time.update(time.time() - end) logger.loss.update(loss.item(), images.size(0)) logger.acc1.update(acc1[0].item(), images.size(0)) logger.acc5.update(acc5[0].item(), images.size(0)) logger.time.update(time.time() - end) end = time.time() if i % cfg.print_freq == 0: progress.display(i) logger.save(batch=i, epoch=epoch) # measure elapsed time # TODO: this should also be done with the ProgressMeter print(" * Acc@1 {acc1:.3f} Acc@5 {acc5:.3f}".format( acc1=logger.acc1.avg, acc5=logger.acc5.avg)) return logger.acc1.avg
def run_epoch(epoch_index, dataloader, image_encoder, image_decoder, embedding_block, criterion, optimizer, device, train): if train: image_encoder.eval() image_decoder.train() embedding_block.train() else: image_encoder.eval() image_decoder.eval() embedding_block.eval() # set up metrics batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') progress = ProgressMeter(len(dataloader), [batch_time, data_time, losses, top1], prefix="Epoch: [{}]".format(epoch_index)) end = time.time() for i, batch in enumerate(dataloader): data_time.update(time.time() - end) image_batch, word_ids_batch = batch[0].to(device), batch[1].to(device) if train: train_step(image_batch, word_ids_batch, image_encoder, image_decoder, embedding_block, criterion, top1, losses, optimizer, device) else: val_step(image_batch, word_ids_batch, image_encoder, image_decoder, embedding_block, criterion, top1, losses, device) # logs batch_time.update(time.time() - end) end = time.time() sys.stdout.write('\r' + progress.to_string(i)) sys.stdout.flush() return progress
def validate_kd(val_loader, teacher, model, criterion, args): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(val_loader), [batch_time, losses, top1, top5], prefix='Test: ') model.eval() with torch.no_grad(): end = time.time() for i, (images, target, idx) in enumerate(val_loader): if args.gpu is not None: images = images.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) output = model(images) o_teacher = teacher(images) #o_teacher = torch.from_numpy(o_teacher_label_val[idx]).cuda() loss = criterion(output, o_teacher, target) acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5)) return top1.avg
def train(train_queue, model, criterion, optimizer, epoch): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() batch_time = AverageMeterTime('Time', ':6.3f') data_time = AverageMeterTime('Data', ':6.3f') progress = ProgressMeter(len(train_queue), batch_time, data_time, prefix="Epoch: [{}]".format(epoch)) model.train() torch.cuda.synchronize() end = time.time() for step, (input, target) in enumerate(train_queue): torch.cuda.synchronize() data_time.update(time.time() - end) end = time.time() target = target.cuda() #target = target.cuda(async=True) input = input.cuda() input = Variable(input) target = Variable(target) optimizer.zero_grad() logits, logits_aux = model(input) loss = criterion(logits, target) if args.auxiliary: loss_aux = criterion(logits_aux, target) loss += args.auxiliary_weight*loss_aux loss.backward() nn.utils.clip_grad_norm(model.parameters(), args.grad_clip) optimizer.step() prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) #objs.update(loss.data[0], n) #top1.update(prec1.data[0], n) #top5.update(prec5.data[0], n) torch.cuda.synchronize() batch_time.update(time.time() - end) end = time.time() if step % args.report_freq == 0: logging.info('train %03d %e %f %f data_time:%.3f/%.3f batch_time:%.3f/%.3f speed:%.3f/%.3f', step, objs.avg, top1.avg, top5.avg, data_time.val, data_time.avg, batch_time.val, batch_time.avg, args.batch_size/(data_time.val+batch_time.val), args.batch_size/(data_time.avg+batch_time.avg)) return top1.avg, objs.avg
def validate_identification(cfg, model, test_loader, criterion): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(test_loader), batch_time, losses, top1, top5, prefix='Test: ', logger=logger) # switch to evaluate mode model.eval() output_dir = "/mnt/sda1/data/zalo/Train-Test-Data/public-test/embs" with torch.no_grad(): end = time.time() for i, (input, target, feature_path) in enumerate(test_loader): input = input.cuda(non_blocking=True).squeeze(0) # [5, 300, 257] target = target.cuda(non_blocking=True) # compute output output = model(input) # [5, 2048] output = torch.mean(output, dim=0, keepdim=True) # [1, 2048] np.save(f"{output_dir}/{feature_path[0]}", output.detach().cpu().numpy()) # output = model.forward_classifier(output) # acc1, acc5 = accuracy(output, target, topk=(1, 5)) # top1.update(acc1[0], input.size(0)) # top5.update(acc5[0], input.size(0)) # loss = criterion(output, target) # losses.update(loss.item(), 1) # # measure elapsed time # batch_time.update(time.time() - end) # end = time.time() # if i % 2000 == 0: # progress.print(i) # logger.info('Test Acc@1: {:.8f} Acc@5: {:.8f}'.format(top1.avg, top5.avg)) return top1.avg
def train_epoch(self): self.model.train() self.epoch += 1 # record training statistics avg_meters = { 'loss': AverageMeter('Loss', ':.4e'), 'acc': AverageMeter('Acc', ':6.2f'), 'time': AverageMeter('Time', ':6.3f') } progress_meter = ProgressMeter( len(self.train_loader), avg_meters.values(), prefix="Epoch: [{}]".format(self.epoch) ) # begin training from minibatches for ix, data in enumerate(self.train_loader): start_time = time.time() input_ids, attention_mask, labels = map( lambda x: x.to(args.device), data ) logits = self.model( input_ids=input_ids, attention_mask=attention_mask ) loss = self.criterion(logits, labels) acc = (logits.argmax(axis=1) == labels).float().mean().item() self.optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_( self.model.parameters(), args.max_grad_norm ) self.optimizer.step() self.scheduler.step() avg_meters['loss'].update(loss.item(), input_ids.size(0)) avg_meters['acc'].update(acc * 100, input_ids.size(0)) avg_meters['time'].update(time.time() - start_time) # log progress if (ix + 1) % args.log_interval == 0: progress_meter.display(ix + 1) progress_meter.display(len(self.train_loader))
def train( train_loader, models, optimizers, criterion, epoch, device, method_name, **kwargs, ): loss_meters = [] top1_meters = [] top5_meters = [] inds_updates = [[] for _ in range(len(models))] show_logs = [] for i in range(len(models)): loss_meter = AverageMeter(f"Loss{i}", ":.4e") top1_meter = AverageMeter(f"Acc{i}@1", ":6.2f") top5_meter = AverageMeter(f"Acc{i}@5", ":6.2f") loss_meters.append(loss_meter) top1_meters.append(top1_meter) top5_meters.append(top5_meter) show_logs += [loss_meter, top1_meter, top5_meter] progress = ProgressMeter( len(train_loader), show_logs, prefix="Epoch: [{}]".format(epoch), ) # switch to train mode for i in range(len(models)): models[i].train() for i, (images, target, indexes) in enumerate(train_loader): if torch.cuda.is_available(): images = images.to(device) target = target.to(device) outputs = [] for m in range(len(models)): output = models[m](images) outputs.append(output) # calculate loss and selected index if method_name in ["ours", "ftl", "greedy", "precision"]: ind = indexes.cpu().numpy() losses, ind_updates = loss_general(outputs, target, criterion) elif method_name == "f-correction": losses, ind_updates = loss_forward(outputs, target, kwargs["P"]) elif method_name == "decouple": losses, ind_updates = loss_decouple(outputs, target, criterion) elif method_name == "co-teaching": losses, ind_updates = loss_coteaching( outputs, target, kwargs["rate_schedule"][epoch] ) elif method_name == "co-teaching+": ind = indexes.cpu().numpy().transpose() if epoch < kwargs["init_epoch"]: losses, ind_updates = loss_coteaching( outputs, target, kwargs["rate_schedule"][epoch] ) else: losses, ind_updates = loss_coteaching_plus( outputs, target, kwargs["rate_schedule"][epoch], ind, epoch * i, ) elif method_name == "jocor": losses, ind_updates = loss_jocor( outputs, target, kwargs["rate_schedule"][epoch], kwargs["co_lambda"] ) else: losses, ind_updates = loss_general(outputs, target, criterion) if None in losses or any(~torch.isfinite(torch.tensor(losses))): continue # compute gradient and do BP for loss, optimizer in zip(losses, optimizers): optimizer.zero_grad() loss.backward() optimizer.step() # measure accuracy and record loss for m in range(len(models)): acc1, acc5 = accuracy(outputs[m], target, topk=(1, 5)) top1_meters[m].update(acc1[0].item(), images.size(0)) top5_meters[m].update(acc5[0].item(), images.size(0)) if len(ind_updates[m]) > 0: loss_meters[m].update(losses[m].item(), len(ind_updates[m])) inds_updates[m] += indexes[ind_updates[m]].numpy().tolist() else: loss_meters[m].update(losses[m].item(), images.size(0)) if i % 100 == 0: progress.display(i) loss_avgs = [loss_meter.avg for loss_meter in loss_meters] top1_avgs = [top1_meter.avg for top1_meter in top1_meters] top5_avgs = [top5_meter.avg for top5_meter in top1_meters] return loss_avgs, top1_avgs, top5_avgs, inds_updates