def train_net(train_data_layer, net, epoch, args): net.train() losses = AverageMeter() time1 = time.time() epoch_num = train_data_layer._num_instance / train_data_layer._batch_size for step in tqdm(range(int(epoch_num))): image_blob, boxes, rel_boxes, SpatialFea, classes, ix1, ix2, class_embed, rel_labels, rel_so_prior = train_data_layer.forward target = Variable(torch.from_numpy(rel_labels).type( torch.LongTensor)).cuda() rel_so_prior = -0.5 * (rel_so_prior + 1.0 / args.num_relations) rel_so_prior = Variable( torch.from_numpy(rel_so_prior).type(torch.FloatTensor)).cuda() # forward args.optimizer.zero_grad() obj_score, rel_score = net(image_blob, boxes, rel_boxes, SpatialFea, classes, ix1, ix2, class_embed, args) loss = args.criterion((rel_so_prior + rel_score).view(1, -1), target) losses.update(loss.item()) loss.backward() args.optimizer.step() if step % args.print_freq == 0: time2 = time.time() print("TRAIN:%d, Total LOSS:%f, Time:%s" % (step, losses.avg, time.strftime('%H:%M:%S', time.gmtime(int(time2 - time1))))) time1 = time.time() losses.reset()
def NN(epoch, net, lemniscate, trainloader, testloader, recompute_memory=0): net.eval() net_time = AverageMeter() cls_time = AverageMeter() losses = AverageMeter() correct = 0. total = 0 testsize = testloader.dataset.__len__() trainFeatures = lemniscate.memory.t() if hasattr(trainloader.dataset, 'imgs'): trainLabels = torch.LongTensor([y for (p, y) in trainloader.dataset.imgs]).cuda() else: trainLabels = torch.LongTensor(trainloader.dataset.train_labels).cuda() if recompute_memory: transform_bak = trainloader.dataset.transform trainloader.dataset.transform = testloader.dataset.transform temploader = torch.utils.data.DataLoader(trainloader.dataset, batch_size=100, shuffle=False, num_workers=1) for batch_idx, (inputs, targets, indexes) in enumerate(temploader): inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs, volatile=True), Variable(targets) batchSize = inputs.size(0) features = net(inputs) trainFeatures[:, batch_idx*batchSize:batch_idx*batchSize+batchSize] = features.data.t() trainLabels = torch.LongTensor(temploader.dataset.train_labels).cuda() trainloader.dataset.transform = transform_bak end = time.time() for batch_idx, (inputs, targets, indexes) in enumerate(testloader): inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs, volatile=True), Variable(targets) batchSize = inputs.size(0) features = net(inputs) net_time.update(time.time() - end) end = time.time() dist = torch.mm(features.data, trainFeatures) yd, yi = dist.topk(1, dim=1, largest=True, sorted=True) candidates = trainLabels.view(1,-1).expand(batchSize, -1) retrieval = torch.gather(candidates, 1, yi) retrieval = retrieval.narrow(1, 0, 1).clone().view(-1) yd = yd.narrow(1, 0, 1) total += targets.size(0) correct += retrieval.eq(targets.data).cpu().sum() cls_time.update(time.time() - end) end = time.time() print('Test [{}/{}]\t' 'Net Time {net_time.val:.3f} ({net_time.avg:.3f})\t' 'Cls Time {cls_time.val:.3f} ({cls_time.avg:.3f})\t' 'Top1: {:.2f}'.format( total, testsize, correct*100./total, net_time=net_time, cls_time=cls_time)) return correct/total
def eval_loader(model, loader): model.eval() loss_meter = AverageMeter() error_meter = AverageMeter() with torch.no_grad(): for i, (images, labels) in enumerate(tqdm.tqdm(loader)): # Move tensors to the configured device images = images.to(device) labels = labels.to(device) bs = labels.size(0) outputs = model(images) loss = criterion(outputs, labels) + l2loss.loss() loss_meter.update(loss.item(), bs) _, predicted = torch.max(outputs.data, 1) n_correct = (predicted == labels).sum().item() error_meter.update(0, n_correct) error_meter.update(1, bs-n_correct) model.train() return loss_meter.avg, error_meter.avg
def train(train_loader, model, lemniscate, criterion, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() # switch to train mode model.train() end = time.time() optimizer.zero_grad() for i, (input, _, index) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) index = index.cuda(async=True) input_var = torch.autograd.Variable(input) index_var = torch.autograd.Variable(index) # compute output feature = model(input_var) output = lemniscate(feature, index_var) loss = criterion(output, index_var) / args.iter_size loss.backward() # measure accuracy and record loss losses.update(loss.data[0] * args.iter_size, input.size(0)) if (i+1) % args.iter_size == 0: # compute gradient and do SGD step optimizer.step() optimizer.zero_grad() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t').format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses)
def supervised_evaluation(model, val_loader): batch_time = AverageMeter() losses = AverageMeter() # switch to evaluate mode model.train() prediction_box = [] target_box = [] with torch.no_grad(): end = time.time() for i, (images, target, index, name) in enumerate(val_loader): images = images.cuda() target = target.cuda() output = model(images) output = torch.softmax(output, dim=1) output = output.data.cpu().numpy() output = np.argmax(output, axis=1) prediction_box += list(output) target_box += list(target.data.cpu().numpy()) # measure elapsed time batch_time.update(time.time() - end) end = time.time() auc = roc_auc_score(target_box, prediction_box) corrects = np.equal(np.array(target_box), np.array(prediction_box)) acc = float(sum(corrects)) / len(corrects) # mean class precision = precision_score(target_box, prediction_box, average='macro') recall = recall_score(target_box, prediction_box, average='macro') f1score = f1_score(target_box, prediction_box, average='macro') return losses.avg, round(auc, 4), round(acc, 4), round(precision, 4), round( recall, 4), round(f1score, 4)
def test(features, targets, model, crit, epoch, text=''): losses = AverageMeter() model.eval() correct = 0 wrong = 0 with torch.no_grad(): size = features.shape[0] for i, (input_tensor, target) in enumerate(zip(features, targets)): target = target.cuda() input_var = torch.autograd.Variable(input_tensor.cuda()) target_var = torch.autograd.Variable(target) output = model(input_var) loss = crit(output, target_var) losses.update(loss.data.item(), input_tensor.shape[0]) output_np = output.data.cpu().numpy() prediction = np.argmax(output_np, axis=1) target_np = target.cpu().numpy() correct += np.where(prediction == target_np)[0].shape[0] wrong += np.where(prediction != target_np)[0].shape[0] print('Epoch: [{0}][{1}/{2}]\t' 'Loss: {loss.val:.4f} ({loss.avg:.4f})\t' '{3} Acc: {4}'.format(epoch, i, size, text, correct / (correct + wrong), loss=losses)) return losses.avg
def train(args, model, criterion, optimizer, train_dataloader, epoch): torch.set_grad_enabled(True) model.train() losses = AverageMeter() accuracies = AverageMeter() for i, data in enumerate(train_dataloader, 1): # get inputs sampled_clips, u_clips, v_clips, targets, _ = data if args.modality == 'u': inputs = u_clips elif args.modality == 'v': inputs = v_clips else: # rgb and res inputs = sampled_clips inputs = inputs.cuda() targets = targets.cuda() # zero the parameter gradients optimizer.zero_grad() # forward and backward if args.modality == 'res': outputs = model(diff(inputs)) else: outputs = model(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() acc = calculate_accuracy(outputs, targets) losses.update(loss.data.item(), inputs.size(0)) accuracies.update(acc, inputs.size(0)) print('Train epoch: [{0:3d}/{1:3d}][{2:4d}/{3:4d}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc {acc.val:.3f} ({acc.avg:.3f})\t' 'lr: {lr}'.format(epoch, args.epochs, i + 1, len(train_dataloader), loss=losses, acc=accuracies, lr=optimizer.param_groups[0]['lr']), end='\r') print('')
def train(epoch): print('\nEpoch: %d' % epoch) adjust_learning_rate(optimizer, epoch) train_loss = AverageMeter() data_time = AverageMeter() batch_time = AverageMeter() correct = 0 total = 0 # switch to train mode net.train() end = time.time() for batch_idx, (inputs, targets, indexes) in enumerate(trainloader): data_time.update(time.time() - end) if use_cuda: inputs, targets, indexes = inputs.cuda(), targets.cuda(), indexes.cuda() optimizer.zero_grad() inputs, targets, indexes = Variable(inputs), Variable(targets), Variable(indexes) features = net(inputs) outputs = lemniscate(features, indexes) loss = criterion(outputs, indexes) loss.backward() optimizer.step() train_loss.update(loss.data[0], inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() print('Epoch: [{}][{}/{}]' 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data: {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Loss: {train_loss.val:.4f} ({train_loss.avg:.4f})'.format( epoch, batch_idx, len(trainloader), batch_time=batch_time, data_time=data_time, train_loss=train_loss))
def validate(args, model, criterion, val_dataloader, epoch): torch.set_grad_enabled(False) model.eval() losses = AverageMeter() accuracies = AverageMeter() for i, data in enumerate(val_dataloader): # get inputs sampled_clips, u_clips, v_clips, targets, _ = data if args.modality == 'u': inputs = u_clips elif args.modality == 'v': inputs = v_clips else: # rgb and res inputs = sampled_clips inputs = inputs.cuda() targets = targets.cuda() # forward if args.modality == 'res': outputs = model(diff(inputs)) else: outputs = model(inputs) loss = criterion(outputs, targets) # compute loss and acc acc = calculate_accuracy(outputs, targets) losses.update(loss.data.item(), inputs.size(0)) accuracies.update(acc, inputs.size(0)) print('Val epoch: [{0:3d}/{1:3d}][{2:4d}/{3:4d}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc {acc.val:.3f} ({acc.avg:.3f})\t' 'lr: {lr}'.format(epoch, args.epochs, i + 1, len(val_dataloader), loss=losses, acc=accuracies, lr=optimizer.param_groups[0]['lr']), end='\r') print('') return losses.avg
def train(loader, model, crit, opt, epoch): """Training of the CNN. Args: loader (torch.utils.data.DataLoader): Data loader model (nn.Module): CNN crit (torch.nn): loss opt (torch.optim.SGD): optimizer for every parameters with True requires_grad in model except top layer epoch (int) """ batch_time = AverageMeter() losses = AverageMeter() data_time = AverageMeter() forward_time = AverageMeter() backward_time = AverageMeter() # switch to train mode model.train() # create an optimizer for the last fc layer optimizer_tl = torch.optim.SGD( model.top_layer.parameters(), lr=args.learning_rate, weight_decay=10**args.weight_decay, ) end = time.time() for i, (input_tensor, target) in enumerate(loader): data_time.update(time.time() - end) # save checkpoint n = len(loader) * epoch + i if n % args.checkpoints == 0: path = os.path.join( args.experiment, 'checkpoints', 'checkpoint_' + str(n / args.checkpoints) + '.pth.tar', ) if args.verbose: print('Save checkpoint at: {0}'.format(path)) torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': opt.state_dict() }, path) target = target.cuda(async=True) input_var = torch.autograd.Variable(input_tensor.cuda()) target_var = torch.autograd.Variable(target) output = model(input_var) loss = crit(output, target_var) # record loss losses.update(loss.data.item(), input_tensor.shape[0]) # compute gradient and do SGD step opt.zero_grad() optimizer_tl.zero_grad() loss.backward() opt.step() optimizer_tl.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if args.verbose and (i % 200) == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data: {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss: {loss.val:.4f} ({loss.avg:.4f})'.format( epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses)) return losses.avg
def train(train_loader, model, criterion, optimizer, scheduler, epoch, summary_writer, args): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter( len(train_loader), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() for i, (images, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) images = images.cuda(non_blocking=True) target = target.cuda(non_blocking=True) if args.mixup: images, target_a, target_b, lam = mixup_data(images, target, args.alpha) output = model(images) loss = mixup_criterion(criterion, output, target_a, target_b, lam) else: output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) step = epoch * len(train_loader) + i # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() if scheduler is not None: scheduler.step(step) # log summary_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], step) summary_writer.add_scalar('train_acc1', acc1, step) summary_writer.add_scalar('train_loss', loss, step) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i)
def test(model, data_loader, config, transform_data_fn=None, has_gt=True): device = get_torch_device(config.is_cuda) dataset = data_loader.dataset num_labels = dataset.NUM_LABELS global_timer, data_timer, iter_timer = Timer(), Timer(), Timer() criterion = nn.CrossEntropyLoss(ignore_index=config.ignore_label) losses, scores, ious = AverageMeter(), AverageMeter(), 0 aps = np.zeros((0, num_labels)) hist = np.zeros((num_labels, num_labels)) logging.info('===> Start testing') global_timer.tic() data_iter = data_loader.__iter__() max_iter = len(data_loader) max_iter_unique = max_iter # Fix batch normalization running mean and std model.eval() # Clear cache (when run in val mode, cleanup training cache) torch.cuda.empty_cache() if config.save_prediction or config.test_original_pointcloud: if config.save_prediction: save_pred_dir = config.save_pred_dir os.makedirs(save_pred_dir, exist_ok=True) else: save_pred_dir = tempfile.mkdtemp() if os.listdir(save_pred_dir): raise ValueError(f'Directory {save_pred_dir} not empty. ' 'Please remove the existing prediction.') with torch.no_grad(): for iteration in range(max_iter): data_timer.tic() if config.return_transformation: coords, input, target, transformation = data_iter.next() else: coords, input, target = data_iter.next() transformation = None data_time = data_timer.toc(False) # Preprocess input iter_timer.tic() if config.wrapper_type != 'None': color = input[:, :3].int() if config.normalize_color: input[:, :3] = input[:, :3] / 255. - 0.5 sinput = SparseTensor(input, coords).to(device) # Feed forward inputs = (sinput, ) if config.wrapper_type == 'None' else (sinput, coords, color) soutput = model(*inputs) output = soutput.F pred = get_prediction(dataset, output, target).int() iter_time = iter_timer.toc(False) if config.save_prediction or config.test_original_pointcloud: save_predictions(coords, pred, transformation, dataset, config, iteration, save_pred_dir) if has_gt: if config.evaluate_original_pointcloud: raise NotImplementedError('pointcloud') output, pred, target = permute_pointcloud( coords, pointcloud, transformation, dataset.label_map, output, pred) target_np = target.numpy() num_sample = target_np.shape[0] target = target.to(device) cross_ent = criterion(output, target.long()) losses.update(float(cross_ent), num_sample) scores.update(precision_at_one(pred, target), num_sample) hist += fast_hist(pred.cpu().numpy().flatten(), target_np.flatten(), num_labels) ious = per_class_iu(hist) * 100 prob = torch.nn.functional.softmax(output, dim=1) ap = average_precision(prob.cpu().detach().numpy(), target_np) aps = np.vstack((aps, ap)) # Due to heavy bias in class, there exists class with no test label at all with warnings.catch_warnings(): warnings.simplefilter("ignore", category=RuntimeWarning) ap_class = np.nanmean(aps, 0) * 100. if iteration % config.test_stat_freq == 0 and iteration > 0: reordered_ious = dataset.reorder_result(ious) reordered_ap_class = dataset.reorder_result(ap_class) class_names = dataset.get_classnames() print_info(iteration, max_iter_unique, data_time, iter_time, has_gt, losses, scores, reordered_ious, hist, reordered_ap_class, class_names=class_names) if iteration % config.empty_cache_freq == 0: # Clear cache torch.cuda.empty_cache() global_time = global_timer.toc(False) reordered_ious = dataset.reorder_result(ious) reordered_ap_class = dataset.reorder_result(ap_class) class_names = dataset.get_classnames() print_info(iteration, max_iter_unique, data_time, iter_time, has_gt, losses, scores, reordered_ious, hist, reordered_ap_class, class_names=class_names) if config.test_original_pointcloud: logging.info('===> Start testing on original pointcloud space.') dataset.test_pointcloud(save_pred_dir) logging.info("Finished test. Elapsed time: {:.4f}".format(global_time)) return losses.avg, scores.avg, np.nanmean(ap_class), np.nanmean( per_class_iu(hist)) * 100
def train_epoch(self, epoch_num, timestamp_start): # self.model.train() # st() train_rec_loss = AverageMeter() train_kld_loss = AverageMeter() train_pos_loss = AverageMeter() batch_idx = 0 epoch_end = False # annealing for kl penalty kl_coeff = float(epoch_num) / float(self.configs.warmup_iter + 1) if kl_coeff >= self.configs.alpha_ub: kl_coeff = self.configs.alpha_ub print('kl penalty coefficient: ', kl_coeff, 'alpha upperbound:', self.configs.alpha_ub) t_start = time.time() # if epoch_num == 1: while epoch_end is False: data, trees, _, epoch_end,filenames = self.train_loader.next_batch() # data = Variable(data).cuda() # st() # self.=izer.zero_grad() ifmask = False if self.configs.maskweight > 0: ifmask = True rec_loss,kld_loss,pos_loss,recon =self.train_step(data, trees, filenames,kl_coeff,ifmask) # print("apply gradients ",time.time()-o_time) # print("num_variables ",len(self.model.all_trainable_variables)) # loss.backward() # self.optimizer.step() # self.train_step = self.optimizer.minimize(loss) train_rec_loss.update(rec_loss, self._total(data), int(data.shape[0])) train_kld_loss.update(kld_loss, self._total(data), int(data.shape[0])) train_pos_loss.update(pos_loss, self._total(data), int(data.shape[0])) # st() if batch_idx % 30 == 0: print(time.time()- t_start,"time taken") scipy.misc.imsave(osp.join(self.configs.exp_dir, 'samples', 'generativenmn_data_{}.png'.format(batch_idx)), (data[0] + 1) / 2.0) scipy.misc.imsave(osp.join(self.configs.exp_dir, 'samples', 'generativenmn_reconstruction_{}.png'.format(batch_idx)), \ (recon[0] + 1) / 2.0) scipy.misc.imsave(osp.join(self.configs.exp_dir, 'samples', 'generativenmn_reconstruction_clip_{}.png'.format(batch_idx)), \ np.clip(recon[0], -1, 1)) print('Epoch:{0}\tIter:{1}/{2}\tRecon {3:.6f}\t KL {4:.6f}'.format(epoch_num, batch_idx, len(self.train_loader) // self.configs.batch_size, train_rec_loss.batch_avg, train_kld_loss.batch_avg)) t_start = time.time() # inti_this = [v for v in tf.global_variables() if "initthis" in v.name] # print("init values ",inti_this) # # self.first=False # _,recloss,kldloss,posloss = self.sess.run([self.train_step,rec_loss,kld_loss,pos_loss],feed_dict={self.model.img:data}) # print(recloss,"recloss",kldloss,"kldloss",posloss,"posloss") # if batch_idx % 30 == 0: # scipy.misc.imsave(osp.join(self.configs.exp_dir, 'samples', 'generativenmn_data.png'), # (data.cpu().data.numpy().transpose(0, 2, 3, 1)[0] + 1) / 2.0) # scipy.misc.imsave(osp.join(self.configs.exp_dir, 'samples', 'generativenmn_reconstruction.png'), \ # (recon.cpu().data.numpy().transpose(0, 2, 3, 1)[0] + 1) / 2.0) # scipy.misc.imsave(osp.join(self.configs.exp_dir, 'samples', 'generativenmn_reconstruction_clip.png'), \ # np.clip(recon.cpu().data.numpy().transpose(0, 2, 3, 1)[0], -1, 1)) # print('Epoch:{0}\tIter:{1}/{2}\tRecon {3:.6f}\t KL {4:.6f}'.format(epoch_num, batch_idx, # len(self.train_loader) // self.configs.batch_size, # train_rec_loss.batch_avg, train_kld_loss.batch_avg)) # tf.reset_default_graph() context.context()._clear_caches() self.model.clean_tree(trees) batch_idx += 1 elapsed_time = \ datetime.datetime.now(pytz.timezone('America/New_York')) - \ timestamp_start print('====> Epoch: {} Average rec loss: {:.6f} Average kld loss: {:.6f} Average pos loss: {:.6f}'.format( epoch_num, train_rec_loss.batch_avg, train_kld_loss.batch_avg, train_pos_loss.batch_avg)) print('Elapsed time:', elapsed_time)
def train(net, optimizer, scheduler, trainloader, testloader, criterion, summary_writer, args): train_loss = AverageMeter() data_time = AverageMeter() batch_time = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() best_acc = 0 end = time.time() global global_step for inputs, targets in inf_generator(trainloader): if global_step >= args.max_iters: break data_time.update(time.time() - end) inputs, targets = inputs.to(args.device), targets.to(args.device) # switch to train mode net.train() scheduler.step(global_step) optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, targets) prec1, prec5 = accuracy(outputs, targets, topk=(1, 5)) top1.update(prec1[0], inputs.size(0)) top5.update(prec5[0], inputs.size(0)) loss.backward() optimizer.step() train_loss.update(loss.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() summary_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], global_step) summary_writer.add_scalar('top1', top1.val, global_step) summary_writer.add_scalar('top5', top5.val, global_step) summary_writer.add_scalar('batch_time', batch_time.val, global_step) summary_writer.add_scalar('data_time', data_time.val, global_step) summary_writer.add_scalar('train_loss', train_loss.val, global_step) if global_step % args.print_freq == 0: lr = optimizer.param_groups[0]['lr'] print(f'Train: [{global_step}/{args.max_iters}] ' f'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) ' f'Data: {data_time.val:.3f} ({data_time.avg:.3f}) ' f'Lr: {lr:.5f} ' f'prec1: {top1.val:.3f} ({top1.avg:.3f}) ' f'prec5: {top5.val:.3f} ({top5.avg:.3f}) ' f'Loss: {train_loss.val:.4f} ({train_loss.avg:.4f})') if (global_step + 1) % args.eval_freq == 0 or global_step == args.max_iters - 1: acc = validate(testloader, net, criterion, device=args.device, print_freq=args.print_freq) summary_writer.add_scalar('val_top1', acc, global_step) if acc > best_acc: best_acc = acc state = { 'step': global_step, 'best_acc': best_acc, 'net': net.state_dict(), 'optimizer': optimizer.state_dict(), } os.makedirs(args.model_dir, exist_ok=True) torch.save(state, os.path.join(args.model_dir, 'ckpt.pth.tar')) print('best accuracy: {:.2f}\n'.format(best_acc)) global_step += 1
def train(epoch): print('\nEpoch: %d' % epoch) adjust_learning_rate(optimizer, epoch) train_loss = AverageMeter() data_time = AverageMeter() batch_time = AverageMeter() correct = 0 total = 0 # switch to train mode net.train() end = time.time() for batch_idx, (inputs, targets, indexes) in enumerate(trainloader): data_time.update(time.time() - end) inputs, targets, indexes = inputs.to(device), targets.to( device), indexes.to(device) outputs = net(inputs) #outputs = lemniscate(features, indexes) #### TODO: calculate the loss on the decoder output with respect to the encoder input loss = criterion(outputs, inputs) optimizer.zero_grad() loss.backward() optimizer.step() train_loss.update(loss.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() print('Epoch: [{}][{}/{}]' 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data: {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Loss: {train_loss.val:.4f} ({train_loss.avg:.4f})'.format( epoch, batch_idx, len(trainloader), batch_time=batch_time, data_time=data_time, train_loss=train_loss)) #### TODO:in pair epochs save reconstructed images in an output directory if epoch % 2 == 0: pic = to_img(outputs.cpu().data) save_image(pic, './out_images/image_{}.png'.format(epoch))
def train(train_loader, model, lemniscate, criterion, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() # switch to train mode model.train() end = time.time() optimizer.zero_grad() for i, (input_imgs, action_probabilities, indices) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) indices = indices.to(get_device(args.gpu)) # Change the image size so it fits to the network #input_imgs = resize2d(input_imgs, (224,224)) # The images are now already in the right size input_imgs = input_imgs[:, 0:9, :, :] #extract the first 3 images action_probabilities = action_probabilities[:, 0: 3] #extract steers first 3 out of 6 #print("input_imgs shape: {}".format(input_imgs.shape)) #print("action_probabilities shape: {}".format(action_probabilities.shape)) #print("input_imgs shape: {}".format(input_imgs.shape)) #print("action_probabilities shape: {}".format(action_probabilities.shape)) # Code to see the images # for j in range(input_imgs.size()[0]): # for img in input_imgs.data.numpy(): # # #print img[0:3].transpose((1,2,0)).shape # cv2.imshow("Test",img[0:3].transpose((1,2,0))+0.5) # cv2.waitKey(400) #print input_imgs.size() feature = model(input_imgs, action_probabilities) output = lemniscate(feature, indices) loss = criterion(output, indices) / args.iter_size loss.backward() # measure accuracy and record loss losses.update(loss.item() * args.iter_size, input_imgs.size(0)) if (i + 1) % args.iter_size == 0: # compute gradient and do SGD step optimizer.step() optimizer.zero_grad() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t').format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses)
def NN(epoch, net, lemniscate, trainloader, testloader, recompute_memory=0): net.eval() net_time = AverageMeter() cls_time = AverageMeter() losses = AverageMeter() correct = 0. total = 0 testsize = testloader.dataset.__len__() trainFeatures = lemniscate.memory.t() if hasattr(trainloader.dataset, 'imgs'): trainLabels = torch.LongTensor( [y for (p, y) in trainloader.dataset.imgs]).cuda() else: trainLabels = torch.LongTensor(trainloader.dataset.targets).cuda() if recompute_memory: transform_bak = trainloader.dataset.transform trainloader.dataset.transform = testloader.dataset.transform temploader = torch.utils.data.DataLoader(trainloader.dataset, batch_size=100, shuffle=False, num_workers=1) for batch_idx, (inputs, targets, indexes) in enumerate(temploader): targets = targets.cuda() inputs = inputs[0] if len(inputs) == 2 or len( inputs) == 3 else inputs batchSize = inputs.size(0) features = net(inputs) features = features[0] if len(features) == 2 else features trainFeatures[:, batch_idx * batchSize:batch_idx * batchSize + batchSize] = features.data.t() trainLabels = torch.LongTensor(temploader.dataset.targets).cuda() # trainLabels = torch.LongTensor(temploader.dataset.train_labels).cuda() trainloader.dataset.transform = transform_bak end = time.time() with torch.no_grad(): for batch_idx, (inputs, targets, indexes) in enumerate(testloader): targets = targets.cuda() inputs = inputs[0] if len(inputs) == 2 or len( inputs) == 3 else inputs batchSize = inputs.size(0) features = net(inputs) features = features[0] if len(features) == 2 else features net_time.update(time.time() - end) end = time.time() dist = torch.mm(features, trainFeatures) yd, yi = dist.topk(1, dim=1, largest=True, sorted=True) candidates = trainLabels.view(1, -1).expand(batchSize, -1) retrieval = torch.gather(candidates, 1, yi) retrieval = retrieval.narrow(1, 0, 1).clone().view(-1) yd = yd.narrow(1, 0, 1) total += targets.size(0) correct += retrieval.eq(targets.data).sum().item() cls_time.update(time.time() - end) end = time.time() print('Test [{}/{}]\t' 'Net Time {net_time.val:.3f} ({net_time.avg:.3f})\t' 'Cls Time {cls_time.val:.3f} ({cls_time.avg:.3f})\t' 'Top1: {:.2f}'.format(total, testsize, correct * 100. / total, net_time=net_time, cls_time=cls_time)) return correct / total
def train(epoch, train_loader, model, contrast, criterion_1, criterion_2, optimizer, opt): """ one epoch training """ model.train() contrast.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() view1_loss_meter = AverageMeter() view2_loss_meter = AverageMeter() view1_prob_meter = AverageMeter() view2_prob_meter = AverageMeter() end = time.time() for idx, (inputs, u_inputs, v_inputs, _, index) in enumerate(train_loader): data_time.update(time.time() - end) bsz = inputs.size(0) inputs = inputs.float().cuda() u_inputs = u_inputs.float().cuda() v_inputs = v_inputs.float().cuda() index = index.cuda() # ===================forward===================== feat_1 = model(inputs) # view 1 is always RGB if opt.modality == 'res': feat_2 = model(diff(inputs)) elif opt.modality == 'u': feat_2 = model(u_inputs) elif opt.modality == 'v': feat_2 = model(v_inputs) else: feat_2 = feat_1 if not opt.intra_neg: out_1, out_2 = contrast(feat_1, feat_2, index) else: feat_neg = model(preprocess(inputs, opt.neg)) out_1, out_2 = contrast(feat_1, feat_2, feat_neg, index) view1_loss = criterion_1(out_1) view2_loss = criterion_2(out_2) view1_prob = out_1[:, 0].mean() view2_prob = out_2[:, 0].mean() loss = view1_loss + view2_loss # ===================backward===================== optimizer.zero_grad() loss.backward() optimizer.step() # ===================meters===================== losses.update(loss.item(), bsz) view1_loss_meter.update(view1_loss.item(), bsz) view1_prob_meter.update(view1_prob.item(), bsz) view2_loss_meter.update(view2_loss.item(), bsz) view2_prob_meter.update(view2_prob.item(), bsz) batch_time.update(time.time() - end) end = time.time() # print info if (idx + 1) % opt.print_freq == 0: print('Train: [{0}/{1}][{2}/{3}]\t' 'BT {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'DT {data_time.val:.3f} ({data_time.avg:.3f})\t' 'loss {loss.val:.3f} ({loss.avg:.3f})\t' '1_p {probs1.val:.3f} ({probs1.avg:.3f})\t' '2_p {probs2.val:.3f} ({probs2.avg:.3f})'.format( epoch, opt.epochs, idx + 1, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, probs1=view1_prob_meter, probs2=view2_prob_meter), end='\r') return view1_loss_meter.avg, view1_prob_meter.avg, view2_loss_meter.avg, view2_prob_meter.avg
def validate(val_loader, model, criterion, epoch, summary_writer, args): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter( len(val_loader), [batch_time, losses, top1, top5], prefix='Test: ') # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(val_loader): images = images.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # log step = epoch * len(val_loader) + i summary_writer.add_scalar('val_acc1', acc1, step) summary_writer.add_scalar('val_loss', loss, step) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) # TODO: this should also be done with the ProgressMeter print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' .format(top1=top1, top5=top5)) return top1.avg
def train_head(epoch, net, hidx, head, otrainset, ptrainset, optimizer, criterion, writer): """trains one head for an epoch """ # declare dataloader random_sampler = RandomSampler(otrainset) batch_sampler = RepeatSampler(random_sampler, cfg.batch_size, nrepeat=cfg.data_nrepeat) ploader = DataLoader(ptrainset, batch_sampler=batch_sampler, num_workers=cfg.num_workers, pin_memory=True) oloader = DataLoader(otrainset, sampler=random_sampler, batch_size=cfg.batch_size, num_workers=cfg.num_workers, pin_memory=True) # set network mode net.train() # tracking variable end = time.time() train_loss = AverageMeter('Loss', ':.4f') data_time = AverageMeter('Data', ':.3f') batch_time = AverageMeter('Time', ':.3f') progress = TimeProgressMeter(batch_time, data_time, train_loss, Batch=len(oloader), Head=len(cfg.net_heads), Epoch=cfg.max_epochs) for batch_idx, (obatch, pbatch) in enumerate(itertools.izip(oloader, ploader)): # record data loading time data_time.update(time.time() - end) # move data to target device (oinputs, _), (pinputs, _) = (obatch, pbatch) oinputs, pinputs = (oinputs.to(cfg.device, non_blocking=True), pinputs.to(cfg.device, non_blocking=True)) # forward ologits, plogits = net(oinputs)[hidx], net(pinputs)[hidx] loss = criterion(ologits.repeat(cfg.data_nrepeat, 1), plogits) # backward optimizer.zero_grad() loss.backward() optimizer.step() train_loss.update(loss.item(), oinputs.size(0)) batch_time.update(time.time() - end) end = time.time() writer.add_scalar('Train/Loss/Head-%d' % head, train_loss.val, epoch * len(oloader) + batch_idx) if batch_idx % cfg.display_freq != 0: continue logger.info(progress.show(Batch=batch_idx, Epoch=epoch, Head=hidx))
def train(model, train_loader, optimizer, criterion, summary_writer, epoch, scheduler=None): train_loss = AverageMeter() data_time = AverageMeter() batch_time = AverageMeter() top1 = AverageMeter() model.train() end = time.time() for i, (x, y) in enumerate(train_loader): x = x.cuda(non_blocking=True) y = y.cuda(non_blocking=True) data_time.update(time.time() - end) scores = model(x, y) loss = criterion(scores, y) acc = accuracy(scores, y) * 100 optimizer.zero_grad() loss.backward() optimizer.step() step = epoch * len(train_loader) + i if scheduler is not None: scheduler.step(step) train_loss.update(loss.item(), x.shape[0]) top1.update(acc, x.shape[0]) batch_time.update(time.time() - end) end = time.time() # log summary_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], step) summary_writer.add_scalar('loss', loss.item(), step) summary_writer.add_scalar('train_acc', acc, step) if i % cfg.train.print_freq == 0: lr = optimizer.param_groups[0]["lr"] print(f'Train: [{epoch}][{i}/{len(train_loader)}] ' f'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) ' f'Data: {data_time.val:.3f} ({data_time.avg:.3f}) ' f'Lr: {lr:.5f} ' f'prec1: {top1.val:.3f} ({top1.avg:.3f}) ' f'Loss: {train_loss.val:.4f} ({train_loss.avg:.4f})')
def validate(val_loader, model, criterion): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() for i, (input, target) in enumerate(val_loader): target = target.cuda() input = input.cuda() with torch.no_grad(): # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # if i % print_freq == 0: # print('Test: [{0}/{1}]\t' # 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' # 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' # 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' # 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( # i, len(val_loader), batch_time=batch_time, loss=losses, # top1=top1, top5=top5)) logging.info( " ---------------------------------------------------------------") logging.info(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format( top1=top1, top5=top5)) return top1.avg, top5.avg
def kNN(net, lemniscate, trainloader, testloader, K, sigma): #, recompute_memory=0): # Changed to recompute_memory in main net.eval() net_time = AverageMeter() cls_time = AverageMeter() testsize = testloader.dataset.__len__() trainFeatures = lemniscate.memory.t() if hasattr(trainloader.dataset, 'imgs'): trainLabels = torch.LongTensor([y for (p, y) in trainloader.dataset.imgs]).cuda() else: trainLabels = torch.LongTensor(trainloader.dataset.train_labels).cuda() C = int(trainLabels.max() + 1) # Changed to recompute_memory in main # if recompute_memory: # transform_bak = trainloader.dataset.transform # trainloader.dataset.transform = testloader.dataset.transform # temploader = torch.utils.data.DataLoader(trainloader.dataset, batch_size=100, shuffle=False, num_workers=1) # for batch_idx, (inputs, targets, indexes) in enumerate(temploader): # targets = targets.cuda(async=True) # batchSize = inputs.size(0) # features = net(inputs) # trainFeatures[:, batch_idx*batchSize:batch_idx*batchSize+batchSize] = features.data.t() # #trainLabels = torch.LongTensor(temploader.dataset.train_labels).cuda() # trainloader.dataset.transform = transform_bak top1 = np.zeros(C) top5 = np.zeros(C) total = np.zeros(C) end = time.time() with torch.no_grad(): retrieval_one_hot = torch.zeros(K, C).cuda() for batch_idx, (inputs, targets, indexes) in enumerate(tqdm.tqdm(testloader)): end = time.time() targets = targets.cuda(async=True) batchSize = inputs.size(0) features = net(inputs) net_time.update(time.time() - end) end = time.time() dist = torch.mm(features, trainFeatures) yd, yi = dist.topk(K, dim=1, largest=True, sorted=True) candidates = trainLabels.view(1,-1).expand(batchSize, -1) retrieval = torch.gather(candidates, 1, yi) retrieval_one_hot.resize_(batchSize * K, C).zero_() retrieval_one_hot.scatter_(1, retrieval.view(-1, 1), 1) yd_transform = yd.clone().div_(sigma).exp_() probs = torch.sum(torch.mul(retrieval_one_hot.view(batchSize, -1, C), yd_transform.view(batchSize, -1, 1)), 1) _, predictions = probs.sort(1, True) # Find which predictions match the target correct = predictions.eq(targets.data.view(-1,1)) for i in targets.unique(): idx = torch.nonzero(targets == i) top1[i] += torch.sum(correct[idx, 0]).cpu().numpy() top5[i] += torch.sum(correct[idx, :5]).cpu().numpy() total[i] += len(idx) cls_time.update(time.time() - end) # print('Test [{}/{}]\t' # 'Net Time {net_time.val:.3f} ({net_time.avg:.3f})\t' # 'Cls Time {cls_time.val:.3f} ({cls_time.avg:.3f})\t' # 'Top1: {:.2f} Top5: {:.2f}'.format( # total, testsize, top1*100./total, top5*100./total, net_time=net_time, cls_time=cls_time)) # print(top1*100./total) table = pd.DataFrame({'classes': list(trainloader.dataset.class_to_idx.keys()), 'top1': np.round((top1*100./total), 2), 'top5': np.round((top5 * 100. / total), 2)}) print(tabulate.tabulate(table.values, table.columns, tablefmt='pipe')) print('Top1 Avg (total samples): ', np.round(sum(top1) / sum(total) * 100., 1), '\nTop5 Avg (total samples): ', np.round(sum(top5) / sum(total) * 100., 1)) print('Top1 Avg (by class): ', np.round(sum(top1 / total * 100.) / len(top1), 1), '\nTop5 Avg (by class): ', np.round(sum(top5 / total * 100.) / len(top5), 1)) #return top1/total return sum(top5 / total * 100.) / len(top5)
def train_moco(epoch, train_loader, model, model_ema, contrast, criterion, optimizer, scheduler, args): """ one epoch training for moco """ model.train() set_bn_train(model_ema) batch_time = AverageMeter() data_time = AverageMeter() loss_meter = AverageMeter() prob_meter = AverageMeter() train_CLD_loss = AverageMeter() train_CLD_acc = AverageMeter() criterion_cld = nn.CrossEntropyLoss().cuda() end = time.time() torch.set_num_threads(1) for idx, ((x1, x2, x3), targets, index) in enumerate(train_loader): data_time.update(time.time() - end) bsz = x1.size(0) x1 = x1.cuda() x2 = x2.cuda() x3 = x3.cuda() feat_q1, features_groupDis1 = model(x1, two_branch=True) feat_q2, features_groupDis2 = model(x2, two_branch=True) with torch.no_grad(): x3_shuffled, backward_inds = DistributedShufle.forward_shuffle(x3, epoch) feat_k3, features_groupDis3 = model_ema(x3_shuffled, two_branch=True) feat_k_all, feat_k3, features_groupDis3 = DistributedShufle.backward_shuffle( feat_k3, backward_inds, return_local=True, branch_two=features_groupDis3) # NCE loss out = contrast(feat_q1, feat_k3, feat_k_all, update=False) loss_1 = criterion(out) out = contrast(feat_q2, feat_k3, feat_k_all, update=True) loss_2 = criterion(out) loss = (loss_1 + loss_2)/2 prob = F.softmax(out, dim=1)[:, 0].mean() # K-way normalized cuts or k-Means. Default: k-Means if args.use_kmeans: cluster_label1, centroids1 = KMeans(features_groupDis1, K=args.clusters, Niters=args.num_iters) cluster_label2, centroids2 = KMeans(features_groupDis2, K=args.clusters, Niters=args.num_iters) else: cluster_label1, centroids1 = spectral_clustering(features_groupDis1, K=args.k_eigen, clusters=args.clusters, Niters=args.num_iters) cluster_label2, centroids2 = spectral_clustering(features_groupDis2, K=args.k_eigen, clusters=args.clusters, Niters=args.num_iters) # instance-group discriminative learning affnity1 = torch.mm(features_groupDis1, centroids2.t()) CLD_loss = criterion_cld(affnity1.div_(args.cld_t), cluster_label2) affnity2 = torch.mm(features_groupDis2, centroids1.t()) CLD_loss = (CLD_loss + criterion_cld(affnity2.div_(args.cld_t), cluster_label1))/2 # get cluster label prediction accuracy _, cluster_pred = torch.topk(affnity1, 1) cluster_pred = cluster_pred.t() correct = cluster_pred.eq(cluster_label2.view(1, -1).expand_as(cluster_pred)) correct_all = correct[0].view(-1).float().sum(0).mul_(100.0/x1.size(0)) train_CLD_acc.update(correct_all.item(), x1.size(0)) # total loss loss = loss + args.Lambda*CLD_loss if torch.isnan(loss): print('INFO loss is nan! Backward skipped') continue # backward optimizer.zero_grad() optimizer.zero_grad() if args.amp_opt_level != "O0": with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() scheduler.step() moment_update(model, model_ema, args.alpha) train_CLD_loss.update(CLD_loss.item(), x1.size(0)) # update meters loss_meter.update(loss.item(), bsz) prob_meter.update(prob.item(), bsz) batch_time.update(time.time() - end) end = time.time() # print info lr = optimizer.param_groups[0]['lr'] if idx % args.print_freq == 0: logger.info(f'Train: [{epoch}][{idx}/{len(train_loader)}] lr: {lr:.5f}\t' f'T {batch_time.val:.3f} ({batch_time.avg:.3f})\t' f'DT {data_time.val:.3f} ({data_time.avg:.3f})\t' f'loss {loss_meter.val:.3f} ({loss_meter.avg:.3f})\t' f'prob {prob_meter.val:.3f} ({prob_meter.avg:.3f})\t' f'CLD loss {train_CLD_loss.val:.3f} ({train_CLD_loss.avg:.3f})\t' f'Top-1 acc {train_CLD_acc.val:.3f} ({train_CLD_acc.avg:.3f})') return loss_meter.avg, prob_meter.avg
def train(train_loader, model, lemniscate, criterion, cls_criterion, optimizer, epoch, writer): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() losses_ins = AverageMeter() losses_rot = AverageMeter() # switch to train mode model.train() end = time.time() optimizer.zero_grad() for i, (input, target, index, name) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # compute output if args.multitaskposrot: # instance discrimination and rotation prediction input = torch.cat(input, 0).cuda() index = torch.cat([index, index], 0).cuda() rotation_label = torch.cat([target[1], target[1]], 0).cuda() # initialize tensors tensors = {} tensors['dataX'] = torch.FloatTensor() tensors['index'] = torch.LongTensor() tensors['index_index'] = torch.LongTensor() tensors['labels'] = torch.LongTensor() # construct rotated input tensors['dataX'].resize_(input.size()).copy_(input) dataX_90 = torch.flip(torch.transpose(input, 2, 3), [2]) dataX_180 = torch.flip(torch.flip(input, [2]), [3]) dataX_270 = torch.transpose(torch.flip(input, [2]), 2, 3) dataX = torch.stack([input, dataX_90, dataX_180, dataX_270], dim=1) batch_size, rotations, channels, height, width = dataX.size() dataX = dataX.view( [batch_size * rotations, channels, height, width]) # construct rotated label and index rotation_label = torch.stack([ rotation_label, torch.ones_like(rotation_label), 2 * torch.ones_like(rotation_label), 3 * torch.ones_like(rotation_label) ], dim=1) rotation_label = rotation_label.view([batch_size * rotations]) index = torch.stack([index, index, index, index], dim=1) index = index.view([batch_size * rotations]) feature, pred_rot, feture_whole = model(dataX) loss_instance = criterion(feature, index) / args.iter_size loss_cls = cls_criterion(pred_rot, rotation_label) loss = loss_instance + 1.0 * loss_cls losses_ins.update(loss_instance.item() * args.iter_size, input.size(0)) losses_rot.update(loss_cls.item() * args.iter_size, input.size(0)) elif args.synthesis: dataX = torch.cat(input, 0).cuda() ori_data = dataX[:int(dataX.shape[0] / 2)] syn_data = dataX[int(dataX.shape[0] / 2):] data = [ori_data, syn_data] dataX = torch.stack(data, dim=1).cuda() batch_size, types, channels, height, width = dataX.size() input = dataX.view([batch_size * types, channels, height, width]) # instance discrimination # input = torch.cat(input, 0).cuda() feature = model(input) loss = criterion(feature, index) / args.iter_size elif args.multiaug: input = torch.cat(input, 0).cuda() feature = model(input) loss = criterion(feature, index) / args.iter_size else: # instance discrimination memory bank input = input.cuda() index = index.cuda() feature = model(input) output = lemniscate(feature, index) loss = criterion(output, index) / args.iter_size loss.backward() # measure accuracy and record loss losses.update(loss.item() * args.iter_size, input.size(0)) if (i + 1) % args.iter_size == 0: # compute gradient and do SGD step optimizer.step() optimizer.zero_grad() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses)) return losses.avg
def train(train_loader, model, lemniscate, criterion, cls_criterion, optimizer, epoch, writer): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() losses_ins = AverageMeter() losses_rot = AverageMeter() # switch to train mode model.train() end = time.time() optimizer.zero_grad() for i, (input, target, index, name) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # compute output if args.multitaskposrot: # instance discrimination and rotation prediction input = torch.cat(input, 0).cuda() index = torch.cat([index, index], 0).cuda() rotation_label = torch.cat(target, 0).cuda() feature, pred_rot, feture_whole = model(input) loss_instance = criterion(feature, index) / args.iter_size loss_cls = cls_criterion(pred_rot, rotation_label) loss = loss_instance + 1.0 * loss_cls losses_ins.update(loss_instance.item() * args.iter_size, input.size(0)) losses_rot.update(loss_cls.item() * args.iter_size, input.size(0)) elif args.synthesis: dataX = torch.cat(input, 0).cuda() ori_data = dataX[:int(dataX.shape[0] / 2)] syn_data = dataX[int(dataX.shape[0] / 2):] data = [ori_data, syn_data] dataX = torch.stack(data, dim=1).cuda() batch_size, types, channels, height, width = dataX.size() input = dataX.view([batch_size * types, channels, height, width]) # instance discrimination # input = torch.cat(input, 0).cuda() feature = model(input) loss = criterion(feature, index) / args.iter_size elif args.multiaug: input = torch.cat(input, 0).cuda() feature = model(input) loss = criterion(feature, index) / args.iter_size else: # instance discrimination memory bank input = input.cuda() index = index.cuda() feature = model(input) output = lemniscate(feature, index) loss = criterion(output, index) / args.iter_size loss.backward() # measure accuracy and record loss losses.update(loss.item() * args.iter_size, input.size(0)) if (i + 1) % args.iter_size == 0: # compute gradient and do SGD step optimizer.step() optimizer.zero_grad() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses)) writer.add_scalar("losses_ins", losses_ins.avg, epoch) writer.add_scalar("losses_rot", losses_rot.avg, epoch) return losses.avg
def kNN(epoch, net, lemniscate, trainloader, testloader, K, sigma, recompute_memory=False, \ inverse=True, two_branch=False, fusion=None): net.eval() net_time = AverageMeter() cls_time = AverageMeter() total = 0 testsize = testloader.dataset.__len__() # print(C) if recompute_memory: transform_bak = trainloader.dataset.transform trainloader.dataset.transform = testloader.dataset.transform temploader = torch.utils.data.DataLoader(trainloader.dataset, batch_size=100, shuffle=False, num_workers=1) for batch_idx, (inputs, targets, indexes) in enumerate(temploader): targets = targets.cuda() inputs = inputs[0] if len(inputs) == 2 or len( inputs) == 3 else inputs batchSize = inputs.size(0) if two_branch: features = net(inputs, two_branch=False) else: features = net(inputs, two_branch=False) features = features[0] if len(features) == 2 else features if batch_idx == 0: trainFeatures = features.data.t() trainLabels = targets else: trainFeatures = torch.cat((trainFeatures, features.data.t()), 1) trainLabels = torch.cat((trainLabels, targets), 0) try: trainLabels = torch.LongTensor(temploader.dataset.targets).cuda() except: trainLabels = torch.LongTensor(temploader.dataset.labels).cuda() trainloader.dataset.transform = transform_bak else: trainFeatures = lemniscate.memory.t() if hasattr(trainloader.dataset, 'imgs'): trainLabels = torch.LongTensor( [y for (p, y) in trainloader.dataset.imgs]).cuda() else: try: trainLabels = torch.LongTensor( trainloader.dataset.targets).cuda() except: trainLabels = torch.LongTensor( trainloader.dataset.labels).cuda() C = trainLabels.max() + 1 C = C.item() top1 = 0. top5 = 0. end = time.time() with torch.no_grad(): retrieval_one_hot = torch.zeros(K, C).cuda() for batch_idx, (inputs, targets, indexes) in enumerate(testloader): end = time.time() targets = targets.cuda() inputs = inputs[0] if len(inputs) == 2 or len( inputs) == 3 else inputs batchSize = inputs.size(0) if two_branch: features = net(inputs, two_branch=False) else: features = net(inputs, two_branch=False) net_time.update(time.time() - end) end = time.time() dist = torch.mm(features, trainFeatures) yd, yi = dist.topk(K, dim=1, largest=True, sorted=True) candidates = trainLabels.view(1, -1).expand(batchSize, -1) retrieval = torch.gather(candidates, 1, yi) retrieval_one_hot.resize_(batchSize * K, C).zero_() retrieval_one_hot.scatter_(1, retrieval.view(-1, 1), 1) yd_transform = torch.exp(torch.div(yd.clone(), sigma)) probs = torch.sum( torch.mul(retrieval_one_hot.view(batchSize, -1, C), yd_transform.view(batchSize, -1, 1)), 1) _, predictions = probs.sort(1, True) # Find which predictions match the target correct = predictions.eq(targets.data.view(-1, 1)) cls_time.update(time.time() - end) top1 = top1 + correct.narrow(1, 0, 1).sum().item() top5 = top5 + correct.narrow(1, 0, 5).sum().item() total += targets.size(0) if inverse: print('Test [{}/{}]\t' 'Net Time {net_time.val:.3f} ({net_time.avg:.3f})\t' 'Cls Time {cls_time.val:.3f} ({cls_time.avg:.3f})\t' 'Top1: {:.2f} Top5: {:.2f}'.format(total, testsize, top1 * 100. / total, top5 * 100. / total, net_time=net_time, cls_time=cls_time)) print(top1 * 100. / total) return top1 / total, top5 / total
def _validate(self, val_loader, model, verbose=False): ''' Validate the performance on validation set :param val_loader: :param model: :param verbose: :return: ''' batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() if torch.cuda.is_available(): criterion = nn.CrossEntropyLoss().cuda() else: criterion = nn.CrossEntropyLoss() # switch to evaluate mode model.eval() end = time.time() t1 = time.time() with torch.no_grad(): for i, (input, target) in enumerate(val_loader): if torch.cuda.is_available(): target = target.cuda(non_blocking=True) input_var = torch.autograd.Variable(input).cuda() target_var = torch.autograd.Variable(target).cuda() else: input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() t2 = time.time() if verbose: print('* Test loss: %.3f top1: %.3f top5: %.3f time: %.3f' % (losses.avg, top1.avg, top5.avg, t2 - t1)) if self.acc_metric == 'acc1': return top1.avg elif self.acc_metric == 'acc5': return top5.avg else: raise NotImplementedError
def evaluate(): # build dataset val_loader, n_class = get_dataset() # build model net = get_model(n_class) criterion = nn.CrossEntropyLoss() if use_cuda: net = net.cuda() net = torch.nn.DataParallel(net, list(range(args.n_gpu))) cudnn.benchmark = True # begin eval net.eval() batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(val_loader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # timing batch_time.update(time.time() - end) end = time.time() progress_bar(batch_idx, len(val_loader), 'Loss: {:.3f} | Acc1: {:.3f}% | Acc5: {:.3f}%' .format(losses.avg, top1.avg, top5.avg))
def train(train_loader, model, criterion, optimizer): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() # switch to train mode model.train() end = time.time() optimizer.zero_grad() for i, (input, target, index, name) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) input = input[0] target = target[0] input = input.cuda() target = target.cuda() output = model(input) loss = criterion(output, target) optimizer.zero_grad() loss.backward() optimizer.step() # measure accuracy and record loss losses.update(loss.item() * args.iter_size, input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() return losses.avg
def train(train_loader, model, lemniscate, criterion, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() # switch to train mode model.train() end = time.time() optimizer.zero_grad() for i, (input, _, index) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) index = index.cuda(async=True) # compute output feature = model(input) output = lemniscate(feature, index) loss = criterion(output, index) / args.iter_size loss.backward() # measure accuracy and record loss losses.update(loss.item() * args.iter_size, input.size(0)) if (i+1) % args.iter_size == 0: # compute gradient and do SGD step optimizer.step() optimizer.zero_grad() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses))
def kNN(epoch, net, lemniscate, trainloader, testloader, K, sigma, recompute_memory=0): net.eval() net_time = AverageMeter() cls_time = AverageMeter() total = 0 testsize = testloader.dataset.__len__() trainFeatures = lemniscate.memory.t() if hasattr(trainloader.dataset, 'imgs'): trainLabels = torch.LongTensor([y for (p, y) in trainloader.dataset.imgs]).cuda() else: trainLabels = torch.LongTensor(trainloader.dataset.train_labels).cuda() C = trainLabels.max() + 1 if recompute_memory: transform_bak = trainloader.dataset.transform trainloader.dataset.transform = testloader.dataset.transform temploader = torch.utils.data.DataLoader(trainloader.dataset, batch_size=100, shuffle=False, num_workers=1) for batch_idx, (inputs, targets, indexes) in enumerate(temploader): inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs, volatile=True), Variable(targets) batchSize = inputs.size(0) features = net(inputs) trainFeatures[:, batch_idx*batchSize:batch_idx*batchSize+batchSize] = features.data.t() trainLabels = torch.LongTensor(temploader.dataset.train_labels).cuda() trainloader.dataset.transform = transform_bak top1 = 0. top5 = 0. end = time.time() for batch_idx, (inputs, targets, indexes) in enumerate(testloader): inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs, volatile=True), Variable(targets) batchSize = inputs.size(0) features = net(inputs) net_time.update(time.time() - end) end = time.time() dist = torch.mm(features.data, trainFeatures) yd, yi = dist.topk(K, dim=1, largest=True, sorted=True) candidates = trainLabels.view(1,-1).expand(batchSize, -1) retrieval = torch.gather(candidates, 1, yi) retrieval_one_hot = torch.FloatTensor(batchSize * K, C).zero_().cuda() retrieval_one_hot.scatter_(1, retrieval.view(-1, 1), 1) yd_transform = yd.clone().div_(sigma).exp_() probs = torch.sum(torch.mul(retrieval_one_hot.view(batchSize, -1 , C), yd_transform.view(batchSize, -1, 1)), 1) _, predictions = probs.sort(1, True) # Find which predictions match the target correct = predictions.eq(targets.data.view(-1,1)) top1 = top1 + correct.narrow(1,0,1).sum() top5 = top5 + correct.narrow(1,0,5).sum() total += targets.size(0) cls_time.update(time.time() - end) end = time.time() print('Test [{}/{}]\t' 'Net Time {net_time.val:.3f} ({net_time.avg:.3f})\t' 'Cls Time {cls_time.val:.3f} ({cls_time.avg:.3f})\t' 'Top1: {:.2f} Top5: {:.2f}'.format( total, testsize, top1*100./total, top5*100./total, net_time=net_time, cls_time=cls_time)) print(top1*100./total) return top1/total
def train(epoch): print('\nEpoch: %d' % epoch) adjust_learning_rate(optimizer, epoch) train_loss = AverageMeter() data_time = AverageMeter() batch_time = AverageMeter() correct = 0 total = 0 # switch to train mode net.train() end = time.time() for batch_idx, (inputs, targets, indexes) in enumerate(trainloader): data_time.update(time.time() - end) inputs, targets, indexes = inputs.to(device), targets.to( device), indexes.to(device) optimizer.zero_grad() features = net(inputs) outputs = lemniscate(features, indexes) loss = criterion(outputs, indexes) loss.backward() optimizer.step() train_loss.update(loss.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() print('Epoch: [{}][{}/{}]' 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data: {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Loss: {train_loss.val:.4f} ({train_loss.avg:.4f})'.format( epoch, batch_idx, len(trainloader), batch_time=batch_time, data_time=data_time, train_loss=train_loss))
def train(args, model, criterion, optimizer, device, train_dataloader, writer, epoch): torch.set_grad_enabled(True) model.train() running_loss = 0.0 correct = 0 batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() losses_contrast_frame = AverageMeter() losses_contrast_clip = AverageMeter() losses_contrast = AverageMeter() losses_order = AverageMeter() end = time.time() #torch.cuda.empty_cache() for i, data in enumerate(train_dataloader, 1): data_time.update(time.time() - end) # get inputs #tuple_clips, tuple_orders, tuple_clips_random, tuple_orders_random, index = data tuple_clips, tuple_orders, index = data bsz = tuple_clips.size(0) inputs = tuple_clips.to(device) #inputs_random = tuple_clips_random.to(device) targets = [order_class_index(order) for order in tuple_orders] targets = torch.tensor(targets).to(device) index = index.to(device) # zero the parameter gradients #optimizer.zero_grad() # forward and backward contrast_loss_1, contrast_loss_2, contrast_loss_3, contrast_loss_clip, outputs = model( inputs, tuple_orders) # return logits here loss_contrast_frame = (contrast_loss_1.sum() + contrast_loss_2.sum() + contrast_loss_3.sum()) / (3 * args.bs) loss_contrast_clip = contrast_loss_clip.sum() / args.bs loss_order = criterion(outputs, targets) loss = args.weight_contrast_frame * loss_contrast_frame + args.weight_contrast_clip * loss_contrast_clip + args.weight_order * loss_order #loss=args.weight_order*loss_order # ===================backward===================== optimizer.zero_grad() loss.backward() optimizer.step() # ===================meters===================== #losses_contrast.update(args.weight_contrast*loss_contrast.item(), bsz) losses_contrast_frame.update( args.weight_contrast_frame * loss_contrast_frame.item(), bsz) losses_contrast_clip.update( args.weight_contrast_clip * loss_contrast_clip.item(), bsz) losses_order.update(args.weight_order * loss_order.item(), bsz) losses.update(loss.item(), bsz) batch_time.update(time.time() - end) end = time.time() # compute loss and acc running_loss += loss.item() pts = torch.argmax(outputs, dim=1) correct += torch.sum(targets == pts).item() # print statistics and write summary every N batch # print info if i % 20 == 0: log_str = ( 'Train: [{0}/{1}][{2}/{3}] ' #'BT {batch_time.val:.3f} ({batch_time.avg:.3f}) ' #'DT {data_time.val:.3f} ({data_time.avg:.3f}) ' 'loss_contrast_frame {loss_contrast_frame.val:.3f} ({loss_contrast_frame.avg:.3f}) ' 'loss_contrast_clip {loss_contrast_clip.val:.3f} ({loss_contrast_clip.avg:.3f}) ' 'loss_order {loss_order.val:.3f} ({loss_order.avg:.3f}) ' 'loss {loss.val:.3f} ({loss.avg:.3f})'.format( epoch, args.epochs, i, len(train_dataloader), loss_contrast_frame=losses_contrast_frame, loss_contrast_clip=losses_contrast_clip, loss_order=losses_order, loss=losses)) logging.info(log_str) if i % args.pf == 0: avg_loss = running_loss / args.pf avg_acc = correct / (args.pf * args.bs) logging.info( '[TRAIN] epoch-{}, batch-{}, loss: {:.3f}, acc: {:.3f}'.format( epoch, i, avg_loss, avg_acc)) step = (epoch - 1) * len(train_dataloader) + i writer.add_scalar('train/CrossEntropyLoss', avg_loss, step) writer.add_scalar('train/Accuracy', avg_acc, step) running_loss = 0.0 correct = 0 # summary params and grads per eopch #for name, param in model.named_parameters(): # writer.add_histogram('params/{}'.format(name), param, epoch) # writer.add_histogram('grads/{}'.format(name), param.grad, epoch) avg_loss = running_loss / len(train_dataloader) return avg_loss