def train(opt, train_loader, model, epoch, vocab, val_loader): # average meters to record the training statistics train_logger = LogCollector() batch_time = AverageMeter() data_time = AverageMeter() nbatch = len(train_loader) # switch to train mode end = time.time() model.n_word_img = 0 model.n_word_txt = 0 model.n_sent = 0 model.s_time = end model.all_stats_img = [[0., 0., 0.]] model.all_stats_txt = [[0., 0., 0.]] for train_data in tqdm(train_loader): # Always reset to train mode model.train() # measure data loading time data_time.update(time.time() - end) # make sure train logger is used model.logger = train_logger # Update the model info = model.forward(*train_data, epoch=epoch) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # Print log info if model.niter % opt.log_step == 0: logger.info( 'Epoch: [{0}] {e_log} {info}' .format( epoch, e_log=str(model.logger), info=info ) )
def train(opt, train_loader, model, epoch): # average meters to record the training statistics batch_time = AverageMeter() data_time = AverageMeter() train_logger = LogCollector() # switch to train mode model.train_start() progbar = Progbar(train_loader.dataset.length) end = time.time() for i, train_data in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # make sure train logger is used model.logger = train_logger # Update the model b_size, loss = model.train_emb(*train_data) # print loss progbar.add(b_size, values=[("loss", loss)]) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # Record logs in tensorboard tb_logger.log_value('epoch', epoch, step=model.Eiters) tb_logger.log_value('step', i, step=model.Eiters) tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters) tb_logger.log_value('data_time', data_time.val, step=model.Eiters) model.logger.tb_log(tb_logger, step=model.Eiters)
def train(opt, train_loader, model, epoch, val_loader): # average meters to record the training statistics batch_time = AverageMeter() data_time = AverageMeter() train_logger = LogCollector() # switch to train mode model.train_start() model.epoch = epoch end = time.time() for i, train_data in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # make sure train logger is used model.logger = train_logger # Update the model if not opt.use_external_captions: model.train_emb(*train_data) else: model.train_emb_with_extended_captions(*train_data) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # Print log info if model.Eiters % opt.log_step == 0: logging.info( 'Epoch: [{0}][{1}/{2}]\t' '{e_log}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, e_log=str(model.logger))) # Record logs in tensorboard tb_logger.log_value('epoch', epoch, step=model.Eiters) tb_logger.log_value('step', i, step=model.Eiters) tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters) tb_logger.log_value('data_time', data_time.val, step=model.Eiters) model.logger.tb_log(tb_logger, step=model.Eiters) # validate at every val_step if model.Eiters % opt.val_step == 0: if not opt.use_external_captions: validate(opt, val_loader, model) else: validate_caption_only(opt, val_loader, model)
def train(opt, train_loader, model, epoch, val_loader, audio): # average meters to record the training statistics batch_time = AverageMeter() data_time = AverageMeter() train_logger = LogCollector() # switch to train mode if audio: model.train2_start() else: model.train_start() end = time.time() for i, train_data in enumerate(train_loader): if opt.reset_train: # Always reset to train mode, this is not the default behavior if audio: model.train2_start() else: model.train_start() # pdb.set_trace() # measure data loading time data_time.update(time.time() - end) # make sure train logger is used model.logger = train_logger # Update the model model.train_emb(audio, *train_data) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # Print log info if model.Eiters % opt.log_step == 0: logging.info( 'Epoch: [{0}][{1}/{2}]\t' '{e_log}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, e_log=str(model.logger))) # Record logs in tensorboard tb_logger.log_value('epoch', epoch, step=model.Eiters) tb_logger.log_value('step', i, step=model.Eiters) tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters) tb_logger.log_value('data_time', data_time.val, step=model.Eiters) model.logger.tb_log(tb_logger, step=model.Eiters)
def train(opt, train_loader, model, epoch, val_loader): # average meters to record the training statistics batch_time = AverageMeter() data_time = AverageMeter() train_logger = LogCollector() best_score = 0 end = time.time() for i, train_data in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # make sure train logger is used model.logger = train_logger # Update the model loss = model.train_emb(*train_data) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # Print log info if model.Eiters % opt.log_step == 0: logging.info( 'Epoch: [{0}][{1}/{2}]\t' '{e_log}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, e_log=str(model.logger))) # Record logs in tensorboard tb_logger.log_value('epoch', epoch, step=model.Eiters) tb_logger.log_value('step', i, step=model.Eiters) tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters) tb_logger.log_value('data_time', data_time.val, step=model.Eiters) tb_logger.log_value('train', float(loss.detach().cpu().numpy()), step=model.Eiters) tb_logger.log_value('c2c', 0., step=model.Eiters) model.logger.tb_log(tb_logger, step=model.Eiters)
def train(opt, train_loader, model, epoch, val_loader, tb_logger): print("start to train") # average meters to record the training statistics batch_time = AverageMeter() data_time = AverageMeter() train_logger = LogCollector() # switch to train mode model.train_start() end = time.time() print("start loading data...") for i, train_data in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # make sure train logger is used model.logger = train_logger # Update the model model.train_emb(*train_data) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # Print log info if model.Eiters % opt.log_step == 0: logging.info( 'Epoch: [{0}][{1}/{2}]\t' '{e_log}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, e_log=str(model.logger))) # Record logs in tensorboard tb_logger.log_value('epoch', epoch, step=model.Eiters) tb_logger.log_value('step', i, step=model.Eiters) tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters) tb_logger.log_value('data_time', data_time.val, step=model.Eiters) model.logger.tb_log(tb_logger, step=model.Eiters)
def train(train_loader, model, epoch, tb_logger, log_step=100, val_step=500): # Loggers for statistics batch_time = AverageMeter() data_time = AverageMeter() train_logger = LogCollector() model.train_start() end = time.time() for i, train_data in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) model.logger = train_logger # Update model model.train_emb(*train_data) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # Print info if model.Eiters % log_step == 0: logging.info( 'Epoch: [{0}][{1}/{2}]\t' '{e_log}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, e_log=str(model.logger))) # Record logs in tensorboard tb_logger.add_scalar('epoch', epoch, model.Eiters) tb_logger.add_scalar('step', i, model.Eiters) tb_logger.add_scalar('batch_time', batch_time.val, model.Eiters) tb_logger.add_scalar('data_time', data_time.val, model.Eiters) model.logger.tb_log(tb_logger, step=model.Eiters) # validate at every val_step """if model.Eiters % val_step == 0:
def train(opt, train_loader, model, epoch, val_loader, vocab): # average meters to record the training statistics batch_time = AverageMeter() data_time = AverageMeter() train_logger = LogCollector() # switch to train mode model.train_start() end = time.time() for i, train_data in enumerate(train_loader): # Always reset to train mode model.train_start() # measure data loading time data_time.update(time.time() - end) # make sure train logger is used model.logger = train_logger # Update the model model.train_emb(*train_data, epoch=epoch) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # Print log info if model.Eiters % opt.log_step == 0: logger.info( 'Epoch: [{0}][{1}/{2}]\t' '{e_log}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, e_log=str(model.logger))) # validate at every val_step if model.Eiters % opt.val_step == 0: validate(opt, val_loader, model, vocab)
def train(opt, train_loader, model, epoch, val_loader): # average meters to record the training statistics batch_time = AverageMeter() data_time = AverageMeter() train_logger = LogCollector() end = time.time() for i, train_data in enumerate(train_loader): model.train_start() data_time.update(time.time() - end) model.logger = train_logger model.train_emb(*train_data) batch_time.update(time.time() - end) end = time.time() # Print log info if model.Eiters % opt.log_step == 0: logging.info( 'Epoch: [{0}][{1}/{2}]\t' '{e_log}\t' 'Time {batch_time.val:.3f}\t' # 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' .format( epoch, i, len(train_loader), batch_time=batch_time, # data_time=data_time, e_log=str(model.logger))) # Record logs in tensorboard tb_logger.log_value('epoch', epoch, step=model.Eiters) tb_logger.log_value('step', i, step=model.Eiters) tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters) model.logger.tb_log(tb_logger, step=model.Eiters) # validate at every val_step if model.Eiters % opt.val_step == 0: validate(opt, val_loader, model)
def train(opt, train_loader, model, epoch, val_loader, best_rsum): # average meters to record the training statistics batch_time = AverageMeter() data_time = AverageMeter() train_logger = LogCollector() # switch to train mode model.train_start() end = time.time() for i, train_data in enumerate(train_loader): # if opt.reset_train: # Always reset to train mode, this is not the default behavior model.train_start() # measure data loading time data_time.update(time.time() - end) # make sure train logger is used model.logger = train_logger # Update the model model.train_emb(*train_data) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # Print log info if model.Eiters % opt.log_step == 0: logging.info( 'Epoch: [{0}][{1}/{2}]\t' '{e_log}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' .format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, e_log=str(model.logger))) # Record logs in tensorboard tb_logger.log_value('epoch', epoch, step=model.Eiters) tb_logger.log_value('step', i, step=model.Eiters) tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters) tb_logger.log_value('data_time', data_time.val, step=model.Eiters) model.logger.tb_log(tb_logger, step=model.Eiters) # validate at every val_step if model.Eiters % opt.val_step == 0: # validate(opt, val_loader, model) # evaluate on validation set rsum = validate(opt, val_loader, model) # remember best R@ sum and save checkpoint is_best = rsum > best_rsum best_rsum = max(rsum, best_rsum) save_checkpoint({ 'epoch': epoch + 1, 'model': model.state_dict(), 'best_rsum': best_rsum, 'opt': opt, 'Eiters': model.Eiters, }, is_best, prefix=opt.logger_name + '/') return best_rsum
def joint_train(opt, train_loader, model, val_loader): # average meters to record the training statistics batch_time = AverageMeter() data_time = AverageMeter() train_logger = LogCollector() best_score = 0 stop = False iters = 0 langs = opt.lang.split("-") # switch to train mode model.train_start() # Sentencepair is always the last data loader in the list if opt.sentencepair: sentencepair_loader = train_loader.pop() sentencepair_loader_val = val_loader.pop() # Call iterator on the DatasetLoader returning DatasetLoaderIterator train_loader_its = list(map(iter, train_loader)) end = time.time() patience_count = 0 if opt.primary: primary = opt.primary.split("-") while not stop: iters += 1 # Pick a data set and batch ind = random.randint(0, len(train_loader) - 1) train_cap2cap = random.random( ) < opt.sentencepair_p and opt.sentencepair if opt.reset_train: # Always reset to train mode, this is not the default behavior model.train_start() # measure data loading time data_time.update(time.time() - end) # make sure train logger is used model.logger = train_logger loss = None loss_c2c = None # Train caption-caption ranking. if train_cap2cap and opt.sentencepair: capA, capB, lenA, lenB = next(sentencepair_loader) captionsA = Variable(capA) captionsB = Variable(capB) if torch.cuda.is_available(): captionsA = captionsA.cuda() captionsB = captionsB.cuda() # Create permute and inverse permute indices t so t on length indsA = np.argsort(np.array(lenA)) indsB = np.argsort(np.array(lenB)) revA = np.zeros(len(lenA), dtype='int') revB = np.zeros(len(lenA), dtype='int') for i in range(len(lenA)): revA[indsA[i]] = i revB[indsB[i]] = i indsA, indsB = torch.LongTensor(indsA), torch.LongTensor(indsB) revA, revB = torch.LongTensor(revA), torch.LongTensor(revB) if torch.cuda.is_available(): indsA, indsB = indsA.cuda(), indsB.cuda() revA, revB = revA.cuda(), revB.cuda() model.Eiters += 1 model.logger.update('Eit', model.Eiters) # Pass length sorted captions for encoding capA_emb = model.txt_enc(captionsA[indsA], sorted(lenA, reverse=True)) capB_emb = model.txt_enc(captionsB[indsB], sorted(lenB, reverse=True)) model.optimizer.zero_grad() # Unsort captions for the loss computation loss_c2c = model.forward_loss(capA_emb[revA], capB_emb[revB]) # compute gradient and do SGD step loss_c2c.backward() if model.grad_clip > 0: clip_grad_norm(model.params, model.grad_clip) model.optimizer.step() # Don't count this as an iter # Train image-sentence ranking. else: tloader = train_loader_its[ind] # Call next element of if its exhausted re-init the DatasetLoaderIterators try: train_data = next(tloader) except StopIteration: train_loader_its = map(iter, train_loader) tloader = train_loader_its[ind] train_data = next(tloader) loss = model.train_emb(*train_data) # Train with sentence-pair ranking batch. batch_time.update(time.time() - end) end = time.time() # Print log info if model.Eiters % opt.log_step == 0: logging.info( '{e_log}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format( batch_time=batch_time, data_time=data_time, e_log=str(model.logger))) # Record logs in tensorboard tb_logger.log_value('step', iters, step=model.Eiters) tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters) tb_logger.log_value('data_time', data_time.val, step=model.Eiters) if loss is not None: tb_logger.log_value('train', float(loss.detach().cpu().numpy()), step=model.Eiters) if loss_c2c is not None: tb_logger.log_value('c2c', float(loss_c2c.detach().cpu().numpy()), step=model.Eiters) model.logger.tb_log(tb_logger, step=model.Eiters) # validate at every val_step if model.Eiters % opt.val_step == 0: total_score = 0 for l, vloader in zip(langs, val_loader): with torch.no_grad(): score = validate(opt, vloader, model, l) if opt.primary: if l in primary: total_score += score else: total_score += score # Compute val loss on sentencepair task if opt.sentencepair: # val_loss = sentencepair_eval(model, sentencepair_loader_val) # tb_logger.log_value('valid_c2c', val_loss, step=model.Eiters) # print('Sentence Pair Val Loss {}'.format(val_loss)) tb_logger.log_value('valid_c2c', 0., step=model.Eiters) else: tb_logger.log_value('valid_c2c', 0., step=model.Eiters) if total_score > best_score: is_best = True print("New best: {}".format(total_score)) best_score = total_score patience_count = 0 else: patience_count += 1 is_best = False print("No improvement in {}".format(patience_count)) if patience_count >= opt.patience: print("No improvement in {} evaluations, stopping".format( patience_count)) break save_checkpoint( { 'iter': iters, 'model': model.state_dict(), 'best_rsum': best_score, 'opt': opt, 'Eiters': model.Eiters, }, is_best, prefix=opt.logger_name + '/') print("Finished trained. Best score: {}".format(best_score))
def train(opt, train_loader, model, optimizer, epoch, tb_logger, val_loader, measure='cosine', grad_clip=-1, scheduler=None, warmup_scheduler=None, ndcg_val_scorer=None): # average meters to record the training statistics batch_time = AverageMeter() data_time = AverageMeter() train_logger = LogCollector() end = time.time() for i, train_data in enumerate(train_loader): model.train() if scheduler is not None: scheduler.step(epoch) if warmup_scheduler is not None: warmup_scheduler.dampen() optimizer.zero_grad() # measure data loading time data_time.update(time.time() - end) # make sure train logger is used model.logger = train_logger # Update the model loss_dict = model(*train_data) loss = sum(loss for loss in loss_dict.values()) # compute gradient and do SGD step loss.backward() if grad_clip > 0: torch.nn.utils.clip_grad.clip_grad_norm_(model.parameters(), grad_clip) optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # Print log info if model.Eiters % opt.log_step == 0: logging.info( 'Epoch: [{0}][{1}/{2}]\t' '{e_log}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, e_log=str(model.logger))) # Record logs in tensorboard tb_logger.add_scalar('epoch', epoch, model.Eiters) tb_logger.add_scalar('step', i, model.Eiters) tb_logger.add_scalar('batch_time', batch_time.val, model.Eiters) tb_logger.add_scalar('data_time', data_time.val, model.Eiters) tb_logger.add_scalar('lr', optimizer.param_groups[0]['lr'], model.Eiters) model.logger.tb_log(tb_logger, step=model.Eiters) # validate at every val_step if model.Eiters % opt.val_step == 0: validate(val_loader, model, tb_logger, measure=measure, log_step=opt.log_step, ndcg_scorer=ndcg_val_scorer)
def train(train_loader, model, criterion, optimizer, epoch, print_freq, summary_writer): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) target = target.cuda(async=True) input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.data[0], input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) summary_writer.add_scalar('data/losses_avg', losses.avg, epoch) summary_writer.add_scalar('data/top1_avg', top1.avg, epoch) summary_writer.add_scalar('data/top5_avg', top5.avg, epoch)
def train(opt, train_loader, adapt_loader, model, model_ema, epoch, val_loader, tb_writer): # average meters to record the training statistics batch_time = AverageMeter() data_time = AverageMeter() train_logger = LogCollector() end = time.time() adapt_iter = iter(adapt_loader) adapt_loss = torch.nn.MSELoss() if opt.ramp_lr: adjust_learning_rate_mean_teacher( model.optimizer, epoch, opt.num_epochs, opt.initial_lr_rampup, opt.initial_lr) else: adjust_learning_rate(opt, model.optimizer, epoch) consistency_weight = get_current_consistency_weight( opt.consistency_weight, epoch, opt.consistency_rampup) for i, train_data in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) model.Eiters += 1 # switch to train mode model.train_start() model_ema.train_start() # make sure train logger is used model.logger = train_logger try: adapt_data = next(adapt_iter) except: adapt_iter = iter(adapt_loader) adapt_data = next(adapt_iter) # Get embeddings img_emb, cap_emb, cap_lens = model.run_emb(*train_data) # Data for Domain Adaptation or SS Learning # Adapt loader returns different features for the same images adapt_imgs_ema, adapt_imgs, _, _, _ = adapt_data adapt_imgs = adapt_imgs.float().cuda() adapt_imgs_ema = adapt_imgs_ema.float().cuda() with torch.no_grad(): ema_adapt_imgs_emb = model_ema.img_enc(adapt_imgs_ema) adapt_imgs_emb = model.img_enc(adapt_imgs) consistency_loss_img = adapt_loss(ema_adapt_imgs_emb, adapt_imgs_emb) consistency_loss = consistency_loss_img * consistency_weight # measure accuracy and record loss model.optimizer.zero_grad() loss = model.forward_loss(img_emb, cap_emb, cap_lens) total_loss = loss + consistency_loss # compute gradient and do SGD step total_loss.backward() if model.grad_clip > 0: torch.nn.utils.clip_grad_norm_( parameters=model.params, max_norm=model.grad_clip) model.optimizer.step() if epoch <= opt.ema_late_epoch: update_ema_variables( model=model, ema_model=model_ema, alpha=opt.consistency_alpha, global_step=model.Eiters, ) else: update_ema_variables( model=model, ema_model=model_ema, alpha=opt.consistency_alpha_late, global_step=model.Eiters, ) # Update the model # model.train_emb(*train_data) # measure elapsed time batch_time.update(time.time() - end) end = time.time() tb_writer.add_scalar('Iter', model.Eiters, model.Eiters) tb_writer.add_scalar('Lr', model.optimizer.param_groups[0]['lr'], model.Eiters) tb_writer.add_scalar('Consistency weight', consistency_weight, model.Eiters) model.logger.update('Contr Loss', loss.item(), ) model.logger.update('Adapt Loss', consistency_loss.item(), ) model.logger.update('Total Loss', total_loss.item(), ) # Print log info if model.Eiters % opt.log_step == 0: print( 'Epoch: [{0}][{1}/{2}]\t' '{e_log}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' .format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, e_log=str(model.logger))) # Record logs in tensorboard tb_writer.add_scalar('epoch', epoch, model.Eiters) tb_writer.add_scalar('step', i, model.Eiters) tb_writer.add_scalar('batch_time', batch_time.val, model.Eiters) tb_writer.add_scalar('data_time', data_time.val, model.Eiters) model.logger.tb_log(tb_writer, prefix='train', step=model.Eiters) # validate at every val_step if model.Eiters % opt.val_step == 0 and model.Eiters > 0: validate(opt, val_loader, model, tb_writer) if opt.log_images: plot_img = vutils.make_grid(train_data[0], normalize=True, scale_each=True) tb_writer.add_image('Labeled Images', plot_img, model.Eiters) plot_img = vutils.make_grid(adapt_imgs, normalize=True, scale_each=True) tb_writer.add_image('Adapt Images', plot_img, model.Eiters)
def test(test_loader, model, criterion, print_freq): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() #preds = np.zeros((0,7,)) pred_labels = np.zeros([0,]) GT_labels = np.zeros([0,]) for i, (input, target) in enumerate(test_loader): target = target.cuda(async=True) input_var = torch.autograd.Variable(input, volatile=True) target_var = torch.autograd.Variable(target, volatile=True) # compute output output = model(input_var) loss = criterion(output, target_var) ''' cal_probs = torch.nn.Softmax(dim=0) probs = cal_probs(output) preds = np.concatenate([preds, probs.data.cpu().numpy()], axis=0) ''' # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.data[0], input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) _, pred = output.data.topk(1, 1, True, True) pred_labels = np.concatenate([pred_labels, pred.cpu().numpy().flatten()], axis=0) GT_labels = np.concatenate([GT_labels, target.cpu().numpy().flatten()], axis=0) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(test_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5)) print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}' .format(top1=top1, top5=top5)) categories = ['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise'] build_confusion_mtx(GT_labels, pred_labels, categories) ''' mean_score, std_score = get_inception_score(preds) print(' * IS: mean {mean_score:.3f} std {std_score:.3f}'.format(mean_score=mean_score, std_score=std_score)) ''' return top1.avg
def train(opt, model, epoch, train_loader, val_loader): # average meters to record the training statistics batch_time = AverageMeter() data_time = AverageMeter() train_logger = LogCollector() kmeans_features = None kmeans_emb = None end = time.time() if opt.cluster_loss: features = retrieve_features(train_loader) kmeans_features = get_centers(features, opt.n_clusters) # https://stats.stackexchange.com/questions/299013/cosine-distance-as-similarity-measure-in-kmeans # normalizing and euclidian distance is linear correlated with cosine distance for j, (images, targets, lengths, ids) in enumerate(train_loader): if opt.cluster_loss: img_embs, _, _ = encode_data(model, train_loader) kmeans_emb = get_centers(img_embs, opt.n_clusters) # switch to train mode model.train_start() # if j == i: # same = True # else: # same = False # measure data loading time data_time.update(time.time() - end) # make sure train logger is used model.logger = train_logger # Update the model model.train_emb(epoch, images, targets, lengths, ids, opt.cluster_loss, kmeans_features, kmeans_emb) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # Print log info if model.Eiters % opt.log_step == 0: logging.info( 'Epoch: [{0}][{1}/{2}]\t' '{e_log}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format( epoch, j, len(train_loader), batch_time=batch_time, data_time=data_time, e_log=str(model.logger))) # Record logs in tensorboard tb_logger.log_value('epoch', epoch, step=model.Eiters) tb_logger.log_value('step', j, step=model.Eiters) tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters) tb_logger.log_value('data_time', data_time.val, step=model.Eiters) model.logger.tb_log(tb_logger, step=model.Eiters) # validate at every val_step if model.Eiters % opt.val_step == 0: validate(opt, val_loader, model)
def train(opt, train_loader, adapt_loader, model, model_ema, epoch, val_loader, tb_writer): # average meters to record the training statistics from model import ContrastiveLoss batch_time = AverageMeter() data_time = AverageMeter() train_logger = LogCollector() end = time.time() adapt_iter = iter(adapt_loader) if opt.adapt_loss == 'mse': adapt_loss = torch.nn.MSELoss() if opt.adapt_loss == 'contrastive': adapt_loss = ContrastiveLoss(margin=opt.margin, measure=opt.measure) if opt.ramp_lr: adjust_learning_rate_mean_teacher(model.optimizer, epoch, opt.num_epochs, opt.initial_lr_rampup, opt.initial_lr) else: adjust_learning_rate(opt, model.optimizer, epoch) consistency_weight = get_current_consistency_weight( opt.consistency_weight, epoch, opt.consistency_rampup) if opt.max_violation: gamma = 1. else: gamma = adjust_gamma(init_gamma=0.0, epoch=epoch, increase=0.2) train_logger.update('hard_contr_gamma', gamma, n=0) for i, train_data in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) model.Eiters += 1 # switch to train mode model.train_start() model_ema.train_start() # make sure train logger is used model.logger = train_logger try: adapt_data = next(adapt_iter) except: adapt_iter = iter(adapt_loader) adapt_data = next(adapt_iter) # Get embeddings img_emb, cap_emb = model.run_emb(*train_data) # Data for Domain Adaptation or SS Learning # Adapt loader returns different features for the same images adapt_imgs_ema, adapt_imgs, adapt_caption, adapt_lens, _ = adapt_data adapt_imgs = adapt_imgs.float().cuda() adapt_imgs_ema = adapt_imgs_ema.float().cuda() consistency_loss_cap = 0. if opt.adapt_split != 'unlabeled': with torch.no_grad(): adapt_caption = adapt_caption.cuda() ema_adapt_cap_emb = model_ema.txt_enc( adapt_caption, adapt_lens, dropout=opt.dropout_noise) adapt_cap_mb = model.txt_enc(adapt_caption, adapt_lens, dropout=opt.dropout_noise) consistency_loss_cap = adapt_loss(ema_adapt_cap_emb, adapt_cap_mb) with torch.no_grad(): ema_adapt_imgs_emb = model_ema.img_enc(adapt_imgs_ema) adapt_imgs_emb = model.img_enc(adapt_imgs) consistency_loss_img = adapt_loss(ema_adapt_imgs_emb, adapt_imgs_emb) consistency_loss = (consistency_loss_img / 2. + consistency_loss_cap / 2.) * consistency_weight # measure accuracy and record loss model.optimizer.zero_grad() loss = model.forward_loss(img_emb, cap_emb, gamma=gamma) total_loss = loss + consistency_loss # compute gradient and do SGD step total_loss.backward() if model.grad_clip > 0: clip_grad_norm(model.params, model.grad_clip) model.optimizer.step() if epoch <= opt.ema_late_epoch: update_ema_variables( model=model, ema_model=model_ema, alpha=opt.consistency_alpha, global_step=model.Eiters, ) else: update_ema_variables( model=model, ema_model=model_ema, alpha=opt.consistency_alpha_late, global_step=model.Eiters, ) # measure elapsed time batch_time.update(time.time() - end) end = time.time() model.logger.update('Iter', model.Eiters, 0) model.logger.update('Lr', model.optimizer.param_groups[0]['lr'], 0) model.logger.update('Consistency weight', consistency_weight, 0) model.logger.update( 'Contr Loss', loss.item(), ) model.logger.update( 'Adapt Loss', consistency_loss.item(), ) model.logger.update( 'Total Loss', total_loss.item(), ) # Print log info if model.Eiters % opt.log_step == 0: print('Epoch: [{0}][{1}/{2}]\t' '{e_log}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, e_log=str(model.logger))) # print(model.logger) pass # Record logs in tensorboard tb_writer.add_scalar('epoch', epoch, model.Eiters) tb_writer.add_scalar('step', i, model.Eiters) tb_writer.add_scalar('batch_time', batch_time.val, model.Eiters) tb_writer.add_scalar('data_time', data_time.val, model.Eiters) model.logger.tb_log(tb_writer, model.Eiters) # validate at every val_step if model.Eiters % opt.val_step == 0: # print('Validate normal') print('Validate EMA') validate(opt, val_loader, model_ema, tb_writer) # validate(opt, val_loader, model, tb_writer) if opt.log_images: plot_img = vutils.make_grid(train_data[0], normalize=True, scale_each=True) tb_writer.add_image('Labeled Images', plot_img, model.Eiters) plot_img = vutils.make_grid(adapt_imgs, normalize=True, scale_each=True) tb_writer.add_image('Adapt Images', plot_img, model.Eiters)