def train(train_loader, model, criterion, optimizer, epoch, args, print_func): batch_time = CNN_utils.AverageMeter() data_time = CNN_utils.AverageMeter() losses = CNN_utils.AverageMeter() top1 = CNN_utils.AverageMeter() top5 = CNN_utils.AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss kout = 5 if args.num_classes < kout * 2: kout = args.num_classes // 2 if kout < 1: kout = 1 prec1, prec5 = CNN_utils.accuracy(output, target, topk=(1, kout)) losses.update(loss.item(), input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print_func('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@{kout} {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, kout=kout, top5=top5))
def train(train_loader, model, criterion, optimizer, epoch, args, print_func): batch_time = CNN_utils.AverageMeter() data_time = CNN_utils.AverageMeter() losses = CNN_utils.AverageMeter() top1 = CNN_utils.AverageMeter() top5 = CNN_utils.AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # target_idx = target.nonzero() [:,1] # compute output output = model(input) loss = criterion(output, target) # log_softmax_output = F.log_softmax(output, dim=1) # # loss = - torch.sum(log_softmax_output * target) / output.shape[0] losses.update(loss.item(), input.size(0)) prec1 = CNN_utils.accuracy_multihots(output, target, topk=(1, 3)) top1.update(prec1[0], input.size(0)) # top5.update(prec5[0], input.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print_func('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@{kout} {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, kout=5, top5=top5))
def train(train_loads_iter, train_loaders, model, criterion, optimizer, epoch, args, print_func): batch_time = CNN_utils.AverageMeter() data_time = CNN_utils.AverageMeter() losses = CNN_utils.AverageMeter() # switch to train mode model.train() if args.fix_BN: CNN_utils.fix_BN(model) batch_iters = math.ceil(args.num_iter / args.batch_size) for i in range(batch_iters): start = time.time() l_loss = [] optimizer.zero_grad() for ds in range(args.num_datasets): args.ind = ds end = time.time() try: (input, target) = train_loads_iter[ds].next() except StopIteration: train_loads_iter[ds] = iter(train_loaders[ds]) (input, target) = train_loads_iter[ds].next() # measure data loading time data_time.update(time.time() - end) if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) output = model(input) output_i = output[args.ind] loss = criterion(torch.log(output_i).double(), target.double()) l_loss.append(loss.item()) loss.backward() losses.update(sum(l_loss), input.size(0)) optimizer.step() # measure elapsed time batch_time.update(time.time() - start) if i % args.print_freq == 0: print_func('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( epoch, i, batch_iters, batch_time=batch_time, data_time=data_time, loss=losses))
def validate(val_loader, model, criterion, args, print_func, ind, phase='Validation'): if val_loader is None: return 0, 0 batch_time = CNN_utils.AverageMeter() kl_divs = CNN_utils.AverageMeter() #mAPs = mAPMeter() # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(val_loader): if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(input) output_i = F.log_softmax(output[ind] / 10.) kl_divs.update( F.kl_div(output_i.detach().double(), target.double(), reduction='batchmean').item(), input.size(0)) #mAPs.add(F.softmax(output_i.detach()), target) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print("pred: ", F.softmax(output[ind] / 10.)) print("true: ", target) print_func('[{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Dataset no. {ind}\t' 'dists {topX.val:.3f} ({topX.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, ind=ind, topX=kl_divs)) print_func('{phase} * dists {top1.avg:.3f}'.format(phase=phase, top1=kl_divs)) return kl_divs.avg
def train(train_loader, model, criterion, optimizer, epoch, args, print_func): batch_time = CNN_utils.AverageMeter() data_time = CNN_utils.AverageMeter() losses = CNN_utils.AverageMeter() top1 = CNN_utils.AverageMeter() topX = CNN_utils.AverageMeter() kout = args.topX or args.num_classes // 2 # switch to train mode model.train() if args.fix_BN: CNN_utils.fix_BN(model) end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # target_idx = target.nonzero() [:,1] # compute output output = model(input) loss = criterion(output, target) losses.update(loss.item(), input.size(0)) prec1, precX = CNN_utils.accuracy(output, target, topk=(1, kout)) top1.update(prec1[0], input.size(0)) topX.update(precX[0], input.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print_func('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@{kout} {topX.val:.3f} ({topX.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, kout=kout, topX=topX))
def validate(val_loader, model, criterion, args, print_func): if val_loader is None: return 0, 0 batch_time = CNN_utils.AverageMeter() losses = CNN_utils.AverageMeter() top1 = CNN_utils.AverageMeter() # top5 = CNN_utils.AverageMeter() # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(val_loader): if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(input) # loss = criterion(output, target) # log_softmax_output = F.log_softmax(output, dim=1) # # loss = - torch.sum(log_softmax_output * target)/ output.shape[0] # measure accuracy and record loss loss = criterion(output, target) prec1 = CNN_utils.accuracy_multihots(output, target, topk=(1, 3)) losses.update(loss.item(), input.size(0)) top1.update(prec1[0], input.size(0)) # top5.update(prec5[0], input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print_func('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1)) print_func(' * Prec@1 {top1.avg:.3f}'.format(top1=top1)) return top1.avg, losses.avg
def validate(val_loader, model, criterion, args, print_func): if val_loader is None: return 0, 0 batch_time = CNN_utils.AverageMeter() losses = CNN_utils.AverageMeter() top1 = CNN_utils.AverageMeter() topX = CNN_utils.AverageMeter() kout = args.topX or args.num_classes // 2 # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(val_loader): if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(input) loss = criterion(output, target) losses.update(loss.item(), input.size(0)) prec1, precX = CNN_utils.accuracy(output, target, topk=(1, kout)) top1.update(prec1[0], input.size(0)) topX.update(precX[0], input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print_func('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@{kout} {topX.val:.3f} ({topX.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, kout=kout, topX = topX)) print_func(' * Prec@1 {top1.avg:.3f}' .format(top1=top1)) return top1.avg, losses.avg
def validate(val_loader, model, criterion, args, print_func, ind, phase='Validation'): if val_loader is None: return 0, 0 batch_time = CNN_utils.AverageMeter() losses = CNN_utils.AverageMeter() # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(val_loader): if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(input) output = output[ind] loss = criterion(torch.log(output).double(), target.double()) losses.update(loss.item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print_func('[{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Dataset no. {ind}\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( i, len(val_loader), batch_time=batch_time, ind=ind, loss=losses)) print_func('{phase} * Score {top1.avg:.3f}'.format(phase=phase, top1=losses)) return losses.avg
def train(train_loader, model, text_model, criterion, optimizer, epoch, args, print_func): batch_time = CNN_utils.AverageMeter() data_time = CNN_utils.AverageMeter() losses_cls = CNN_utils.AverageMeter() losses_ebd = CNN_utils.AverageMeter() top1 = CNN_utils.AverageMeter() losses_cls_aux = CNN_utils.AverageMeter() # switch to train mode model.train() text_model.eval() end = time.time() mCEL = MclassCrossEntropyLoss() cos = torch.nn.CosineSimilarity() for i, (input, target, text_info) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) textAvg = text_info[1] sentCode = text_info[0] if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) sentCode = sentCode.cuda(args.gpu, non_blocking=True) textAvg = textAvg.cuda(args.gpu, non_blocking=True) with torch.no_grad(): sentence_output = text_model(sentCode) text_cls = F.softmax(sentence_output[0], dim=1) text_cls[text_cls<0.01] = 0 # text_class_values, text_class_ids = torch.max(text_output[0], dim=1) # text_class_ids[text_class_values<0.5] = 4 # compute output output_cls, output_proj = model(input) new_target = (text_cls + target)/torch.sum(text_cls+target, dim=1, keepdim=True).expand_as(text_cls) loss_cls = mCEL(output_cls, new_target) loss_ebd = torch.sum(1 - cos(output_proj, textAvg)) / output_proj.shape[0] # loss_cls_aux = mCEL(output_cls_basic, text_cls) losses_cls.update(loss_cls.item(), input.size(0)) losses_ebd.update(loss_ebd.item(), input.size(0)) # losses_cls_aux.update(loss_cls_aux.item(), input.size(0)) loss = loss_cls + args.alpha* loss_ebd prec1 = CNN_utils.accuracy_multihots(output_cls, target, topk=(1, 3)) top1.update(prec1[0], input.size(0)) # top5.update(prec5[0], input.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print_func('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss_cls {loss_cls.val:.4f} ({loss_cls.avg:.4f})\t' 'Loss_ebd {loss_ebd.val:.4f} ({loss_ebd.avg:.4f})\t' 'Loss_cls_aux {loss_cls_aux.val:.4f} ({loss_cls_aux.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})' .format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss_cls=losses_cls, loss_ebd=losses_ebd, loss_cls_aux=losses_cls_aux, top1=top1))
def validate(val_loader, model, criterion, args, print_func): if val_loader is None: return 0, 0 import numpy as np batch_time = CNN_utils.AverageMeter() losses = CNN_utils.AverageMeter() # top1 = CNN_utils.AverageMeter() # top5 = CNN_utils.AverageMeter() # switch to evaluate mode model.eval() labels_mAP = [] predicts_mAP = [] with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(val_loader): if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(input) loss = criterion(output, target) output_category = F.softmax(output, dim=1) # log_softmax_output = F.log_softmax(output, dim=1) # # loss = - torch.sum(log_softmax_output * target)/ output.shape[0] # measure accuracy and record loss # prec1 = CNN_utils.accuracy_multihots(output, target, topk=(1, 1)) losses.update(loss.item(), input.size(0)) # top1.update(prec1[0].item(), input.size(0)) # top5.update(prec5[0], input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() predicts_mAP.append(output_category.cpu().data.numpy()) target = target.cpu().data.numpy() target_multiple_hot = np.zeros([target.shape[0], args.num_classes]) for s_idx, s_target in enumerate(target): target_multiple_hot[s_idx, s_target] = 1 labels_mAP.append(target_multiple_hot) if i % args.print_freq == 0: print_func('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses)) from sklearn.metrics import average_precision_score import numpy as np labels_mAP = np.concatenate(labels_mAP, axis=0) predicts_mAP = np.concatenate(predicts_mAP, axis=0) labels_mAP[labels_mAP > 0] = 1 mAP = average_precision_score(labels_mAP, predicts_mAP) print_func(' * mAP@1 {:.3f}'.format(mAP * 100)) return mAP * 100, losses.avg
def train(train_loader, visual_model, criterion, optimizer, epoch, args, print_func): batch_time = CNN_utils.AverageMeter() data_time = CNN_utils.AverageMeter() losses_cls = CNN_utils.AverageMeter() losses_ebd = CNN_utils.AverageMeter() top1 = CNN_utils.AverageMeter() # = CNN_utils.AverageMeter() # switch to train mode visual_model.train() end = time.time() cos = torch.nn.CosineSimilarity() for i, (input, target, text_embedding) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) text_embedding = text_embedding.cuda(args.gpu, non_blocking=True) # text_embedding = text_model(text) # compute output output_cls, output_proj = visual_model(input) log_softmax_output = F.log_softmax(output_cls, dim=1) loss_cls = - torch.sum(log_softmax_output * target) / output_cls.shape[0] loss_ebd = torch.sum(1 - cos(output_proj, text_embedding)) / output_proj.shape[0] # loss_ebd = (output_proj - text_embedding)**2 / output_proj.shape[0] losses_cls.update(loss_cls.item(), input.size(0)) losses_ebd.update(loss_ebd.item(), input.size(0)) loss = loss_ebd prec1 = CNN_utils.accuracy_multihots(output_cls, target, topk=(1, 3)) top1.update(prec1[0], input.size(0)) # top5.update(prec5[0], input.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print_func('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss_cls {loss_cls.val:.4f} ({loss_cls.avg:.4f})\t' 'Loss_ebd {loss_ebd.val:.4f} ({loss_ebd.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})' .format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss_cls=losses_cls, loss_ebd=losses_ebd, top1=top1)) return losses_ebd.avg
def train(train_loads_iter, train_loaders, model, criterion, optimizer, epoch, args, print_func): batch_time = CNN_utils.AverageMeter() data_time = CNN_utils.AverageMeter() losses = CNN_utils.AverageMeter() top1 = CNN_utils.AverageMeter() topX = CNN_utils.AverageMeter() # switch to train mode model.train() if args.fix_BN: CNN_utils.fix_BN(model) #args.lam = 2 / (1 + math.exp(-epoch / 100)) - 1 batch_iters = math.ceil(args.num_iter / args.batch_size) for i in range(batch_iters): start = time.time() l_loss = [] l_top1 = [] l_topX = [] optimizer.zero_grad() for ds in range(args.num_datasets): args.ind = ds kout = args.topX or args.class_len[args.ind] // 2 end = time.time() try: (input, target) = train_loads_iter[ds].next() except StopIteration: train_loads_iter[ds] = iter(train_loaders[ds]) (input, target) = train_loads_iter[ds].next() # measure data loading time data_time.update(time.time() - end) if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(input) output_i = output[(args.class_len[args.ind] // 2) % 3] output_dom = output[-1].squeeze().cuda(args.gpu, non_blocking=True) loss = criterion(output_i, target) dom_target = torch.tensor( np.array([args.ind for _ in range(list(target.size())[0])]), dtype=torch.float).cuda(args.gpu, non_blocking=True) dtarget = dom_target.cuda(args.gpu, non_blocking=True) domain_loss = F.binary_cross_entropy_with_logits( dom_target, dtarget) total_loss = loss + domain_loss total_loss.backward() l_loss.append(loss.item() - args.lam * domain_loss.item()) prec1, precX = CNN_utils.accuracy(output_i, target, topk=(1, kout)) l_top1.append(prec1.item()) l_topX.append(precX.item()) losses.update(l_loss[-1], input.size(0)) top1.update(sum(l_top1) / len(l_top1), input.size(0)) topX.update(sum(l_topX) / len(l_topX), input.size(0)) #optimizer.zero_grad() #allloss_var.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - start) if i % args.print_freq == 0: print_func('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@X {topX.val:.3f} ({topX.avg:.3f})'.format( epoch, i, batch_iters, batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, topX=topX))
def train(train_loader, model, text_model, criterion, optimizer, epoch, args, print_func): batch_time = CNN_utils.AverageMeter() data_time = CNN_utils.AverageMeter() losses_cls = CNN_utils.AverageMeter() losses_ebd = CNN_utils.AverageMeter() top1 = CNN_utils.AverageMeter() top5 = CNN_utils.AverageMeter() # switch to train mode model.train() text_model.eval() end = time.time() cos = torch.nn.CosineSimilarity() for i, (input, target, text) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) text = text.cuda(args.gpu, non_blocking=True) text_embedding, word_features = text_model(text) # compute output output_cls, output_proj = model(input) x_weights = torch.sigmoid( torch.bmm(word_features, output_proj.unsqueeze(2))) # x_weighted_global = torch.sum((x_anchors.permute([0, 2, 1]) * x_weights, (1,), keepdim=False) x_weighted_global = torch.bmm(word_features.permute([0, 2, 1]), x_weights).squeeze(-1) log_softmax_output = F.log_softmax(output_cls, dim=1) loss_cls = -torch.sum( log_softmax_output * target) / output_cls.shape[0] loss_ebd = torch.sum( 1 - cos(output_proj, x_weighted_global)) / output_proj.shape[0] losses_cls.update(loss_cls.item(), input.size(0)) losses_ebd.update(loss_ebd.item(), input.size(0)) loss = loss_cls + args.alpha * loss_ebd prec1 = CNN_utils.accuracy_multihots(output_cls, target, topk=(1, 3)) top1.update(prec1[0], input.size(0)) # top5.update(prec5[0], input.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print_func('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss_cls {loss_cls.val:.4f} ({loss_cls.avg:.4f})\t' 'Loss_ebd {loss_ebd.val:.4f} ({loss_ebd.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss_cls=losses_cls, loss_ebd=losses_ebd, top1=top1))
def train(train_loads_iter, train_loaders, model, criterion, optimizer, epoch, args, print_func): batch_time = CNN_utils.AverageMeter() data_time = CNN_utils.AverageMeter() losses = CNN_utils.AverageMeter() top1 = CNN_utils.AverageMeter() topX = CNN_utils.AverageMeter() # switch to train mode model.train() if args.fix_BN: CNN_utils.fix_BN(model) batch_iters = math.ceil(args.num_iter / args.batch_size) for i in range(batch_iters): start = time.time() l_loss = [] l_top1 = [] l_topX = [] #allloss_var = 0 optimizer.zero_grad() for ds in range(args.num_datasets): args.ind = ds kout = args.topX or args.class_len[args.ind] // 2 end = time.time() try: (input, target) = train_loads_iter[ds].next() except StopIteration: train_loads_iter[ds] = iter(train_loaders[ds]) (input, target) = train_loads_iter[ds].next() # measure data loading time data_time.update(time.time() - end) if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # target_idx = target.nonzero() [:,1] # if torch.max(target) >= 0 and torch.max(target) < args.class_len[args.ind]: # compute output #print("Input shape {}".format(input.shape)) output = model(input) output_i = output[args.ind] #print("Output_i device {}".format(output_i.device)) loss = criterion(output_i, target) l_loss.append(loss.item()) #allloss_var += loss loss.backward() prec1, precX = CNN_utils.accuracy(output_i.detach(), target, topk=(1, kout)) l_top1.append(prec1.item()) l_topX.append(precX.item()) losses.update(sum(l_loss), input.size(0)) top1.update(sum(l_top1) / len(l_top1), input.size(0)) topX.update(sum(l_topX) / len(l_topX), input.size(0)) #optimizer.zero_grad() #allloss_var.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - start) if i % args.print_freq == 0: print_func('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@X {topX.val:.3f} ({topX.avg:.3f})'.format( epoch, i, batch_iters, batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, topX=topX))
def train(train_loader, visual_model, text_model, text_generator, criterion, optimizer, epoch, args, print_func): batch_time = CNN_utils.AverageMeter() data_time = CNN_utils.AverageMeter() losses_visual_cls = CNN_utils.AverageMeter() losses_text_cls = CNN_utils.AverageMeter() losses_ebd = CNN_utils.AverageMeter() losses = CNN_utils.AverageMeter() top1 = CNN_utils.AverageMeter() top5 = CNN_utils.AverageMeter() # switch to train mode visual_model.train() text_model.train() text_generator.eval() end = time.time() cos = torch.nn.CosineSimilarity() for i, (visual_input, visual_target, text) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.gpu is not None: visual_input = visual_input.cuda(args.gpu, non_blocking=True) visual_target = visual_target.cuda(args.gpu, non_blocking=True) text = text.cuda(args.gpu, non_blocking=True) text_target, text_input = text_generator(text) text_target = F.softmax(text_target, dim=1) # compute output visual_cls, visual_proj = visual_model(visual_input) text_cls, text_proj = text_model(text_input) log_softmax_text = F.log_softmax(text_cls, dim=1) loss_text_cls = -torch.sum( log_softmax_text * text_target) / text_cls.shape[0] log_softmax_visual = F.log_softmax(visual_cls, dim=1) loss_visual_cls = -torch.sum( log_softmax_visual * visual_target) / visual_cls.shape[0] loss_ebd = torch.sum( 1 - cos(visual_proj, text_proj)) / visual_proj.shape[0] losses_visual_cls.update(loss_visual_cls.item(), visual_input.size(0)) losses_text_cls.update(loss_text_cls.item(), text_input.size(0)) losses_ebd.update(loss_ebd.item(), visual_input.size(0)) loss = loss_visual_cls + loss_ebd + loss_text_cls losses.update(loss.item(), visual_input.size(0)) prec1 = CNN_utils.accuracy_multihots(visual_cls, visual_target, topk=(1, 3)) top1.update(prec1[0], visual_input.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print_func( 'Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss_visual cls {losses_visual_cls.val:.4f} ({losses_visual_cls.avg:.4f})\t' 'Loss_text cls {losses_text_cls.val:.4f} ({losses_text_cls.avg:.4f})\t' 'Loss_ebd {losses_ebd.val:.4f} ({losses_ebd.avg:.4f})\t' 'total loss {losses.val:.4f} ({losses.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, losses_visual_cls=losses_visual_cls, losses_text_cls=losses_text_cls, losses_ebd=losses_ebd, losses=losses, top1=top1))