def train(train_loader, model, criterion, optimizer, epoch, args, print_func):
    batch_time = CNN_utils.AverageMeter()
    data_time = CNN_utils.AverageMeter()
    losses = CNN_utils.AverageMeter()
    top1 = CNN_utils.AverageMeter()
    top5 = CNN_utils.AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if args.gpu is not None:
            input = input.cuda(args.gpu, non_blocking=True)
        target = target.cuda(args.gpu, non_blocking=True)

        # compute output
        output = model(input)
        loss = criterion(output, target)

        # measure accuracy and record loss
        kout = 5
        if args.num_classes < kout * 2:
            kout = args.num_classes // 2
            if kout < 1:
                kout = 1

        prec1, prec5 = CNN_utils.accuracy(output, target, topk=(1, kout))
        losses.update(loss.item(), input.size(0))
        top1.update(prec1[0], input.size(0))
        top5.update(prec5[0], input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print_func('Epoch: [{0}][{1}/{2}]\t'
                       'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                       'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                       'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                       'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                       'Prec@{kout} {top5.val:.3f} ({top5.avg:.3f})'.format(
                           epoch,
                           i,
                           len(train_loader),
                           batch_time=batch_time,
                           data_time=data_time,
                           loss=losses,
                           top1=top1,
                           kout=kout,
                           top5=top5))
Пример #2
0
def train(train_loader, model, criterion, optimizer, epoch, args, print_func):
    batch_time = CNN_utils.AverageMeter()
    data_time = CNN_utils.AverageMeter()
    losses = CNN_utils.AverageMeter()
    top1 = CNN_utils.AverageMeter()
    top5 = CNN_utils.AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if args.gpu is not None:
            input = input.cuda(args.gpu, non_blocking=True)
        target = target.cuda(args.gpu, non_blocking=True)
        # target_idx = target.nonzero() [:,1]

        # compute output
        output = model(input)
        loss = criterion(output, target)

        # log_softmax_output = F.log_softmax(output, dim=1)
        #
        # loss = - torch.sum(log_softmax_output * target) / output.shape[0]

        losses.update(loss.item(), input.size(0))

        prec1 = CNN_utils.accuracy_multihots(output, target, topk=(1, 3))

        top1.update(prec1[0], input.size(0))
        # top5.update(prec5[0], input.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print_func('Epoch: [{0}][{1}/{2}]\t'
                       'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                       'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                       'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                       'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                       'Prec@{kout} {top5.val:.3f} ({top5.avg:.3f})'.format(
                           epoch,
                           i,
                           len(train_loader),
                           batch_time=batch_time,
                           data_time=data_time,
                           loss=losses,
                           top1=top1,
                           kout=5,
                           top5=top5))
def train(train_loads_iter, train_loaders, model, criterion, optimizer, epoch,
          args, print_func):
    batch_time = CNN_utils.AverageMeter()
    data_time = CNN_utils.AverageMeter()
    losses = CNN_utils.AverageMeter()

    # switch to train mode
    model.train()
    if args.fix_BN:
        CNN_utils.fix_BN(model)

    batch_iters = math.ceil(args.num_iter / args.batch_size)
    for i in range(batch_iters):
        start = time.time()
        l_loss = []

        optimizer.zero_grad()
        for ds in range(args.num_datasets):
            args.ind = ds

            end = time.time()
            try:
                (input, target) = train_loads_iter[ds].next()
            except StopIteration:
                train_loads_iter[ds] = iter(train_loaders[ds])
                (input, target) = train_loads_iter[ds].next()

            # measure data loading time
            data_time.update(time.time() - end)

            if args.gpu is not None:
                input = input.cuda(args.gpu, non_blocking=True)

            target = target.cuda(args.gpu, non_blocking=True)
            output = model(input)
            output_i = output[args.ind]
            loss = criterion(torch.log(output_i).double(), target.double())
            l_loss.append(loss.item())
            loss.backward()

        losses.update(sum(l_loss), input.size(0))
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - start)

        if i % args.print_freq == 0:
            print_func('Epoch: [{0}][{1}/{2}]\t'
                       'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                       'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                       'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
                           epoch,
                           i,
                           batch_iters,
                           batch_time=batch_time,
                           data_time=data_time,
                           loss=losses))
Пример #4
0
def validate(val_loader,
             model,
             criterion,
             args,
             print_func,
             ind,
             phase='Validation'):
    if val_loader is None:
        return 0, 0
    batch_time = CNN_utils.AverageMeter()
    kl_divs = CNN_utils.AverageMeter()
    #mAPs = mAPMeter()

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (input, target) in enumerate(val_loader):
            if args.gpu is not None:
                input = input.cuda(args.gpu, non_blocking=True)

            target = target.cuda(args.gpu, non_blocking=True)

            # compute output
            output = model(input)
            output_i = F.log_softmax(output[ind] / 10.)

            kl_divs.update(
                F.kl_div(output_i.detach().double(),
                         target.double(),
                         reduction='batchmean').item(), input.size(0))
            #mAPs.add(F.softmax(output_i.detach()), target)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                print("pred: ", F.softmax(output[ind] / 10.))
                print("true: ", target)
                print_func('[{0}/{1}]\t'
                           'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                           'Dataset no. {ind}\t'
                           'dists {topX.val:.3f} ({topX.avg:.3f})'.format(
                               i,
                               len(val_loader),
                               batch_time=batch_time,
                               ind=ind,
                               topX=kl_divs))

        print_func('{phase} * dists {top1.avg:.3f}'.format(phase=phase,
                                                           top1=kl_divs))

    return kl_divs.avg
def train(train_loader, model, criterion, optimizer, epoch, args, print_func):
    batch_time = CNN_utils.AverageMeter()
    data_time = CNN_utils.AverageMeter()
    losses = CNN_utils.AverageMeter()
    top1 = CNN_utils.AverageMeter()
    topX = CNN_utils.AverageMeter()

    kout = args.topX or args.num_classes // 2

    # switch to train mode
    model.train()
    if args.fix_BN:
        CNN_utils.fix_BN(model)
    end = time.time()

    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if args.gpu is not None:
            input = input.cuda(args.gpu, non_blocking=True)
        target = target.cuda(args.gpu, non_blocking=True)
        # target_idx = target.nonzero() [:,1]


        # compute output
        output = model(input)
        loss = criterion(output, target)

        losses.update(loss.item(), input.size(0))

        prec1, precX = CNN_utils.accuracy(output, target, topk=(1, kout))

        top1.update(prec1[0], input.size(0))
        topX.update(precX[0], input.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print_func('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@{kout} {topX.val:.3f} ({topX.avg:.3f})'.format(
                   epoch, i, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses, top1=top1, kout=kout, topX=topX))
Пример #6
0
def validate(val_loader, model, criterion, args, print_func):
    if val_loader is None:
        return 0, 0
    batch_time = CNN_utils.AverageMeter()
    losses = CNN_utils.AverageMeter()
    top1 = CNN_utils.AverageMeter()
    # top5 = CNN_utils.AverageMeter()

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (input, target) in enumerate(val_loader):
            if args.gpu is not None:
                input = input.cuda(args.gpu, non_blocking=True)

            target = target.cuda(args.gpu, non_blocking=True)

            # compute output
            output = model(input)
            # loss = criterion(output, target)
            # log_softmax_output = F.log_softmax(output, dim=1)
            #
            # loss = - torch.sum(log_softmax_output * target)/ output.shape[0]
            # measure accuracy and record loss
            loss = criterion(output, target)

            prec1 = CNN_utils.accuracy_multihots(output, target, topk=(1, 3))
            losses.update(loss.item(), input.size(0))

            top1.update(prec1[0], input.size(0))
            # top5.update(prec5[0], input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                print_func('Test: [{0}/{1}]\t'
                           'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                           'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                           'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                               i,
                               len(val_loader),
                               batch_time=batch_time,
                               loss=losses,
                               top1=top1))

        print_func(' * Prec@1 {top1.avg:.3f}'.format(top1=top1))

    return top1.avg, losses.avg
def validate(val_loader, model, criterion, args, print_func):
    if val_loader is None:
        return  0, 0
    batch_time = CNN_utils.AverageMeter()
    losses = CNN_utils.AverageMeter()
    top1 = CNN_utils.AverageMeter()
    topX = CNN_utils.AverageMeter()

    kout = args.topX or args.num_classes // 2

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (input, target) in enumerate(val_loader):
            if args.gpu is not None:
                input = input.cuda(args.gpu, non_blocking=True)

            target = target.cuda(args.gpu, non_blocking=True)


            # compute output

            output = model(input)

            loss = criterion(output, target)

            losses.update(loss.item(), input.size(0))

            prec1, precX = CNN_utils.accuracy(output, target, topk=(1, kout))

            top1.update(prec1[0], input.size(0))
            topX.update(precX[0], input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                print_func('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Prec@{kout} {topX.val:.3f} ({topX.avg:.3f})'.format(
                       i, len(val_loader), batch_time=batch_time, loss=losses,
                       top1=top1, kout=kout, topX = topX))

        print_func(' * Prec@1 {top1.avg:.3f}'
              .format(top1=top1))

    return top1.avg, losses.avg
def validate(val_loader,
             model,
             criterion,
             args,
             print_func,
             ind,
             phase='Validation'):
    if val_loader is None:
        return 0, 0
    batch_time = CNN_utils.AverageMeter()
    losses = CNN_utils.AverageMeter()

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (input, target) in enumerate(val_loader):
            if args.gpu is not None:
                input = input.cuda(args.gpu, non_blocking=True)

            target = target.cuda(args.gpu, non_blocking=True)

            # compute output
            output = model(input)
            output = output[ind]

            loss = criterion(torch.log(output).double(), target.double())
            losses.update(loss.item(), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                print_func('[{0}/{1}]\t'
                           'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                           'Dataset no. {ind}\t'
                           'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
                               i,
                               len(val_loader),
                               batch_time=batch_time,
                               ind=ind,
                               loss=losses))

        print_func('{phase} * Score {top1.avg:.3f}'.format(phase=phase,
                                                           top1=losses))

    return losses.avg
def train(train_loader, model, text_model, criterion, optimizer, epoch, args, print_func):
    batch_time = CNN_utils.AverageMeter()
    data_time = CNN_utils.AverageMeter()
    losses_cls = CNN_utils.AverageMeter()
    losses_ebd = CNN_utils.AverageMeter()
    top1 = CNN_utils.AverageMeter()
    losses_cls_aux = CNN_utils.AverageMeter()

    # switch to train mode
    model.train()
    text_model.eval()

    end = time.time()



    mCEL = MclassCrossEntropyLoss()

    cos = torch.nn.CosineSimilarity()
    for i, (input, target, text_info) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        textAvg = text_info[1]
        sentCode = text_info[0]
        if args.gpu is not None:
            input = input.cuda(args.gpu, non_blocking=True)
        target = target.cuda(args.gpu, non_blocking=True)
        sentCode = sentCode.cuda(args.gpu, non_blocking=True)
        textAvg = textAvg.cuda(args.gpu, non_blocking=True)

        with torch.no_grad():
            sentence_output = text_model(sentCode)
            text_cls = F.softmax(sentence_output[0], dim=1)
            text_cls[text_cls<0.01] = 0
        # text_class_values, text_class_ids = torch.max(text_output[0], dim=1)
        # text_class_ids[text_class_values<0.5] = 4
        # compute output
        output_cls, output_proj = model(input)
        new_target = (text_cls + target)/torch.sum(text_cls+target, dim=1, keepdim=True).expand_as(text_cls)
        loss_cls = mCEL(output_cls, new_target)


        loss_ebd = torch.sum(1 - cos(output_proj, textAvg)) / output_proj.shape[0]

        # loss_cls_aux = mCEL(output_cls_basic, text_cls)

        losses_cls.update(loss_cls.item(), input.size(0))
        losses_ebd.update(loss_ebd.item(), input.size(0))
        # losses_cls_aux.update(loss_cls_aux.item(), input.size(0))
        loss = loss_cls + args.alpha* loss_ebd

        prec1 = CNN_utils.accuracy_multihots(output_cls, target, topk=(1, 3))

        top1.update(prec1[0], input.size(0))
        # top5.update(prec5[0], input.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print_func('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss_cls {loss_cls.val:.4f} ({loss_cls.avg:.4f})\t'
                  'Loss_ebd {loss_ebd.val:.4f} ({loss_ebd.avg:.4f})\t'
                   'Loss_cls_aux {loss_cls_aux.val:.4f} ({loss_cls_aux.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'
                  .format(
                   epoch, i, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss_cls=losses_cls, loss_ebd=losses_ebd, loss_cls_aux=losses_cls_aux, top1=top1))
Пример #10
0
def validate(val_loader, model, criterion, args, print_func):
    if val_loader is None:
        return 0, 0
    import numpy as np

    batch_time = CNN_utils.AverageMeter()
    losses = CNN_utils.AverageMeter()
    # top1 = CNN_utils.AverageMeter()
    # top5 = CNN_utils.AverageMeter()

    # switch to evaluate mode
    model.eval()
    labels_mAP = []
    predicts_mAP = []
    with torch.no_grad():
        end = time.time()
        for i, (input, target) in enumerate(val_loader):
            if args.gpu is not None:
                input = input.cuda(args.gpu, non_blocking=True)

            target = target.cuda(args.gpu, non_blocking=True)

            # compute output
            output = model(input)
            loss = criterion(output, target)
            output_category = F.softmax(output, dim=1)
            # log_softmax_output = F.log_softmax(output, dim=1)
            #
            # loss = - torch.sum(log_softmax_output * target)/ output.shape[0]
            # measure accuracy and record loss
            # prec1 = CNN_utils.accuracy_multihots(output, target, topk=(1, 1))
            losses.update(loss.item(), input.size(0))

            # top1.update(prec1[0].item(), input.size(0))
            # top5.update(prec5[0], input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            predicts_mAP.append(output_category.cpu().data.numpy())
            target = target.cpu().data.numpy()
            target_multiple_hot = np.zeros([target.shape[0], args.num_classes])
            for s_idx, s_target in enumerate(target):
                target_multiple_hot[s_idx, s_target] = 1
            labels_mAP.append(target_multiple_hot)

            if i % args.print_freq == 0:
                print_func('Test: [{0}/{1}]\t'
                           'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                           'Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
                               i,
                               len(val_loader),
                               batch_time=batch_time,
                               loss=losses))

    from sklearn.metrics import average_precision_score
    import numpy as np
    labels_mAP = np.concatenate(labels_mAP, axis=0)
    predicts_mAP = np.concatenate(predicts_mAP, axis=0)
    labels_mAP[labels_mAP > 0] = 1
    mAP = average_precision_score(labels_mAP, predicts_mAP)

    print_func(' * mAP@1 {:.3f}'.format(mAP * 100))

    return mAP * 100, losses.avg
Пример #11
0
def train(train_loader, visual_model, criterion, optimizer, epoch, args, print_func):
    batch_time = CNN_utils.AverageMeter()
    data_time = CNN_utils.AverageMeter()
    losses_cls = CNN_utils.AverageMeter()
    losses_ebd = CNN_utils.AverageMeter()
    top1 = CNN_utils.AverageMeter()
     # = CNN_utils.AverageMeter()

    # switch to train mode
    visual_model.train()

    end = time.time()

    cos = torch.nn.CosineSimilarity()
    for i, (input, target, text_embedding) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if args.gpu is not None:
            input = input.cuda(args.gpu, non_blocking=True)
        target = target.cuda(args.gpu, non_blocking=True)
        text_embedding = text_embedding.cuda(args.gpu, non_blocking=True)

        # text_embedding = text_model(text)

        # compute output
        output_cls, output_proj = visual_model(input)

        log_softmax_output = F.log_softmax(output_cls, dim=1)
        loss_cls = - torch.sum(log_softmax_output * target) / output_cls.shape[0]

        loss_ebd = torch.sum(1 - cos(output_proj, text_embedding)) / output_proj.shape[0]
        # loss_ebd = (output_proj - text_embedding)**2 / output_proj.shape[0]
        losses_cls.update(loss_cls.item(), input.size(0))
        losses_ebd.update(loss_ebd.item(), input.size(0))
        loss = loss_ebd

        prec1 = CNN_utils.accuracy_multihots(output_cls, target, topk=(1, 3))

        top1.update(prec1[0], input.size(0))
        # top5.update(prec5[0], input.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print_func('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss_cls {loss_cls.val:.4f} ({loss_cls.avg:.4f})\t'
                  'Loss_ebd {loss_ebd.val:.4f} ({loss_ebd.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'
                  .format(
                   epoch, i, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss_cls=losses_cls, loss_ebd=losses_ebd, top1=top1))
    return losses_ebd.avg
Пример #12
0
def train(train_loads_iter, train_loaders, model, criterion, optimizer, epoch,
          args, print_func):
    batch_time = CNN_utils.AverageMeter()
    data_time = CNN_utils.AverageMeter()
    losses = CNN_utils.AverageMeter()
    top1 = CNN_utils.AverageMeter()
    topX = CNN_utils.AverageMeter()

    # switch to train mode
    model.train()
    if args.fix_BN:
        CNN_utils.fix_BN(model)

    #args.lam = 2 / (1 + math.exp(-epoch / 100)) - 1
    batch_iters = math.ceil(args.num_iter / args.batch_size)
    for i in range(batch_iters):
        start = time.time()
        l_loss = []
        l_top1 = []
        l_topX = []

        optimizer.zero_grad()
        for ds in range(args.num_datasets):
            args.ind = ds

            kout = args.topX or args.class_len[args.ind] // 2

            end = time.time()
            try:
                (input, target) = train_loads_iter[ds].next()
            except StopIteration:
                train_loads_iter[ds] = iter(train_loaders[ds])
                (input, target) = train_loads_iter[ds].next()

            # measure data loading time
            data_time.update(time.time() - end)

            if args.gpu is not None:
                input = input.cuda(args.gpu, non_blocking=True)

            target = target.cuda(args.gpu, non_blocking=True)

            # compute output
            output = model(input)
            output_i = output[(args.class_len[args.ind] // 2) % 3]
            output_dom = output[-1].squeeze().cuda(args.gpu, non_blocking=True)

            loss = criterion(output_i, target)

            dom_target = torch.tensor(
                np.array([args.ind for _ in range(list(target.size())[0])]),
                dtype=torch.float).cuda(args.gpu, non_blocking=True)
            dtarget = dom_target.cuda(args.gpu, non_blocking=True)
            domain_loss = F.binary_cross_entropy_with_logits(
                dom_target, dtarget)

            total_loss = loss + domain_loss
            total_loss.backward()

            l_loss.append(loss.item() - args.lam * domain_loss.item())

            prec1, precX = CNN_utils.accuracy(output_i, target, topk=(1, kout))
            l_top1.append(prec1.item())
            l_topX.append(precX.item())

        losses.update(l_loss[-1], input.size(0))
        top1.update(sum(l_top1) / len(l_top1), input.size(0))
        topX.update(sum(l_topX) / len(l_topX), input.size(0))

        #optimizer.zero_grad()
        #allloss_var.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - start)

        if i % args.print_freq == 0:
            print_func('Epoch: [{0}][{1}/{2}]\t'
                       'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                       'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                       'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                       'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                       'Prec@X {topX.val:.3f} ({topX.avg:.3f})'.format(
                           epoch,
                           i,
                           batch_iters,
                           batch_time=batch_time,
                           data_time=data_time,
                           loss=losses,
                           top1=top1,
                           topX=topX))
def train(train_loader, model, text_model, criterion, optimizer, epoch, args,
          print_func):
    batch_time = CNN_utils.AverageMeter()
    data_time = CNN_utils.AverageMeter()
    losses_cls = CNN_utils.AverageMeter()
    losses_ebd = CNN_utils.AverageMeter()
    top1 = CNN_utils.AverageMeter()
    top5 = CNN_utils.AverageMeter()

    # switch to train mode
    model.train()
    text_model.eval()

    end = time.time()

    cos = torch.nn.CosineSimilarity()
    for i, (input, target, text) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if args.gpu is not None:
            input = input.cuda(args.gpu, non_blocking=True)
        target = target.cuda(args.gpu, non_blocking=True)
        text = text.cuda(args.gpu, non_blocking=True)

        text_embedding, word_features = text_model(text)

        # compute output
        output_cls, output_proj = model(input)

        x_weights = torch.sigmoid(
            torch.bmm(word_features, output_proj.unsqueeze(2)))
        # x_weighted_global = torch.sum((x_anchors.permute([0, 2, 1]) * x_weights, (1,), keepdim=False)

        x_weighted_global = torch.bmm(word_features.permute([0, 2, 1]),
                                      x_weights).squeeze(-1)

        log_softmax_output = F.log_softmax(output_cls, dim=1)
        loss_cls = -torch.sum(
            log_softmax_output * target) / output_cls.shape[0]

        loss_ebd = torch.sum(
            1 - cos(output_proj, x_weighted_global)) / output_proj.shape[0]

        losses_cls.update(loss_cls.item(), input.size(0))
        losses_ebd.update(loss_ebd.item(), input.size(0))
        loss = loss_cls + args.alpha * loss_ebd

        prec1 = CNN_utils.accuracy_multihots(output_cls, target, topk=(1, 3))

        top1.update(prec1[0], input.size(0))
        # top5.update(prec5[0], input.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print_func('Epoch: [{0}][{1}/{2}]\t'
                       'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                       'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                       'Loss_cls {loss_cls.val:.4f} ({loss_cls.avg:.4f})\t'
                       'Loss_ebd {loss_ebd.val:.4f} ({loss_ebd.avg:.4f})\t'
                       'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                           epoch,
                           i,
                           len(train_loader),
                           batch_time=batch_time,
                           data_time=data_time,
                           loss_cls=losses_cls,
                           loss_ebd=losses_ebd,
                           top1=top1))
def train(train_loads_iter, train_loaders, model, criterion, optimizer, epoch,
          args, print_func):
    batch_time = CNN_utils.AverageMeter()
    data_time = CNN_utils.AverageMeter()
    losses = CNN_utils.AverageMeter()
    top1 = CNN_utils.AverageMeter()
    topX = CNN_utils.AverageMeter()

    # switch to train mode
    model.train()
    if args.fix_BN:
        CNN_utils.fix_BN(model)

    batch_iters = math.ceil(args.num_iter / args.batch_size)
    for i in range(batch_iters):
        start = time.time()
        l_loss = []
        l_top1 = []
        l_topX = []
        #allloss_var = 0

        optimizer.zero_grad()
        for ds in range(args.num_datasets):
            args.ind = ds

            kout = args.topX or args.class_len[args.ind] // 2

            end = time.time()
            try:
                (input, target) = train_loads_iter[ds].next()
            except StopIteration:
                train_loads_iter[ds] = iter(train_loaders[ds])
                (input, target) = train_loads_iter[ds].next()

            # measure data loading time
            data_time.update(time.time() - end)

            if args.gpu is not None:
                input = input.cuda(args.gpu, non_blocking=True)

            target = target.cuda(args.gpu, non_blocking=True)
            # target_idx = target.nonzero() [:,1]
            # if torch.max(target) >= 0 and torch.max(target) < args.class_len[args.ind]:
            # compute output
            #print("Input shape {}".format(input.shape))
            output = model(input)
            output_i = output[args.ind]
            #print("Output_i device {}".format(output_i.device))
            loss = criterion(output_i, target)
            l_loss.append(loss.item())
            #allloss_var += loss
            loss.backward()

            prec1, precX = CNN_utils.accuracy(output_i.detach(),
                                              target,
                                              topk=(1, kout))
            l_top1.append(prec1.item())
            l_topX.append(precX.item())

        losses.update(sum(l_loss), input.size(0))
        top1.update(sum(l_top1) / len(l_top1), input.size(0))
        topX.update(sum(l_topX) / len(l_topX), input.size(0))

        #optimizer.zero_grad()
        #allloss_var.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - start)

        if i % args.print_freq == 0:
            print_func('Epoch: [{0}][{1}/{2}]\t'
                       'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                       'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                       'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                       'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                       'Prec@X {topX.val:.3f} ({topX.avg:.3f})'.format(
                           epoch,
                           i,
                           batch_iters,
                           batch_time=batch_time,
                           data_time=data_time,
                           loss=losses,
                           top1=top1,
                           topX=topX))
def train(train_loader, visual_model, text_model, text_generator, criterion,
          optimizer, epoch, args, print_func):
    batch_time = CNN_utils.AverageMeter()
    data_time = CNN_utils.AverageMeter()
    losses_visual_cls = CNN_utils.AverageMeter()
    losses_text_cls = CNN_utils.AverageMeter()
    losses_ebd = CNN_utils.AverageMeter()
    losses = CNN_utils.AverageMeter()

    top1 = CNN_utils.AverageMeter()
    top5 = CNN_utils.AverageMeter()

    # switch to train mode
    visual_model.train()
    text_model.train()
    text_generator.eval()

    end = time.time()

    cos = torch.nn.CosineSimilarity()
    for i, (visual_input, visual_target, text) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if args.gpu is not None:
            visual_input = visual_input.cuda(args.gpu, non_blocking=True)
        visual_target = visual_target.cuda(args.gpu, non_blocking=True)
        text = text.cuda(args.gpu, non_blocking=True)

        text_target, text_input = text_generator(text)
        text_target = F.softmax(text_target, dim=1)
        # compute output
        visual_cls, visual_proj = visual_model(visual_input)
        text_cls, text_proj = text_model(text_input)

        log_softmax_text = F.log_softmax(text_cls, dim=1)
        loss_text_cls = -torch.sum(
            log_softmax_text * text_target) / text_cls.shape[0]

        log_softmax_visual = F.log_softmax(visual_cls, dim=1)
        loss_visual_cls = -torch.sum(
            log_softmax_visual * visual_target) / visual_cls.shape[0]

        loss_ebd = torch.sum(
            1 - cos(visual_proj, text_proj)) / visual_proj.shape[0]

        losses_visual_cls.update(loss_visual_cls.item(), visual_input.size(0))
        losses_text_cls.update(loss_text_cls.item(), text_input.size(0))
        losses_ebd.update(loss_ebd.item(), visual_input.size(0))
        loss = loss_visual_cls + loss_ebd + loss_text_cls
        losses.update(loss.item(), visual_input.size(0))

        prec1 = CNN_utils.accuracy_multihots(visual_cls,
                                             visual_target,
                                             topk=(1, 3))

        top1.update(prec1[0], visual_input.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print_func(
                'Epoch: [{0}][{1}/{2}]\t'
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                'Loss_visual cls {losses_visual_cls.val:.4f} ({losses_visual_cls.avg:.4f})\t'
                'Loss_text cls {losses_text_cls.val:.4f} ({losses_text_cls.avg:.4f})\t'
                'Loss_ebd {losses_ebd.val:.4f} ({losses_ebd.avg:.4f})\t'
                'total loss {losses.val:.4f} ({losses.avg:.4f})\t'
                'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                    epoch,
                    i,
                    len(train_loader),
                    batch_time=batch_time,
                    data_time=data_time,
                    losses_visual_cls=losses_visual_cls,
                    losses_text_cls=losses_text_cls,
                    losses_ebd=losses_ebd,
                    losses=losses,
                    top1=top1))