Exemplo n.º 1
0
def validate(loader, model, criterion, optimizer, device, log):
    model.eval()
    size_batch, size_data = loader.batch_size, len(loader)
    running_acc_20, iteration_acc_20, iteration_acc_50 = 0, 0, 0
    for index, data in enumerate(loader):
        inputs = data['frame'].to(device)
        labels = misc.limit_value_tensor(data['noise_label'] - 976, 0,
                                         999).to(device)
        real_label = misc.limit_value_tensor(data['steer'] - 976, 0,
                                             999).to(device)
        optimizer.zero_grad()
        with torch.set_grad_enabled(False):
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            acc_50 = misc.accuracy(predicted, real_label, size_batch, 20)
            acc_20 = misc.accuracy(predicted, real_label, size_batch, 50)
        running_acc_20 += acc_20
        iteration_acc_20 += acc_20
        iteration_acc_50 += acc_50
        if index % 100 == 99:
            out = 'Iteration: {:>5}/{:<5}  {:5}  || Acc_20: {:.4f}   Acc_50: {:.4f}'.format(
                index, size_data, 'val', iteration_acc_20 / 100,
                iteration_acc_50 / 100)
            print(out)
            log.write(out)
            iteration_acc_20, iteration_acc_50 = 0, 0
    return running_acc_20 / size_data
Exemplo n.º 2
0
def test():
    test_losses = misc.AverageMeter()
    test_top1 = misc.AverageMeter()
    test_top5 = misc.AverageMeter()

    model.eval()
    prefetcher = datasets.DataPrefetcher(test_loader)
    with torch.no_grad():
        data, target = prefetcher.next()
        while data is not None:
            default_graph.clear_all_tensors()

            data, target = data.to(args.device), target.to(args.device)
            output = model(data)

            loss = criterion(output, target)
            prec1, prec5 = misc.accuracy(output, target, topk=(1, 5))
            test_losses.update(loss.item(), data.size(0))
            test_top1.update(prec1.item(), data.size(0))
            test_top5.update(prec5.item(), data.size(0))

            data, target = prefetcher.next()

    test_sparsity = (torch.cat(gates_params) != 0).float().mean().item()
    print(' * Test set: Loss_CE: %.4f, '
          'Sparsity: %.4f, Top1 acc: %.4f, Top5 acc: %.4f\n' %
          (test_losses.avg, test_sparsity, test_top1.avg, test_top5.avg))
    return test_top1.avg, test_sparsity
Exemplo n.º 3
0
    def loss_labels(self, outputs, targets, indices, num_segments, log=True):
        """Classification loss (NLL)
      targets dicts must contain the key "labels" containing a tensor of dim [nb_target_segments]
      """
        assert 'pred_logits' in outputs
        src_logits = outputs['pred_logits']

        idx = self._get_src_permutation_idx(indices)
        target_classes_o = torch.cat(
            [t['labels'][J] for t, (_, J) in zip(targets, indices)]).long()
        target_classes = torch.full(src_logits.shape[:2],
                                    self.num_classes,
                                    dtype=torch.int64,
                                    device=src_logits.device)
        target_classes[idx] = target_classes_o

        loss_ce = F.cross_entropy(src_logits.transpose(1, 2), target_classes,
                                  self.empty_weight)
        losses = {'loss_ce': loss_ce}

        if log:
            # TODO this should probably be a separate loss, not hacked in this one here
            losses['class_error'] = 100 - accuracy(src_logits[idx],
                                                   target_classes_o)[0]

        return losses
def validate(val_loader, model, criterion, epoch):
    losses = misc.AverageMeter()
    top1 = misc.AverageMeter()
    top5 = misc.AverageMeter()

    # switch to evaluate mode
    prefetcher = datasets.DataPrefetcher(val_loader)
    model.eval()

    input, target = prefetcher.next()
    i = -1
    while input is not None:
        i += 1
        with torch.no_grad():
            output = model(input)
            loss = criterion(output, target)

        # measure accuracy and record loss
        prec1, prec5 = misc.accuracy(output.data, target, topk=(1, 5))

        reduced_loss = reduce_tensor(loss.data)
        prec1 = reduce_tensor(prec1)
        prec5 = reduce_tensor(prec5)

        losses.update(to_python_float(reduced_loss), input.size(0))
        top1.update(to_python_float(prec1), input.size(0))
        top5.update(to_python_float(prec5), input.size(0))

        input, target = prefetcher.next()

    print(' * Test Epoch {0}, Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}\n'.
          format(epoch, top1=top1, top5=top5))

    return top1.avg
Exemplo n.º 5
0
def train(train_loader, model, criterion, optimizer, use_cuda):
    # Switch to train mode
    model.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    end = time.time()

    bar = Bar('Processing', max=len(train_loader))
    for batch_idx, (inputs, targets) in enumerate(train_loader):

        # Measure data loading time
        data_time.update(time.time() - end)

        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()

        targets = targets.squeeze(
            1)  # pytorch 0.4.0 merged Variable and Tensor
        # inputs, targets = V(inputs), V(targets.squeeze(1))

        # Compute output
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Measure accuracy and record loss
        prec1 = accuracy(outputs.data, targets.data, topk=(1, ))
        losses.update(loss.item(), inputs.size(0))
        top1.update(prec1[0], inputs.size(0))

        # Compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # Plot progress
        bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} |  Top1: {top1:.4f}'.format(
            batch=batch_idx + 1,
            size=len(train_loader),
            data=data_time.val,
            bt=batch_time.val,
            total=bar.elapsed_td,
            eta=bar.eta_td,
            loss=losses.avg,
            top1=top1.avg)
        bar.next()

    bar.finish()
    return (losses.avg, top1.avg)
Exemplo n.º 6
0
def validation(val_loader, model, criterion, use_cuda):
    # Switch to evaluate mode
    model.eval()
    
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    end = time.time()

    bar = Bar('Processing', max=len(val_loader))
    for batch_idx, (inputs, targets) in enumerate(val_loader):
        # Measure data loading time
        data_time.update(time.time() - end)

        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()

        # inputs, targets = V(inputs, volatile=True), V(targets.squeeze(1), volatile=True)
        # UserWarning: volatile was removed and now has no effect. Use `with torch.no_grad():` instead.
        with torch.no_grad():
            targets = targets.squeeze(1)
            # Compute output
            outputs = model(inputs)
            loss = criterion(outputs, targets)

        # Measure accuracy and record loss
        prec1 = accuracy(outputs.data, targets.data, topk=(1,))
        losses.update(loss.item(), inputs.size(0))
        top1.update(prec1[0], inputs.size(0))

        # Measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # Plot progress
        bar.suffix  = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f}'.format(
                    batch=batch_idx + 1,
                    size=len(val_loader),
                    data=data_time.avg,
                    bt=batch_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    loss=losses.avg,
                    top1=top1.avg,
                    )
        bar.next()
    
    bar.finish()   
    return (losses.avg, top1.avg)
def train(train_loader, model, criterion, optimizer, epoch):
    losses = misc.AverageMeter()
    top1 = misc.AverageMeter()
    top5 = misc.AverageMeter()

    # switch to train mode
    prefetcher = datasets.DataPrefetcher(train_loader)
    model.train()

    input, target = prefetcher.next()
    i = -1
    while input is not None:
        i += 1

        output = model(input)
        loss = criterion(output, target)

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i % args.log_interval == 0:
            prec1, prec5 = misc.accuracy(output.data, target, topk=(1, 5))

            # Average loss and accuracy across processes for logging
            reduced_loss = reduce_tensor(loss.data)
            prec1 = reduce_tensor(prec1)
            prec5 = reduce_tensor(prec5)

            # to_python_float incurs a host<->device sync
            losses.update(to_python_float(reduced_loss), input.size(0))
            top1.update(to_python_float(prec1), input.size(0))
            top5.update(to_python_float(prec5), input.size(0))

            torch.cuda.synchronize()

            print('Epoch: [{0}][{1}/{2}]\t'
                  'Loss {loss.val:.10f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                      epoch,
                      i,
                      len(train_loader),
                      loss=losses,
                      top1=top1,
                      top5=top5))

        input, target = prefetcher.next()
Exemplo n.º 8
0
def train_step(model, optimizer, images, label, train = True):

    with tf.GradientTape() as pred_tape:

        pred = model(images, train)

        pred_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits = pred)

    current_loss = tf.reduce_mean(pred_loss)
    accu = accuracy(pred, label)

    print("Current Training Loss : %f, Accuracy : %f" %(current_loss, accu))

    if train:
        gradient_of_predictor = pred_tape.gradient(pred_loss, model.trainable_variables)

        optimizer.apply_gradients(zip(gradient_of_predictor, model.trainable_variables))
Exemplo n.º 9
0
def test_wrapper(data_loader, data_size, model, use_gpu=True):
    model.eval()
    accumulator = inst_meter_dict(
        ['top_1_acc', 'top_5_acc', 'data_time', 'batch_time'])
    tic = time.time()
    with no_grad():  # close all grads, operations inside don't track history
        toc = time.time()
        for batch_index, (inputs, labels) in enumerate(data_loader['test']):
            batch_size = inputs.size(0)
            accumulator['data_time'].update(time.time() - toc)
            if use_gpu:
                try:
                    inputs, labels = Variable(inputs.float().cuda()), Variable(
                        labels.long().cuda(async=True))
                except:
                    logging.error(inputs, labels)
            else:
                inputs, labels = Variable(inputs), Variable(labels)
            outputs = model(inputs)
            acc_1, acc_5 = accuracy(outputs.data, labels.data, topk=(1, 5))
            accumulator['top_1_acc'].update(acc_1.item(), batch_size)
            accumulator['top_5_acc'].update(acc_5.item(), batch_size)
            accumulator['batch_time'].update(time.time() - toc)
            toc = time.time()

            if (batch_index + 1) % 10 == 0:
                logging.info('[{}/{}] data: {:.4f}s | batch: {:.4f}s'.format(
                    batch_index + 1, len(data_loader["test"]),
                    accumulator['data_time'].val,
                    accumulator['batch_time'].val))
                accumulator['data_time'].reset()
                accumulator['batch_time'].reset()

        logging.info('top-1: {:.4f} | top-5: {:.4f} | time: {:.4f}'.format(
            accumulator['top_1_acc'].avg, accumulator['top_5_acc'].avg,
            time.time() - tic))
Exemplo n.º 10
0
    loss_ce = criterion(output, target)
    loss_reg = args.lambd * (torch.cat(gates_params).abs().mean() -
                             args.sparsity_level)**2
    loss = loss_ce + loss_reg

    loss.backward()
    optimizer.step()

    for p in gates_params:
        p.data.clamp_(0, 1)

    if i % args.log_interval == 0:
        concat_channels = torch.cat(gates_params)
        sparsity = (concat_channels != 0).float().mean()
        mean_gate = concat_channels.mean()
        prec1, prec5 = misc.accuracy(output, target, topk=(1, 5))
        top1.update(prec1.item(), data.size(0))
        top5.update(prec5.item(), data.size(0))

        print(
            'Train Iter [%d/%d]\tLoss: %.4f, Loss_CE: %.4f, Loss_REG: %.4f, '
            'Sparsity: %.4f, Mean gate: %.4f, Top1 acc: %.4f, Top5 acc: %.4f' %
            (i, len(train_loader),
             loss.item(), loss_ce.item(), loss_reg.item(), sparsity.item(),
             mean_gate.item(), top1.avg, top5.avg))

    if i % args.eval_interval == 0 and i > 0:
        acc, test_sparsity = test()
        if test_sparsity <= args.sparsity_level and acc > best_acc:
            best_acc = acc
            torch.save(model.state_dict(),
Exemplo n.º 11
0
def train(train_loader, model, criterion, optimizer, epoch, ows_state, args):
    meters = defaultdict(misc.AverageMeter)

    model.train()

    filters = model.filters if hasattr(model, 'filters') else model.module.filters
    history = defaultdict(list)

    end = time.time()
    for iteration, (input, target) in enumerate(train_loader):
        if "mini" in args.debug and iteration > 20: break

        best_path, temperature, gamma_max, best_perf, timing = solve_ows(
            model, epoch, len(train_loader), iteration, ows_state, args)

        # measure data loading time
        meters["data_time"].update(time.time() - end)

        if not args.no_cuda:
            target = target.cuda(non_blocking=True)

        compute_results = misc.SWDefaultDict(misc.SWDict)

        minconf = [F.configurations[0] for F in filters]
        maxconf = [F.configurations[-1] for F in filters]

        optimizer.zero_grad()

        # sandwich rule: train maximum configuration
        outp = model(input, configuration=maxconf)
        loss = criterion(outp['x'], target)
        loss.mean().backward()
        compute_results['max']['x'] = outp['x'].detach()
        compute_results['max']['loss_numpy'] = loss.detach().cpu().numpy()
        compute_results['max']['prob'] = torch.nn.functional.softmax(compute_results['max']['x'], dim=1)

        # sandwich rule: train minimum and random configuration with self-distillation
        for kind in ('min', 'rand'):
            conf = None if kind == 'rand' else minconf
            outp = model(input, configuration=conf)

            loss = misc.soft_cross_entropy(outp['x'], compute_results['max']['prob'].detach())
            compute_results[kind]['soft_loss_numpy'] = loss.detach().cpu().numpy()
            with torch.no_grad():
                hard_loss_numpy = criterion(outp['x'], target).detach().cpu().numpy()
                compute_results[kind]['loss_numpy'] = hard_loss_numpy

            compute_results[kind]['x'] = outp['x'].detach()
            if kind == 'rand':
                compute_results['rand']['decision'] = outp['decision'].cpu().numpy()
            loss.mean().backward()

        for path, image_loss, image_refloss in zip(compute_results['rand']['decision'],
                                                    compute_results['rand']['loss_numpy'],
                                                    compute_results['max']['loss_numpy']):
            for i, pi in enumerate(path):
                ows_state.histories[i][pi].update(-(image_loss - image_refloss) / len(path), epoch, iteration)

        for refname in ('min', 'max', 'rand'):
            meters['loss_' + kind].update(compute_results[kind]['loss_numpy'].mean(), input.size(0))
            refloss = compute_results[refname]['loss_numpy']
            (prec1, prec5), refcorrect_ks = misc.accuracy(compute_results[refname]['x'].data,
                                                          target, topk=(1, 5), return_correct_k=True)
            refcorrect1, refcorrect5 = [a.cpu().numpy().astype(bool) for a in refcorrect_ks]
            history['loss_' + refname].append(refloss)
            history['top1_' + refname].append(refcorrect1)
            history['top5_' + refname].append(refcorrect5)
            meters['top1_' + refname].update(prec1.item(), input.size(0))
            meters['top5_' + refname].update(prec5.item(), input.size(0))
            if 'soft_loss_numpy' in compute_results[refname]:
                meters['loss_soft_' + kind].update(compute_results[kind]['soft_loss_numpy'].mean(), input.size(0))
                history['loss_soft_' + refname].append(compute_results[refname]['soft_loss_numpy'])

        history['configuration'].append(compute_results['rand']['decision'])
        history['configuration'].append(compute_results['rand']['loss_numpy'])

        optimizer.step()

        # measure elapsed time
        meters["batch_time"].update(time.time() - end)
        end = time.time()

        if iteration % args.print_freq == 0:
            toprint = f"Epoch: [{epoch}][{iteration}/{len(train_loader)}]\t"
            toprint += ('Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                        'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                        'Prec@1 {top1_rand.val:.3f} ({top1_rand.avg:.3f})\t'
                        'Prec@5 {top5_rand.val:.3f} ({top5_rand.avg:.3f})\t'.format(**meters))

            for key, meter in meters.items():
                if key.startswith('loss'):
                    toprint += f'{key} {meter.val:.4f} ({meter.avg:.4f})\t'
            logger.info(toprint)

            # prints a string summarizing the sampling probabilities for each filter
            probas_str = ""
            for i, F in enumerate(filters):
                if F.probability is not None:
                    probas_str += '|{} '.format(i)
                    for p in F.probability:
                        probas_str += str(int(100 * p)) + ' '
            probas_log = None
            if any(F.probability is not None for F in filters):
                probas_log = tuple(F.probability for F in filters),
            history['OWS'].append(dict(best_path=best_path, temperature=temperature, gamma_max=gamma_max,
                                        best_pref=best_perf, pred_latency=timing, probas_log=probas_log))
            if probas_str:
                probas_str = '\n' + probas_str
            ows_str = f"predicted latency: {timing}, perf: {best_perf}, T: {temperature}, gamma: {gamma_max}"
            logger.info('best_path: ' + ','.join(map(str, best_path)) + ows_str + probas_str)


    return history
Exemplo n.º 12
0
def generic_train(data_loader,
                  data_size,
                  model,
                  criterion,
                  optimizer,
                  lr_scheduler,
                  max_epoch=100,
                  use_gpu=True,
                  pre_eval=False):
    tic = time.time()

    best_model = model
    best_acc = 0.0

    temporary = inst_meter_dict(
        ['batch_time', 'data_time', 'losses', 'top_1_acc', 'top_5_acc'])
    accumulator = inst_meter_dict(['losses', 'top_1_acc', 'top_5_acc'])

    # pre-evaluation phase to check cuda memory
    if pre_eval:
        logging.info('Validation [0/{}]:'.format(max_epoch))
        model.eval()

        toc = time.time()
        with no_grad(
        ):  # close all grads, operations inside don't track history
            batch_size = 0
            for batch_index, (inputs, labels) in enumerate(data_loader['dev']):
                if batch_size == 0:
                    batch_size = inputs.size(0)
                temporary['data_time'].update(time.time() - toc)
                # wrap in Variable
                if use_gpu:
                    try:
                        inputs, labels = Variable(
                            inputs.float().cuda()), Variable(
                                labels.long().cuda(async=True))
                    except:
                        logging.error(inputs, labels)
                else:
                    inputs, labels = Variable(inputs), Variable(labels)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                acc_1, acc_5 = accuracy(outputs.data,
                                        labels.data,
                                        topk=(1, cfg.TRAIN.METRICS_TOP_K_ACC))

                accumulator['losses'].update(loss.item(), batch_size)
                accumulator['top_1_acc'].update(acc_1.item(), batch_size)
                accumulator['top_5_acc'].update(acc_5.item(), batch_size)

            logging.info(
                '[{}/{}] loss: {:.4f} | top-1: {:.4f} | top-5: {:.4f}'.format(
                    0, max_epoch, accumulator['losses'].avg,
                    accumulator['top_1_acc'].avg,
                    accumulator['top_5_acc'].avg))
            logging.info(
                'Pre-evaluation done, validation batch-size:{}, everything is ok'
                .format(batch_size))

    # training and validation
    for epoch in range(max_epoch):
        is_best = False
        use_mixup = cfg.TRAIN.MIXUP
        if use_mixup:
            logging.info('Mix-up used during training')
            if epoch not in xrange(cfg.TRAIN.MU.ACTIVE_EPOCH_RANGE[0],
                                   cfg.TRAIN.MU.ACTIVE_EPOCH_RANGE[1]):
                use_mixup = False
                logging.info('Mix-up switch OFF')
            else:
                logging.info('Mix-up switch ON')

        # Each epoch has a training and validation phase
        # ---- training phase ----
        optimizer = update_lr(optimizer, epoch, lr_scheduler)
        logging.info('Training epoch [{}/{}]: learning rate {}'.format(
            epoch + 1, max_epoch, optimizer.param_groups[0]['lr']))
        model.train()  # Set model to training mode

        # Iterate over data.
        toc = time.time()
        for batch_index, (inputs, labels) in enumerate(data_loader["train"]):
            batch_size = inputs.size(0)
            temporary['data_time'].update(time.time() - toc)
            # wrap in Variable
            if use_gpu:
                try:
                    inputs, labels = Variable(inputs.float().cuda()), Variable(
                        labels.long().cuda(async=True))
                    if use_mixup:
                        inputs, targets_a, targets_b, lam = mixup_data(
                            inputs, labels, cfg.TRAIN.MU.ALPHA)
                        inputs, targets_a, targets_b = map(
                            Variable, (inputs, targets_a, targets_b))
                except:
                    logging.error('\n==> inputs:\n{}\n==> labels:\n{}'.format(
                        inputs, labels))
                    return 0
            else:
                inputs, labels = Variable(inputs), Variable(labels)

            # Set gradient to zero to delete history of computations in previous epoch. Track operations so that differentiation can be done automatically.
            optimizer.zero_grad()
            outputs = model(inputs)
            if use_mixup:
                loss = mixup_criterion(criterion, outputs, targets_a,
                                       targets_b, lam)
                acc_1, acc_5 = mixup_accuracy(
                    outputs.data,
                    targets_a,
                    targets_b,
                    lam,
                    topk=(1, cfg.TRAIN.METRICS_TOP_K_ACC))
            else:
                loss = criterion(outputs, labels)
                acc_1, acc_5 = accuracy(outputs.data,
                                        labels.data,
                                        topk=(1, cfg.TRAIN.METRICS_TOP_K_ACC))

            # losses.update(loss.data[0], inputs.size(0))
            temporary['losses'].update(loss.item(), batch_size)
            temporary['top_1_acc'].update(acc_1.item(), batch_size)
            temporary['top_5_acc'].update(acc_5.item(), batch_size)
            accumulator['losses'].update(loss.item(), batch_size)
            accumulator['top_1_acc'].update(acc_1.item(), batch_size)
            accumulator['top_5_acc'].update(acc_5.item(), batch_size)

            # backward + optimize only if in training phase
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # print evaluation statistics
            temporary['batch_time'].update(time.time() - toc)
            toc = time.time()

            if (batch_index + 1) % cfg.TRAIN.LOG_INTERVAL == 0:
                logging.info(
                    '[{}/{}] [{}/{}] data: {:.4f}s | batch: {:.4f}s | loss: {:.4f} | top-1: {:.4f} | top-5: {:.4f}'
                    .format(
                        epoch + 1,
                        max_epoch,
                        batch_index + 1,
                        len(data_loader["train"]),
                        temporary['data_time'].val,
                        temporary['batch_time'].val,
                        temporary['losses'].avg,
                        temporary['top_1_acc'].avg,
                        temporary['top_5_acc'].avg,
                    ))
                temporary['data_time'].reset()
                temporary['batch_time'].reset()
                temporary['losses'].reset()
                temporary['top_1_acc'].reset()
                temporary['top_5_acc'].reset()

        logging.info(
            '[{}/{}] loss: {:.4f} | top-1: {:.4f} | top-5: {:.4f}'.format(
                epoch + 1, max_epoch, accumulator['losses'].avg,
                accumulator['top_1_acc'].avg, accumulator['top_5_acc'].avg))
        accumulator['losses'].reset()
        accumulator['top_1_acc'].reset()
        accumulator['top_5_acc'].reset()
        # -------------------------

        # ---- validation phase ----
        logging.info('Validation [{}/{}]:'.format(epoch + 1, max_epoch))
        model.eval()

        # Iterate over data.
        toc = time.time()
        with no_grad(
        ):  # close all grads, operations inside don't track history
            for batch_index, (inputs, labels) in enumerate(data_loader["dev"]):
                batch_size = inputs.size(0)
                temporary['data_time'].update(time.time() - toc)
                # wrap in Variable
                if use_gpu:
                    try:
                        inputs, labels = Variable(
                            inputs.float().cuda()), Variable(
                                labels.long().cuda(async=True))
                    except:
                        logging.error(
                            '\n==> inputs:\n{}\n==> labels:\n{}'.format(
                                inputs, labels))
                        return 0
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # Set gradient to zero to delete history of computations in previous epoch. Track operations so that differentiation can be done automatically.
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                acc_1, acc_5 = accuracy(outputs.data,
                                        labels.data,
                                        topk=(1, cfg.TRAIN.METRICS_TOP_K_ACC))

                # losses.update(loss.data[0], inputs.size(0))
                temporary['losses'].update(loss.item(), batch_size)
                temporary['top_1_acc'].update(acc_1.item(), batch_size)
                temporary['top_5_acc'].update(acc_5.item(), batch_size)
                accumulator['losses'].update(loss.item(), batch_size)
                accumulator['top_1_acc'].update(acc_1.item(), batch_size)
                accumulator['top_5_acc'].update(acc_5.item(), batch_size)

            # check if current model is best
            logging.info('Current validation accuracy: {:.4f}'.format(
                accumulator['top_1_acc'].avg))
            if accumulator['top_1_acc'].avg > best_acc:
                is_best = True
                best_acc = accumulator['top_1_acc'].avg
                # best_model = copy.deepcopy(model)
                logging.info('New best accuracy: {:.4f}'.format(best_acc))
            accumulator['losses'].reset()
            accumulator['top_1_acc'].reset()
            accumulator['top_5_acc'].reset()
        # --------------------------

        # ---- save checkpoint ----
        if (epoch + 1) % cfg.TRAIN.SAVE_INTERVAL == 0:
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'acc': accumulator['top_1_acc'].avg,
                    # 'best_acc': best_acc,
                    'optimizer': optimizer.state_dict()
                },
                cfg.TRAIN.OUTPUT_MODEL_PREFIX,
                is_best=is_best)
            logging.info('Checkpoint saved to {}-{:0>4}.pth.tar'.format(
                cfg.TRAIN.OUTPUT_MODEL_PREFIX, epoch + 1))
        # ------------------------

    time_elapsed = int(time.time() - tic)
    logging.info('Training job complete in {:d}:{:0>2d}:{:d}'.format(
        time_elapsed // 3600,
        (time_elapsed - 3600 * (time_elapsed // 3600)) // 60,
        (time_elapsed - 60 * (time_elapsed // 60))))
    logging.info('Best val Acc: {:4f}'.format(best_acc))
    return 0
Exemplo n.º 13
0
 def classify(self, x, y, train=True, mode='base'):
     x = self.__call__(x, train=train, mode=mode)
     cent = softmax_cross_entropy(x, y)
     acc = accuracy(x, y)
     return cent, acc