示例#1
0
def val(model, dataset):
    '''
    计算模型在验证集上的准确率
    返回top1和top3的准确率 
    '''
    model.eval()
    dataset.val()
    acc_meter = meter.AverageValueMeter()
    top1_meter = meter.AverageValueMeter()
    dataloader = t.utils.data.DataLoader(dataset,
                                         opt.batch_size,
                                         opt.shuffle,
                                         num_workers=opt.workers,
                                         pin_memory=True)
    for ii, data in enumerate(tqdm.tqdm(dataloader)):
        input, label, _ = data
        val_input = Variable(input, volatile=True).cuda()
        val_label = Variable(label.type(t.LongTensor), volatile=True).cuda()
        score = model(val_input)
        acc = topk_acc(score.data, label.cuda())
        top1 = topk_acc(score.data, label.cuda(), k=1)
        acc_meter.add(acc)
        top1_meter.add(top1)
    model.train()
    dataset.train()
    print(acc_meter.value()[0], top1_meter.value()[0])
    return acc_meter.value()[0], top1_meter.value()[0]
示例#2
0
def val(model, dataset):
    '''
    计算模型在验证集上的准确率
    返回top1和top3的准确率 
    '''
    model.eval()
    dataset.val()
    acc_meter = meter.AverageValueMeter()
    top1_meter = meter.AverageValueMeter()
    dataloader = t.utils.data.DataLoader(dataset,
                                         opt.batch_size / 2,
                                         opt.shuffle,
                                         num_workers=8,
                                         pin_memory=True)
    for ii, data in tqdm.tqdm(enumerate(dataloader)):
        input, label, _ = data
        #bs, ncrops, c, h, w = input.size()
        #val_input = Variable(input.view(-1, c, h, w), volatile=True).cuda()
        val_input = Variable(input, volatile=True).cuda()
        #val_label = Variable(label.type(t.LongTensor), volatile=True).cuda()
        #scores = model(val_input)  # fuse batch size and ncrops   bs*ncrop,80
        #prob = t.nn.functional.softmax(scores)
        # prob_avg = prob.view(bs, ncrops, -1).mean(1)
        prob_avg = model(val_input)
        acc = topk_acc(prob_avg.data, label.cuda())
        top1 = topk_acc(prob_avg.data, label.cuda(), k=1)
        acc_meter.add(acc)
        top1_meter.add(top1)
    model.train()
    dataset.train()
    print(acc_meter.value()[0], top1_meter.value()[0])
    return acc_meter.value()[0], top1_meter.value()[0]
示例#3
0
def train(epoch, warm_up=True):
    model.train()
    train_loss = 0
    total, top1_correct, top5_correct = 0, 0, 0
    top1_acc, top5_acc = 0, 0

    for batch_idx, (inputs, targets) in enumerate(train_loader):
        # learning rate warm up
        if warm_up and epoch <= WARM_UP_EPOCHS:
            warm_up_scheduler.step()
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        if OBJ_FUNC == 'FL':
            outputs = F.softmax(outputs, dim=1)

        optimizer.zero_grad()
        # loss_param = {'y_hat': outputs, 'y': targets} ; loss = primary_criterion(**loss_param)
        loss = primary_criterion(outputs, targets)

        if torch.isinf(loss):
            print('[ERROR] nan loss (%s), stop training.' % loss)
            exit(1)

        loss.backward(retain_graph=True)
        if OBJ_FUNC == 'COT':
            entropy = complement_criterion(outputs, targets)
            entropy.backward()

        optimizer.step()

        train_loss += loss.item()

        # _, predicted = outputs.max(1)
        total += targets.size()[0]

        top_acc_list = utils.topk_acc(outputs, targets, topk=(1, 5))

        # top1_correct += predicted.eq(targets).sum().item()
        top1_correct += top_acc_list[0]
        top5_correct += top_acc_list[1]

        top1_acc = 100.0 * (top1_correct / total)
        top5_acc = 100.0 * (top5_correct / total)

        utils.progress_bar(
            'Train',
            epoch + 1,
            batch_idx,
            len(train_loader),
            msg=
            'Loss: %.3f | Acc: [top-1] %.3f%% (%d/%d), [top-5] %.3f%% (%d/%d)'
            % (train_loss / (batch_idx + 1), top1_acc, top1_correct, total,
               top5_acc, top5_correct, total))

    train_loss /= len(train_loader)

    tensor_board_writer.add_scalars('train/acc', {'acc': top1_acc}, epoch)
    tensor_board_writer.add_scalars('train/loss', {'loss': train_loss}, epoch)
    train_csv_writer.writerow({
        'epoch':
        epoch,
        'loss':
        train_loss,
        'acc':
        top1_acc.item(),
        'top5acc':
        top5_acc.item(),
        'lr':
        optimizer.state_dict()['param_groups'][0]['lr']
    })
    train_csv_file.flush()
示例#4
0
def valid(epoch):
    global best_top1_valid_acc
    model.eval()

    valid_loss = 0
    total, top1_correct, top5_correct = 0, 0, 0
    top1_acc, top5_acc = 0, 0

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(valid_loader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)

            loss = cross_entropy(outputs, targets)
            valid_loss += loss.item()

            total += targets.size()[0]
            top_acc_list = utils.topk_acc(outputs, targets, topk=(1, 5))
            top1_correct += top_acc_list[0]
            top5_correct += top_acc_list[1]

            top1_acc = 100.0 * (top1_correct / total)
            top5_acc = 100.0 * (top5_correct / total)

            utils.progress_bar(
                'Valid',
                epoch + 1,
                batch_idx,
                len(valid_loader),
                msg=
                'Loss: %.3f | Acc: [top-1] %.3f%% (%d/%d), [top-5] %.3f%% (%d/%d)'
                % (valid_loss / (batch_idx + 1), top1_acc, top1_correct, total,
                   top5_acc, top5_correct, total))

    if top1_acc > best_top1_valid_acc:
        print('Saving current parameters of the model (checkpoint).')
        state = {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'acc': top1_acc,
            'top5acc': top5_acc,
            'loss': valid_loss,
        }

        # torch.save(state, log_dir + '/' + str(top1_acc.item()) + '.pth')
        torch.save(state, log_dir + '/' + 'model' + '.pth')
        best_top1_valid_acc = top1_acc

    valid_loss /= len(valid_loader)

    tensor_board_writer.add_scalars('valid/acc', {'acc': top1_acc}, epoch)
    tensor_board_writer.add_scalars('valid/loss', {'loss': valid_loss}, epoch)
    valid_csv_writer.writerow({
        'epoch':
        epoch,
        'loss':
        valid_loss,
        'acc':
        top1_acc.item(),
        'top5acc':
        top5_acc.item(),
        'lr':
        optimizer.state_dict()['param_groups'][0]['lr']
    })
    valid_csv_file.flush()
示例#5
0
def train(**kwargs):
    '''
    训练模型
    '''
    opt.parse(kwargs)

    lr1, lr2 = opt.lr1, opt.lr2
    vis.vis.env = opt.env
    
    # 模型
    model = getattr(models,opt.model)(opt)
    if opt.load_path:
        model.load(opt.load_path)
    print(model)
    model.cuda()
    optimizer = model.get_optimizer(lr1,lr2)
    criterion = getattr(models,opt.loss)()

    # 指标:求均值
    loss_meter = meter.AverageValueMeter()
    acc_meter = meter.AverageValueMeter()
    top1_meter = meter.AverageValueMeter()
    
    step = 0
    max_acc = 0
    vis.vis.texts = ''

    # 数据
    dataset = ClsDataset(opt)
    dataloader =  t.utils.data.DataLoader(dataset, opt.batch_size, opt.shuffle, num_workers=opt.workers,pin_memory=True)
    
    # 训练
    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        acc_meter.reset()
        top1_meter.reset()

        for ii, data in tqdm.tqdm(enumerate(dataloader, 0)):
            # 训练
            optimizer.zero_grad()
            input, label, _ = data
            input = Variable(input.cuda())
            label = Variable(label.cuda())
            output = model(input).squeeze()
            error = criterion(output, label)
            error.backward()
            optimizer.step()

            # 计算损失的均值和训练集的准确率均值
            loss_meter.add(error.data[0])
            acc = topk_acc(output.data,label.data)
            acc_meter.add(acc)
            top1_acc = topk_acc(output.data,label.data,k=1)
            top1_meter.add(top1_acc)

            # 可视化
            if (ii+1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                log_values = dict(loss = loss_meter.value()[0],
                                   train_acc = acc_meter.value()[0],
                                     epoch = epoch, 
                                    ii = ii,
                                    train_top1_acc= top1_meter.value()[0]
                                    )
                vis.plot_many(log_values)
        
        # 数据跑一遍之后,计算在验证集上的分数
        accuracy,top1_accuracy = val(model,dataset)
        vis.plot('val_acc', accuracy)
        vis.plot('val_top1',top1_accuracy)
        info = time.strftime('[%m%d_%H%M%S]') + 'epoch:{epoch},val_acc:{val_acc},lr:{lr},val_top1:{val_top1},train_acc:{train_acc}<br>'.format(
            epoch=epoch,
            lr=lr1,
            val_acc=accuracy,
            val_top1=top1_accuracy,
            train_acc=acc_meter.value()
        )
        vis.vis.texts += info
        vis.vis.text(vis.vis.texts, win=u'log')

        # 调整学习率
        # 如果验证集上准确率降低了,就降低学习率,并加载之前的最好模型
        # 否则保存模型,并记下模型保存路径
        if accuracy > max_acc:
            max_acc = accuracy
            best_path = model.save(accuracy)
        else:
            if lr1==0:	lr1=lr2
            model.load(best_path)
            lr1, lr2 = lr1 *opt.lr_decay, lr2 * opt.lr_decay
            optimizer = model.get_optimizer(lr1,lr2)

        vis.vis.save([opt.env])
    checkpoint = torch.load(MODEL_CHECKPOINT_DIR + '/' + 'model.pth')
    checkpoint = modify_checkpoint_keys(checkpoint)
    model.load_state_dict(checkpoint['model_state_dict'])

    # 5. Load the model to CPU or GPU.
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = model.to(device)

    # 6. (If GPU,) Turn on 'Data Parallel' mode.
    if device == 'cuda':
        model = torch.nn.DataParallel(model)
        cudnn.benchmark = True

    # 7. Turn on the test (evaluation) mode.
    model.eval()

    # 8. Test (evaluate)
    y_hats, ys = get_all_y_hats_and_ys(model, test_loader, device)

    # 9. Result (1) (acc)
    top_acc_list = utils.topk_acc(y_hats, ys, topk=(1, 5))
    total_sample_sizes = utils.get_total_sample_sizes(TEST_DIR)
    print('[RESULT] Acc: [top-1]',
          100 * top_acc_list[0].item() / total_sample_sizes,
          end=' ')
    print('[top-5]', 100 * top_acc_list[1].item() / total_sample_sizes)

    # 10. Result (2) (confusion matrix)
    conf_matrix = confusion_matrix(ys, y_hats.argmax(dim=1))
    plot_confusion_matrix(conf_matrix, CLASSES, file_format='svg')
示例#7
0
def train(**kwargs):
    opt.parse(kwargs)
    lr1, lr2 = opt.lr1, opt.lr2
    lr3 = opt.lr3
    vis.vis.env = opt.env
    max_acc = 0
    # 模型
    model = getattr(models, opt.model)(opt)
    optimizer = model.get_optimizer(opt.model, lr1, lr2, lr3)
    if opt.load_path:  #load optimizer + model
        #checkpoint = t.load(opt.load_path,lambda storage, loc: storage)
        checkpoint = t.load(opt.load_path)
        model.load_state_dict(checkpoint['d'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        max_acc = checkpoint['acc']
        print('using checkpoint:{}'.format(opt.load_path))
        print('old config:')
        print(checkpoint['opt'])
    print(model)
    model.cuda()
    criterion = getattr(models, opt.loss)()
    # 指标:求均值
    loss_meter = meter.AverageValueMeter()
    acc_meter = meter.AverageValueMeter()
    top1_meter = meter.AverageValueMeter()
    vis.vis.texts = ''
    # 数据
    dataset = ClsDataset()
    dataloader = t.utils.data.DataLoader(dataset,
                                         opt.batch_size,
                                         opt.shuffle,
                                         num_workers=opt.workers,
                                         pin_memory=True)
    time_begin = time.time()
    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        acc_meter.reset()
        top1_meter.reset()
        for ii, data in tqdm.tqdm(enumerate(dataloader, 0)):
            # 训练
            optimizer.zero_grad()
            input, label, _ = data
            input = Variable(input.cuda())
            label = Variable(label.cuda())
            output = model(input).squeeze()
            error = criterion(output, label)
            error.backward()
            optimizer.step()
            # 计算损失的均值和训练集的准确率均值
            loss_meter.add(error.data[0])
            acc = topk_acc(output.data, label.data)
            acc_meter.add(acc)
            top1_acc = topk_acc(output.data, label.data, k=1)
            top1_meter.add(top1_acc)
            # 可视化
            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()
                log_values = dict(loss=loss_meter.value()[0],
                                  train_acc=acc_meter.value()[0],
                                  epoch=epoch,
                                  ii=ii,
                                  train_top1_acc=top1_meter.value()[0])
                vis.plot_many(log_values)

        # 数据跑一遍之后,计算在验证集上的分数
        accuracy, top1_accuracy = val(model, dataset)
        vis.plot('val_acc', accuracy)
        vis.plot('val_top1', top1_accuracy)
        info = time.strftime(
            '[%m%d_%H%M%S]'
        ) + 'epoch:{epoch},train_acc:{train_acc},mac_acc:{max_acc},val_acc:{val_acc},lr:{lr}<br>'.format(
            epoch=epoch,
            lr=lr1,
            train_acc=acc_meter.value(),
            val_acc=accuracy,
            max_acc=max_acc
            #val_top1=top1_accuracy
        )
        vis.vis.texts += info
        # 调整学习率
        # 如果验证集上准确率降低了,就降低学习率,并加载之前的最好模型
        # 否则保存模型,并记下模型保存路径
        if accuracy > max_acc:
            max_acc = accuracy
            best_path = model.save(accuracy)
        else:
            if lr1 == 0: lr1 = lr2
            if lr3:
                lr3 = lr1
                lr3 = lr3 * opt.lr_decay
            model.load(best_path)
            lr1, lr2 = lr1 * opt.lr_decay, lr2 * opt.lr_decay
            optimizer = model.get_optimizer(opt.model, lr1, lr2, lr3)
            vis.vis.texts += 'change learning_rate'

        #for param_group in optimizer.param_groups:
        #   lr = init_lr * (0.5 ** (epoch // lr_decay_epoch))
        #  param_group['lr'] = lr
        # param_group['weight_decay'] = weight_decay

        vis.vis.text(vis.vis.texts, win=u'log')
        vis.vis.save([opt.env])
        time_all = time.time() - time_begin
        print(time_all)
    print('Training complete in {:.0f}hour {:.0f}min'.format(
        time_all // 3600, time_all // 60))
    print('Best val Acc: {:4f}'.format(max_acc))