Exemplo n.º 1
0
def val(model, criterion, dataloader, epoch=None, val_writer=None, lr=None, msglogger=None):
    with t.no_grad():
        """
        计算模型在验证集上的准确率等信息
        """
        model.eval()
        val_losses = AverageMeter()
        val_top1 = AverageMeter()
        val_top5 = AverageMeter()
        val_progressor = None
        if not msglogger:
            val_progressor = ProgressBar(mode="Val  ", epoch=epoch, total_epoch=opt.max_epoch, model_name=opt.model,
                                         lr=lr,
                                         total=len(dataloader))
        for ii, (data, labels, img_path,tag) in enumerate(dataloader):
            if not check_date(img_path, tag, msglogger): return
            input = data.to(opt.device)
            labels = labels.to(opt.device)
            score = model(input)
            loss = criterion(score, labels)
            precision1, precision5 = accuracy(score, labels, topk=(1, 5))  # top1 和 top5 的准确率
            val_losses.update(loss.item(), input.size(0))
            val_top1.update(precision1[0].item(), input.size(0))
            val_top5.update(precision5[0].item(), input.size(0))
            if val_progressor:
                val_progressor.current = ii + 1
                val_progressor.current_loss = val_losses.avg
                val_progressor.current_top1 = val_top1.avg
                val_progressor.current_top5 = val_top5.avg
                val_progressor()
                if ii % opt.print_freq == 0:
                    if val_writer:
                        grid = make_grid((input.data.cpu() * 0.225 + 0.45).clamp(min=0, max=1))
                        val_writer.add_image('val_images', grid, ii * (epoch + 1))  # 测试图片
                        val_writer.add_scalar('loss', val_losses.avg, ii * (epoch + 1))  # 训练误差
                        val_writer.add_text('top1', 'val accuracy top1 %.2f%%' % val_top1.avg,
                                            ii * (epoch + 1))  # top1准确率文本
                        val_writer.add_scalars('accuracy', {'top1': val_top1.avg,
                                                            'top5': val_top5.avg,
                                                            'loss': val_losses.avg}, ii * (epoch + 1))

        if msglogger:
            msglogger.info('==> Top1: %.3f    Top5: %.3f    Loss: %.3f\n',
                           val_top1.avg, val_top5.avg, val_losses.avg)
        return [val_losses.avg, val_top1.avg, val_top5.avg]
Exemplo n.º 2
0
def train(**kwargs):
    opt._parse(kwargs)
    train_writer = None
    value_writer = None
    if opt.vis:
        train_writer = SummaryWriter(
            log_dir='./runs/train_' +
            datetime.now().strftime('%y%m%d-%H-%M-%S'))
        value_writer = SummaryWriter(
            log_dir='./runs/val_' + datetime.now().strftime('%y%m%d-%H-%M-%S'))
    previous_loss = 1e10  # 上次学习的loss
    best_precision = 0  # 最好的精确度
    start_epoch = 0
    lr = opt.lr
    perf_scores_history = []  # 绩效分数
    # step1: criterion and optimizer
    # 1. 铰链损失(Hinge Loss):主要用于支持向量机(SVM) 中;
    # 2. 互熵损失 (Cross Entropy Loss,Softmax Loss ):用于Logistic 回归与Softmax 分类中;
    # 3. 平方损失(Square Loss):主要是最小二乘法(OLS)中;
    # 4. 指数损失(Exponential Loss) :主要用于Adaboost 集成学习算法中;
    # 5. 其他损失(如0-1损失,绝对值损失)
    criterion = t.nn.CrossEntropyLoss().to(opt.device)  # 损失函数
    # step2: meters
    train_losses = AverageMeter()  # 误差仪表
    train_top1 = AverageMeter()  # top1 仪表
    train_top5 = AverageMeter()  # top5 仪表
    pylogger = PythonLogger(msglogger)
    # step3: configure model
    model = getattr(models, opt.model)()  # 获得网络结构
    compression_scheduler = distiller.CompressionScheduler(model)
    optimizer = model.get_optimizer(lr, opt.weight_decay)  # 优化器
    if opt.load_model_path:
        # # 把所有的张量加载到CPU中
        # t.load(opt.load_model_path, map_location=lambda storage, loc: storage)
        # t.load(opt.load_model_path, map_location='cpu')
        # # 把所有的张量加载到GPU 1中
        # t.load(opt.load_model_path, map_location=lambda storage, loc: storage.cuda(1))
        # # 把张量从GPU 1 移动到 GPU 0
        # t.load(opt.load_model_path, map_location={'cuda:1': 'cuda:0'})
        checkpoint = t.load(opt.load_model_path)
        start_epoch = checkpoint["epoch"]
        # compression_scheduler.load_state_dict(checkpoint['compression_scheduler'], False)
        best_precision = checkpoint["best_precision"]
        model.load_state_dict(checkpoint["state_dict"])
        optimizer = checkpoint['optimizer']
    model.to(opt.device)  # 加载模型到 GPU

    if opt.compress:
        compression_scheduler = distiller.file_config(
            model, optimizer, opt.compress, compression_scheduler)  # 加载模型修剪计划表
        model.to(opt.device)
    # 学习速率调整器
    lr_scheduler = get_scheduler(optimizer, opt)
    # step4: data_image
    train_data = DatasetFromFilename(opt.data_root, flag='train')  # 训练集
    val_data = DatasetFromFilename(opt.data_root, flag='test')  # 验证集
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)  # 训练集加载器
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=True,
                                num_workers=opt.num_workers)  # 验证集加载器
    # train
    for epoch in range(start_epoch, opt.max_epoch):
        model.train()
        if opt.pruning:
            compression_scheduler.on_epoch_begin(epoch)  # epoch 开始修剪
        train_losses.reset()  # 重置仪表
        train_top1.reset()  # 重置仪表
        # print('训练数据集大小', len(train_dataloader))
        total_samples = len(train_dataloader.sampler)
        steps_per_epoch = math.ceil(total_samples / opt.batch_size)
        train_progressor = ProgressBar(mode="Train  ",
                                       epoch=epoch,
                                       total_epoch=opt.max_epoch,
                                       model_name=opt.model,
                                       lr=lr,
                                       total=len(train_dataloader))
        lr = lr_scheduler.get_lr()[0]
        for ii, (data, labels, img_path, tag) in enumerate(train_dataloader):
            if not check_date(img_path, tag, msglogger): return
            if opt.pruning:
                compression_scheduler.on_minibatch_begin(
                    epoch, ii, steps_per_epoch, optimizer)  # batch 开始修剪
            train_progressor.current = ii + 1  # 训练集当前进度
            # train model
            input = data.to(opt.device)
            target = labels.to(opt.device)
            if train_writer:
                grid = make_grid(
                    (input.data.cpu() * 0.225 + 0.45).clamp(min=0, max=1))
                train_writer.add_image('train_images', grid,
                                       ii * (epoch + 1))  # 训练图片
            score = model(input)  # 网络结构返回值
            # 计算损失
            loss = criterion(score, target)
            if opt.pruning:
                # Before running the backward phase, we allow the scheduler to modify the loss
                # (e.g. add regularization loss)
                agg_loss = compression_scheduler.before_backward_pass(
                    epoch,
                    ii,
                    steps_per_epoch,
                    loss,
                    optimizer=optimizer,
                    return_loss_components=True)  # 模型修建误差
                loss = agg_loss.overall_loss
            train_losses.update(loss.item(), input.size(0))
            # loss = criterion(score[0], target)  # 计算损失   Inception3网络
            optimizer.zero_grad()  # 参数梯度设成0
            loss.backward()  # 反向传播
            optimizer.step()  # 更新参数

            if opt.pruning:
                compression_scheduler.on_minibatch_end(epoch, ii,
                                                       steps_per_epoch,
                                                       optimizer)  # batch 结束修剪

            precision1_train, precision5_train = accuracy(
                score, target, topk=(1, 5))  # top1 和 top5 的准确率

            # writer.add_graph(model, input)
            # precision1_train, precision2_train = accuracy(score[0], target, topk=(1, 2))  # Inception3网络
            train_losses.update(loss.item(), input.size(0))
            train_top1.update(precision1_train[0].item(), input.size(0))
            train_top5.update(precision5_train[0].item(), input.size(0))
            train_progressor.current_loss = train_losses.avg
            train_progressor.current_top1 = train_top1.avg
            train_progressor.current_top5 = train_top5.avg
            train_progressor()  # 打印进度
            if ii % opt.print_freq == 0:
                if train_writer:
                    train_writer.add_scalar('loss', train_losses.avg,
                                            ii * (epoch + 1))  # 训练误差
                    train_writer.add_text(
                        'top1', 'train accuracy top1 %s' % train_top1.avg,
                        ii * (epoch + 1))  # top1准确率文本
                    train_writer.add_scalars(
                        'accuracy', {
                            'top1': train_top1.avg,
                            'top5': train_top5.avg,
                            'loss': train_losses.avg
                        }, ii * (epoch + 1))
        # train_progressor.done()  # 保存训练结果为txt
        # validate and visualize
        if opt.pruning:
            distiller.log_weights_sparsity(model, epoch,
                                           loggers=[pylogger])  # 打印模型修剪结果
            compression_scheduler.on_epoch_end(epoch, optimizer)  # epoch 结束修剪
        val_loss, val_top1, val_top5 = val(model, criterion, val_dataloader,
                                           epoch, value_writer, lr)  # 校验模型
        sparsity = distiller.model_sparsity(model)
        perf_scores_history.append(
            distiller.MutableNamedTuple(
                {
                    'sparsity': sparsity,
                    'top1': val_top1,
                    'top5': val_top5,
                    'epoch': epoch + 1,
                    'lr': lr,
                    'loss': val_loss
                }, ))
        # 保持绩效分数历史记录从最好到最差的排序
        # 按稀疏度排序为主排序键,然后按top1、top5、epoch排序
        perf_scores_history.sort(key=operator.attrgetter(
            'sparsity', 'top1', 'top5', 'epoch'),
                                 reverse=True)
        for score in perf_scores_history[:1]:
            msglogger.info(
                '==> Best [Top1: %.3f   Top5: %.3f   Sparsity: %.2f on epoch: %d   Lr: %f   Loss: %f]',
                score.top1, score.top5, score.sparsity, score.epoch, lr,
                score.loss)

        best_precision = max(perf_scores_history[0].top1,
                             best_precision)  # 最大top1 准确率
        is_best = epoch + 1 == perf_scores_history[
            0].epoch  # 当前epoch 和最佳epoch 一样
        if is_best:
            model.save({
                "epoch":
                epoch + 1,
                "model_name":
                opt.model,
                "state_dict":
                model.state_dict(),
                "best_precision":
                best_precision,
                "optimizer":
                optimizer,
                "valid_loss": [val_loss, val_top1, val_top5],
                'compression_scheduler':
                compression_scheduler.state_dict(),
            })  # 保存模型
        # update learning rate
        lr_scheduler.step(epoch)  # 更新学习效率
        # 如果训练误差比上次大 降低学习效率
        # if train_losses.val > previous_loss:
        #     lr = lr * opt.lr_decay
        #     # 当loss大于上一次loss,降低学习率
        #     for param_group in optimizer.param_groups:
        #         param_group['lr'] = lr
        #
        # previous_loss = train_losses.val
        t.cuda.empty_cache()  # 这个命令是清除没用的临时变量的
Exemplo n.º 3
0
    def train(self):
        previous_loss = 1e10  # 上次学习的loss
        lr = self.opt.lr
        perf_scores_history = []
        pylogger = PythonLogger(msglogger)
        self.train_load_model()
        self.load_compress()
        self.create_write()
        lr_scheduler = get_scheduler(self.optimizer, opt)
        for epoch in range(self.start_epoch, self.opt.max_epoch):
            self.model.train()
            self.load_data()
            if self.opt.pruning:
                self.compression_scheduler.on_epoch_begin(epoch)  # epoch 开始修剪
            self.train_losses.reset()  # 重置仪表
            self.train_top1.reset()  # 重置仪表
            # print('训练数据集大小', len(train_dataloader))
            total_samples = len(self.train_dataloader.sampler)
            steps_per_epoch = math.ceil(total_samples / self.opt.batch_size)
            train_progressor = ProgressBar(mode="Train  ",
                                           epoch=epoch,
                                           total_epoch=self.opt.max_epoch,
                                           model_name=self.opt.model,
                                           total=len(self.train_dataloader))
            lr = lr_scheduler.get_lr()
            for ii, (data, labels,
                     img_path) in enumerate(self.train_dataloader):
                if self.opt.pruning:
                    self.compression_scheduler.on_minibatch_begin(
                        epoch, ii, steps_per_epoch,
                        self.optimizer)  # batch 开始修剪
                train_progressor.current = ii + 1  # 训练集当前进度
                # train model
                input = data.to(self.opt.device)
                target = labels.to(self.opt.device)
                score = self.model(input)  # 网络结构返回值
                loss = self.criterion(score, target)  # 计算损失
                if self.opt.pruning:
                    # Before running the backward phase, we allow the scheduler to modify the loss
                    # (e.g. add regularization loss)
                    agg_loss = self.compression_scheduler.before_backward_pass(
                        epoch,
                        ii,
                        steps_per_epoch,
                        loss,
                        optimizer=self.optimizer,
                        return_loss_components=True)  # 模型修建误差
                    loss = agg_loss.overall_loss
                self.train_losses.update(loss.item(), input.size(0))
                # loss = criterion(score[0], target)  # 计算损失   Inception3网络
                self.optimizer.zero_grad()  # 参数梯度设成0
                loss.backward()  # 反向传播
                self.optimizer.step()  # 更新参数

                if opt.pruning:
                    self.compression_scheduler.on_minibatch_end(
                        epoch, ii, steps_per_epoch,
                        self.optimizer)  # batch 结束修剪

                precision1_train, precision5_train = accuracy(
                    score, target, topk=(1, 5))  # top1 和 top5 的准确率

                # precision1_train, precision2_train = accuracy(score[0], target, topk=(1, 2))  # Inception3网络
                self.train_losses.update(loss.item(), input.size(0))
                self.train_top1.update(precision1_train[0].item(),
                                       input.size(0))
                self.train_top5.update(precision5_train[0].item(),
                                       input.size(0))
                train_progressor.current_loss = self.train_losses.avg
                train_progressor.current_top1 = self.train_top1.avg
                train_progressor.current_top5 = self.train_top5.avg
                train_progressor()  # 打印进度
                if (ii + 1) % self.opt.print_freq == 0:
                    self.visualization_train(input, ii, epoch)
            if self.opt.pruning:
                distiller.log_weights_sparsity(self.model,
                                               epoch,
                                               loggers=[pylogger])  # 打印模型修剪结果
                self.compression_scheduler.on_epoch_end(
                    epoch, self.optimizer)  # epoch 结束修剪
            val_loss, val_top1, val_top5 = val(self.model, self.criterion,
                                               self.val_dataloader, epoch,
                                               self.value_writer)  # 校验模型
            sparsity = distiller.model_sparsity(self.model)
            perf_scores_history.append(
                distiller.MutableNamedTuple(
                    {
                        'sparsity': sparsity,
                        'top1': val_top1,
                        'top5': val_top5,
                        'epoch': epoch + 1,
                        'lr': lr,
                        'loss': val_loss
                    }, ))
            # 保持绩效分数历史记录从最好到最差的排序
            # 按稀疏度排序为主排序键,然后按top1、top5、epoch排序
            perf_scores_history.sort(key=operator.attrgetter(
                'sparsity', 'top1', 'top5', 'epoch'),
                                     reverse=True)
            for score in perf_scores_history[:1]:
                msglogger.info(
                    '==> Best [Top1: %.3f   Top5: %.3f   Sparsity: %.2f on epoch: %d   Lr: %f   Loss: %f]',
                    score.top1, score.top5, score.sparsity, score.epoch, lr,
                    score.loss)

            is_best = epoch == perf_scores_history[
                0].epoch  # 当前epoch 和最佳epoch 一样
            self.best_precision = max(perf_scores_history[0].top1,
                                      self.best_precision)  # 最大top1 准确率
            if is_best:
                self.train_save_model(epoch, val_loss, val_top1, val_top5)
            # update learning rate
            lr = lr_scheduler.get_lr()
            # # 如果训练误差比上次大 降低学习效率
            # if self.train_losses.val > previous_loss:
            #     lr = lr * self.opt.lr_decay
            #     # 当loss大于上一次loss,降低学习率
            #     for param_group in self.optimizer.param_groups:
            #         param_group['lr'] = lr
            #
            # previous_loss = self.train_losses.val
            t.cuda.empty_cache()