示例#1
0
def train_epoch(model,
                optim,
                loss_fcn,
                trainloader,
                valloader,
                epoch,
                interval_validate=4000,
                max_iter=40000):
    """
    训练一个epoch
    :param model: 用于训练的模型
            optim:训练时所采用的优化器
            loss_fcn:训练时采用的损失函数
           trainloader:用于训练的数据集
           valloader:用于验证的数据集
           epoch:表示这是第几个epoch
    :return:
    """
    model = model.cuda()

    model.train()
    n_class = len(valloader.dataset.class_names)
    for batch_idx, (data, target) in enumerate(trainloader):
        data = data.cuda()
        target = target.cuda()

        print('train' + str(epoch) + str(batch_idx))
        iteration = batch_idx + epoch * len(
            trainloader)  #将每个batch看做一次iteration,此处表示是第几个iteration
        # if iteration % interval_validate ==400:#表示迭代训练interval_validate次后就要验证数据集,验证集的数据与训练集一致,用于评价模型的泛华能力,调整超参数
        #     validate(model=model,valloader=valloader,loss_fcn=loss_fcn)

        assert model.training  #判断当前是否处于训练模式中

        optim.zero_grad()
        score = model(data)
        loss = loss_fcn(score, target, weight=None, size_average=False)
        loss /= len(data)
        loss_data = loss.data.item()
        loss.backward()
        optim.step()

        #做几次或者每次都更新统计指标并可视化
        metrics = []
        lbl_pred = score.data.max(
            1)[1].cpu().numpy()[:, :, :]  #将该像素得分最高的类看做该像素所属于的类别,所有的像素组成分类图
        lbl_true = target.data.cpu().numpy()  #人为标定的分类图
        acc, acc_cls, mean_iu, fwavacc = models.label_accuracy_score(
            lbl_true, lbl_pred, n_class=n_class)  #这4个参数都可以作为模型在训练集的评价指标

        metrics.append((acc, acc_cls, mean_iu, fwavacc))
        metrics = np.mean(metrics, axis=0)

        #将上述标量可视化
        utils.Vis.plot_scalar('loss2', loss_data, iteration)
        if iteration > max_iter:  #如果超过了最大的迭代次数,则退出循环
            break
示例#2
0
def validate(model, valloader, loss_fcn):
    """
    用来在验证集上评估该模型,并且根据测试结果调整超参数
    :param model: 用来验证的模型
    val_loader:用来验证模型的数据集
    loss_fcn:model的损失函数
    :return:
    """
    model.eval()
    n_class = len(valloader.dataset.class_names)

    val_loss = 0
    for batch_idx, (data, target) in enumerate(valloader):
        data = data.cuda()
        target = target.cuda()

        print('validate' + str(batch_idx))
        with torch.no_grad():
            score = model(data)  #使用模型处理输入数据得到结果

        loss = loss_fcn(score, target, weight=None, size_average=False)
        loss_data = loss.data.item()
        val_loss += loss / len(data)

        imgs = data.data.cpu()
        lbl_pred = score.data.max(1)[1].cpu().numpy()[:, :, :]
        lbl_true = target.data.cpu()

        #可视化模型语义分割的效果
        label_trues, label_preds = [], []
        visualizations = []
        for img, lt, lp in zip(imgs, lbl_true, lbl_pred):
            img, lt = valloader.dataset.untransforms(img, lt)
            label_trues.append(lt)
            label_preds.append(lp)
            if len(visualizations) < 9:
                viz = fcn.utils.visualize_segmentation(lbl_pred=lp,
                                                       lbl_true=lt,
                                                       img=img,
                                                       n_class=n_class)
                visualizations.append(viz)

        #计算模型在验证集的效果
    acc, acc_cls, mean_iu, fwavacc = models.label_accuracy_score(
        label_trues, label_preds, n_class)
    val_loss /= len(valloader)

    utils.Vis.plot_scalar('ValLos', loss_data, batch_idx)
    utils.Vis.plot_scalar('ValMeanIu', mean_iu, None)
    # utils.ModelSave(model,optim=)
    model.train()
示例#3
0
    def validate(self):
        """
        用来在验证集上评估该模型,并且根据测试结果调整超参数
        :param model: 用来验证的模型
        val_loader:用来验证模型的数据集
        loss_fcn:model的损失函数
        :return:
        """
        self.model.eval()
        n_class = len(self.val_loader.dataset.class_names)
        label_trues, label_preds = [], []
        visualizations = []
        val_loss = 0
        for batch_idx, (data, target) in enumerate(self.val_loader):
            if batch_idx > 1000:
                break
            if self.cuda:
                data = data.cuda()
                target = target.cuda()

            print('validate' + str(batch_idx))
            with torch.no_grad():
                score = self.model(data)  # 使用模型处理输入数据得到结果

            loss = self.loss_fcn(score,
                                 target,
                                 weight=None,
                                 size_average=False)
            loss_data = loss.data.item()
            val_loss += loss_data / len(data)

            imgs = data.data.cpu()
            lbl_pred = score.data.max(1)[1].cpu().numpy()[:, :, :]
            lbl_true = target.data.cpu()

            # 可视化模型语义分割的效果

            for img, lt, lp in zip(imgs, lbl_true, lbl_pred):
                img, lt = self.val_loader.dataset.untransform(img, lt)
                label_trues.append(lt)
                label_preds.append(lp)
                if len(visualizations) < 15 * 5:
                    viz = fcn.utils.visualize_segmentation(lbl_pred=lp,
                                                           lbl_true=lt,
                                                           img=img,
                                                           n_class=n_class)
                    visualizations.append(viz)

            # 计算模型在验证集的效果
        acc, acc_cls, mean_iu, fwavacc = models.label_accuracy_score(
            label_trues, label_preds, n_class)
        val_loss /= len(self.val_loader)
        self.scheduler.step(val_loss)

        #可视化模型的效果
        self.valid_loss = val_loss
        self.valid_acc = acc
        self.valMeanIu = mean_iu
        self.plotModelScalars()

        #保存相关的数据
        for i in range(5):
            out = osp.join(self.out, 'visualization_viz')
            if not osp.exists(out):
                os.makedirs(out)
            out_file = osp.join(out, str(i) + 'iter%012d.jpg' % self.iteration)
            scipy.misc.imsave(
                out_file,
                fcn.utils.get_tile_image(visualizations[15 * i:15 * i + 15]))

        now = datetime.datetime.now()
        utils.ModelSave(model=self.model,
                        optim=self.optim,
                        saveRoot=osp.join(
                            self.out,
                            now.strftime('%Y%m%d_%H%M%S.%f') +
                            'checkpoint.pth.tar'),
                        epoch=self.epoch,
                        iteration=self.iteration)
        if mean_iu > self.best_mean_iu:
            self.best_mean_iu = mean_iu
            shutil.copy(
                osp.join(
                    self.out,
                    now.strftime('%Y%m%d_%H%M%S.%f') + 'checkpoint.pth.tar'),
                osp.join(
                    self.out,
                    now.strftime('%Y%m%d_%H%M%S.%f') + 'model_best.pth.tar'))

        #将关心数据保存为csv格式
        log = [0, 0, 0, val_loss, mean_iu, self.optim.param_groups[0]['lr']]
        log = map(str, log)
        self.logFile.write(log)

        self.model.train()
示例#4
0
    def train_epoch(self):

        if self.cuda:
            self.model = self.model.cuda()
        self.model.train()
        n_class = len(self.train_loader.dataset.class_names)

        for batch_idx, (data, target) in enumerate(self.train_loader):
            if self.cuda:
                data = data.cuda()
                target = target.cuda()

            print('train' + ' epoch:' + str(self.epoch) + '   batch_idx:' +
                  str(batch_idx))
            iteration = batch_idx + self.epoch * len(
                self.train_loader)  # 将每个batch看做一次iteration,此处表示是第几个iteration
            if iteration % self.interval_validate == 799:  #表示迭代训练interval_validate次后就要验证数据集,验证集的数据与训练集一致,用于评价模型的泛华能力,调整超参数
                self.validate()

            assert self.model.training  # 判断当前是否处于训练模式中

            self.optim.zero_grad()
            score = self.model(data)
            loss = self.loss_fcn(score,
                                 target,
                                 weight=None,
                                 size_average=False)
            loss /= len(data)
            loss_data = loss.data.item()
            loss.backward()
            self.optim.step()

            # 做几次或者每次都更新统计指标并可视化,此处时每做10次可视化一下效果
            if iteration % 100 == 0:
                metrics = []
                lbl_pred = score.data.max(1)[1].cpu().numpy(
                )[:, :, :]  # 将该像素得分最高的类看做该像素所属于的类别,所有的像素组成分类图
                lbl_true = target.data.cpu().numpy()  # 人为标定的分类图
                acc, acc_cls, mean_iu, fwavacc = models.label_accuracy_score(
                    lbl_true, lbl_pred,
                    n_class=n_class)  # 这4个参数都可以作为模型在训练集的评价指标

                metrics.append((acc, acc_cls, mean_iu, fwavacc))
                metrics = np.mean(metrics, axis=0)

                # 将上述标量可视化
                self.train_loss = loss_data
                self.iteration = iteration
                self.train_acc = metrics.tolist()[0]
                self.trainMeanIu = metrics.tolist()[2]
                self.plotModelScalars()

                # 将关心数据保存为csv格式
                log = [
                    iteration, self.train_loss, self.trainMeanIu, 0, 0,
                    self.optim.param_groups[0]['lr']
                ]
                log = map(str, log)
                self.logFile.write(log)

                self.model.train()