def train_epoch(model, optim, loss_fcn, trainloader, valloader, epoch, interval_validate=4000, max_iter=40000): """ 训练一个epoch :param model: 用于训练的模型 optim:训练时所采用的优化器 loss_fcn:训练时采用的损失函数 trainloader:用于训练的数据集 valloader:用于验证的数据集 epoch:表示这是第几个epoch :return: """ model = model.cuda() model.train() n_class = len(valloader.dataset.class_names) for batch_idx, (data, target) in enumerate(trainloader): data = data.cuda() target = target.cuda() print('train' + str(epoch) + str(batch_idx)) iteration = batch_idx + epoch * len( trainloader) #将每个batch看做一次iteration,此处表示是第几个iteration # if iteration % interval_validate ==400:#表示迭代训练interval_validate次后就要验证数据集,验证集的数据与训练集一致,用于评价模型的泛华能力,调整超参数 # validate(model=model,valloader=valloader,loss_fcn=loss_fcn) assert model.training #判断当前是否处于训练模式中 optim.zero_grad() score = model(data) loss = loss_fcn(score, target, weight=None, size_average=False) loss /= len(data) loss_data = loss.data.item() loss.backward() optim.step() #做几次或者每次都更新统计指标并可视化 metrics = [] lbl_pred = score.data.max( 1)[1].cpu().numpy()[:, :, :] #将该像素得分最高的类看做该像素所属于的类别,所有的像素组成分类图 lbl_true = target.data.cpu().numpy() #人为标定的分类图 acc, acc_cls, mean_iu, fwavacc = models.label_accuracy_score( lbl_true, lbl_pred, n_class=n_class) #这4个参数都可以作为模型在训练集的评价指标 metrics.append((acc, acc_cls, mean_iu, fwavacc)) metrics = np.mean(metrics, axis=0) #将上述标量可视化 utils.Vis.plot_scalar('loss2', loss_data, iteration) if iteration > max_iter: #如果超过了最大的迭代次数,则退出循环 break
def validate(model, valloader, loss_fcn): """ 用来在验证集上评估该模型,并且根据测试结果调整超参数 :param model: 用来验证的模型 val_loader:用来验证模型的数据集 loss_fcn:model的损失函数 :return: """ model.eval() n_class = len(valloader.dataset.class_names) val_loss = 0 for batch_idx, (data, target) in enumerate(valloader): data = data.cuda() target = target.cuda() print('validate' + str(batch_idx)) with torch.no_grad(): score = model(data) #使用模型处理输入数据得到结果 loss = loss_fcn(score, target, weight=None, size_average=False) loss_data = loss.data.item() val_loss += loss / len(data) imgs = data.data.cpu() lbl_pred = score.data.max(1)[1].cpu().numpy()[:, :, :] lbl_true = target.data.cpu() #可视化模型语义分割的效果 label_trues, label_preds = [], [] visualizations = [] for img, lt, lp in zip(imgs, lbl_true, lbl_pred): img, lt = valloader.dataset.untransforms(img, lt) label_trues.append(lt) label_preds.append(lp) if len(visualizations) < 9: viz = fcn.utils.visualize_segmentation(lbl_pred=lp, lbl_true=lt, img=img, n_class=n_class) visualizations.append(viz) #计算模型在验证集的效果 acc, acc_cls, mean_iu, fwavacc = models.label_accuracy_score( label_trues, label_preds, n_class) val_loss /= len(valloader) utils.Vis.plot_scalar('ValLos', loss_data, batch_idx) utils.Vis.plot_scalar('ValMeanIu', mean_iu, None) # utils.ModelSave(model,optim=) model.train()
def validate(self): """ 用来在验证集上评估该模型,并且根据测试结果调整超参数 :param model: 用来验证的模型 val_loader:用来验证模型的数据集 loss_fcn:model的损失函数 :return: """ self.model.eval() n_class = len(self.val_loader.dataset.class_names) label_trues, label_preds = [], [] visualizations = [] val_loss = 0 for batch_idx, (data, target) in enumerate(self.val_loader): if batch_idx > 1000: break if self.cuda: data = data.cuda() target = target.cuda() print('validate' + str(batch_idx)) with torch.no_grad(): score = self.model(data) # 使用模型处理输入数据得到结果 loss = self.loss_fcn(score, target, weight=None, size_average=False) loss_data = loss.data.item() val_loss += loss_data / len(data) imgs = data.data.cpu() lbl_pred = score.data.max(1)[1].cpu().numpy()[:, :, :] lbl_true = target.data.cpu() # 可视化模型语义分割的效果 for img, lt, lp in zip(imgs, lbl_true, lbl_pred): img, lt = self.val_loader.dataset.untransform(img, lt) label_trues.append(lt) label_preds.append(lp) if len(visualizations) < 15 * 5: viz = fcn.utils.visualize_segmentation(lbl_pred=lp, lbl_true=lt, img=img, n_class=n_class) visualizations.append(viz) # 计算模型在验证集的效果 acc, acc_cls, mean_iu, fwavacc = models.label_accuracy_score( label_trues, label_preds, n_class) val_loss /= len(self.val_loader) self.scheduler.step(val_loss) #可视化模型的效果 self.valid_loss = val_loss self.valid_acc = acc self.valMeanIu = mean_iu self.plotModelScalars() #保存相关的数据 for i in range(5): out = osp.join(self.out, 'visualization_viz') if not osp.exists(out): os.makedirs(out) out_file = osp.join(out, str(i) + 'iter%012d.jpg' % self.iteration) scipy.misc.imsave( out_file, fcn.utils.get_tile_image(visualizations[15 * i:15 * i + 15])) now = datetime.datetime.now() utils.ModelSave(model=self.model, optim=self.optim, saveRoot=osp.join( self.out, now.strftime('%Y%m%d_%H%M%S.%f') + 'checkpoint.pth.tar'), epoch=self.epoch, iteration=self.iteration) if mean_iu > self.best_mean_iu: self.best_mean_iu = mean_iu shutil.copy( osp.join( self.out, now.strftime('%Y%m%d_%H%M%S.%f') + 'checkpoint.pth.tar'), osp.join( self.out, now.strftime('%Y%m%d_%H%M%S.%f') + 'model_best.pth.tar')) #将关心数据保存为csv格式 log = [0, 0, 0, val_loss, mean_iu, self.optim.param_groups[0]['lr']] log = map(str, log) self.logFile.write(log) self.model.train()
def train_epoch(self): if self.cuda: self.model = self.model.cuda() self.model.train() n_class = len(self.train_loader.dataset.class_names) for batch_idx, (data, target) in enumerate(self.train_loader): if self.cuda: data = data.cuda() target = target.cuda() print('train' + ' epoch:' + str(self.epoch) + ' batch_idx:' + str(batch_idx)) iteration = batch_idx + self.epoch * len( self.train_loader) # 将每个batch看做一次iteration,此处表示是第几个iteration if iteration % self.interval_validate == 799: #表示迭代训练interval_validate次后就要验证数据集,验证集的数据与训练集一致,用于评价模型的泛华能力,调整超参数 self.validate() assert self.model.training # 判断当前是否处于训练模式中 self.optim.zero_grad() score = self.model(data) loss = self.loss_fcn(score, target, weight=None, size_average=False) loss /= len(data) loss_data = loss.data.item() loss.backward() self.optim.step() # 做几次或者每次都更新统计指标并可视化,此处时每做10次可视化一下效果 if iteration % 100 == 0: metrics = [] lbl_pred = score.data.max(1)[1].cpu().numpy( )[:, :, :] # 将该像素得分最高的类看做该像素所属于的类别,所有的像素组成分类图 lbl_true = target.data.cpu().numpy() # 人为标定的分类图 acc, acc_cls, mean_iu, fwavacc = models.label_accuracy_score( lbl_true, lbl_pred, n_class=n_class) # 这4个参数都可以作为模型在训练集的评价指标 metrics.append((acc, acc_cls, mean_iu, fwavacc)) metrics = np.mean(metrics, axis=0) # 将上述标量可视化 self.train_loss = loss_data self.iteration = iteration self.train_acc = metrics.tolist()[0] self.trainMeanIu = metrics.tolist()[2] self.plotModelScalars() # 将关心数据保存为csv格式 log = [ iteration, self.train_loss, self.trainMeanIu, 0, 0, self.optim.param_groups[0]['lr'] ] log = map(str, log) self.logFile.write(log) self.model.train()