示例#1
0
def interpretation_predict(model, images):
    images = images.astype('float32')
    arrange_transforms(
        model.model_type,
        model.__class__.__name__,
        transforms=model.test_transforms,
        mode='test')
    tmp_transforms = copy.deepcopy(model.test_transforms.transforms)
    model.test_transforms.transforms = model.test_transforms.transforms[-2:]

    new_imgs = []
    for i in range(images.shape[0]):
        images[i] = cv2.cvtColor(images[i], cv2.COLOR_RGB2BGR)
        new_imgs.append(model.test_transforms(images[i])[0])

    new_imgs = np.array(new_imgs)
    with fluid.scope_guard(model.scope):
        out = model.exe.run(
            model.test_prog,
            feed={'image': new_imgs},
            fetch_list=list(model.interpretation_feats.values()))

    model.test_transforms.transforms = tmp_transforms

    return out
示例#2
0
    def export_quant_model(self,
                           dataset,
                           save_dir,
                           batch_size=1,
                           batch_num=10,
                           cache_dir="./temp"):
        input_channel = getattr(self, 'input_channel', 3)
        arrange_transforms(
            model_type=self.model_type,
            class_name=self.__class__.__name__,
            transforms=dataset.transforms,
            mode='quant',
            input_channel=input_channel)
        dataset.num_samples = batch_size * batch_num
        import paddle
        version = paddle.__version__.strip().split('.')
        if version[0] == '2' or (version[0] == '0' and
                                 hasattr(paddle, 'enable_static')):
            from .slim.post_quantization import PaddleXPostTrainingQuantizationV2 as PaddleXPostTrainingQuantization
        else:
            from .slim.post_quantization import PaddleXPostTrainingQuantization
            PaddleXPostTrainingQuantization._collect_target_varnames
        is_use_cache_file = True
        if cache_dir is None:
            is_use_cache_file = False
        quant_prog = self.test_prog.clone(for_test=True)
        post_training_quantization = PaddleXPostTrainingQuantization(
            executor=self.exe,
            dataset=dataset,
            program=quant_prog,
            inputs=self.test_inputs,
            outputs=self.test_outputs,
            batch_size=batch_size,
            batch_nums=batch_num,
            scope=self.scope,
            algo='KL',
            quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"],
            is_full_quantize=False,
            is_use_cache_file=is_use_cache_file,
            cache_dir=cache_dir)
        post_training_quantization.quantize()
        post_training_quantization.save_quantized_model(save_dir)
        model_info = self.get_model_info()
        model_info['status'] = 'Quant'

        # 保存模型输出的变量描述
        model_info['_ModelInputsOutputs'] = dict()
        model_info['_ModelInputsOutputs']['test_inputs'] = [
            [k, v.name] for k, v in self.test_inputs.items()
        ]
        model_info['_ModelInputsOutputs']['test_outputs'] = [
            [k, v.name] for k, v in self.test_outputs.items()
        ]

        with open(
                osp.join(save_dir, 'model.yml'), encoding='utf-8',
                mode='w') as f:
            yaml.dump(model_info, f)
示例#3
0
def normlime(img_file,
             model,
             dataset=None,
             num_samples=3000,
             batch_size=50,
             save_dir='./',
             normlime_weights_file=None):
    """使用NormLIME算法将模型预测结果的可解释性可视化。

    NormLIME是利用一定数量的样本来出一个全局的解释。由于NormLIME计算量较大,此处采用一种简化的方式:
    使用一定数量的测试样本(目前默认使用所有测试样本),对每个样本进行特征提取,映射到同一个特征空间;
    然后以此特征做为输入,以模型输出做为输出,使用线性回归对其进行拟合,得到一个全局的输入和输出的关系。
    之后,对一测试样本进行解释时,使用NormLIME全局的解释,来对LIME的结果进行滤波,使最终的可视化结果更加稳定。

    注意1:dataset读取的是一个数据集,该数据集不宜过大,否则计算时间会较长,但应包含所有类别的数据。
    注意2:NormLIME可解释性结果可视化目前只支持分类模型。

    Args:
        img_file (str): 预测图像路径。
        model (paddlex.cv.models): paddlex中的模型。
        dataset (paddlex.datasets): 数据集读取器,默认为None。
        num_samples (int): LIME用于学习线性模型的采样数,默认为3000。
        batch_size (int): 预测数据batch大小,默认为50。
        save_dir (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。
        normlime_weights_file (str): NormLIME初始化文件名,若不存在,则计算一次,保存于该路径;若存在,则直接载入。
    """
    assert model.model_type == 'classifier', \
        'Now the interpretation visualize only be supported in classifier!'
    if model.status != 'Normal':
        raise Exception(
            'The interpretation only can deal with the Normal model')
    if not osp.exists(save_dir):
        os.makedirs(save_dir)
    arrange_transforms(model.model_type,
                       model.__class__.__name__,
                       transforms=model.test_transforms,
                       mode='test')
    tmp_transforms = copy.deepcopy(model.test_transforms)
    tmp_transforms.transforms = tmp_transforms.transforms[:-2]
    img = tmp_transforms(img_file)[0]
    img = np.around(img).astype('uint8')
    img = np.expand_dims(img, axis=0)
    interpreter = None
    if dataset is None:
        raise Exception(
            'The dataset is None. Cannot implement this kind of interpretation'
        )
    interpreter = get_normlime_interpreter(
        img,
        model,
        dataset,
        num_samples=num_samples,
        batch_size=batch_size,
        save_dir=save_dir,
        normlime_weights_file=normlime_weights_file)
    img_name = osp.splitext(osp.split(img_file)[-1])[0]
    interpreter.interpret(img, save_dir=osp.join(save_dir, img_name))
示例#4
0
    def _preprocess(images,
                    transforms,
                    model_type,
                    class_name,
                    thread_pool=None):
        arrange_transforms(model_type=model_type,
                           class_name=class_name,
                           transforms=transforms,
                           mode='test')
        if thread_pool is not None:
            batch_data = thread_pool.map(transforms, images)
        else:
            batch_data = list()
            for image in images:
                batch_data.append(transforms(image))
        padding_batch = generate_minibatch(batch_data)
        im = np.array([data[0] for data in padding_batch])

        return im
示例#5
0
def lime(img_file, model, num_samples=3000, batch_size=50, save_dir='./'):
    """使用LIME算法将模型预测结果的可解释性可视化。

    LIME表示与模型无关的局部可解释性,可以解释任何模型。LIME的思想是以输入样本为中心,
    在其附近的空间中进行随机采样,每个采样通过原模型得到新的输出,这样得到一系列的输入
    和对应的输出,LIME用一个简单的、可解释的模型(比如线性回归模型)来拟合这个映射关系,
    得到每个输入维度的权重,以此来解释模型。

    注意:LIME可解释性结果可视化目前只支持分类模型。

    Args:
        img_file (str): 预测图像路径。
        model (paddlex.cv.models): paddlex中的模型。
        num_samples (int): LIME用于学习线性模型的采样数,默认为3000。
        batch_size (int): 预测数据batch大小,默认为50。
        save_dir (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。
    """
    assert model.model_type == 'classifier', \
        'Now the interpretation visualize only be supported in classifier!'
    if model.status != 'Normal':
        raise Exception(
            'The interpretation only can deal with the Normal model')
    if not osp.exists(save_dir):
        os.makedirs(save_dir)
    arrange_transforms(model.model_type,
                       model.__class__.__name__,
                       transforms=model.test_transforms,
                       mode='test')
    tmp_transforms = copy.deepcopy(model.test_transforms)
    tmp_transforms.transforms = tmp_transforms.transforms[:-2]
    img = tmp_transforms(img_file)[0]
    img = np.around(img).astype('uint8')
    img = np.expand_dims(img, axis=0)
    interpreter = None
    interpreter = get_lime_interpreter(img,
                                       model,
                                       num_samples=num_samples,
                                       batch_size=batch_size)
    img_name = osp.splitext(osp.split(img_file)[-1])[0]
    interpreter.interpret(img, save_dir=osp.join(save_dir, img_name))
示例#6
0
 def _preprocess(images,
                 transforms,
                 model_type,
                 class_name,
                 thread_pool=None,
                 input_channel=3):
     arrange_transforms(model_type=model_type,
                        class_name=class_name,
                        transforms=transforms,
                        mode='test',
                        input_channel=input_channel)
     if thread_pool is not None:
         batch_data = thread_pool.map(transforms, images)
     else:
         batch_data = list()
         for image in images:
             batch_data.append(transforms(image))
     padding_batch = generate_minibatch(batch_data)
     im = np.array([data[0] for data in padding_batch],
                   dtype=padding_batch[0][0].dtype)
     im_info = [data[1] for data in padding_batch]
     return im, im_info
示例#7
0
    def evaluate(self,
                 eval_dataset,
                 batch_size=1,
                 epoch_id=None,
                 metric=None,
                 return_details=False):
        """评估。

        Args:
            eval_dataset (paddlex.datasets): 验证数据读取器。
            batch_size (int): 验证数据批大小。默认为1。当前只支持设置为1。
            epoch_id (int): 当前评估模型所在的训练轮数。
            metric (bool): 训练过程中评估的方式,取值范围为['COCO', 'VOC']。默认为None,
                根据用户传入的Dataset自动选择,如为VOCDetection,则metric为'VOC';
                如为COCODetection,则metric为'COCO'。
            return_details (bool): 是否返回详细信息。默认值为False。

        Returns:
            tuple (metrics, eval_details) /dict (metrics): 当return_details为True时,返回(metrics, eval_details),
                当return_details为False时,返回metrics。metrics为dict,包含关键字:'bbox_mmap'和'segm_mmap'
                或者’bbox_map‘和'segm_map',分别表示预测框和分割区域平均准确率平均值在
                各个IoU阈值下的结果取平均值的结果(mmAP)、平均准确率平均值(mAP)。eval_details为dict,
                包含bbox、mask和gt三个关键字。其中关键字bbox的键值是一个列表,列表中每个元素代表一个预测结果,
                一个预测结果是一个由图像id,预测框类别id, 预测框坐标,预测框得分组成的列表。
                关键字mask的键值是一个列表,列表中每个元素代表各预测框内物体的分割结果,分割结果由图像id、
                预测框类别id、表示预测框内各像素点是否属于物体的二值图、预测框得分。
                而关键字gt的键值是真实标注框的相关信息。
        """
        input_channel = getattr(self, 'input_channel', 3)
        arrange_transforms(model_type=self.model_type,
                           class_name=self.__class__.__name__,
                           transforms=eval_dataset.transforms,
                           mode='eval',
                           input_channel=input_channel)
        if metric is None:
            if hasattr(self, 'metric') and self.metric is not None:
                metric = self.metric
            else:
                if isinstance(eval_dataset, paddlex.datasets.CocoDetection):
                    metric = 'COCO'
                else:
                    raise Exception(
                        "eval_dataset should be datasets.COCODetection.")
        assert metric in ['COCO', 'VOC'], "Metric only support 'VOC' or 'COCO'"
        if batch_size > 1:
            batch_size = 1
            logging.warning(
                "Mask RCNN supports batch_size=1 only during evaluating, so batch_size is forced to be set to 1."
            )
        data_generator = eval_dataset.generator(batch_size=batch_size,
                                                drop_last=False)

        total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size)
        results = list()
        logging.info(
            "Start to evaluating(total_samples={}, total_steps={})...".format(
                eval_dataset.num_samples, total_steps))
        for step, data in tqdm.tqdm(enumerate(data_generator()),
                                    total=total_steps):
            images = np.array([d[0] for d in data]).astype('float32')
            im_infos = np.array([d[1] for d in data]).astype('float32')
            im_shapes = np.array([d[3] for d in data]).astype('float32')
            feed_data = {
                'image': images,
                'im_info': im_infos,
                'im_shape': im_shapes,
            }
            with fluid.scope_guard(self.scope):
                outputs = self.exe.run(self.test_prog,
                                       feed=[feed_data],
                                       fetch_list=list(
                                           self.test_outputs.values()),
                                       return_numpy=False)
            res = {
                'bbox': (np.array(outputs[0]),
                         outputs[0].recursive_sequence_lengths()),
                'mask':
                (np.array(outputs[1]), outputs[1].recursive_sequence_lengths())
            }
            res_im_id = [d[2] for d in data]
            res['im_info'] = (im_infos, [])
            res['im_shape'] = (im_shapes, [])
            res['im_id'] = (np.array(res_im_id), [])
            results.append(res)
            logging.debug("[EVAL] Epoch={}, Step={}/{}".format(
                epoch_id, step + 1, total_steps))

        ap_stats, eval_details = eval_results(
            results,
            'COCO',
            eval_dataset.coco_gt,
            with_background=True,
            resolution=self.mask_head_resolution)
        if metric == 'VOC':
            if isinstance(ap_stats[0], np.ndarray) and isinstance(
                    ap_stats[1], np.ndarray):
                metrics = OrderedDict(
                    zip(['bbox_map', 'segm_map'],
                        [ap_stats[0][1], ap_stats[1][1]]))
            else:
                metrics = OrderedDict(zip(['bbox_map', 'segm_map'],
                                          [0.0, 0.0]))
        elif metric == 'COCO':
            if isinstance(ap_stats[0], np.ndarray) and isinstance(
                    ap_stats[1], np.ndarray):
                metrics = OrderedDict(
                    zip(['bbox_mmap', 'segm_mmap'],
                        [ap_stats[0][0], ap_stats[1][0]]))
            else:
                metrics = OrderedDict(
                    zip(['bbox_mmap', 'segm_mmap'], [0.0, 0.0]))
        if return_details:
            return metrics, eval_details
        return metrics
示例#8
0
    def evaluate(self,
                 eval_dataset,
                 batch_size=1,
                 epoch_id=None,
                 return_details=False):
        """评估。
        Args:
            eval_dataset (paddlex.datasets): 验证数据读取器。
            batch_size (int): 验证数据批大小。默认为1。
            epoch_id (int): 当前评估模型所在的训练轮数。
            return_details (bool): 是否返回详细信息。
        Returns:
          dict: 当return_details为False时,返回dict, 包含关键字:'acc1'、'acc5',
              分别表示最大值的accuracy、前5个最大值的accuracy。
          tuple (metrics, eval_details): 当return_details为True时,增加返回dict,
              包含关键字:'true_labels'、'pred_scores',分别代表真实类别id、每个类别的预测得分。
        """
        arrange_transforms(model_type=self.model_type,
                           class_name=self.__class__.__name__,
                           transforms=eval_dataset.transforms,
                           mode='eval')
        data_generator = eval_dataset.generator(batch_size=batch_size,
                                                drop_last=False)
        k = min(5, self.num_classes)
        total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size)
        true_labels = list()
        pred_scores = list()
        if not hasattr(self, 'parallel_test_prog'):
            with fluid.scope_guard(self.scope):
                self.parallel_test_prog = fluid.CompiledProgram(
                    self.test_prog).with_data_parallel(
                        share_vars_from=self.parallel_train_prog)
        batch_size_each_gpu = self._get_single_card_bs(batch_size)
        logging.info(
            "Start to evaluating(total_samples={}, total_steps={})...".format(
                eval_dataset.num_samples, total_steps))
        for step, data in tqdm.tqdm(enumerate(data_generator()),
                                    total=total_steps):
            images = np.array([d[0] for d in data]).astype('float32')
            labels = [d[1] for d in data]
            num_samples = images.shape[0]
            if num_samples < batch_size:
                num_pad_samples = batch_size - num_samples
                pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1))
                images = np.concatenate([images, pad_images])
            with fluid.scope_guard(self.scope):
                outputs = self.exe.run(self.parallel_test_prog,
                                       feed={'image': images},
                                       fetch_list=list(
                                           self.test_outputs.values()))
            outputs = [outputs[0][:num_samples]]
            true_labels.extend(labels)
            pred_scores.extend(outputs[0].tolist())
            logging.debug("[EVAL] Epoch={}, Step={}/{}".format(
                epoch_id, step + 1, total_steps))

        pred_top1_label = np.argsort(pred_scores)[:, -1]
        pred_topk_label = np.argsort(pred_scores)[:, -k:]
        acc1 = sum(pred_top1_label == true_labels) / len(true_labels)
        acck = sum(
            [np.isin(x, y)
             for x, y in zip(true_labels, pred_topk_label)]) / len(true_labels)
        metrics = OrderedDict([('acc1', acc1), ('acc{}'.format(k), acck)])
        if return_details:
            eval_details = {
                'true_labels': true_labels,
                'pred_scores': pred_scores
            }
            return metrics, eval_details
        return metrics
示例#9
0
    def evaluate(self,
                 eval_dataset,
                 batch_size=1,
                 epoch_id=None,
                 return_details=False):
        """评估。

        Args:
            eval_dataset (paddlex.datasets): 评估数据读取器。
            batch_size (int): 评估时的batch大小。默认1。
            epoch_id (int): 当前评估模型所在的训练轮数。
            return_details (bool): 是否返回详细信息。默认False。

        Returns:
            dict: 当return_details为False时,返回dict。包含关键字:'miou'、'category_iou'、'macc'、
                'category_acc'和'kappa',分别表示平均iou、各类别iou、平均准确率、各类别准确率和kappa系数。
            tuple (metrics, eval_details):当return_details为True时,增加返回dict (eval_details),
                包含关键字:'confusion_matrix',表示评估的混淆矩阵。
        """
        arrange_transforms(model_type=self.model_type,
                           class_name=self.__class__.__name__,
                           transforms=eval_dataset.transforms,
                           mode='eval')
        total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size)
        conf_mat = ConfusionMatrix(self.num_classes, streaming=True)
        data_generator = eval_dataset.generator(batch_size=batch_size,
                                                drop_last=False)
        if not hasattr(self, 'parallel_test_prog'):
            with fluid.scope_guard(self.scope):
                self.parallel_test_prog = fluid.CompiledProgram(
                    self.test_prog).with_data_parallel(
                        share_vars_from=self.parallel_train_prog)
        logging.info(
            "Start to evaluating(total_samples={}, total_steps={})...".format(
                eval_dataset.num_samples, total_steps))
        for step, data in tqdm.tqdm(enumerate(data_generator()),
                                    total=total_steps):
            images = np.array([d[0] for d in data])
            im_info = [d[1] for d in data]
            labels = [d[2] for d in data]

            num_samples = images.shape[0]
            if num_samples < batch_size:
                num_pad_samples = batch_size - num_samples
                pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1))
                images = np.concatenate([images, pad_images])
            feed_data = {'image': images}
            with fluid.scope_guard(self.scope):
                outputs = self.exe.run(self.parallel_test_prog,
                                       feed=feed_data,
                                       fetch_list=list(
                                           self.test_outputs.values()),
                                       return_numpy=True)
            pred = outputs[0]
            if num_samples < batch_size:
                pred = pred[0:num_samples]

            for i in range(num_samples):
                one_pred = np.squeeze(pred[i]).astype('uint8')
                one_label = labels[i]
                for info in im_info[i][::-1]:
                    if info[0] == 'resize':
                        w, h = info[1][1], info[1][0]
                        one_pred = cv2.resize(one_pred, (w, h),
                                              cv2.INTER_NEAREST)
                    elif info[0] == 'padding':
                        w, h = info[1][1], info[1][0]
                        one_pred = one_pred[0:h, 0:w]
                one_pred = one_pred.astype('int64')
                one_pred = one_pred[np.newaxis, :, :, np.newaxis]
                one_label = one_label[np.newaxis, np.newaxis, :, :]
                mask = one_label != self.ignore_index
                conf_mat.calculate(pred=one_pred, label=one_label, ignore=mask)
            _, iou = conf_mat.mean_iou()
            logging.debug("[EVAL] Epoch={}, Step={}/{}, iou={}".format(
                epoch_id, step + 1, total_steps, iou))

        category_iou, miou = conf_mat.mean_iou()
        category_acc, oacc = conf_mat.accuracy()
        category_f1score = conf_mat.f1_score()

        metrics = OrderedDict(
            zip([
                'miou', 'category_iou', 'oacc', 'category_acc', 'kappa',
                'category_F1-score'
            ], [
                miou, category_iou, oacc, category_acc,
                conf_mat.kappa(), category_f1score
            ]))
        if return_details:
            eval_details = {
                'confusion_matrix': conf_mat.confusion_matrix.tolist()
            }
            return metrics, eval_details
        return metrics
示例#10
0
    def train_loop(self,
                   num_epochs,
                   train_dataset,
                   train_batch_size,
                   eval_dataset=None,
                   save_interval_epochs=1,
                   log_interval_steps=10,
                   save_dir='output',
                   use_vdl=False,
                   early_stop=False,
                   early_stop_patience=5):
        if train_dataset.num_samples < train_batch_size:
            raise Exception(
                'The amount of training datset must be larger than batch size.')
        if not osp.isdir(save_dir):
            if osp.exists(save_dir):
                os.remove(save_dir)
            os.makedirs(save_dir)
        if use_vdl:
            from visualdl import LogWriter
            vdl_logdir = osp.join(save_dir, 'vdl_log')
        # 给transform添加arrange操作
        input_channel = getattr(self, 'input_channel', 3)
        arrange_transforms(
            model_type=self.model_type,
            class_name=self.__class__.__name__,
            transforms=train_dataset.transforms,
            mode='train',
            input_channel=input_channel)
        # 构建train_data_loader
        self.build_train_data_loader(
            dataset=train_dataset, batch_size=train_batch_size)

        if eval_dataset is not None:
            self.eval_transforms = eval_dataset.transforms
            self.test_transforms = copy.deepcopy(eval_dataset.transforms)

        # 获取实时变化的learning rate
        lr = self.optimizer._learning_rate
        if isinstance(lr, fluid.framework.Variable):
            self.train_outputs['lr'] = lr

        # 在多卡上跑训练
        if self.parallel_train_prog is None:
            build_strategy = fluid.compiler.BuildStrategy()
            build_strategy.fuse_all_optimizer_ops = False
            if paddlex.env_info['place'] != 'cpu' and len(self.places) > 1:
                build_strategy.sync_batch_norm = self.sync_bn
            exec_strategy = fluid.ExecutionStrategy()
            exec_strategy.num_iteration_per_drop_scope = 1
            self.parallel_train_prog = fluid.CompiledProgram(
                self.train_prog).with_data_parallel(
                    loss_name=self.train_outputs['loss'].name,
                    build_strategy=build_strategy,
                    exec_strategy=exec_strategy)

        total_num_steps = math.floor(train_dataset.num_samples /
                                     train_batch_size)
        num_steps = 0
        time_stat = list()
        time_train_one_epoch = None
        time_eval_one_epoch = None

        total_num_steps_eval = 0
        # 模型总共的评估次数
        total_eval_times = math.ceil(num_epochs / save_interval_epochs)
        # 检测目前仅支持单卡评估,训练数据batch大小与显卡数量之商为验证数据batch大小。
        eval_batch_size = train_batch_size
        if self.model_type == 'detector':
            eval_batch_size = self._get_single_card_bs(train_batch_size)
        if eval_dataset is not None:
            total_num_steps_eval = math.ceil(eval_dataset.num_samples /
                                             eval_batch_size)

        if use_vdl:
            # VisualDL component
            log_writer = LogWriter(vdl_logdir)

        thresh = 0.0001
        if early_stop:
            earlystop = EarlyStop(early_stop_patience, thresh)
        best_accuracy_key = ""
        best_accuracy = -1.0
        best_model_epoch = -1
        start_epoch = self.completed_epochs
        # task_id: 目前由PaddleX GUI赋值
        # 用于在VisualDL日志中注明所属任务id
        task_id = getattr(paddlex, "task_id", "")
        for i in range(start_epoch, num_epochs):
            records = list()
            step_start_time = time.time()
            epoch_start_time = time.time()
            for step, data in enumerate(self.train_data_loader()):
                outputs = self.exe.run(
                    self.parallel_train_prog,
                    feed=data,
                    fetch_list=list(self.train_outputs.values()))
                outputs_avg = np.mean(np.array(outputs), axis=1)
                records.append(outputs_avg)

                # 训练完成剩余时间预估
                current_time = time.time()
                step_cost_time = current_time - step_start_time
                step_start_time = current_time
                if len(time_stat) < 20:
                    time_stat.append(step_cost_time)
                else:
                    time_stat[num_steps % 20] = step_cost_time

                # 每间隔log_interval_steps,输出loss信息
                num_steps += 1
                if num_steps % log_interval_steps == 0:
                    step_metrics = OrderedDict(
                        zip(list(self.train_outputs.keys()), outputs_avg))

                    if use_vdl:
                        for k, v in step_metrics.items():
                            log_writer.add_scalar(
                                '{}-Metrics/Training(Step): {}'.format(
                                    task_id, k), v, num_steps)

                    # 估算剩余时间
                    avg_step_time = np.mean(time_stat)
                    if time_train_one_epoch is not None:
                        eta = (num_epochs - i - 1) * time_train_one_epoch + (
                            total_num_steps - step - 1) * avg_step_time
                    else:
                        eta = ((num_epochs - i) * total_num_steps - step - 1
                               ) * avg_step_time
                    if time_eval_one_epoch is not None:
                        eval_eta = (
                            total_eval_times - i // save_interval_epochs
                        ) * time_eval_one_epoch
                    else:
                        eval_eta = (
                            total_eval_times - i // save_interval_epochs
                        ) * total_num_steps_eval * avg_step_time
                    eta_str = seconds_to_hms(eta + eval_eta)

                    logging.info(
                        "[TRAIN] Epoch={}/{}, Step={}/{}, {}, time_each_step={}s, eta={}"
                        .format(i + 1, num_epochs, step + 1, total_num_steps,
                                dict2str(step_metrics),
                                round(avg_step_time, 2), eta_str))
            train_metrics = OrderedDict(
                zip(list(self.train_outputs.keys()), np.mean(
                    records, axis=0)))
            logging.info('[TRAIN] Epoch {} finished, {} .'.format(
                i + 1, dict2str(train_metrics)))
            time_train_one_epoch = time.time() - epoch_start_time
            epoch_start_time = time.time()

            # 每间隔save_interval_epochs, 在验证集上评估和对模型进行保存
            self.completed_epochs += 1
            eval_epoch_start_time = time.time()
            if (i + 1) % save_interval_epochs == 0 or i == num_epochs - 1:
                current_save_dir = osp.join(save_dir, "epoch_{}".format(i + 1))
                if not osp.isdir(current_save_dir):
                    os.makedirs(current_save_dir)
                if getattr(self, 'use_ema', False):
                    self.exe.run(self.ema.apply_program)
                if eval_dataset is not None and eval_dataset.num_samples > 0:
                    self.eval_metrics, self.eval_details = self.evaluate(
                        eval_dataset=eval_dataset,
                        batch_size=eval_batch_size,
                        epoch_id=i + 1,
                        return_details=True)
                    logging.info('[EVAL] Finished, Epoch={}, {} .'.format(
                        i + 1, dict2str(self.eval_metrics)))
                    # 保存最优模型
                    best_accuracy_key = list(self.eval_metrics.keys())[0]
                    current_accuracy = self.eval_metrics[best_accuracy_key]
                    if current_accuracy > best_accuracy:
                        best_accuracy = current_accuracy
                        best_model_epoch = i + 1
                        best_model_dir = osp.join(save_dir, "best_model")
                        self.save_model(save_dir=best_model_dir)
                    if use_vdl:
                        for k, v in self.eval_metrics.items():
                            if isinstance(v, list):
                                continue
                            if isinstance(v, np.ndarray):
                                if v.size > 1:
                                    continue
                            log_writer.add_scalar(
                                "{}-Metrics/Eval(Epoch): {}".format(
                                    task_id, k), v, i + 1)
                self.save_model(save_dir=current_save_dir)
                if getattr(self, 'use_ema', False):
                    self.exe.run(self.ema.restore_program)
                time_eval_one_epoch = time.time() - eval_epoch_start_time
                eval_epoch_start_time = time.time()
                if best_model_epoch > 0:
                    logging.info(
                        'Current evaluated best model in eval_dataset is epoch_{}, {}={}'
                        .format(best_model_epoch, best_accuracy_key,
                                best_accuracy))
                if eval_dataset is not None and early_stop:
                    if earlystop(current_accuracy):
                        break