def cocoapi_eval(anns, style, coco_gt=None, anno_file=None, max_dets=(100, 300, 1000)): """ Args: anns: Evaluation result. style: COCOeval style, can be `bbox` , `segm` and `proposal`. coco_gt: Whether to load COCOAPI through anno_file, eg: coco_gt = COCO(anno_file) anno_file: COCO annotations file. max_dets: COCO evaluation maxDets. """ assert coco_gt != None or anno_file != None from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval if coco_gt == None: coco_gt = COCO(anno_file) logging.debug("Start evaluate...") coco_dt = loadRes(coco_gt, anns) if style == 'proposal': coco_eval = COCOeval(coco_gt, coco_dt, 'bbox') coco_eval.params.useCats = 0 coco_eval.params.maxDets = list(max_dets) else: coco_eval = COCOeval(coco_gt, coco_dt, style) coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() return coco_eval.stats
def quantize(self): ''' Quantize the fp32 model. Use calibrate data to calculate the scale factor of quantized variables, and inserts fake quant/dequant op to obtain the quantized model. Args: None Returns: the program of quantized model. ''' self._load_model_data() self._collect_target_varnames() self._set_activation_persistable() batch_ct = 0 for data in self._data_loader(): batch_ct += 1 if self._batch_nums and batch_ct >= self._batch_nums: break batch_id = 0 logging.info("Start to run batch!") for data in self._data_loader(): start = time.time() self._executor.run( program=self._program, feed=data, fetch_list=self._fetch_list, return_numpy=False) if self._algo == "KL": self._sample_data(batch_id) else: self._sample_threshold() end = time.time() logging.debug('[Run batch data] Batch={}/{}, time_each_batch={} s.'.format( str(batch_id + 1), str(batch_ct), str(end-start))) batch_id += 1 if self._batch_nums and batch_id >= self._batch_nums: break logging.info("All run batch: ".format(batch_id)) self._reset_activation_persistable() logging.info("Calculate scale factor ...") if self._algo == "KL": self._calculate_kl_threshold() logging.info("Update the program ...") if self._algo in ["KL", "abs_max"]: self._update_program() else: self._save_input_threhold() logging.info("Save ...") self._save_output_threshold() logging.info("Finish quant!") return self._program
def generator(self, batch_size=1, drop_last=True): self.batch_size = batch_size parallel_reader = multithread_reader if self.parallel_method == "process": if platform.platform().startswith("Windows"): logging.debug( "multiprocess_reader is not supported in Windows platform, force to use multithread_reader." ) else: parallel_reader = multiprocess_reader return parallel_reader(self.transforms, self.iterator, num_workers=self.num_workers, buffer_size=self.buffer_size, batch_size=batch_size, drop_last=drop_last)
def __init__(self, predict_fn, label_names, num_samples=3000, batch_size=50, kmeans_model_for_normlime=None, normlime_weights=None): root_path = gen_user_home() root_path = osp.join(root_path, '.paddlex') h_pre_models = osp.join(root_path, "pre_models") if not osp.exists(h_pre_models): if not osp.exists(root_path): os.makedirs(root_path) url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz" pdx.utils.download_and_decompress(url, path=root_path) h_pre_models_kmeans = osp.join(h_pre_models, "kmeans_model.pkl") if kmeans_model_for_normlime is None: try: self.kmeans_model = load_kmeans_model(h_pre_models_kmeans) except: raise ValueError( "NormLIME needs the KMeans model, where we provided a default one in " "pre_models/kmeans_model.pkl.") else: logging.debug("Warning: It is *strongly* suggested to use the \ default KMeans model in pre_models/kmeans_model.pkl. \ Use another one will change the final result.") self.kmeans_model = load_kmeans_model(kmeans_model_for_normlime) self.num_samples = num_samples self.batch_size = batch_size try: self.normlime_weights = np.load(normlime_weights, allow_pickle=True).item() except: self.normlime_weights = None logging.debug( "Warning: not find the correct precomputed Normlime result.") self.predict_fn = predict_fn self.labels = None self.image = None self.label_names = label_names
def eval_results(results, metric, coco_gt, with_background=True, resolution=None, is_bbox_normalized=False, map_type='11point'): """Evaluation for evaluation program results""" box_ap_stats = [] coco_gt_data = copy.deepcopy(coco_gt) eval_details = {'gt': copy.deepcopy(coco_gt.dataset)} if metric == 'COCO': np.linspace = fixed_linspace if 'proposal' in results[0]: proposal_eval(results, coco_gt_data) if 'bbox' in results[0]: box_ap_stats, xywh_results = coco_bbox_eval( results, coco_gt_data, with_background, is_bbox_normalized=is_bbox_normalized) if 'mask' in results[0]: mask_ap_stats, segm_results = mask_eval(results, coco_gt_data, resolution) ap_stats = [box_ap_stats, mask_ap_stats] eval_details['bbox'] = xywh_results eval_details['mask'] = segm_results return ap_stats, eval_details np.linspace = backup_linspace else: if 'accum_map' in results[-1]: res = np.mean(results[-1]['accum_map'][0]) logging.debug('mAP: {:.2f}'.format(res * 100.)) box_ap_stats.append(res * 100.) elif 'bbox' in results[0]: box_ap, xywh_results = voc_bbox_eval( results, coco_gt_data, with_background, is_bbox_normalized=is_bbox_normalized, map_type=map_type) box_ap_stats.append(box_ap) eval_details['bbox'] = xywh_results return box_ap_stats, eval_details
def build_transforms_v1(model_type, transforms_info, batch_transforms_info): """ 老版本模型加载,仅支持PaddleX前端导出的模型 """ logging.debug("Use build_transforms_v1 to reconstruct transforms") if model_type == "classifier": import paddlex.cv.transforms.cls_transforms as T elif model_type == "detector": import paddlex.cv.transforms.det_transforms as T elif model_type == "segmenter": import paddlex.cv.transforms.seg_transforms as T transforms = list() for op_info in transforms_info: op_name = op_info[0] op_attr = op_info[1] if op_name == 'DecodeImage': continue if op_name == 'Permute': continue if op_name == 'ResizeByShort': op_attr_new = dict() if 'short_size' in op_attr: op_attr_new['short_size'] = op_attr['short_size'] else: op_attr_new['short_size'] = op_attr['target_size'] op_attr_new['max_size'] = op_attr.get('max_size', -1) op_attr = op_attr_new if op_name.startswith('Arrange'): continue if not hasattr(T, op_name): raise Exception( "There's no operator named '{}' in transforms of {}".format( op_name, model_type)) transforms.append(getattr(T, op_name)(**op_attr)) if model_type == "detector" and len(batch_transforms_info) > 0: op_name = batch_transforms_info[0][0] op_attr = batch_transforms_info[0][1] assert op_name == "PaddingMiniBatch", "Only PaddingMiniBatch transform is supported for batch transform" padding = T.Padding(coarsest_stride=op_attr['coarsest_stride']) transforms.append(padding) eval_transforms = T.Compose(transforms) return eval_transforms
def evaluate(self, eval_dataset, batch_size=1, epoch_id=None, metric=None, return_details=False): """评估。 Args: eval_dataset (paddlex.datasets): 验证数据读取器。 batch_size (int): 验证数据批大小。默认为1。当前只支持设置为1。 epoch_id (int): 当前评估模型所在的训练轮数。 metric (bool): 训练过程中评估的方式,取值范围为['COCO', 'VOC']。默认为None, 根据用户传入的Dataset自动选择,如为VOCDetection,则metric为'VOC'; 如为COCODetection,则metric为'COCO'。 return_details (bool): 是否返回详细信息。默认值为False。 Returns: tuple (metrics, eval_details) /dict (metrics): 当return_details为True时,返回(metrics, eval_details), 当return_details为False时,返回metrics。metrics为dict,包含关键字:'bbox_mmap'或者’bbox_map‘, 分别表示平均准确率平均值在各个阈值下的结果取平均值的结果(mmAP)、平均准确率平均值(mAP)。 eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、 预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。 """ self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval') if metric is None: if hasattr(self, 'metric') and self.metric is not None: metric = self.metric else: if isinstance(eval_dataset, paddlex.datasets.CocoDetection): metric = 'COCO' elif isinstance(eval_dataset, paddlex.datasets.VOCDetection): metric = 'VOC' else: raise Exception( "eval_dataset should be datasets.VOCDetection or datasets.COCODetection." ) assert metric in ['COCO', 'VOC'], "Metric only support 'VOC' or 'COCO'" if batch_size > 1: batch_size = 1 logging.warning( "Faster RCNN supports batch_size=1 only during evaluating, so batch_size is forced to be set to 1." ) dataset = eval_dataset.generator(batch_size=batch_size, drop_last=False) total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size) results = list() logging.info( "Start to evaluating(total_samples={}, total_steps={})...".format( eval_dataset.num_samples, total_steps)) for step, data in tqdm.tqdm(enumerate(dataset()), total=total_steps): images = np.array([d[0] for d in data]).astype('float32') im_infos = np.array([d[1] for d in data]).astype('float32') im_shapes = np.array([d[3] for d in data]).astype('float32') feed_data = { 'image': images, 'im_info': im_infos, 'im_shape': im_shapes, } outputs = self.exe.run(self.test_prog, feed=[feed_data], fetch_list=list(self.test_outputs.values()), return_numpy=False) res = { 'bbox': (np.array(outputs[0]), outputs[0].recursive_sequence_lengths()) } res_im_id = [d[2] for d in data] res['im_info'] = (im_infos, []) res['im_shape'] = (im_shapes, []) res['im_id'] = (np.array(res_im_id), []) if metric == 'VOC': res_gt_box = [] res_gt_label = [] res_is_difficult = [] for d in data: res_gt_box.extend(d[4]) res_gt_label.extend(d[5]) res_is_difficult.extend(d[6]) res_gt_box_lod = [d[4].shape[0] for d in data] res_gt_label_lod = [d[5].shape[0] for d in data] res_is_difficult_lod = [d[6].shape[0] for d in data] res['gt_box'] = (np.array(res_gt_box), [res_gt_box_lod]) res['gt_label'] = (np.array(res_gt_label), [res_gt_label_lod]) res['is_difficult'] = (np.array(res_is_difficult), [res_is_difficult_lod]) results.append(res) logging.debug("[EVAL] Epoch={}, Step={}/{}".format( epoch_id, step + 1, total_steps)) box_ap_stats, eval_details = eval_results(results, metric, eval_dataset.coco_gt, with_background=True) metrics = OrderedDict( zip(['bbox_mmap' if metric == 'COCO' else 'bbox_map'], box_ap_stats)) if return_details: return metrics, eval_details return metrics
def evaluate(self, eval_dataset, batch_size=1, epoch_id=None, metric=None, return_details=False): """评估。 Args: eval_dataset (paddlex.datasets): 验证数据读取器。 batch_size (int): 验证数据批大小。默认为1。当前只支持设置为1。 epoch_id (int): 当前评估模型所在的训练轮数。 metric (bool): 训练过程中评估的方式,取值范围为['COCO', 'VOC']。默认为None, 根据用户传入的Dataset自动选择,如为VOCDetection,则metric为'VOC'; 如为COCODetection,则metric为'COCO'。 return_details (bool): 是否返回详细信息。默认值为False。 Returns: tuple (metrics, eval_details) /dict (metrics): 当return_details为True时,返回(metrics, eval_details), 当return_details为False时,返回metrics。metrics为dict,包含关键字:'bbox_mmap'和'segm_mmap' 或者’bbox_map‘和'segm_map',分别表示预测框和分割区域平均准确率平均值在 各个IoU阈值下的结果取平均值的结果(mmAP)、平均准确率平均值(mAP)。eval_details为dict, 包含bbox、mask和gt三个关键字。其中关键字bbox的键值是一个列表,列表中每个元素代表一个预测结果, 一个预测结果是一个由图像id,预测框类别id, 预测框坐标,预测框得分组成的列表。 关键字mask的键值是一个列表,列表中每个元素代表各预测框内物体的分割结果,分割结果由图像id、 预测框类别id、表示预测框内各像素点是否属于物体的二值图、预测框得分。 而关键字gt的键值是真实标注框的相关信息。 """ input_channel = getattr(self, 'input_channel', 3) arrange_transforms(model_type=self.model_type, class_name=self.__class__.__name__, transforms=eval_dataset.transforms, mode='eval', input_channel=input_channel) if metric is None: if hasattr(self, 'metric') and self.metric is not None: metric = self.metric else: if isinstance(eval_dataset, paddlex.datasets.CocoDetection): metric = 'COCO' else: raise Exception( "eval_dataset should be datasets.COCODetection.") assert metric in ['COCO', 'VOC'], "Metric only support 'VOC' or 'COCO'" if batch_size > 1: batch_size = 1 logging.warning( "Mask RCNN supports batch_size=1 only during evaluating, so batch_size is forced to be set to 1." ) data_generator = eval_dataset.generator(batch_size=batch_size, drop_last=False) total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size) results = list() logging.info( "Start to evaluating(total_samples={}, total_steps={})...".format( eval_dataset.num_samples, total_steps)) for step, data in tqdm.tqdm(enumerate(data_generator()), total=total_steps): images = np.array([d[0] for d in data]).astype('float32') im_infos = np.array([d[1] for d in data]).astype('float32') im_shapes = np.array([d[3] for d in data]).astype('float32') feed_data = { 'image': images, 'im_info': im_infos, 'im_shape': im_shapes, } with fluid.scope_guard(self.scope): outputs = self.exe.run(self.test_prog, feed=[feed_data], fetch_list=list( self.test_outputs.values()), return_numpy=False) res = { 'bbox': (np.array(outputs[0]), outputs[0].recursive_sequence_lengths()), 'mask': (np.array(outputs[1]), outputs[1].recursive_sequence_lengths()) } res_im_id = [d[2] for d in data] res['im_info'] = (im_infos, []) res['im_shape'] = (im_shapes, []) res['im_id'] = (np.array(res_im_id), []) results.append(res) logging.debug("[EVAL] Epoch={}, Step={}/{}".format( epoch_id, step + 1, total_steps)) ap_stats, eval_details = eval_results( results, 'COCO', eval_dataset.coco_gt, with_background=True, resolution=self.mask_head_resolution) if metric == 'VOC': if isinstance(ap_stats[0], np.ndarray) and isinstance( ap_stats[1], np.ndarray): metrics = OrderedDict( zip(['bbox_map', 'segm_map'], [ap_stats[0][1], ap_stats[1][1]])) else: metrics = OrderedDict(zip(['bbox_map', 'segm_map'], [0.0, 0.0])) elif metric == 'COCO': if isinstance(ap_stats[0], np.ndarray) and isinstance( ap_stats[1], np.ndarray): metrics = OrderedDict( zip(['bbox_mmap', 'segm_mmap'], [ap_stats[0][0], ap_stats[1][0]])) else: metrics = OrderedDict( zip(['bbox_mmap', 'segm_mmap'], [0.0, 0.0])) if return_details: return metrics, eval_details return metrics
def evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False): """评估。 Args: eval_dataset (paddlex.datasets): 验证数据读取器。 batch_size (int): 验证数据批大小。默认为1。 epoch_id (int): 当前评估模型所在的训练轮数。 return_details (bool): 是否返回详细信息。 Returns: dict: 当return_details为False时,返回dict, 包含关键字:'acc1'、'acc5', 分别表示最大值的accuracy、前5个最大值的accuracy。 tuple (metrics, eval_details): 当return_details为True时,增加返回dict, 包含关键字:'true_labels'、'pred_scores',分别代表真实类别id、每个类别的预测得分。 """ arrange_transforms(model_type=self.model_type, class_name=self.__class__.__name__, transforms=eval_dataset.transforms, mode='eval') data_generator = eval_dataset.generator(batch_size=batch_size, drop_last=False) k = min(5, self.num_classes) total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size) true_labels = list() pred_scores = list() if not hasattr(self, 'parallel_test_prog'): with fluid.scope_guard(self.scope): self.parallel_test_prog = fluid.CompiledProgram( self.test_prog).with_data_parallel( share_vars_from=self.parallel_train_prog) batch_size_each_gpu = self._get_single_card_bs(batch_size) logging.info( "Start to evaluating(total_samples={}, total_steps={})...".format( eval_dataset.num_samples, total_steps)) for step, data in tqdm.tqdm(enumerate(data_generator()), total=total_steps): images = np.array([d[0] for d in data]).astype('float32') labels = [d[1] for d in data] num_samples = images.shape[0] if num_samples < batch_size: num_pad_samples = batch_size - num_samples pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1)) images = np.concatenate([images, pad_images]) with fluid.scope_guard(self.scope): outputs = self.exe.run(self.parallel_test_prog, feed={'image': images}, fetch_list=list( self.test_outputs.values())) outputs = [outputs[0][:num_samples]] true_labels.extend(labels) pred_scores.extend(outputs[0].tolist()) logging.debug("[EVAL] Epoch={}, Step={}/{}".format( epoch_id, step + 1, total_steps)) pred_top1_label = np.argsort(pred_scores)[:, -1] pred_topk_label = np.argsort(pred_scores)[:, -k:] acc1 = sum(pred_top1_label == true_labels) / len(true_labels) acck = sum( [np.isin(x, y) for x, y in zip(true_labels, pred_topk_label)]) / len(true_labels) metrics = OrderedDict([('acc1', acc1), ('acc{}'.format(k), acck)]) if return_details: eval_details = { 'true_labels': true_labels, 'pred_scores': pred_scores } return metrics, eval_details return metrics
def sensitivity(program, place, param_names, eval_func, sensitivities_file=None, pruned_ratios=None): scope = fluid.global_scope() graph = GraphWrapper(program) sensitivities = load_sensitivities(sensitivities_file) if pruned_ratios is None: pruned_ratios = np.arange(0.1, 1, step=0.1) total_evaluate_iters = 0 for name in param_names: if name not in sensitivities: sensitivities[name] = {} total_evaluate_iters += len(list(pruned_ratios)) else: total_evaluate_iters += (len(list(pruned_ratios)) - len(sensitivities[name])) eta = '-' start_time = time.time() baseline = eval_func(graph.program) cost = time.time() - start_time eta = cost * (total_evaluate_iters - 1) current_iter = 1 for name in sensitivities: for ratio in pruned_ratios: if ratio in sensitivities[name]: logging.debug('{}, {} has computed.'.format(name, ratio)) continue progress = float(current_iter) / total_evaluate_iters progress = "%.2f%%" % (progress * 100) logging.info( "Total evaluate iters={}, current={}, progress={}, eta={}". format( total_evaluate_iters, current_iter, progress, seconds_to_hms( int(cost * (total_evaluate_iters - current_iter)))), use_color=True) current_iter += 1 pruner = Pruner() logging.info("sensitive - param: {}; ratios: {}".format( name, ratio)) pruned_program, param_backup, _ = pruner.prune( program=graph.program, scope=scope, params=[name], ratios=[ratio], place=place, lazy=True, only_graph=False, param_backup=True) pruned_metric = eval_func(pruned_program) loss = (baseline - pruned_metric) / baseline logging.info("pruned param: {}; {}; loss={}".format( name, ratio, loss)) sensitivities[name][ratio] = loss with open(sensitivities_file, 'wb') as f: pickle.dump(sensitivities, f) for param_name in param_backup.keys(): param_t = scope.find_var(param_name).get_tensor() param_t.set(param_backup[param_name], place) return sensitivities
def evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False): """评估。 Args: eval_dataset (paddlex.datasets): 评估数据读取器。 batch_size (int): 评估时的batch大小。默认1。 epoch_id (int): 当前评估模型所在的训练轮数。 return_details (bool): 是否返回详细信息。默认False。 Returns: dict: 当return_details为False时,返回dict。包含关键字:'miou'、'category_iou'、'macc'、 'category_acc'和'kappa',分别表示平均iou、各类别iou、平均准确率、各类别准确率和kappa系数。 tuple (metrics, eval_details):当return_details为True时,增加返回dict (eval_details), 包含关键字:'confusion_matrix',表示评估的混淆矩阵。 """ self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval') total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size) conf_mat = ConfusionMatrix(self.num_classes, streaming=True) data_generator = eval_dataset.generator(batch_size=batch_size, drop_last=False) if not hasattr(self, 'parallel_test_prog'): self.parallel_test_prog = fluid.CompiledProgram( self.test_prog).with_data_parallel( share_vars_from=self.parallel_train_prog) logging.info( "Start to evaluating(total_samples={}, total_steps={})...".format( eval_dataset.num_samples, total_steps)) for step, data in tqdm.tqdm(enumerate(data_generator()), total=total_steps): images = np.array([d[0] for d in data]) _, _, im_h, im_w = images.shape labels = list() for d in data: padding_label = np.zeros( (1, im_h, im_w)).astype('int64') + self.ignore_index _, label_h, label_w = d[1].shape padding_label[:, :label_h, :label_w] = d[1] labels.append(padding_label) labels = np.array(labels) num_samples = images.shape[0] if num_samples < batch_size: num_pad_samples = batch_size - num_samples pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1)) images = np.concatenate([images, pad_images]) feed_data = {'image': images} outputs = self.exe.run(self.parallel_test_prog, feed=feed_data, fetch_list=list(self.test_outputs.values()), return_numpy=True) pred = outputs[0] if num_samples < batch_size: pred = pred[0:num_samples] mask = labels != self.ignore_index conf_mat.calculate(pred=pred, label=labels, ignore=mask) _, iou = conf_mat.mean_iou() logging.debug("[EVAL] Epoch={}, Step={}/{}, iou={}".format( epoch_id, step + 1, total_steps, iou)) category_iou, miou = conf_mat.mean_iou() category_acc, macc = conf_mat.accuracy() metrics = OrderedDict( zip(['miou', 'category_iou', 'macc', 'category_acc', 'kappa'], [miou, category_iou, macc, category_acc, conf_mat.kappa()])) if return_details: eval_details = { 'confusion_matrix': conf_mat.confusion_matrix.tolist() } return metrics, eval_details return metrics
def _calculate_kl_threshold(self): ''' Calculate the KL threshold of quantized variables. ''' assert self._algo == "KL", "The algo should be KL to calculate kl threshold." ct = 1 # Abs_max threshold for weights for var_name in self._quantized_weight_var_name: start = time.time() weight_data = self._sampling_data[var_name] weight_threshold = None if self._weight_quantize_type == "abs_max": weight_threshold = np.max(np.abs(weight_data)) elif self._weight_quantize_type == "channel_wise_abs_max": weight_threshold = [] for i in range(weight_data.shape[0]): abs_max_value = np.max(np.abs(weight_data[i])) weight_threshold.append(abs_max_value) self._quantized_var_kl_threshold[var_name] = weight_threshold end = time.time() logging.debug('[Calculate weight] Weight_id={}/{}, time_each_weight={} s.'.format( str(ct), str(len(self._quantized_weight_var_name)), str(end-start))) ct += 1 ct = 1 # KL threshold for activations if self._is_use_cache_file: for var_name in self._quantized_act_var_name: start = time.time() sampling_data = [] filenames = [f for f in os.listdir(self._cache_dir) \ if re.match(var_name + '_[0-9]+.npy', f)] for filename in filenames: file_path = os.path.join(self._cache_dir, filename) sampling_data.append(np.load(file_path)) os.remove(file_path) sampling_data = np.concatenate(sampling_data) self._quantized_var_kl_threshold[var_name] = \ self._get_kl_scaling_factor(np.abs(sampling_data)) end = time.time() logging.debug('[Calculate activation] Activation_id={}/{}, time_each_activation={} s.'.format( str(ct), str(len(self._quantized_act_var_name)), str(end-start))) ct += 1 else: for var_name in self._quantized_act_var_name: start = time.time() self._sampling_data[var_name] = np.concatenate( self._sampling_data[var_name]) self._quantized_var_kl_threshold[var_name] = \ self._get_kl_scaling_factor(np.abs(self._sampling_data[var_name])) end = time.time() logging.debug('[Calculate activation] Activation_id={}/{}, time_each_activation={} s.'.format( str(ct), str(len(self._quantized_act_var_name)), str(end-start))) ct += 1
def evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False): """评估。 Args: eval_dataset (paddlex.datasets): 评估数据读取器。 batch_size (int): 评估时的batch大小。默认1。 epoch_id (int): 当前评估模型所在的训练轮数。 return_details (bool): 是否返回详细信息。默认False。 Returns: dict: 当return_details为False时,返回dict。包含关键字:'miou'、'category_iou'、'macc'、 'category_acc'和'kappa',分别表示平均iou、各类别iou、平均准确率、各类别准确率和kappa系数。 tuple (metrics, eval_details):当return_details为True时,增加返回dict (eval_details), 包含关键字:'confusion_matrix',表示评估的混淆矩阵。 """ arrange_transforms(model_type=self.model_type, class_name=self.__class__.__name__, transforms=eval_dataset.transforms, mode='eval') total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size) conf_mat = ConfusionMatrix(self.num_classes, streaming=True) data_generator = eval_dataset.generator(batch_size=batch_size, drop_last=False) if not hasattr(self, 'parallel_test_prog'): with fluid.scope_guard(self.scope): self.parallel_test_prog = fluid.CompiledProgram( self.test_prog).with_data_parallel( share_vars_from=self.parallel_train_prog) logging.info( "Start to evaluating(total_samples={}, total_steps={})...".format( eval_dataset.num_samples, total_steps)) for step, data in tqdm.tqdm(enumerate(data_generator()), total=total_steps): images = np.array([d[0] for d in data]) im_info = [d[1] for d in data] labels = [d[2] for d in data] num_samples = images.shape[0] if num_samples < batch_size: num_pad_samples = batch_size - num_samples pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1)) images = np.concatenate([images, pad_images]) feed_data = {'image': images} with fluid.scope_guard(self.scope): outputs = self.exe.run(self.parallel_test_prog, feed=feed_data, fetch_list=list( self.test_outputs.values()), return_numpy=True) pred = outputs[0] if num_samples < batch_size: pred = pred[0:num_samples] for i in range(num_samples): one_pred = np.squeeze(pred[i]).astype('uint8') one_label = labels[i] for info in im_info[i][::-1]: if info[0] == 'resize': w, h = info[1][1], info[1][0] one_pred = cv2.resize(one_pred, (w, h), cv2.INTER_NEAREST) elif info[0] == 'padding': w, h = info[1][1], info[1][0] one_pred = one_pred[0:h, 0:w] one_pred = one_pred.astype('int64') one_pred = one_pred[np.newaxis, :, :, np.newaxis] one_label = one_label[np.newaxis, np.newaxis, :, :] mask = one_label != self.ignore_index conf_mat.calculate(pred=one_pred, label=one_label, ignore=mask) _, iou = conf_mat.mean_iou() logging.debug("[EVAL] Epoch={}, Step={}/{}, iou={}".format( epoch_id, step + 1, total_steps, iou)) category_iou, miou = conf_mat.mean_iou() category_acc, oacc = conf_mat.accuracy() category_f1score = conf_mat.f1_score() metrics = OrderedDict( zip([ 'miou', 'category_iou', 'oacc', 'category_acc', 'kappa', 'category_F1-score' ], [ miou, category_iou, oacc, category_acc, conf_mat.kappa(), category_f1score ])) if return_details: eval_details = { 'confusion_matrix': conf_mat.confusion_matrix.tolist() } return metrics, eval_details return metrics
def voc_bbox_eval(results, coco_gt, with_background=False, overlap_thresh=0.5, map_type='11point', is_bbox_normalized=False, evaluate_difficult=False): """ Bounding box evaluation for VOC dataset Args: results (list): prediction bounding box results. class_num (int): evaluation class number. overlap_thresh (float): the postive threshold of bbox overlap map_type (string): method for mAP calcualtion, can only be '11point' or 'integral' is_bbox_normalized (bool): whether bbox is normalized to range [0, 1]. evaluate_difficult (bool): whether to evaluate difficult gt bbox. """ assert 'bbox' in results[0] logging.debug("Start evaluate...") # matplotlib.use() must be called *before* pylab, matplotlib.pyplot, # or matplotlib.backends is imported for the first time # pycocotools import matplotlib import matplotlib matplotlib.use('Agg') from pycocotools.coco import COCO cat_ids = coco_gt.getCatIds() # when with_background = True, mapping category to classid, like: # background:0, first_class:1, second_class:2, ... clsid2catid = dict( {i + int(with_background): catid for i, catid in enumerate(cat_ids)}) class_num = len(clsid2catid) + int(with_background) detection_map = DetectionMAP( class_num=class_num, overlap_thresh=overlap_thresh, map_type=map_type, is_bbox_normalized=is_bbox_normalized, evaluate_difficult=evaluate_difficult) xywh_res = [] det_nums = 0 gt_nums = 0 for t in results: bboxes = t['bbox'][0] bbox_lengths = t['bbox'][1][0] im_ids = np.array(t['im_id'][0]).flatten() if bboxes.shape == (1, 1) or bboxes is None: continue gt_boxes = t['gt_box'][0] gt_labels = t['gt_label'][0] difficults = t['is_difficult'][0] if not evaluate_difficult \ else None if len(t['gt_box'][1]) == 0: # gt_box, gt_label, difficult read as zero padded Tensor bbox_idx = 0 for i in range(len(gt_boxes)): gt_box = gt_boxes[i] gt_label = gt_labels[i] difficult = None if difficults is None \ else difficults[i] bbox_num = bbox_lengths[i] bbox = bboxes[bbox_idx:bbox_idx + bbox_num] gt_box, gt_label, difficult = prune_zero_padding( gt_box, gt_label, difficult) detection_map.update(bbox, gt_box, gt_label, difficult) bbox_idx += bbox_num det_nums += bbox_num gt_nums += gt_box.shape[0] im_id = int(im_ids[i]) for b in bbox: clsid, score, xmin, ymin, xmax, ymax = b.tolist() w = xmax - xmin + 1 h = ymax - ymin + 1 bbox = [xmin, ymin, w, h] coco_res = { 'image_id': im_id, 'category_id': clsid2catid[clsid], 'bbox': bbox, 'score': score } xywh_res.append(coco_res) else: # gt_box, gt_label, difficult read as LoDTensor gt_box_lengths = t['gt_box'][1][0] bbox_idx = 0 gt_box_idx = 0 for i in range(len(bbox_lengths)): bbox_num = bbox_lengths[i] gt_box_num = gt_box_lengths[i] bbox = bboxes[bbox_idx:bbox_idx + bbox_num] gt_box = gt_boxes[gt_box_idx:gt_box_idx + gt_box_num] gt_label = gt_labels[gt_box_idx:gt_box_idx + gt_box_num] difficult = None if difficults is None else \ difficults[gt_box_idx: gt_box_idx + gt_box_num] detection_map.update(bbox, gt_box, gt_label, difficult) bbox_idx += bbox_num gt_box_idx += gt_box_num im_id = int(im_ids[i]) for b in bbox: clsid, score, xmin, ymin, xmax, ymax = b.tolist() w = xmax - xmin + 1 h = ymax - ymin + 1 bbox = [xmin, ymin, w, h] coco_res = { 'image_id': im_id, 'category_id': clsid2catid[clsid], 'bbox': bbox, 'score': score } xywh_res.append(coco_res) logging.debug("Accumulating evaluatation results...") detection_map.accumulate() map_stat = 100. * detection_map.get_map() logging.debug("mAP({:.2f}, {}) = {:.2f}".format(overlap_thresh, map_type, map_stat)) return map_stat, xywh_res