def __init__(self, y_true_path, y_pred_path, name_path): # load y_true self.gt = decode_annotation(y_true_path, type='y_true') # load y_pred self.dt = decode_annotation(y_pred_path, type='y_pred') self.names = decode_name(name_path)
def __init__(self, cfg, verbose=0): self.verbose = verbose self.mask = cfg["yolo"]["mask"] self.anchors = cfg["yolo"]["anchors"] self.max_boxes = cfg["yolo"]["max_boxes"] self.strides = cfg["yolo"]["strides"] self.anno_path = cfg["train"]["anno_path"] self.name_path = cfg['train']['name_path'] self.image_size = cfg["train"]["image_size"] self.batch_size = cfg["train"]["batch_size"] self.mix_up = cfg['train']["mix_up"] self.cut_mix = cfg['train']['cut_mix'] self.mosaic = cfg['train']['mosaic'] self.label_smoothing = cfg['train']["label_smoothing"] self.annotation = decode_annotation(anno_path=self.anno_path) self.num_anno = len(self.annotation) self.name = decode_name(name_path=self.name_path) self.num_classes = len(self.name) # init self._image_size = np.random.choice(self.image_size) self._anchors = self.anchors / self._image_size self._grid_size = self._image_size // self.strides
# -*- coding: utf-8 -*- import numpy as np import cv2 from core.utils import decode_annotation # Parameters K = 6 image_size = 416 dataset_path = './data/pascal_voc/train.txt' print('Num of Clusters is', K) print('Base Image Size is', image_size) # Read Dataset anns = decode_annotation(dataset_path) def resize_bboxes(path, bboxes): image = cv2.imread(path) h, w, _ = image.shape scale = min(image_size / w, image_size / h) nw, nh = int(scale * w), int(scale * h) dw, dh = (image_size - nw) // 2, (image_size - nh) // 2 bboxes = np.asarray(bboxes).astype(np.float32) bboxes[:, [0, 2]] = bboxes[:, [0, 2]] * scale + dw bboxes[:, [1, 3]] = bboxes[:, [1, 3]] * scale + dh bboxes[:, [0, 2]] = np.clip(bboxes[:, [0, 2]], 0, image_size - 1) bboxes[:, [1, 3]] = np.clip(bboxes[:, [1, 3]], 0, image_size - 1)
def VOCEval(y_true_path, y_pred_path, name_path, ovthresh=0.5, use_07_metric=False, verbose=0): """ :param y_true_path: :param y_pred_path: :param ovthresh: Overlap threshold (default = 0.5) :param use_07_metric: Whether to use VOC07's 11 point AP computation (default False) :return: """ # assumes detections are in detpath.format(classname) # assumes annotations are in annopath.format(imagename) # assumes imagesetfile is a text file with each line an image name # cachedir caches the annotations in a pickle file # load y_true y_true = decode_annotation(y_true_path, type='y_true') # load y_pred y_pred = decode_annotation(y_pred_path, type='y_pred') names = decode_name(name_path) ans = {} for classname_idx in range(len(names)): # extract gt objects for this class class_recs = {} npos = 0 for imgs_path, bboxes, labels in y_true: image_idx = os.path.basename(imgs_path).split('.')[0] bbox = [bbox for bbox, label in zip(bboxes, labels) if label == classname_idx] bbox = np.array(bbox) det = [False] * len(bbox) diff = [False] * len(bbox) npos += len(bbox) class_recs[image_idx] = {'bbox': bbox, 'det': det, 'difficult': diff} # extract pd objects for this class image_ids = [] BB = [] confidence = [] for imgs_path, bboxes, labels, confis in y_pred: image_idx = os.path.basename(imgs_path).split('.')[0] for bbox, label, confi in zip(bboxes, labels, confis): if label != classname_idx: continue image_ids.append(image_idx) BB.append(bbox) confidence.append(confi) image_ids = np.array(image_ids) BB = np.array(BB) confidence = np.array(confidence) # sort by confidence sorted_ind = np.argsort(-confidence) BB = BB[sorted_ind] image_ids = [image_ids[x] for x in sorted_ind] # go down dets and mark TPs and FPs nd = len(image_ids) tp = np.zeros(nd) fp = np.zeros(nd) for d in range(nd): R = class_recs[image_ids[d]] bb = BB[d].astype(float) ovmax = -np.inf BBGT = R['bbox'].astype(float) if BBGT.size > 0: # compute overlaps # intersection ixmin = np.maximum(BBGT[:, 0], bb[0]) iymin = np.maximum(BBGT[:, 1], bb[1]) ixmax = np.minimum(BBGT[:, 2], bb[2]) iymax = np.minimum(BBGT[:, 3], bb[3]) iw = np.maximum(ixmax - ixmin + 1., 0.) ih = np.maximum(iymax - iymin + 1., 0.) inters = iw * ih # union uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + (BBGT[:, 2] - BBGT[:, 0] + 1.) * (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) overlaps = inters / uni ovmax = np.max(overlaps) jmax = np.argmax(overlaps) if ovmax > ovthresh: if not R['difficult'][jmax]: if not R['det'][jmax]: tp[d] = 1. R['det'][jmax] = 1 else: fp[d] = 1. else: fp[d] = 1. # compute precision recall fp = np.cumsum(fp) tp = np.cumsum(tp) rec = tp / np.maximum(float(npos), np.finfo(np.float64).eps) # avoid divide by zero in case the first detection matches a difficult # ground truth prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) ap = voc_ap(rec, prec, use_07_metric) ans[names[classname_idx]] = {'rec': rec, 'prec': prec, 'ap': ap} if verbose > 0: print("\nOn Test Data") print("class |AP@50") mAP = [] for key in ans.keys(): ap = ans[key]['ap'] mAP.append(ap) if verbose > 0: print("{:>15}|{:>15.2%}".format(key, ap)) mAP = np.mean(mAP) if verbose > 0: print("{:>15}|{:>15.2%}".format('mAP', mAP)) return mAP