def nms_fallback(boxes, thresh): """ Perform non-maximal suppression and return the indices Parameters ---------- boxes: [[x, y, xmax, ymax, score]] Returns kept box indices ------- """ order = np.argsort(boxes[:, -1])[::-1] iou_mat = bbox_iou(boxes[:, :4], boxes[:, :4]) keep = [] while len(order) > 0: i = order[0] keep.append(i) IOU = iou_mat[i, order[1:]] remaining = np.where(IOU <= thresh)[0] order = order[remaining + 1] return keep
def __call__(self, target, size, neg=False): anchor_num = len(self.anchor_ratios) * len(self.anchor_scales) # -1 ignore 0 negative 1 positive cls = -1 * np.ones((anchor_num, size, size), dtype=np.int64) delta = np.zeros((4, anchor_num, size, size), dtype=np.float32) delta_weight = np.zeros((anchor_num, size, size), dtype=np.float32) def select(position, keep_num=16): num = position[0].shape[0] if num <= keep_num: return position, num slt = np.arange(num) np.random.shuffle(slt) slt = slt[:keep_num] return tuple(p[slt] for p in position), keep_num tcx, tcy, tw, th = corner2center(target) if neg: cx = size // 2 cy = size // 2 cx += int( np.ceil((tcx - self.train_search_size // 2) / self.anchor_stride + 0.5)) cy += int( np.ceil((tcy - self.train_search_size // 2) / self.anchor_stride + 0.5)) l = max(0, cx - 3) r = min(size, cx + 4) u = max(0, cy - 3) d = min(size, cy + 4) cls[:, u:d, l:r] = 0 neg, _ = select(np.where(cls == 0), self.train_neg_num) cls[:] = -1 cls[neg] = 0 overlap = np.zeros((anchor_num, size, size), dtype=np.float32) return cls, delta, delta_weight, overlap anchor_box = self.anchors.all_anchors[0] anchor_center = self.anchors.all_anchors[1] x1, y1, x2, y2 = anchor_box[0], anchor_box[1], \ anchor_box[2], anchor_box[3] cx, cy, w, h = anchor_center[0], anchor_center[1], \ anchor_center[2], anchor_center[3] delta[0] = (tcx - cx) / w delta[1] = (tcy - cy) / h delta[2] = np.log(tw / w) delta[3] = np.log(th / h) target = np.array([target[0], target[1], target[2], target[3]]).reshape(1, -1) bbox = np.array([x1, y1, x2, y2]).reshape(4, -1).T overlap = bbox_iou(bbox, target) overlap = overlap.reshape(-1, self.train_output_size, self.train_output_size) pos = np.where(overlap > self.train_thr_high) neg = np.where(overlap < self.train_thr_low) pos, pos_num = select(pos, self.train_pos_num) neg, _ = select(neg, self.train_total_num - self.train_pos_num) cls[pos] = 1 delta_weight[pos] = 1. / (pos_num + 1e-6) cls[neg] = 0 return cls, delta, delta_weight, overlap
def update(self, new_detections: np.ndarray, tracking_predictions: np.ndarray, detection_anchor_indices: np.ndarray, tracking_anchor_indices: np.ndarray, tracking_anchor_weights: np.ndarray, tracking_classes: np.ndarray, extra_info: dict = None): """ Update the tracks according to tracking and detection predictions. Parameters ---------- new_detections: Nx5 ndarray tracking_predictions: Mx5 ndarray extra_info: a dictionary with extra information Returns ------- """ # pylint: disable=too-many-nested-blocks t_pose_processing = time.time() logging.info("tracking predictions 's shape is {}".format( tracking_predictions.shape)) logging.debug(tracking_predictions) logging.debug(self.waiting_update_tracks) detection_landmarks = extra_info[ 'detection_landmarks'] if 'detection_landmarks' in extra_info else None tracking_landmarks = extra_info[ 'tracking_landmarks'] if 'tracking_landmarks' in extra_info else None for t in self.tracks: t.predict() # STEP 1: track level NMS still_active_track_pred_indices = [] still_active_track_indices = [] if len(tracking_predictions) > 0: # class wise NMS keep_set = set() for c in set(tracking_classes.ravel().tolist()): class_pick = np.nonzero(tracking_classes == c)[0] keep_tracking_pred_nms_indices = nms_fallback( tracking_predictions[class_pick, ...], self.track_nms_thresh) for i_keep in keep_tracking_pred_nms_indices: keep_set.add(class_pick[i_keep]) still_active_track_pred_indices = [] for i_pred, i_track in enumerate(self.waiting_update_tracks): if i_pred in keep_set: self.tracks[i_track].update( tracking_predictions[i_pred, :], (tracking_anchor_indices[i_pred, :], tracking_anchor_weights[i_pred, :]), tracking_landmarks[i_pred, :] if tracking_landmarks is not None else None) else: # suppressed tracks in the track NMS process will be marked as Missing self.tracks[i_track].mark_missed() if self.tracks[i_track].is_active(): still_active_track_pred_indices.append(i_pred) still_active_track_indices.append(i_track) # STEP 2: Remove New Detection Overlapping with Tracks if len(still_active_track_pred_indices) > 0 and len( new_detections) > 0: active_tracking_predictions = tracking_predictions[ still_active_track_pred_indices, :] det_track_max_iou = bbox_iou(new_detections[:, :4], active_tracking_predictions[:, :4]) same_class = new_detections[:, -1:] == ( tracking_classes[still_active_track_pred_indices, :].T) # suppress all new detections that have high IOU with active tracks affinity = (det_track_max_iou * same_class).max(axis=1) keep_detection_indices = np.nonzero( affinity <= self.new_track_iou_thresh)[0] else: # otherwise simply keep all detections keep_detection_indices = list(range(len(new_detections))) active_tracking_predictions = np.array([]) # STEP 3: New Track Initialization if len(keep_detection_indices) > 0: active_new_detections = new_detections[keep_detection_indices, :] # (Optional) STEP 3.a: Perform joint linking of body and head if self.joint_linking: tracking_classes = np.array(tracking_classes) body2face_link, face2body_link = \ self._link_face_body(active_new_detections, extra_info['detection_keypoints'][keep_detection_indices], active_tracking_predictions, extra_info['tracking_keypoints'][still_active_track_pred_indices], tracking_classes[still_active_track_pred_indices] ) else: body2face_link, face2body_link = None, None new_tracks = [] for idx, i_new_track in enumerate(keep_detection_indices): new_track = Track( new_detections[i_new_track, :4], self.all_track_id, (detection_anchor_indices[i_new_track, :], np.array([1])), keep_alive_thresh=self.keep_alive, class_id=new_detections[i_new_track, -1], attributes=detection_landmarks[i_new_track, :] if detection_landmarks is not None else None) if self.joint_linking: if new_track.class_id == 0: # new face track if idx in face2body_link[0]: logging.debug(idx, i_new_track, '0') body_idx = face2body_link[0][idx] if idx > body_idx: new_track.link_to(new_tracks[body_idx]) elif idx in face2body_link[2]: logging.debug(idx, i_new_track, '1') body_idx = face2body_link[2][idx] new_track.link_to(self.tracks[ still_active_track_indices[body_idx]]) if new_track.class_id == 1: # new body track if idx in body2face_link[0]: face_idx = body2face_link[0][idx] if idx > face_idx: new_track.link_to(new_tracks[face_idx]) elif idx in body2face_link[2]: face_idx = body2face_link[2][idx] new_track.link_to(self.tracks[ still_active_track_indices[face_idx]]) self.all_track_id += 1 self.tracks.append(new_track) new_tracks.append(new_track) elapsed_post_processing = time.time() - t_pose_processing logging.info( "total tracklets to now is {}, post-processing time: {:.05f} sec". format(self.all_track_id, elapsed_post_processing))
def validate(self): """Test on validation dataset.""" val_data = self.val_loader ctx = self.ctx val_metric = self.val_metric nms_threshold = self.nms_threshold validation_threshold = self.validation_threshold val_metric.reset() # set nms threshold and topk constraint # post_nms = maximum number of objects per image self.net.set_nms(nms_thresh=nms_threshold, nms_topk=200, post_nms=len( self.classes)) # default: iou=0.45 e topk=400 # >>>> Verificar eficácia # mx.nd.waitall() # allow the MXNet engine to perform graph optimization for best performance. self.net.hybridize(static_alloc=True, static_shape=True) num_of_classes = len(self.classes) # total number of correct prediction by class tp = [0] * num_of_classes # false positives by class fp = [0] * num_of_classes # count the number of gt by class gt_by_class = [0] * num_of_classes # rec and prec by class rec_by_class = [0] * num_of_classes prec_by_class = [0] * num_of_classes confusion_matrix = np.zeros((num_of_classes, num_of_classes)) for batch in val_data: batch_size = batch[0].shape[0] data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) pred_bboxes_list = [] pred_label_list = [] pred_scores_list = [] gt_bboxes_list = [] gt_label_list = [] for x, y in zip(data, label): # get prediction results ids, scores, bboxes = self.net(x) pred_label_list.append(ids) pred_scores_list.append(scores) # clip to image size pred_bboxes_list.append(bboxes.clip(0, batch[0].shape[2])) # split ground truths gt_label_list.append(y.slice_axis(axis=-1, begin=4, end=5)) gt_bboxes_list.append(y.slice_axis(axis=-1, begin=0, end=4)) # Uncomment the following line if you want to plot the images in each inference to visually check the tp, fp and fn # self.show_images(x, pred_label_list, pred_bboxes_list, gt_label_list, gt_bboxes_list) # update metric val_metric.update(pred_bboxes_list, pred_label_list, pred_scores_list, gt_bboxes_list, gt_label_list) #, gt_difficults) # Get Micro Averaging (precision and recall by each class) in each batch for img in range(batch_size): # count +1 for this class id. It will get the total number of gt by class # It is useful when considering unbalanced datasets for gt_idx in gt_label_list[0][img]: index = int(gt_idx.asnumpy()[0]) gt_by_class[index] += 1 for (pred_label, pred_bbox) in zip(pred_label_list[0][img], list(pred_bboxes_list[0][img])): pred_label = int(pred_label.asnumpy()[0]) pred_bbox = pred_bbox.asnumpy() pred_bbox = np.expand_dims(pred_bbox, axis=0) match = 0 for (gt_bbox_label, gt_bbox_coordinates) in zip( gt_label_list[0][img], list(gt_bboxes_list[0][img])): gt_bbox_coord = gt_bbox_coordinates.asnumpy() gt_bbox_coord = np.expand_dims(gt_bbox_coord, axis=0) gt_bbox_label = int(gt_bbox_label.asnumpy()[0]) iou = bbox_iou(pred_bbox, gt_bbox_coord) # Correct inference if iou > validation_threshold and pred_label == gt_bbox_label: confusion_matrix[gt_bbox_label][pred_label] += 1 tp[gt_bbox_label] += 1 # Correct classification match = 1 # Incorrect inference - missed the correct class but put the bounding box in other class elif iou > validation_threshold: confusion_matrix[gt_bbox_label][pred_label] += 1 fp[pred_label] += 1 match = 1 if not match: fp[pred_label] += 1 # calculate the Recall and Precision by class tp = np.array(tp) # we can also sum the matrix diagonal fp = np.array(fp) fp_sum = sum(fp) tp_sum = sum(tp) # rec and prec according to the micro averaging for i, (gt_value, tp_value) in enumerate(zip(gt_by_class, tp)): rec_by_class[i] += tp_value / gt_value # If an element of fp + tp is 0, # the corresponding element of prec[l] is nan. with np.errstate(divide='ignore', invalid='ignore'): prec_by_class[i] += tp_value / (tp_value + fp[i]) return val_metric.get(), rec_by_class, prec_by_class
def update(self, pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults=None): """Update internal buffer with latest prediction and gt pairs. Parameters ---------- pred_bboxes : mxnet.NDArray or numpy.ndarray Prediction bounding boxes with shape `B, N, 4`. Where B is the size of mini-batch, N is the number of bboxes. pred_labels : mxnet.NDArray or numpy.ndarray Prediction bounding boxes labels with shape `B, N`. pred_scores : mxnet.NDArray or numpy.ndarray Prediction bounding boxes scores with shape `B, N`. gt_bboxes : mxnet.NDArray or numpy.ndarray Ground-truth bounding boxes with shape `B, M, 4`. Where B is the size of mini-batch, M is the number of ground-truths. gt_labels : mxnet.NDArray or numpy.ndarray Ground-truth bounding boxes labels with shape `B, M`. gt_difficults : mxnet.NDArray or numpy.ndarray, optional, default is None Ground-truth bounding boxes difficulty labels with shape `B, M`. """ if gt_difficults is None: gt_difficults = [None for _ in as_numpy(gt_labels)] # Not sure about this code .. # lodged issue on github #872 https://github.com/dmlc/gluon-cv/issues/872 # if isinstance(gt_labels, list): # if len(gt_difficults) != len(gt_labels) * gt_labels[0].shape[0]: # gt_difficults = [None] * len(gt_labels) * gt_labels[0].shape[0] for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in zip( *[ as_numpy(x) for x in [ pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults ] ]): # strip padding -1 for pred and gt valid_pred = np.where(pred_label.flat >= 0)[0] pred_bbox = pred_bbox[valid_pred, :] pred_label = pred_label.flat[valid_pred].astype(int) pred_score = pred_score.flat[valid_pred] # change the class ids for the ground truths if self.class_map is not None: gt_label = np.expand_dims(np.array( [self.class_map[int(l)] for l in gt_label.flat]), axis=0) valid_gt = np.where(gt_label.flat >= 0)[0] gt_bbox = gt_bbox[valid_gt, :] gt_label = gt_label.flat[valid_gt].astype(int) if gt_difficult is None: gt_difficult = np.zeros(gt_bbox.shape[0]) else: gt_difficult = gt_difficult.flat[valid_gt] for l in np.unique( np.concatenate((pred_label, gt_label)).astype(int)): pred_mask_l = pred_label == l pred_bbox_l = pred_bbox[pred_mask_l] pred_score_l = pred_score[pred_mask_l] # sort by score order = pred_score_l.argsort()[::-1] pred_bbox_l = pred_bbox_l[order] pred_score_l = pred_score_l[order] gt_mask_l = gt_label == l gt_bbox_l = gt_bbox[gt_mask_l] gt_difficult_l = gt_difficult[gt_mask_l] self._n_pos[l] += np.logical_not(gt_difficult_l).sum() self._score[l].extend(pred_score_l) if len(pred_bbox_l) == 0: continue if len(gt_bbox_l) == 0: self._match[l].extend((0, ) * pred_bbox_l.shape[0]) continue # VOC evaluation follows integer typed bounding boxes. pred_bbox_l = pred_bbox_l.copy() pred_bbox_l[:, 2:] # += 1 gt_bbox_l = gt_bbox_l.copy() gt_bbox_l[:, 2:] # += 1 iou = bbox_iou(pred_bbox_l, gt_bbox_l) gt_index = iou.argmax(axis=1) # set -1 if there is no matching ground truth gt_index[iou.max(axis=1) < self.iou_thresh] = -1 del iou selec = np.zeros(gt_bbox_l.shape[0], dtype=bool) for gt_idx in gt_index: if gt_idx >= 0: if gt_difficult_l[gt_idx]: self._match[l].append(-1) else: if not selec[gt_idx]: self._match[l].append(1) else: self._match[l].append(0) selec[gt_idx] = True else: self._match[l].append(0)
def validate(self): """Test on validation dataset.""" val_data = self.val_loader ctx = self.ctx val_metric = self.val_metric nms_threshold = self.nms_threshold validation_threshold = self.validation_threshold val_metric.reset() # set nms threshold and topk constraint # post_nms = maximum number of objects per image self.net.set_nms(nms_thresh=nms_threshold, nms_topk=200, post_nms=len(self.classes)) # default: iou=0.45 e topk=400 # allow the MXNet engine to perform graph optimization for best performance. self.net.hybridize(static_alloc=True, static_shape=True) # total number of correct prediction by class tp = [0] * len(self.classes) # count the number of gt by class gt_by_class = [0] * len(self.classes) # false positives by class fp = [0] * len(self.classes) # rec and prec by class rec_by_class = [0] * len(self.classes) prec_by_class = [0] * len(self.classes) for batch in val_data: batch_size = batch[0].shape[0] data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) det_bboxes = [] det_ids = [] det_scores = [] gt_bboxes = [] gt_ids = [] gt_difficults = [] for x, y in zip(data, label): # get prediction results ids, scores, bboxes = self.net(x) det_ids.append(ids) det_scores.append(scores) # clip to image size det_bboxes.append(bboxes.clip(0, batch[0].shape[2])) # split ground truths gt_ids.append(y.slice_axis(axis=-1, begin=4, end=5)) gt_bboxes.append(y.slice_axis(axis=-1, begin=0, end=4)) # gt_difficults.append(y.slice_axis(axis=-1, begin=5, end=6) if y.shape[-1] > 5 else None) # update metric val_metric.update(det_bboxes, det_ids, det_scores, gt_bboxes, gt_ids) #, gt_difficults) # Get Micro Averaging (precision and recall by each class) in each batch for img in range(batch_size): gt_ids_teste, gt_bboxes_teste = [], [] for ids in det_ids[0][img]: det_ids_number = (int(ids.asnumpy()[0])) # It is required to check if the predicted class is in the image # otherwise, count it as a false positive and do not include in the list if det_ids_number in list(gt_ids[0][img]): gt_index = list(gt_ids[0][img]).index(det_ids_number) gt_ids_teste.extend(gt_ids[0][img][gt_index]) gt_bboxes_teste.append(gt_bboxes[0][img][gt_index]) else: fp[det_ids_number] += 1 # Wrong classification xww = 1 # count +1 for this class id. It will get the total number of gt by class # It is useful when considering unbalanced datasets for gt_idx in gt_ids[0][img]: index = int(gt_idx.asnumpy()[0]) gt_by_class[index] += 1 for ids in range(len(gt_bboxes_teste)): det_bbox_ids = det_bboxes[0][img][ids] det_bbox_ids = det_bbox_ids.asnumpy() det_bbox_ids = np.expand_dims(det_bbox_ids, axis=0) predict_ind = int(det_ids[0][img][ids].asnumpy()[0]) gt_bbox_ids = gt_bboxes_teste[ids] gt_bbox_ids = gt_bbox_ids.asnumpy() gt_bbox_ids = np.expand_dims(gt_bbox_ids, axis=0) gt_ind = int(gt_ids_teste[ids].asnumpy()[0]) iou = bbox_iou(det_bbox_ids, gt_bbox_ids) # Uncomment the following line if you want to plot the images in each inference to visually check the tp, fp and fn # self.show_images(x, gt_bbox_ids, det_bbox_ids, img) # Check if IoU is above the threshold and the class id corresponds to the ground truth if (iou > validation_threshold) and (predict_ind == gt_ind): tp[gt_ind] += 1 # Correct classification else: fp[predict_ind] += 1 # Wrong classification # calculate the Recall and Precision by class tp = np.array(tp) fp = np.array(fp) # rec and prec according to the micro averaging for i, (gt_value, tp_value) in enumerate(zip(gt_by_class, tp)): rec_by_class[i] += tp_value/gt_value # If an element of fp + tp is 0, # the corresponding element of prec[l] is nan. with np.errstate(divide='ignore', invalid='ignore'): prec_by_class[i] += tp_value/(tp_value+fp[i]) rec, prec = val_metric._recall_prec() return val_metric.get(), rec_by_class, prec_by_class