def test_non_maximum_suppression_consistency(self): bbox = generate_random_bbox(6000, (600, 800), 32, 512) cpu_selec = non_maximum_suppression(bbox, 0.5) gpu_selec = non_maximum_suppression(cuda.to_gpu(bbox), 0.5) np.testing.assert_equal(cpu_selec, cuda.to_cpu(gpu_selec))
def check_non_maximum_suppression(self, bbox, threshold, expect): selec = non_maximum_suppression(bbox, threshold) self.assertIsInstance(selec, type(bbox)) self.assertEqual(selec.dtype, np.int32) np.testing.assert_equal( cuda.to_cpu(selec), cuda.to_cpu(expect))
def _suppress(raw_bbox, raw_score, nms_thresh, score_thresh): xp = cuda.get_array_module(raw_bbox, raw_score) bbox = [] label = [] score = [] for l in range(raw_score.shape[1] - 1): bbox_l = raw_bbox[:, l + 1] score_l = raw_score[:, l + 1] mask = score_l >= score_thresh bbox_l = bbox_l[mask] score_l = score_l[mask] order = argsort(-score_l) bbox_l = bbox_l[order] score_l = score_l[order] indices = utils.non_maximum_suppression(bbox_l, nms_thresh) bbox_l = bbox_l[indices] score_l = score_l[indices] bbox.append(bbox_l) label.append(xp.array((l, ) * len(bbox_l))) score.append(score_l) bbox = xp.vstack(bbox).astype(np.float32) label = xp.hstack(label).astype(np.int32) score = xp.hstack(score).astype(np.float32) return bbox, label, score
def _suppress(self, raw_cls_bbox, raw_prob): bbox = list() label = list() score = list() # skip cls_id = 0 because it is the background class for l in range(1, self.n_class): cls_bbox_l = raw_cls_bbox.reshape((-1, self.n_class, 4))[:, l, :] prob_l = raw_prob[:, l] # thresholding by score keep = prob_l > self.score_thresh cls_bbox_l = cls_bbox_l[keep] prob_l = prob_l[keep] # thresholding by nms keep = non_maximum_suppression( cls_bbox_l, self.nms_thresh, prob_l) bbox.append(cls_bbox_l[keep]) # The labels are in [0, self.n_class - 2]. label.append((l - 1) * np.ones((len(keep),))) score.append(prob_l[keep]) bbox = np.concatenate(bbox, axis=0).astype(np.float32) label = np.concatenate(label, axis=0).astype(np.int32) score = np.concatenate(score, axis=0).astype(np.float32) return bbox, label, score
def _suppress(self, raw_bbox, raw_score): xp = self.xp bbox = list() label = list() score = list() for l in range(self.n_fg_class): bbox_l = raw_bbox # the l-th class corresponds for the (l + 1)-th column. score_l = raw_score[:, l + 1] mask = score_l >= self.score_thresh bbox_l = bbox_l[mask] score_l = score_l[mask] if self.nms_thresh is not None: indices = utils.non_maximum_suppression( bbox_l, self.nms_thresh, score_l) bbox_l = bbox_l[indices] score_l = score_l[indices] bbox.append(bbox_l) label.append(xp.array((l,) * len(bbox_l))) score.append(score_l) bbox = xp.vstack(bbox).astype(np.float32) label = xp.hstack(label).astype(np.int32) score = xp.hstack(score).astype(np.float32) return bbox, label, score
def _suppress(self, raw_cls_bbox, raw_cls_roi, raw_prob, raw_mask): bbox = list() roi = list() label = list() score = list() mask = list() for l in range(1, self.n_class): cls_bbox_l = raw_cls_bbox.reshape((-1, self.n_class, 4))[:, l, :] cls_roi_l = raw_cls_roi.reshape((-1, self.n_class, 4))[:, l, :] prob_l = raw_prob[:, l] lmask = prob_l > self.score_thresh cls_bbox_l = cls_bbox_l[lmask] cls_roi_l = cls_roi_l[lmask] prob_l = prob_l[lmask] mask_l = raw_mask[:, l] mask_l = mask_l[lmask] keep = non_maximum_suppression(cls_bbox_l, self.nms_thresh, prob_l) bbox.append(cls_bbox_l[keep]) roi.append(cls_roi_l[keep]) #labels are in [0, self.nclass - 2]. label.append((l - 1) * np.ones((len(keep), ))) score.append(prob_l[keep]) mask.append(mask_l[keep]) bbox = np.concatenate(bbox, axis=0).astype(np.float32) roi = np.concatenate(roi, axis=0).astype(np.float32) label = np.concatenate(label, axis=0).astype(np.float32) score = np.concatenate(score, axis=0).astype(np.float32) mask = np.concatenate(mask, axis=0).astype(np.float32) return bbox, roi, label, score, mask
def _suppress(self, raw_cls_bbox, raw_prob, raw_roi, raw_level): bbox = [] label = [] score = [] roi = [] level = [] # skip cls_id = 0 because it is the background class # -> maskは0から始まるから、l-1を使う # -> あーしまったTrainChainで最後のクラスToothBlushは範囲外になっておるわ・・ for l in range(1, self.n_class): if self.predict_mask and l == self.n_class - 1: # まったく本質的でないのだか、maskを推定するときの学習でオフセットを間違えており、 # l == self.n_class-1でindex out of boundsする?要検証 continue cls_bbox_l = raw_cls_bbox.reshape((-1, self.n_class, 4))[:, l, :] prob_l = raw_prob[:, l] mask = prob_l > self.score_thresh cls_bbox_l = cls_bbox_l[mask] prob_l = prob_l[mask] keep = non_maximum_suppression(cls_bbox_l, self.nms_thresh, prob_l) bbox.append(cls_bbox_l[keep]) # The labels are in [0, self.n_class - 2]. label.append((l - 1) * np.ones((len(keep), ))) score.append(prob_l[keep]) raw_roi_l = raw_roi[:, l, :][mask] roi.append(raw_roi_l[keep]) level_l = raw_level[mask] level.append(level_l[keep]) bbox = np.concatenate(bbox, axis=0).astype(np.float32) label = np.concatenate(label, axis=0).astype(np.int32) score = np.concatenate(score, axis=0).astype(np.float32) roi = np.concatenate(roi, axis=0) level = np.concatenate(level, axis=0).astype(np.int32) return bbox, label, score, roi, level
def check_non_maximum_suppression_options( self, bbox, threshold, score, limit): # Pass all options to the tested function scored_selec = non_maximum_suppression(bbox, threshold, score, limit) self.assertIsInstance(scored_selec, type(bbox)) # Reorder inputs befor passing it to the function. # Reorder the outputs according to scores. order = score.argsort()[::-1] reordered_selec = non_maximum_suppression( bbox[order], threshold, score=None, limit=None) reordered_selec = reordered_selec[:limit] reordered_selec = order[reordered_selec] np.testing.assert_equal( cuda.to_cpu(scored_selec), cuda.to_cpu(reordered_selec))
def decode(self, mb_loc, mb_conf, nms_thresh, score_thresh): xp = self.xp mb_bbox = self._default_bbox.copy() mb_bbox[:, :2] += mb_loc[:, :2] * self._variance[ 0] * self._default_bbox[:, 2:] mb_bbox[:, 2:] *= xp.exp(mb_loc[:, 2:] * self._variance[1]) mb_bbox[:, :2] -= mb_bbox[:, 2:] / 2 mb_bbox[:, 2:] += mb_bbox[:, :2] if xp == np: mb_conf[mb_conf > 88.72] = 88.72 # avoid overflow mb_score = xp.exp(mb_conf) mb_score /= mb_score.sum(axis=1, keepdims=True) # intra-class non-maximum suppression bbox = [] label = [] score = [] for l in range(mb_conf.shape[1] - 1): bbox_l = mb_bbox score_l = mb_score[:, l + 1] mask = score_l >= score_thresh bbox_l = bbox_l[mask] score_l = score_l[mask] indices = utils.non_maximum_suppression(bbox_l, nms_thresh, score_l) bbox_l = bbox_l[indices] score_l = score_l[indices] bbox.append(bbox_l) label.append(xp.array((l, ) * len(bbox_l))) score.append(score_l) # inter-class non-maximum suppression bbox = xp.vstack(bbox) label = xp.hstack(label) score = xp.hstack(score) indices = utils.non_maximum_suppression(bbox, nms_thresh, score) bbox = bbox[indices].astype(np.float32) label = label[indices].astype(np.int32) score = score[indices].astype(np.float32) return bbox, label, score
def check_non_maximum_suppression_options(self, bbox, threshold, score, limit): # Pass all options to the tested function scored_selec = non_maximum_suppression(bbox, threshold, score, limit) self.assertIsInstance(scored_selec, type(bbox)) # Reorder inputs befor passing it to the function. # Reorder the outputs according to scores. order = score.argsort()[::-1] reordered_selec = non_maximum_suppression(bbox[order], threshold, score=None, limit=None) reordered_selec = reordered_selec[:limit] reordered_selec = order[reordered_selec] np.testing.assert_equal(cuda.to_cpu(scored_selec), cuda.to_cpu(reordered_selec))
def get_humans_by_feature(model, feature_map, detection_thresh=0.15, min_num_keypoints=-1): resp, conf, x, y, w, h, e = feature_map start = time.time() delta = resp * conf K = len(model.keypoint_names) outW, outH = model.outsize ROOT_NODE = 0 # instance start = time.time() rx, ry = model.restore_xy(x, y) rw, rh = model.restore_size(w, h) ymin, ymax = ry - rh / 2, ry + rh / 2 xmin, xmax = rx - rw / 2, rx + rw / 2 bbox = np.array([ymin, xmin, ymax, xmax]) bbox = bbox.transpose(1, 2, 3, 0) root_bbox = bbox[ROOT_NODE] score = delta[ROOT_NODE] candidate = np.where(score > detection_thresh) score = score[candidate] root_bbox = root_bbox[candidate] selected = non_maximum_suppression(bbox=root_bbox, thresh=0.3, score=score) root_bbox = root_bbox[selected] #logger.info('detect instance {:.5f}'.format(time.time() - start)) start = time.time() humans = [] e = e.transpose(0, 3, 4, 1, 2) ei = 0 # index of edges which contains ROOT_NODE as begin # alchemy_on_humans for hxw in zip(candidate[0][selected], candidate[1][selected]): human = {ROOT_NODE: bbox[(ROOT_NODE, hxw[0], hxw[1])]} # initial for graph in DIRECTED_GRAPHS: eis, ts = graph i_h, i_w = hxw for ei, t in zip(eis, ts): index = (ei, i_h, i_w) # must be tuple u_ind = np.unravel_index(np.argmax(e[index]), e[index].shape) j_h = i_h + u_ind[0] - model.local_grid_size[1] // 2 j_w = i_w + u_ind[1] - model.local_grid_size[0] // 2 if j_h < 0 or j_w < 0 or j_h >= outH or j_w >= outW: break if delta[t, j_h, j_w] < detection_thresh: break human[t] = bbox[(t, j_h, j_w)] i_h, i_w = j_h, j_w if min_num_keypoints <= len(human) - 1: humans.append(human) #logger.info('alchemy time {:.5f}'.format(time.time() - start)) logger.info('num humans = {}'.format(len(humans))) return humans
def mask_voting( rois, cls_probs, mask_probs, n_class, H, W, score_thresh=0.7, nms_thresh=0.3, mask_merge_thresh=0.5, binary_thresh=0.4): mask_size = mask_probs.shape[-1] v_labels = np.empty((0, ), dtype=np.int32) v_masks = np.empty((0, mask_size, mask_size), dtype=np.float32) v_bboxes = np.empty((0, 4), dtype=np.float32) v_cls_probs = np.empty((0, ), dtype=np.float32) for l in range(0, n_class - 1): # non maximum suppression cls_prob_l = cls_probs[:, l+1] thresh_mask = cls_prob_l >= 0.001 bbox_l = rois[thresh_mask] cls_prob_l = cls_prob_l[thresh_mask] keep = non_maximum_suppression( bbox_l, nms_thresh, cls_prob_l, limit=100) bbox_l = bbox_l[keep] cls_prob_l = cls_prob_l[keep] n_bbox_l = len(bbox_l) v_mask_l = np.zeros((n_bbox_l, mask_size, mask_size)) v_bbox_l = np.zeros((n_bbox_l, 4)) for i, bbox in enumerate(bbox_l): iou = bbox_iou(rois, bbox[np.newaxis, :]) idx = np.where(iou > mask_merge_thresh)[0] mask_weights = cls_probs[idx, l + 1] mask_weights = mask_weights / mask_weights.sum() mask_prob_l = mask_probs[idx] rois_l = rois[idx] orig_mask, v_bbox_l[i] = mask_aggregation( rois_l, mask_prob_l, mask_weights, H, W, binary_thresh) v_mask_l[i] = cv2.resize( orig_mask.astype(np.float32), (mask_size, mask_size)) score_thresh_mask = cls_prob_l > score_thresh v_cls_prob_l = cls_prob_l[score_thresh_mask] v_mask_l = v_mask_l[score_thresh_mask] v_bbox_l = v_bbox_l[score_thresh_mask] v_label_l = np.repeat(l, v_bbox_l.shape[0]) v_cls_probs = np.concatenate((v_cls_probs, v_cls_prob_l)) v_masks = np.concatenate((v_masks, v_mask_l)) v_bboxes = np.concatenate((v_bboxes, v_bbox_l)) v_labels = np.concatenate((v_labels, v_label_l)) return v_labels, v_masks, v_bboxes, v_cls_probs
def detect( self, image: Image.Image, nms_iou_threshold: float = 0.5 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """Detect characters from the image.""" img_w, img_h = image.size if img_w < img_h: w = self.image_min_side h = img_h * self.image_min_side / img_w h = 32 * int(round(h / 32)) else: h = self.image_min_side w = img_w * self.image_min_side / img_h w = 32 * int(round(w / 32)) image = image.resize((w, h), resample=Image.BILINEAR) img = np.asarray(image, dtype=np.float32).transpose(2, 0, 1) imgs = img.reshape(1, *img.shape) if self.xp != np: imgs = cuda.to_gpu(imgs) imgs = (imgs - 127.5) / 128.0 with chainer.using_config('train', False), chainer.no_backprop_mode(): heatmap = self(imgs) heatmap = heatmap.array heatmap[:-4] = _sigmoid(heatmap[:-4]) bboxes, _, scores = heatmap_to_labeled_bboxes(heatmap, self.score_threshold) bboxes, scores = bboxes[0], scores[0] hm_h, hm_w = heatmap.shape[2:4] bboxes[:, 0::2] *= img_w / hm_w bboxes[:, 1::2] *= img_h / hm_h keep = non_maximum_suppression(bboxes, nms_iou_threshold, score=scores) bboxes = bboxes[keep] scores = scores[keep] if self.xp != np: bboxes = cuda.to_cpu(bboxes) scores = cuda.to_cpu(scores) return bboxes, scores
def _filter_overlapping_bboxs(self, mb_boxs, mb_confs): confs = [] labels = [] for box, conf, label in zip(mb_boxs, mb_confs, self.gt_mb_labels): indices = utils.non_maximum_suppression(box, self.nms_thresh) confs.append(conf[indices]) if chainer.cuda.available: labels.append(label[indices].get()) else: labels.append(label[indices]) confs = F.concat(confs, axis=0) labels = np.concatenate(labels) return zip(labels, confs)
def filter_overlapping_bboxs(self, mb_boxs, mb_confs, gt_labels): confs = [] labels = [] for box, conf, label in zip(mb_boxs, mb_confs, gt_labels): indices = non_maximum_suppression(box, 0.5) # Add more beautiful version of this nms-thresh confs.append(conf[indices]) if chainer.cuda.available: labels.append(label[indices.get()]) else: labels.append(label[indices]) confs = F.concat(confs, axis=0) labels = np.concatenate(labels) return zip(labels, confs)
def _decode(self, loc, obj, conf): raw_bbox = self._default_bbox.copy() raw_bbox[:, :2] += 1 / (1 + self.xp.exp(-loc[:, :2])) raw_bbox[:, :2] *= self._step[:, None] raw_bbox[:, 2:] *= self.xp.exp(loc[:, 2:]) raw_bbox[:, :2] -= raw_bbox[:, 2:] / 2 raw_bbox[:, 2:] += raw_bbox[:, :2] obj = 1 / (1 + self.xp.exp(-obj)) conf = 1 / (1 + self.xp.exp(-conf)) raw_score = obj[:, None] * conf np_step = cuda.to_cpu(self._step) step_list = sorted(set(np_step), key=np_step.tolist().index) bbox = [] label = [] score = [] layer_id = [] for l in range(self.n_fg_class): bbox_l = raw_bbox score_l = raw_score[:, l] mask = score_l >= self.score_thresh bbox_l = bbox_l[mask] score_l = score_l[mask] step_l = self._step[mask] layer_id_l = np.array([step_list.index(step) for step in step_l]) indices = utils.non_maximum_suppression( bbox_l, self.nms_thresh, score_l) bbox_l = bbox_l[indices] score_l = score_l[indices] layer_id_l = layer_id_l[cuda.to_cpu(indices)] bbox.append(bbox_l) label.append(self.xp.array((l,) * len(bbox_l))) score.append(score_l) layer_id.append(layer_id_l) bbox = self.xp.vstack(bbox).astype(np.float32) label = self.xp.hstack(label).astype(np.int32) score = self.xp.hstack(score).astype(np.float32) layer_id = self.xp.hstack(layer_id).astype(np.int32) return bbox, label, score, layer_id
def _suppress_each_box(self, raw_cls_bbox, raw_prob): xp = np #model.xp bbox = [] label = [] prob = [] best_class = raw_prob.argmax(axis=1) self.raw_cls_bbox = raw_cls_bbox if True: best_class = best_class[:len(raw_cls_bbox)] cls_bbox_l = raw_cls_bbox.reshape((-1, self.n_class, 4)) cls_bbox_l = xp.array( [cls_bbox_l[i, item, :] for i, item in enumerate(best_class)]) non_bg_mask = best_class > 0 #prob_l = raw_prob[:, best_class] prob_l = xp.array( [raw_prob[i, item] for i, item in enumerate(best_class)]) self.prob_l = prob_l mask = np.logical_and(prob_l > self.score_thresh, best_class > 0) cls_bbox_l = cls_bbox_l[mask] prob_l = prob_l[mask] #best_class_mask = best_class[mask] keep = non_maximum_suppression(cls_bbox_l, self.nms_thresh, prob_l) bbox.append(cls_bbox_l[keep]) # The labels are in [0, self.n_class - 2]. #label.append((l - 1) * np.ones((len(keep),))) self.out = prob_l self.best_class = best_class self.mask = mask self.keep = keep self.raw_prob = raw_prob label.append(best_class[mask][keep] - 1) prob.append(prob_l[keep]) bbox = np.concatenate(bbox, axis=0).astype(np.float32) label = np.concatenate(label, axis=0).astype(np.int32) prob = np.concatenate(prob, axis=0).astype(np.float32) return bbox, label, prob
def detect(self, image: Image.Image): # get all prediction results pred_bboxes_set = [] pred_scores_set = [] for detector in self.detectors: bboxes, scores = detector.detect(image) pred_bboxes_set.append(bboxes) pred_scores_set.append(scores) all_bboxes = np.concatenate(pred_bboxes_set) all_scores = np.concatenate(pred_scores_set) # apply NMS to obtain base bounding boxes for refinement keep = non_maximum_suppression(all_bboxes, thresh=0.3, score=all_scores) base_bboxes = all_bboxes[keep] base_scores = all_scores[keep] # get matched bboxes iou_mat = calc_iou_mat(base_bboxes, all_bboxes) match_mat = iou_mat >= 0.5 # refine bboxes by bbox voting refined_bboxes = np.empty_like(base_bboxes) refined_scores = np.empty_like(base_scores) votes = np.sum(match_mat, axis=1) for i in range(len(base_bboxes)): match = match_mat[i] scores = all_scores[match] bboxes = all_bboxes[match] refined_bboxes[i] = np.sum(scores[:, None] * bboxes, axis=0) / scores.sum() refined_scores[i] = np.average(scores) min_votes_mask = votes >= self.min_votes return refined_bboxes[min_votes_mask], refined_scores[min_votes_mask]
def _suppress(self, raw_cls_bbox, raw_prob): bbox = [] label = [] score = [] # skip cls_id = 0 because it is the background class for l in range(1, self.n_class): cls_bbox_l = raw_cls_bbox.reshape((-1, self.n_class, 4))[:, l, :] prob_l = raw_prob[:, l] mask = prob_l > self.score_thresh cls_bbox_l = cls_bbox_l[mask] prob_l = prob_l[mask] keep = non_maximum_suppression( cls_bbox_l, self.nms_thresh, prob_l) bbox.append(cls_bbox_l[keep]) # The labels are in [0, self.n_class - 2]. label.append((l - 1) * np.ones((len(keep),))) score.append(prob_l[keep]) bbox = np.concatenate(bbox, axis=0).astype(np.float32) label = np.concatenate(label, axis=0).astype(np.int32) score = np.concatenate(score, axis=0).astype(np.float32) return bbox, label, score
def _decode(self, loc, obj, conf): raw_bbox = self._default_bbox.copy() raw_bbox[:, :2] += 1 / (1 + self.xp.exp(-loc[:, :2])) raw_bbox[:, 2:] *= self.xp.exp(loc[:, 2:]) raw_bbox[:, :2] -= raw_bbox[:, 2:] / 2 raw_bbox[:, 2:] += raw_bbox[:, :2] raw_bbox *= self.insize / self.extractor.grid obj = 1 / (1 + self.xp.exp(-obj)) conf = self.xp.exp(conf) conf /= conf.sum(axis=1, keepdims=True) raw_score = obj[:, None] * conf bbox = [] label = [] score = [] for l in range(self.n_fg_class): bbox_l = raw_bbox score_l = raw_score[:, l] mask = score_l >= self.score_thresh bbox_l = bbox_l[mask] score_l = score_l[mask] indices = utils.non_maximum_suppression(bbox_l, self.nms_thresh, score_l) bbox_l = bbox_l[indices] score_l = score_l[indices] bbox.append(bbox_l) label.append(self.xp.array((l, ) * len(bbox_l))) score.append(score_l) bbox = self.xp.vstack(bbox).astype(np.float32) label = self.xp.hstack(label).astype(np.int32) score = self.xp.hstack(score).astype(np.float32) return bbox, label, score
def _decode(self, loc, conf): raw_bbox = self._default_bbox.copy() raw_bbox[:, :2] += 1 / (1 + self.xp.exp(-loc[:, :2])) raw_bbox[:, :2] *= self._step[:, None] raw_bbox[:, 2:] *= self.xp.exp(loc[:, 2:]) raw_bbox[:, :2] -= raw_bbox[:, 2:] / 2 raw_bbox[:, 2:] += raw_bbox[:, :2] conf = 1 / (1 + self.xp.exp(-conf)) raw_score = conf[:, 0, None] * conf[:, 1:] bbox = [] label = [] score = [] for l in range(self.n_fg_class): bbox_l = raw_bbox score_l = raw_score[:, l] mask = score_l >= self.score_thresh bbox_l = bbox_l[mask] score_l = score_l[mask] indices = utils.non_maximum_suppression( bbox_l, self.nms_thresh, score_l) bbox_l = bbox_l[indices] score_l = score_l[indices] bbox.append(cuda.to_cpu(bbox_l)) label.append(np.array((l,) * len(bbox_l))) score.append(cuda.to_cpu(score_l)) bbox = np.vstack(bbox).astype(np.float32) label = np.hstack(label).astype(np.int32) score = np.hstack(score).astype(np.float32) return bbox, label, score
def decode(self, mb_loc, mb_conf, nms_thresh=0.45, score_thresh=0.6): """Decodes back to coordinates and classes of bounding boxes. This method decodes :obj:`mb_loc` and :obj:`mb_conf` returned by a SSD network back to :obj:`bbox`, :obj:`label` and :obj:`score`. Args: mb_loc (array): A float array whose shape is :math:`(K, 4)`, :math:`K` is the number of default bounding boxes. mb_conf (array): A float array whose shape is :math:`(K, n\_fg\_class + 1)`. nms_thresh (float): The threshold value for :func:`~chainercv.utils.non_maximum_suppression`. The default value is :obj:`0.45`. score_thresh (float): The threshold value for confidence score. If a bounding box whose confidence score is lower than this value, the bounding box will be suppressed. The default value is :obj:`0.6`. Returns: tuple of three arrays: This method returns a tuple of three arrays, :obj:`(bbox, label, score)`. * **bbox**: A float array of shape :math:`(R, 4)`, \ where :math:`R` is the number of bounding boxes in a image. \ Each bouding box is organized by \ :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \ in the second axis. * **label** : An integer array of shape :math:`(R,)`. \ Each value indicates the class of the bounding box. * **score** : A float array of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ xp = self.xp # (center_y, center_x, height, width) mb_bbox = self._default_bbox.copy() mb_bbox[:, :2] += mb_loc[:, :2] * self._variance[0] \ * self._default_bbox[:, 2:] mb_bbox[:, 2:] *= xp.exp(mb_loc[:, 2:] * self._variance[1]) # (center_y, center_x, height, width) -> (y_min, x_min, height, width) mb_bbox[:, :2] -= mb_bbox[:, 2:] / 2 # (center_y, center_x, height, width) -> (y_min, x_min, y_max, x_max) mb_bbox[:, 2:] += mb_bbox[:, :2] # softmax mb_score = xp.exp(mb_conf) mb_score /= mb_score.sum(axis=1, keepdims=True) bbox = list() label = list() score = list() for l in range(mb_conf.shape[1] - 1): bbox_l = mb_bbox # the l-th class corresponds for the (l + 1)-th column. score_l = mb_score[:, l + 1] mask = score_l >= score_thresh bbox_l = bbox_l[mask] score_l = score_l[mask] if nms_thresh is not None: indices = utils.non_maximum_suppression( bbox_l, nms_thresh, score_l) bbox_l = bbox_l[indices] score_l = score_l[indices] bbox.append(bbox_l) label.append(xp.array((l, ) * len(bbox_l))) score.append(score_l) bbox = xp.vstack(bbox).astype(np.float32) label = xp.hstack(label).astype(np.int32) score = xp.hstack(score).astype(np.float32) return bbox, label, score
def __call__(self, bbox, score): selc = non_maximum_suppression(bbox, self._thresh, score) return selc
def check_non_maximum_suppression_zero_legnth_bbox( self, bbox, threshold): selec = non_maximum_suppression(bbox, threshold) self.assertIsInstance(selec, type(bbox)) self.assertEqual(selec.shape, (0,))
def mask_voting(roi_cmask_prob, bbox, roi_cls_prob, size, score_thresh, nms_thresh, mask_merge_thresh, binary_thresh, limit=100, bg_label=0): """Refine mask probabilities by merging multiple masks. First, this function discard invalid masks with non maximum suppression. Then, it merges masks with weight calculated from class probabilities and iou. This function improves the mask qualities by merging overlapped masks predicted as the same object class. Here are notations used. * :math:`R'` is the total number of RoIs produced across batches. * :math:`L` is the number of classes excluding the background. * :math:`RH` is the height of pooled image. * :math:`RW` is the height of pooled image. Args: roi_cmask_prob (array): A mask probability array whose shape is :math:`(R, RH, RW)`. bbox (array): A bounding box array whose shape is :math:`(R, 4)`. cls_prob (array): A class probability array whose shape is :math:`(R, L + 1)`. size (tuple of int): Original image size. score_thresh (float): A threshold value of the class score. nms_thresh (float): A threshold value of non maximum suppression. mask_merge_thresh (float): A threshold value of the bounding box iou for mask merging. binary_thresh (float): A threshold value of mask score for mask merging. limit (int): The maximum number of outputs. bg_label (int): The id of the background label. Returns: array, array, array, array: * **v_cmask_prob**: Merged mask probability. Its shapes is \ :math:`(N, RH, RW)`. * **v_bbox**: Bounding boxes for the merged masks. Its shape is \ :math:`(N, 4)`. * **v_label**: Class labels for the merged masks. Its shape is \ :math:`(N, )`. * **v_score**: Class probabilities for the merged masks. Its shape \ is :math:`(N, )`. """ roi_cmask_size = roi_cmask_prob.shape[1:] n_class = roi_cls_prob.shape[1] v_cmask_prob = [] v_bbox = [] v_label = [] v_cls_prob = [] cls_score = [] cls_bbox = [] for label in range(0, n_class): # background if label == bg_label: continue # non maximum suppression score_l = roi_cls_prob[:, label] keep_indices = non_maximum_suppression(bbox, nms_thresh, score_l) bbox_l = bbox[keep_indices] score_l = score_l[keep_indices] cls_bbox.append(bbox_l) cls_score.append(score_l) sorted_score = np.sort(np.concatenate(cls_score))[::-1] n_keep = min(len(sorted_score), limit) score_thresh = max(sorted_score[n_keep - 1], score_thresh) for label in range(0, n_class): # background if label == bg_label: continue bbox_l = cls_bbox[label - 1] score_l = cls_score[label - 1] keep_indices = np.where(score_l >= score_thresh) bbox_l = bbox_l[keep_indices] score_l = score_l[keep_indices] v_cmask_prob_l = [] v_bbox_l = [] v_score_l = [] for i, bb in enumerate(bbox_l): iou = bbox_iou(bbox, bb[np.newaxis, :]) keep_indices = np.where(iou >= mask_merge_thresh)[0] cmask_weight = roi_cls_prob[keep_indices, label] cmask_weight = cmask_weight / cmask_weight.sum() cmask_prob_i = roi_cmask_prob[keep_indices] bbox_i = bbox[keep_indices] m_cmask, m_bbox = _mask_aggregation(bbox_i, cmask_prob_i, cmask_weight, size, binary_thresh) if m_cmask is not None and m_bbox is not None: m_cmask = resize(m_cmask.astype(np.float32), roi_cmask_size) v_cmask_prob_l.append(m_cmask) v_bbox_l.append(m_bbox) v_score_l.append(score_l[i]) if len(v_cmask_prob_l) > 0: v_cmask_prob_l = np.concatenate(v_cmask_prob_l) v_bbox_l = np.concatenate(v_bbox_l) v_score_l = np.array(v_score_l) v_label_l = np.repeat(label - 1, v_bbox_l.shape[0]) v_label_l = v_label_l.astype(np.int32) v_cmask_prob.append(v_cmask_prob_l) v_bbox.append(v_bbox_l) v_label.append(v_label_l) v_cls_prob.append(v_score_l) if len(v_cmask_prob) > 0: v_cmask_prob = np.concatenate(v_cmask_prob) v_bbox = np.concatenate(v_bbox) v_label = np.concatenate(v_label) v_cls_prob = np.concatenate(v_cls_prob) else: v_cmask_prob = np.empty((0, roi_cmask_size[0], roi_cmask_size[1])) v_bbox = np.empty((0, 4)) v_label = np.empty((0, )) v_cls_prob = np.empty((0, )) return v_cmask_prob, v_bbox, v_label, v_cls_prob
def decode(self, arm_loc, arm_conf, odm_loc, odm_conf, nms_thresh=0.45, score_thresh=0.6): xp = self.xp # (center_y, center_x, height, width) mb_bbox = self._default_bbox.copy() mb_bbox[:, :2] += arm_loc[:, :2] * self._variance[0] \ * self._default_bbox[:, 2:] mb_bbox[:, 2:] *= xp.exp(arm_loc[:, 2:] * self._variance[1]) # Anchor refinement mb_bbox[:, :2] += odm_loc[:, :2] * self._variance[0] \ * mb_bbox[:, 2:] mb_bbox[:, 2:] *= xp.exp(odm_loc[:, 2:] * self._variance[1]) # (center_y, center_x, height, width) -> (y_min, x_min, height, width) mb_bbox[:, :2] -= mb_bbox[:, 2:] / 2 # (center_y, center_x, height, width) -> (y_min, x_min, y_max, x_max) mb_bbox[:, 2:] += mb_bbox[:, :2] # softmax mb_score = xp.exp(odm_conf) mb_score /= mb_score.sum(axis=1, keepdims=True) objectness = xp.exp(arm_conf) negativeness = xp.exp(1 - arm_conf) objectness /= objectness + negativeness # negative anchor filtering mb_score[objectness <= 0.01] = 0 bbox = list() label = list() score = list() for l in range(odm_conf.shape[1] - 1): bbox_l = mb_bbox # the l-th class corresponds for the (l + 1)-th column. score_l = mb_score[:, l + 1] mask = score_l >= score_thresh bbox_l = bbox_l[mask] score_l = score_l[mask] if nms_thresh is not None: indices = utils.non_maximum_suppression(bbox_l, nms_thresh, score_l, limit=400) bbox_l = bbox_l[indices] score_l = score_l[indices] bbox.append(bbox_l) label.append(xp.array((l, ) * len(bbox_l))) score.append(score_l) bbox = xp.vstack(bbox).astype(np.float32) label = xp.hstack(label).astype(np.int32) score = xp.hstack(score).astype(np.float32) return bbox, label, score
def check_non_maximum_suppression_zero_legnth_bbox(self, bbox, threshold): selec = non_maximum_suppression(bbox, threshold) self.assertIsInstance(selec, type(bbox)) self.assertEqual(selec.shape, (0, ))
def decode(self, locs, confs, anchors, in_shape): if chainer.config.train: nms_limit_pre = self._train_nms_limit_pre nms_limit_post = self._train_nms_limit_post else: nms_limit_pre = self._test_nms_limit_pre nms_limit_post = self._test_nms_limit_post rois = [] roi_indices = [] for i in range(in_shape[0]): roi = [] conf = [] for l in range(len(self._scales)): loc_l = locs[l].array[i] conf_l = confs[l].array[i] roi_l = anchors[l].copy() # tlbr -> yxhw roi_l[:, 2:] -= roi_l[:, :2] roi_l[:, :2] += roi_l[:, 2:] / 2 # offset roi_l[:, :2] += loc_l[:, :2] * roi_l[:, 2:] roi_l[:, 2:] *= self.xp.exp( self.xp.minimum(loc_l[:, 2:], exp_clip)) # yxhw -> tlbr roi_l[:, :2] -= roi_l[:, 2:] / 2 roi_l[:, 2:] += roi_l[:, :2] # clip roi_l[:, :2] = self.xp.maximum(roi_l[:, :2], 0) roi_l[:, 2:] = self.xp.minimum(roi_l[:, 2:], self.xp.array(in_shape[2:])) order = _argsort(-conf_l)[:nms_limit_pre] roi_l = roi_l[order] conf_l = conf_l[order] mask = (roi_l[:, 2:] - roi_l[:, :2] > 0).all(axis=1) roi_l = roi_l[mask] conf_l = conf_l[mask] indices = utils.non_maximum_suppression(roi_l, self._nms_thresh, limit=nms_limit_post) roi_l = roi_l[indices] conf_l = conf_l[indices] roi.append(roi_l) conf.append(conf_l) roi = self.xp.vstack(roi).astype(np.float32) conf = self.xp.hstack(conf).astype(np.float32) order = _argsort(-conf)[:nms_limit_post] roi = roi[order] rois.append(roi) roi_indices.append(self.xp.array((i, ) * len(roi))) rois = self.xp.vstack(rois).astype(np.float32) roi_indices = self.xp.hstack(roi_indices).astype(np.int32) return rois, roi_indices
def check_non_maximum_suppression(self, bbox, threshold, expect): selec = non_maximum_suppression(bbox, threshold) self.assertIsInstance(selec, type(bbox)) self.assertEqual(selec.dtype, np.int32) np.testing.assert_equal(cuda.to_cpu(selec), cuda.to_cpu(expect))
def mask_voting( rois, mask_probs, cls_probs, n_class, H, W, score_thresh=0.7, nms_thresh=0.3, mask_merge_thresh=0.5, binary_thresh=0.4, max_num=100): mask_size = mask_probs.shape[-1] v_labels = np.empty((0, ), dtype=np.int32) v_masks = np.empty((0, mask_size, mask_size), dtype=np.float32) v_bboxes = np.empty((0, 4), dtype=np.float32) v_cls_probs = np.empty((0, ), dtype=np.float32) tmp_all_scores = np.empty((0, ), dtype=np.float32) tmp_cls_probs = [] tmp_bbox = [] for label in range(0, n_class): if label == 0: # l == 0 is background continue # non maximum suppression cls_prob_l = cls_probs[:, label] keep_indices = non_maximum_suppression( rois, nms_thresh, cls_prob_l, limit=max_num) bbox_l = rois[keep_indices] cls_prob_l = cls_prob_l[keep_indices] tmp_bbox.append(bbox_l) tmp_cls_probs.append(cls_prob_l) tmp_all_scores = np.concatenate((tmp_all_scores, cls_prob_l)) sorted_all_scores = np.sort(tmp_all_scores)[::-1] keep_num = min(len(sorted_all_scores), max_num) thresh = max(sorted_all_scores[keep_num - 1], 1e-3) for label in range(0, n_class): if label == 0: continue bbox_l = tmp_bbox[label - 1] cls_prob_l = tmp_cls_probs[label - 1] keep_indices = np.where(cls_prob_l >= thresh) bbox_l = bbox_l[keep_indices] cls_prob_l = cls_prob_l[keep_indices] v_mask_l = np.empty((0, mask_size, mask_size), dtype=np.float32) v_bbox_l = np.empty((0, 4), dtype=np.float32) v_cls_prob_l = np.empty((0, ), dtype=np.float32) for i, bbox in enumerate(bbox_l): iou = bbox_iou(rois, bbox[np.newaxis, :]) idx = np.where(iou >= mask_merge_thresh)[0] mask_weights = cls_probs[idx, label] mask_weights = mask_weights / mask_weights.sum() mask_prob_l = mask_probs[idx] rois_l = rois[idx] clipped_bbox, clipped_mask = mask_aggregation( rois_l, mask_prob_l, mask_weights, H, W, binary_thresh) if clipped_bbox is not None and clipped_mask is not None: clipped_mask = cv2.resize( clipped_mask.astype(np.float32), (mask_size, mask_size)) v_mask_l = np.concatenate((v_mask_l, clipped_mask[None])) v_bbox_l = np.concatenate((v_bbox_l, clipped_bbox[None])) v_cls_prob_l = np.concatenate( (v_cls_prob_l, cls_prob_l[i][None])) keep_indices = v_cls_prob_l > score_thresh v_mask_l = v_mask_l[keep_indices] v_bbox_l = v_bbox_l[keep_indices] v_cls_prob_l = v_cls_prob_l[keep_indices] v_label_l = np.repeat(label, v_bbox_l.shape[0]) v_masks = np.concatenate((v_masks, v_mask_l)) v_bboxes = np.concatenate((v_bboxes, v_bbox_l)) v_labels = np.concatenate((v_labels, v_label_l)) v_cls_probs = np.concatenate((v_cls_probs, v_cls_prob_l)) return v_bboxes, v_masks, v_labels, v_cls_probs
def decode(self, locs, confs, anchors, in_shape): """Decodes back to coordinates of RoIs. This method decodes :obj:`locs` and :obj:`confs` returned by a FPN network back to :obj:`rois` and :obj:`roi_indices`. Args: locs (list of arrays): A list of arrays whose shape is :math:`(N, K_l, 4)`, where :math:`N` is the size of batch and :math:`K_l` is the number of the anchor boxes of the :math:`l`-th level. confs (list of arrays): A list of array whose shape is :math:`(N, K_l)`. anchors (list of arrays): Anchor boxes returned by :meth:`anchors`. in_shape (tuple of ints): The shape of input of array the feature extractor. Returns: tuple of two arrays: :obj:`rois` and :obj:`roi_indices`. * **rois**: An array of shape :math:`(R, 4)`, \ where :math:`R` is the total number of RoIs in the given batch. * **roi_indices** : An array of shape :math:`(R,)`. """ if chainer.config.train: nms_limit_pre = self._train_nms_limit_pre nms_limit_post = self._train_nms_limit_post else: nms_limit_pre = self._test_nms_limit_pre nms_limit_post = self._test_nms_limit_post rois = [] roi_indices = [] for i in range(in_shape[0]): roi = [] conf = [] for l in range(len(self._scales)): loc_l = locs[l].array[i] conf_l = confs[l].array[i] roi_l = anchors[l].copy() # tlbr -> yxhw roi_l[:, 2:] -= roi_l[:, :2] roi_l[:, :2] += roi_l[:, 2:] / 2 # offset roi_l[:, :2] += loc_l[:, :2] * roi_l[:, 2:] roi_l[:, 2:] *= self.xp.exp( self.xp.minimum(loc_l[:, 2:], exp_clip)) # yxhw -> tlbr roi_l[:, :2] -= roi_l[:, 2:] / 2 roi_l[:, 2:] += roi_l[:, :2] # clip roi_l[:, :2] = self.xp.maximum(roi_l[:, :2], 0) roi_l[:, 2:] = self.xp.minimum(roi_l[:, 2:], self.xp.array(in_shape[2:])) order = argsort(-conf_l)[:nms_limit_pre] roi_l = roi_l[order] conf_l = conf_l[order] mask = (roi_l[:, 2:] - roi_l[:, :2] > 0).all(axis=1) roi_l = roi_l[mask] conf_l = conf_l[mask] indices = utils.non_maximum_suppression(roi_l, self._nms_thresh, limit=nms_limit_post) roi_l = roi_l[indices] conf_l = conf_l[indices] roi.append(roi_l) conf.append(conf_l) roi = self.xp.vstack(roi).astype(np.float32) conf = self.xp.hstack(conf).astype(np.float32) order = argsort(-conf)[:nms_limit_post] roi = roi[order] rois.append(roi) roi_indices.append(self.xp.array((i, ) * len(roi))) rois = self.xp.vstack(rois).astype(np.float32) roi_indices = self.xp.hstack(roi_indices).astype(np.int32) return rois, roi_indices