def predict(self, imgs): prepared_imgs = list() sizes = list() print("predicting!") for img in imgs: size = img.shape[1:] img = self.prepare(img.astype(np.float32)) prepared_imgs.append(img) sizes.append(size) bboxes = list() out_rois = list() labels = list() scores = list() masks = list() for img, size in zip(prepared_imgs, sizes): with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): img_var = chainer.Variable(self.xp.asarray(img[None])) scale = img_var.shape[3] / size[1] roi_cls_locs, roi_scores, rois, _, roi_masks = self.__call__( img_var, scale=scale) #assuming batch size = 1 roi_cls_loc = roi_cls_locs.data roi_score = roi_scores.data roi_mask = F.sigmoid(roi_masks).data roi = rois / scale mean = self.xp.tile(self.xp.asarray(self.loc_normalize_mean), self.n_class) std = self.xp.tile(self.xp.asarray(self.loc_normalize_std), self.n_class) roi_cls_loc = (roi_cls_loc * std + mean).astype(np.float32) roi_cls_loc = roi_cls_loc.reshape((-1, self.n_class, 4)) roi = self.xp.broadcast_to(roi[:, None], roi_cls_loc.shape).reshape((-1, 4)) cls_bbox = loc2bbox(roi, roi_cls_loc.reshape((-1, 4))) cls_bbox = cls_bbox.reshape((-1, self.n_class * 4)) cls_roi = roi.reshape((-1, self.n_class * 4)) #clip the bbox cls_bbox[:, 0::2] = self.xp.clip(cls_bbox[:, 0::2], 0, size[0]) cls_bbox[:, 1::2] = self.xp.clip(cls_bbox[:, 1::2], 0, size[1]) cls_roi[:, 0::2] = self.xp.clip(cls_roi[:, 0::2], 0, size[0]) cls_roi[:, 1::2] = self.xp.clip(cls_roi[:, 1::2], 0, size[1]) prob = F.softmax(roi_score).data raw_cls_bbox = cuda.to_cpu(cls_bbox) raw_cls_roi = cuda.to_cpu(cls_roi) raw_prob = cuda.to_cpu(prob) raw_mask = cuda.to_cpu(roi_mask) bbox, out_roi, label, score, mask = self._suppress( raw_cls_bbox, raw_cls_roi, raw_prob, raw_mask) bboxes.append(bbox) out_rois.append(out_roi) labels.append(label) scores.append(score) masks.append(mask) return bboxes, out_rois, labels, scores, masks
def _to_bbox_label_score(self, roi_cls_locs, roi_scores, rois, roi_indices, scale, size): # NOQA # We are assuming that batch size is 1. roi_cls_loc = roi_cls_locs.data roi_score = roi_scores.data roi = rois / scale roi_index = roi_indices # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. mean = self.xp.tile(self.xp.asarray(self.loc_normalize_mean), self.n_class) std = self.xp.tile(self.xp.asarray(self.loc_normalize_std), self.n_class) roi_cls_loc = (roi_cls_loc * std + mean).astype(np.float32) roi_cls_loc = roi_cls_loc.reshape((-1, self.n_class, 4)) roi_cls = self.xp.broadcast_to(roi[:, None], roi_cls_loc.shape) cls_bbox = loc2bbox(roi_cls.reshape((-1, 4)), roi_cls_loc.reshape((-1, 4))) cls_bbox = cls_bbox.reshape((-1, self.n_class * 4)) # clip bounding box cls_bbox[:, 0::2] = self.xp.clip(cls_bbox[:, 0::2], 0, size[0]) cls_bbox[:, 1::2] = self.xp.clip(cls_bbox[:, 1::2], 0, size[1]) # clip roi roi[:, 0::2] = self.xp.clip(roi[:, 0::2], 0, size[0]) roi[:, 1::2] = self.xp.clip(roi[:, 1::2], 0, size[1]) prob = F.softmax(roi_score).data roi_index = self.xp.broadcast_to( roi_index[:, None], roi_cls_loc.shape[:2]) raw_cls_bbox = cuda.to_cpu(cls_bbox) raw_prob = cuda.to_cpu(prob) if self.context: n_fg_class = self.n_class - 1 for l in range(n_fg_class): if l not in self.context: raw_prob[:, l + 1] = 0 raw_prob = raw_prob / raw_prob.sum(axis=0) bbox, label, score = self._suppress(raw_cls_bbox, raw_prob) bbox_int = np.round(bbox).astype(np.int32) bbox_sizes = ((bbox_int[:, 2] - bbox_int[:, 0]) * (bbox_int[:, 3] - bbox_int[:, 1])) keep = bbox_sizes > 0 bbox = bbox[keep] label = label[keep] score = score[keep] if self._detections_per_im > 0: indices = np.argsort(score) keep = indices >= (len(indices) - self._detections_per_im) bbox = bbox[keep] label = label[keep] score = score[keep] return bbox, label, score
def __call__(self, x, rois, roi_indices, img_size): """Forward the chain. We assume that there are :math:`N` batches. Args: x (~chainer.Variable): 4D image variable. rois (array): A bounding box array containing coordinates of proposal boxes. This is a concatenation of bounding box arrays from multiple images in the batch. Its shape is :math:`(R', 4)`. Given :math:`R_i` proposed RoIs from the :math:`i` th image, :math:`R' = \\sum _{i=1} ^ N R_i`. roi_indices (array): An array containing indices of images to which bounding boxes correspond to. Its shape is :math:`(R',)`. img_size (tuple of int): A tuple containing image size. """ h = F.relu(self.conv1(x)) h_cls_seg = self.cls_seg(h) h_ag_loc = self.ag_loc(h) # PSROI pooling and regression roi_ag_seg_scores, roi_ag_locs, roi_cls_scores = self._pool( h_cls_seg, h_ag_loc, rois, roi_indices) if self.iter2: # 2nd Iteration # get rois2 for more precise prediction roi_ag_locs = roi_ag_locs.array mean = self.xp.array(self.loc_normalize_mean) std = self.xp.array(self.loc_normalize_std) roi_locs = roi_ag_locs[:, 1, :] roi_locs = (roi_locs * std + mean).astype(np.float32) rois2 = loc2bbox(rois, roi_locs) rois2[:, 0::2] = self.xp.clip(rois2[:, 0::2], 0, img_size[0]) rois2[:, 1::2] = self.xp.clip(rois2[:, 1::2], 0, img_size[1]) # PSROI pooling and regression roi_ag_seg_scores2, roi_ag_locs2, roi_cls_scores2 = self._pool( h_cls_seg, h_ag_loc, rois2, roi_indices) # concat 1st and 2nd iteration results rois = self.xp.concatenate((rois, rois2)) roi_indices = self.xp.concatenate((roi_indices, roi_indices)) roi_ag_seg_scores = F.concat( (roi_ag_seg_scores, roi_ag_seg_scores2), axis=0) roi_ag_locs = F.concat( (roi_ag_locs, roi_ag_locs2), axis=0) roi_cls_scores = F.concat( (roi_cls_scores, roi_cls_scores2), axis=0) return roi_ag_seg_scores, roi_ag_locs, roi_cls_scores, \ rois, roi_indices
def predict_each_box(self, imgs): prepared_imgs = [] sizes = [] for img in imgs: size = img.shape[1:] img = self.prepare(img.astype(np.float32)) prepared_imgs.append(img) sizes.append(size) bboxes = [] labels = [] scores = [] for img, size in zip(prepared_imgs, sizes): with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): img_var = chainer.Variable(self.xp.asarray(img[None])) scale = img_var.shape[3] / size[1] roi_cls_locs, roi_scores, rois, _ = self.forward( img_var, scales=[scale]) # We are assuming that batch size is 1. roi_cls_loc = roi_cls_locs.array roi_score = roi_scores.array roi = rois / scale # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. mean = self.xp.tile(self.xp.asarray(self.loc_normalize_mean), self.n_class) std = self.xp.tile(self.xp.asarray(self.loc_normalize_std), self.n_class) roi_cls_loc = (roi_cls_loc * std + mean).astype(np.float32) roi_cls_loc = roi_cls_loc.reshape((-1, self.n_class, 4)) roi = self.xp.broadcast_to(roi[:, None], roi_cls_loc.shape) cls_bbox = loc2bbox(roi.reshape((-1, 4)), roi_cls_loc.reshape((-1, 4))) cls_bbox = cls_bbox.reshape((-1, self.n_class * 4)) # clip bounding box cls_bbox[:, 0::2] = self.xp.clip(cls_bbox[:, 0::2], 0, size[0]) cls_bbox[:, 1::2] = self.xp.clip(cls_bbox[:, 1::2], 0, size[1]) #print(roi_score) prob = chainer.functions.softmax(roi_score).array raw_cls_bbox = cuda.to_cpu(cls_bbox) raw_prob = cuda.to_cpu(prob) bbox, label, prob = self._suppress_each_box(raw_cls_bbox, raw_prob) bboxes.append(bbox) labels.append(label) scores.append(prob) return bboxes, labels, scores
def __call__(self, x, rois, roi_indices, img_size, iter2, gt_roi_labels=None): roi_indices = roi_indices.astype(np.float32) indices_and_rois = self.xp.concatenate((roi_indices[:, None], rois), axis=1) h = F.relu(self.psroi_conv1(x)) h_cls_seg = self.psroi_conv2(h) h_locs = self.psroi_conv3(h) # PSROI pooling and regression roi_seg_scores, roi_cls_locs, roi_cls_scores = self._pool( indices_and_rois, h_cls_seg, h_locs, gt_roi_labels=gt_roi_labels) if iter2: # 2nd Iteration # get rois2 for more precise prediction roi_cls_locs = roi_cls_locs.array roi_locs = roi_cls_locs[:, 1, :] mean = self.xp.array(self.loc_normalize_mean, np.float32) std = self.xp.array(self.loc_normalize_std, np.float32) roi_locs = roi_locs * std + mean rois2 = loc2bbox(rois, roi_locs) H, W = img_size rois2[:, 0::2] = self.xp.clip(rois2[:, 0::2], 0, H) rois2[:, 1::2] = self.xp.clip(rois2[:, 1::2], 0, W) # PSROI pooling and regression indices_and_rois2 = self.xp.concatenate( (roi_indices[:, None], rois2), axis=1) roi_seg_scores2, roi_cls_locs2, roi_cls_scores2 = self._pool( indices_and_rois2, h_cls_seg, h_locs, gt_roi_labels=gt_roi_labels) # concat 1st and 2nd iteration results rois = self.xp.concatenate((rois, rois2)) roi_indices = self.xp.concatenate((roi_indices, roi_indices)) roi_cls_scores = F.concat((roi_cls_scores, roi_cls_scores2), axis=0) roi_cls_locs = F.concat((roi_cls_locs, roi_cls_locs2), axis=0) roi_seg_scores = F.concat((roi_seg_scores, roi_seg_scores2), axis=0) return rois, roi_indices, roi_seg_scores, roi_cls_locs, roi_cls_scores
def predict(self, imgs): """Detect objects from images. This method predicts objects for each image. Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(bboxes, labels, scores)`. * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \ where :math:`R` is the number of bounding boxes in a image. \ Each bounding box is organized by \ :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \ in the second axis. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the bounding box. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ prepared_imgs = [] sizes = [] for img in imgs: size = img.shape[1:] img = self.prepare(img.astype(np.float32)) prepared_imgs.append(img) sizes.append(size) bboxes = [] labels = [] scores = [] for img, size in zip(prepared_imgs, sizes): with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): img_var = chainer.Variable(self.xp.asarray(img[None])) scale = img_var.shape[3] / size[1] roi_cls_locs, roi_scores, rois, _ = self.__call__( img_var, scale=scale) # We are assuming that batch size is 1. roi_cls_loc = roi_cls_locs.array roi_score = roi_scores.array roi = rois / scale # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. mean = self.xp.tile(self.xp.asarray(self.loc_normalize_mean), self.n_class) std = self.xp.tile(self.xp.asarray(self.loc_normalize_std), self.n_class) roi_cls_loc = (roi_cls_loc * std + mean).astype(np.float32) roi_cls_loc = roi_cls_loc.reshape((-1, self.n_class, 4)) roi = self.xp.broadcast_to(roi[:, None], roi_cls_loc.shape) cls_bbox = loc2bbox(roi.reshape((-1, 4)), roi_cls_loc.reshape((-1, 4))) cls_bbox = cls_bbox.reshape((-1, self.n_class * 4)) # clip bounding box cls_bbox[:, 0::2] = self.xp.clip(cls_bbox[:, 0::2], 0, size[0]) cls_bbox[:, 1::2] = self.xp.clip(cls_bbox[:, 1::2], 0, size[1]) prob = F.softmax(roi_score).array raw_cls_bbox = cuda.to_cpu(cls_bbox) raw_prob = cuda.to_cpu(prob) bbox, label, prob = self._suppress(raw_cls_bbox, raw_prob) bboxes.append(bbox) labels.append(label) scores.append(prob) return bboxes, labels, scores
def predict(self, imgs): """Detect objects from images. This method predicts objects for each image. Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(bboxes, labels, scores)`. * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \ where :math:`R` is the number of bounding boxes in a image. \ Each bouding box is organized by \ :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \ in the second axis. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the bounding box. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ prepared_imgs = [] sizes = [] for img in imgs: size = img.shape[1:] img = self.prepare(img.astype(np.float32)) prepared_imgs.append(img) sizes.append(size) bboxes = [] labels = [] scores = [] for img, size in zip(prepared_imgs, sizes): with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): img_var = chainer.Variable(self.xp.asarray(img[None])) scale = img_var.shape[3] / size[1] roi_cls_locs, roi_scores, rois, _ = self.__call__( img_var, scale=scale) # We are assuming that batch size is 1. roi_cls_loc = roi_cls_locs.array roi_score = roi_scores.array roi = rois / scale # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. mean = self.xp.tile(self.xp.asarray(self.loc_normalize_mean), self.n_class) std = self.xp.tile(self.xp.asarray(self.loc_normalize_std), self.n_class) roi_cls_loc = (roi_cls_loc * std + mean).astype(np.float32) roi_cls_loc = roi_cls_loc.reshape((-1, self.n_class, 4)) roi = self.xp.broadcast_to(roi[:, None], roi_cls_loc.shape) cls_bbox = loc2bbox(roi.reshape((-1, 4)), roi_cls_loc.reshape((-1, 4))) cls_bbox = cls_bbox.reshape((-1, self.n_class * 4)) # clip bounding box cls_bbox[:, 0::2] = self.xp.clip(cls_bbox[:, 0::2], 0, size[0]) cls_bbox[:, 1::2] = self.xp.clip(cls_bbox[:, 1::2], 0, size[1]) prob = F.softmax(roi_score).array raw_cls_bbox = cuda.to_cpu(cls_bbox) raw_prob = cuda.to_cpu(prob) bbox, label, score = self._suppress(raw_cls_bbox, raw_prob) bboxes.append(bbox) labels.append(label) scores.append(score) return bboxes, labels, scores
def predict(self, imgs): prepared_imgs = list() scales = list() for img in imgs: size = img.shape[1:] img = self.prepare(img.astype(np.float32)) prepared_imgs.append(img) scale = img.shape[2] / size[1] scales.append(scale) bboxes = list() masks = list() labels = list() scores = list() for img, scale in zip(prepared_imgs, scales): with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): img_var = chainer.Variable(self.xp.asarray(img[None])) img_size = img_var.shape[2:] h = self.extractor(img_var) rpn_locs, rpn_scores, rois, roi_indices, anchor =\ self.rpn(h, img_size, [scale]) roi_cls_locs, roi_scores, _, = self.head( h, rois, roi_indices, pred_mask=False) # We are assuming that batch size is 1. roi_cls_loc = roi_cls_locs.data roi_score = roi_scores.data roi = rois / scale roi_index = roi_indices # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. mean = self.xp.tile(self.xp.asarray(self.loc_normalize_mean), self.n_class) std = self.xp.tile(self.xp.asarray(self.loc_normalize_std), self.n_class) roi_cls_loc = (roi_cls_loc * std + mean).astype(np.float32) roi_cls_loc = roi_cls_loc.reshape((-1, self.n_class, 4)) roi_cls = self.xp.broadcast_to(roi[:, None], roi_cls_loc.shape) cls_bbox = loc2bbox(roi_cls.reshape((-1, 4)), roi_cls_loc.reshape((-1, 4))) cls_bbox = cls_bbox.reshape((-1, self.n_class * 4)) # clip bounding box cls_bbox[:, 0::2] = self.xp.clip(cls_bbox[:, 0::2], 0, size[0]) cls_bbox[:, 1::2] = self.xp.clip(cls_bbox[:, 1::2], 0, size[1]) # clip roi roi[:, 0::2] = self.xp.clip(roi[:, 0::2], 0, size[0]) roi[:, 1::2] = self.xp.clip(roi[:, 1::2], 0, size[1]) prob = F.softmax(roi_score).data roi_index = self.xp.broadcast_to( roi_index[:, None], roi_cls_loc.shape[:2]) raw_cls_bbox = cuda.to_cpu(cls_bbox) raw_prob = cuda.to_cpu(prob) bbox, label, score = self._suppress(raw_cls_bbox, raw_prob) bbox_int = np.round(bbox).astype(np.int32) bbox_sizes = ((bbox_int[:, 2] - bbox_int[:, 0]) * (bbox_int[:, 3] - bbox_int[:, 1])) keep = bbox_sizes > 0 bbox = bbox[keep] label = label[keep] score = score[keep] if self._detections_per_im > 0: indices = np.argsort(score) keep = indices >= (len(indices) - self._detections_per_im) bbox = bbox[keep] label = label[keep] score = score[keep] bboxes.append(bbox) labels.append(label) scores.append(score) if len(bbox) == 0: masks.append(np.zeros((0, size[0], size[1]), dtype=bool)) continue # use predicted bbox as rois with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): rois = self.xp.asarray(bbox) * scale roi_indices = self.xp.zeros( (len(bbox),), dtype=np.int32) _, _, roi_masks = self.head( x=h, rois=rois, roi_indices=roi_indices, pred_bbox=False, pred_mask=True) roi_masks = F.sigmoid(roi_masks) roi_mask = cuda.to_cpu(roi_masks.data) mask = segm_results( bbox, label, roi_mask, size[0], size[1], mask_size=self.head.mask_size, ) masks.append(mask) return bboxes, masks, labels, scores
def _to_bboxes(self, roi_cls_locs, roi_scores, rois, roi_indices, sizes, scales): if isinstance(roi_cls_locs, chainer.Variable): roi_cls_locs = roi_cls_locs.array probs = F.softmax(roi_scores).array del roi_scores bboxes = [] labels = [] scores = [] for index in range(len(sizes)): scale = scales[index] size = sizes[index] keep = roi_indices == index roi_cls_loc = roi_cls_locs[keep] prob = probs[keep] roi = rois[keep] / scale # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. mean = self.xp.tile(self.xp.asarray(self.loc_normalize_mean), self.n_class) std = self.xp.tile(self.xp.asarray(self.loc_normalize_std), self.n_class) roi_cls_loc = (roi_cls_loc * std + mean).astype(np.float32) roi_cls_loc = roi_cls_loc.reshape((-1, self.n_class, 4)) roi_cls = self.xp.broadcast_to(roi[:, None], roi_cls_loc.shape) cls_bbox = loc2bbox(roi_cls.reshape((-1, 4)), roi_cls_loc.reshape((-1, 4))) cls_bbox = cls_bbox.reshape((-1, self.n_class * 4)) # clip bounding box cls_bbox[:, 0::2] = self.xp.clip(cls_bbox[:, 0::2], 0, size[0]) cls_bbox[:, 1::2] = self.xp.clip(cls_bbox[:, 1::2], 0, size[1]) # clip roi roi[:, 0::2] = self.xp.clip(roi[:, 0::2], 0, size[0]) roi[:, 1::2] = self.xp.clip(roi[:, 1::2], 0, size[1]) raw_cls_bbox = cuda.to_cpu(cls_bbox) raw_prob = cuda.to_cpu(prob) bbox, label, score = self._suppress(raw_cls_bbox, raw_prob) bbox_int = np.round(bbox).astype(np.int32) bbox_sizes = ((bbox_int[:, 2] - bbox_int[:, 0]) * (bbox_int[:, 3] - bbox_int[:, 1])) keep = bbox_sizes > 0 bbox = bbox[keep] label = label[keep] score = score[keep] if self._detections_per_im > 0: indices = np.argsort(score) keep = indices >= (len(indices) - self._detections_per_im) bbox = bbox[keep] label = label[keep] score = score[keep] bboxes.append(bbox) labels.append(label) scores.append(score) return bboxes, labels, scores
def predict(self, imgs): prepared_imgs = list() sizes = list() #print("predicting!") for img in imgs: size = img.shape[1:] img = self.prepare(img.astype(np.float32)) prepared_imgs.append(img) sizes.append(size) bboxes = list() out_rois = list() labels = list() scores = list() masks = list() for img, size in zip(prepared_imgs, sizes): with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): img_var = chainer.Variable(self.xp.asarray(img[None])) scale = img_var.shape[3] / size[1] roi_cls_locs, roi_scores, rois, _, h = self.__call__( img_var, scale=scale) #assuming batch size = 1 roi_cls_loc = roi_cls_locs.data roi_score = roi_scores.data roi = rois / scale mean = self.xp.tile(self.xp.asarray(self.loc_normalize_mean), self.n_class) std = self.xp.tile(self.xp.asarray(self.loc_normalize_std), self.n_class) roi_cls_loc = (roi_cls_loc * std + mean).astype(np.float32) roi_cls_loc = roi_cls_loc.reshape((-1, self.n_class, 4)) roi = self.xp.broadcast_to(roi[:, None], roi_cls_loc.shape).reshape((-1, 4)) cls_bbox = loc2bbox(roi, roi_cls_loc.reshape((-1, 4))) cls_bbox = cls_bbox.reshape((-1, self.n_class * 4)) cls_roi = roi.reshape((-1, self.n_class * 4)) #clip the bbox cls_bbox[:, 0::2] = self.xp.clip(cls_bbox[:, 0::2], 0, size[0]) cls_bbox[:, 1::2] = self.xp.clip(cls_bbox[:, 1::2], 0, size[1]) cls_roi[:, 0::2] = self.xp.clip(cls_roi[:, 0::2], 0, size[0]) cls_roi[:, 1::2] = self.xp.clip(cls_roi[:, 1::2], 0, size[1]) prob = F.softmax(roi_score).data raw_cls_bbox = cuda.to_cpu(cls_bbox) raw_cls_roi = cuda.to_cpu(cls_roi) raw_prob = cuda.to_cpu(prob) bbox, out_roi, label, score = self._suppress( raw_cls_bbox, raw_cls_roi, raw_prob) mask = [] if len(bbox) > 0: # mask head roi_indices = self.xp.zeros((len(bbox), ), dtype=np.int32) with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): hres5 = self.head.res5head(h, cuda.to_gpu(bbox * scale), roi_indices) roi_masks = self.head.maskhead(hres5) roi_mask = F.sigmoid(roi_masks).data raw_mask = cuda.to_cpu(roi_mask) # postprocess if self.preset == 'evaluate': bboxes.append(bbox_yxyx2xywh(bbox)) wmasks = [] for m, b, l in zip(raw_mask, bbox, label): wm = im_mask(m[int(l + 1)], size, b) # encode the mask wm = pycocotools.mask.encode(np.asfortranarray(wm)) wm['counts'] = wm['counts'].decode('ascii') mask.append(wm) elif self.preset == 'visualize': bboxes.append(bbox) wmasks = [] for m, b, l in zip(raw_mask, bbox, label): wm = im_mask(m[int(l + 1)], size, b) mask.append(wm) elif self.preset == 'evaluate': # len(bbox) = 0 wm = np.zeros((size[0], size[1]), dtype=np.uint8) wm = pycocotools.mask.encode(np.asfortranarray(wm)) wm['counts'] = wm['counts'].decode('ascii') mask.append(wm) bboxes.append(bbox_yxyx2xywh(bbox)) labels.append([self.class_ids[int(l)] for l in label.tolist()]) scores.append(score) masks.append(mask) return bboxes, labels, scores, masks
def predict(self, imgs): prepared_imgs = [] sizes = [] for img in imgs: size = img.shape[1:] img = self.prepare(img.astype(np.float32)) prepared_imgs.append(img) sizes.append(size) bboxes = [] labels = [] scores = [] masks = [] for img, size in zip(prepared_imgs, sizes): with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): img_var = chainer.Variable(self.xp.asarray(img[None])) scale = img_var.shape[3] / size[1] roi_cls_locs, roi_scores, rois, roi_indices, levels = self.__call__( img_var, scale=scale) # We are assuming that batch size is 1. roi = rois / scale roi_cls_loc = roi_cls_locs.data roi_score = roi_scores.data if roi_cls_loc.shape[1] == 4: roi_cls_loc = self.xp.tile(roi_cls_loc, self.n_class) # if loc prediction layer uses shared weight, expand (though, not optimized way) if roi_cls_loc.shape[1] == 4: roi_cls_loc = self.xp.tile(roi_cls_loc, self.n_class) # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. mean = self.xp.tile(self.xp.asarray(self.loc_normalize_mean), self.n_class) std = self.xp.tile(self.xp.asarray(self.loc_normalize_std), self.n_class) roi_cls_loc = (roi_cls_loc * std + mean).astype(np.float32) roi_cls_loc = roi_cls_loc.reshape((-1, self.n_class, 4)) roi = self.xp.broadcast_to(roi[:, None], roi_cls_loc.shape) cls_bbox = loc2bbox(roi.reshape((-1, 4)), roi_cls_loc.reshape((-1, 4))) cls_bbox = cls_bbox.reshape((-1, self.n_class * 4)) # clip bounding box cls_bbox[:, 0::2] = self.xp.clip(cls_bbox[:, 0::2], 0, size[0]) cls_bbox[:, 1::2] = self.xp.clip(cls_bbox[:, 1::2], 0, size[1]) prob = F.softmax(roi_score).data raw_cls_bbox = cuda.to_cpu(cls_bbox) raw_prob = cuda.to_cpu(prob) raw_roi = cuda.to_cpu(roi) raw_levels = cuda.to_cpu(levels) bbox, label, score, roi, levels = self._suppress( raw_cls_bbox, raw_prob, raw_roi, raw_levels) # predict only mask based on detected roi mask_per_image = [] if len(label) > 0: with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): # because we are assuming batch size=1, all elements of roi_indices is zero. roi_indices = self.xp.zeros(roi.shape[0], dtype=np.float32) bbox_gpu = cuda.to_gpu( bbox) if chainer.cuda.available else bbox indices_and_rois = self.xp.concatenate( (roi_indices[:, None], bbox_gpu * scale), axis=1) mask = self.head.predict_mask( levels, indices_and_rois, self.extractor.spatial_scales) if self.predict_mask: mask = F.sigmoid(mask).data mask = mask[np.arange(mask.shape[0]), label] maskをresizeする for i, (b, m) in enumerate(zip(bbox, mask)): w = b[3] - b[1] h = b[2] - b[0] m = cv2.resize(m, (w, h)) * 255 m = m.astype(np.uint8) _, m = cv2.threshold(m, 127, 255, cv2.THRESH_BINARY) mask_per_image.append(m) else: mask = mask.reshape((mask.shape[0], 17, -1)).data mask = cuda.to_cpu(mask) mask_per_image.append(mask) bboxes.append(bbox) labels.append(label) scores.append(score) masks.append(mask_per_image) return bboxes, labels, scores, masks
def __call__(self, loc, score, anchor, img_size, scale=1.): """Propose RoIs. Inputs :obj:`loc, score, anchor` refer to the same anchor when indexed by the same index. On notations, :math:`R` is the total number of anchors. This is equal to product of the height and the width of an image and the number of anchor bases per pixel. Type of the output is same as the inputs. Args: loc (array): Predicted offsets and scaling to anchors. Its shape is :math:`(R, 4)`. score (array): Predicted foreground probability for anchors. Its shape is :math:`(R,)`. anchor (array): Coordinates of anchors. Its shape is :math:`(R, 4)`. img_size (tuple of ints): A tuple :obj:`height, width`, which contains image size after scaling. scale (float): The scaling factor used to scale an image after reading it from a file. Returns: array: An array of coordinates of proposal boxes. Its shape is :math:`(S, 4)`. :math:`S` is less than :obj:`self.n_test_post_nms` in test time and less than :obj:`self.n_train_post_nms` in train time. :math:`S` depends on the size of the predicted bounding boxes and the number of bounding boxes discarded by NMS. """ if chainer.config.train: n_pre_nms = self.n_train_pre_nms n_post_nms = self.n_train_post_nms else: n_pre_nms = self.n_test_pre_nms n_post_nms = self.n_test_post_nms xp = cuda.get_array_module(loc) loc = cuda.to_cpu(loc) score = cuda.to_cpu(score) anchor = cuda.to_cpu(anchor) # Convert anchors into proposal via bbox transformations. roi = loc2bbox(anchor, loc) # Clip predicted boxes to image. roi[:, slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0, img_size[0]) roi[:, slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0, img_size[1]) # Remove predicted boxes with either height or width < threshold. min_size = self.min_size * scale hs = roi[:, 2] - roi[:, 0] ws = roi[:, 3] - roi[:, 1] keep = np.where((hs >= min_size) & (ws >= min_size))[0] roi = roi[keep, :] score = score[keep] # Sort all (proposal, score) pairs by score from highest to lowest. # Take top pre_nms_topN (e.g. 6000). order = score.ravel().argsort()[::-1] if n_pre_nms > 0: order = order[:n_pre_nms] roi = roi[order, :] score = score[order] # Apply nms (e.g. threshold = 0.7). # Take after_nms_topN (e.g. 300). if xp != np and not self.force_cpu_nms: keep = non_maximum_suppression(cuda.to_gpu(roi), thresh=self.nms_thresh) keep = cuda.to_cpu(keep) else: keep = non_maximum_suppression(roi, thresh=self.nms_thresh) if n_post_nms > 0: keep = keep[:n_post_nms] roi = roi[keep] if xp != np: roi = cuda.to_gpu(roi) return roi
def __call__(self, x, scale=1.0, iter2=True): img_size = x.shape[2:] # Feature Extractor h = self.res1(x) h = self.res2(h) h = self.res3(h) h = self.res4(h) # RPN rpn_locs, rpn_scores, rois, roi_indices, anchor = self.rpn( h, img_size, scale) roi_indices = roi_indices.astype(np.float32) indices_and_rois = self.xp.concatenate((roi_indices[:, None], rois), axis=1) # ResNet101C5 with dilated convolution h = self.res5(h) # Convolution for PSROI pooling h = F.relu(self.psroi_conv1(h)) h_seg = self.psroi_conv2(h) h_locs = self.psroi_conv3(h) # PSROI pooling and regression roi_seg_scores, roi_cls_locs, roi_cls_scores = self._pool_and_predict( indices_and_rois, h_seg, h_locs) roi_cls_probs = F.softmax(roi_cls_scores) roi_seg_probs = F.softmax(roi_seg_scores) roi_seg_probs = roi_seg_probs.array roi_cls_probs = roi_cls_probs.array if iter2: # 2nd Iteration # get rois2 for more precise prediction roi_cls_locs = roi_cls_locs.array roi_locs = roi_cls_locs[:, 1, :] mean = self.xp.array(self.loc_normalize_mean) std = self.xp.array(self.loc_normalize_std) roi_locs = roi_locs * std + mean rois2 = loc2bbox(rois, roi_locs) H, W = img_size rois2[:, 0::2] = self.xp.clip(rois2[:, 0::2], 0, H) rois2[:, 1::2] = self.xp.clip(rois2[:, 1::2], 0, W) # PSROI pooling and regression indices_and_rois2 = self.xp.concatenate( (roi_indices[:, None], rois2), axis=1) indices_and_rois2 = indices_and_rois2.astype(self.xp.float32) roi_seg_scores2, _, roi_cls_scores2 = self._pool_and_predict( indices_and_rois2, h_seg, h_locs) roi_cls_probs2 = F.softmax(roi_cls_scores2) roi_seg_probs2 = F.softmax(roi_seg_scores2) roi_seg_probs2 = roi_seg_probs2.array roi_cls_probs2 = roi_cls_probs2.array # concat 1st and 2nd iteration results rois = self.xp.concatenate((rois, rois2)) roi_indices = self.xp.concatenate((roi_indices, roi_indices)) roi_cls_probs = self.xp.concatenate( (roi_cls_probs, roi_cls_probs2)) roi_seg_probs = self.xp.concatenate( (roi_seg_probs, roi_seg_probs2)) return roi_indices, rois, roi_seg_probs, roi_cls_probs
def predict(self, imgs): """Detect objects from images. This method predicts objects for each image. Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(bboxes, labels, scores)`. * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \ where :math:`R` is the number of bounding boxes in a image. \ Each bouding box is organized by \ :obj:`(x_min, y_min, x_max, y_max)` \ in the second axis. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the bounding box. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ prepared_imgs = list() scales = list() for img in imgs: _, H, W = img.shape img = self.prepare(img.astype(np.float32)) scale = img.shape[2] / W prepared_imgs.append(img) scales.append(scale) bboxes = list() labels = list() scores = list() for img, scale in zip(prepared_imgs, scales): img_var = chainer.Variable(self.xp.asarray(img[None]), volatile=chainer.flag.ON) H, W = img_var.shape[2:] roi_cls_locs, roi_scores, rois, _ = self.__call__(img_var, scale=scale, test=True) # We are assuming that batch size is 1. roi_cls_loc = roi_cls_locs.data roi_score = roi_scores.data roi = rois / scale # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. mean = self.xp.tile(self.xp.asarray(self.loc_normalize_mean), self.n_class) std = self.xp.tile(self.xp.asarray(self.loc_normalize_std), self.n_class) roi_cls_loc = (roi_cls_loc * std + mean).astype(np.float32) roi_cls_loc = roi_cls_loc.reshape(-1, self.n_class, 4) roi = self.xp.broadcast_to(roi[:, None], roi_cls_loc.shape) cls_bbox = loc2bbox(roi.reshape(-1, 4), roi_cls_loc.reshape(-1, 4)) cls_bbox = cls_bbox.reshape(-1, self.n_class * 4) # clip bounding box cls_bbox[:, slice(0, 4, 2)] = self.xp.clip( cls_bbox[:, slice(0, 4, 2)], 0, W / scale) cls_bbox[:, slice(1, 4, 2)] = self.xp.clip( cls_bbox[:, slice(1, 4, 2)], 0, H / scale) prob = F.softmax(roi_score).data raw_cls_bbox = cuda.to_cpu(cls_bbox) raw_prob = cuda.to_cpu(prob) bbox, label, score = self._suppress(raw_cls_bbox, raw_prob) bboxes.append(bbox) labels.append(label) scores.append(score) return bboxes, labels, scores