def demo_net(detector, image_name): """ wrapper for detector :param detector: Detector :param image_name: image name :return: None """ # load demo data im = cv2.imread(image_name + '.jpg') im_array, im_scale = resize(im, config.TEST.SCALES[0], config.TRAIN.MAX_SIZE) im_array = transform(im_array, config.PIXEL_MEANS) roi_array = sio.loadmat(image_name + '_boxes.mat')['boxes'] batch_index_array = np.zeros((roi_array.shape[0], 1)) projected_rois = roi_array * im_scale roi_array = np.hstack((batch_index_array, projected_rois)) scores, boxes = detector.im_detect(im_array, roi_array) all_boxes = [[] for _ in CLASSES] CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls in CLASSES: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] keep = np.where(cls_scores >= CONF_THRESH)[0] cls_boxes = cls_boxes[keep, :] cls_scores = cls_scores[keep] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) all_boxes[cls_ind] = dets[keep, :] boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))] vis_all_detection(im_array, boxes_this_image, CLASSES, 0)
def demo_net(detector, image_name): """ wrapper for detector :param detector: Detector :param image_name: image name :return: None """ config.TEST.HAS_RPN = True assert os.path.exists(image_name), image_name + ' not found' im = cv2.imread(image_name) im_array, im_scale = resize(im, config.SCALES[0], config.MAX_SIZE) im_array = transform(im_array, config.PIXEL_MEANS) im_info = np.array([[im_array.shape[2], im_array.shape[3], im_scale]], dtype=np.float32) scores, boxes = detector.im_detect(im_array, im_info) all_boxes = [[] for _ in CLASSES] CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls in CLASSES: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] keep = np.where(cls_scores >= CONF_THRESH)[0] cls_boxes = cls_boxes[keep, :] cls_scores = cls_scores[keep] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets.astype(np.float32), NMS_THRESH) all_boxes[cls_ind] = dets[keep, :] boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))] vis_all_detection(im_array, boxes_this_image, CLASSES, 0)
def demo_net(detector, image_name): """ wrapper for detector :param detector: Detector :param image_name: image name :return: None """ config.TEST.HAS_RPN = True assert os.path.exists(image_name), image_name + ' not found' im = cv2.imread(image_name) im_array, im_scale = resize(im, config.SCALES[0], config.MAX_SIZE) im_array = transform(im_array, config.PIXEL_MEANS) im_info = np.array([[im_array.shape[2], im_array.shape[3], im_scale]], dtype=np.float32) scores, boxes = detector.im_detect(im_array, im_info) all_boxes = [[] for _ in CLASSES] CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls in CLASSES: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] keep = np.where(cls_scores >= CONF_THRESH)[0] cls_boxes = cls_boxes[keep, :] cls_scores = cls_scores[keep] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets.astype(np.float32), NMS_THRESH) all_boxes[cls_ind] = dets[keep, :] boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))] vis_all_detection(im_array, boxes_this_image, CLASSES, 0)
def main(): color = cv2.imread(args.img) # read image in b,g,r order img, scale = resize(color.copy(), 640, 1024) im_info = np.array([[img.shape[0], img.shape[1], scale]], dtype=np.float32) # (h, w, scale) img = np.swapaxes(img, 0, 2) img = np.swapaxes(img, 1, 2) # change to r,g,b order img = img[np.newaxis, :] # extend to (n, c, h, w) ctx = mx.gpu(args.gpu) _, arg_params, aux_params = mx.model.load_checkpoint( args.prefix, args.epoch) arg_params, aux_params = ch_dev(arg_params, aux_params, ctx) if 'resnet' in args.prefix: sym = resnet_50(num_class=2, bn_mom=0.99, bn_global=True, is_train=False) else: sym = get_vgg_test(num_classes=2) arg_params["data"] = mx.nd.array(img, ctx) arg_params["im_info"] = mx.nd.array(im_info, ctx) exe = sym.bind(ctx, arg_params, args_grad=None, grad_req="null", aux_states=aux_params) exe.forward(is_train=False) output_dict = { name: nd for name, nd in zip(sym.list_outputs(), exe.outputs) } rois = output_dict['rpn_rois_output'].asnumpy( )[:, 1:] # first column is index scores = output_dict['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output_dict['bbox_pred_reshape_output'].asnumpy()[0] pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, (im_info[0][0], im_info[0][1])) cls_boxes = pred_boxes[:, 4:8] cls_scores = scores[:, 1] keep = np.where(cls_scores >= args.thresh)[0] cls_boxes = cls_boxes[keep, :] cls_scores = cls_scores[keep] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets.astype(np.float32), args.nms_thresh) dets = dets[keep, :] keep = nest(dets, thresh=args.nest_thresh) dets = dets[keep, :] for i in range(dets.shape[0]): bbox = dets[i, :4] cv2.rectangle( color, (int(round(bbox[0] / scale)), int(round(bbox[1] / scale))), (int(round(bbox[2] / scale)), int(round(bbox[3] / scale))), (0, 255, 0), 2) cv2.imwrite("result.jpg", color)
def main(): color = cv2.imread(args.img) # read image in b,g,r order img, scale = resize(color.copy(), 640, 1024) im_info = np.array([[img.shape[0], img.shape[1], scale]], dtype=np.float32) # (h, w, scale) img = np.swapaxes(img, 0, 2) img = np.swapaxes(img, 1, 2) # change to r,g,b order img = img[np.newaxis, :] # extend to (n, c, h, w) ctx = mx.gpu(args.gpu) _, arg_params, aux_params = mx.model.load_checkpoint(args.prefix, args.epoch) arg_params, aux_params = ch_dev(arg_params, aux_params, ctx) if 'resnet' in args.prefix: sym = resnet_50(num_class=2, bn_mom=0.99, bn_global=True, is_train=False) else: sym = get_vgg_test(num_classes=2) arg_params["data"] = mx.nd.array(img, ctx) arg_params["im_info"] = mx.nd.array(im_info, ctx) exe = sym.bind(ctx, arg_params, args_grad=None, grad_req="null", aux_states=aux_params) exe.forward(is_train=False) output_dict = {name: nd for name, nd in zip(sym.list_outputs(), exe.outputs)} rois = output_dict['rpn_rois_output'].asnumpy()[:, 1:] # first column is index scores = output_dict['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output_dict['bbox_pred_reshape_output'].asnumpy()[0] pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, (im_info[0][0], im_info[0][1])) cls_boxes = pred_boxes[:, 4:8] cls_scores = scores[:, 1] keep = np.where(cls_scores >= args.thresh)[0] cls_boxes = cls_boxes[keep, :] cls_scores = cls_scores[keep] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets.astype(np.float32), args.nms_thresh) dets = dets[keep, :] keep = nest(dets, thresh=args.nest_thresh) dets = dets[keep, :] for i in range(dets.shape[0]): bbox = dets[i, :4] cv2.rectangle(color, (int(round(bbox[0]/scale)), int(round(bbox[1]/scale))), (int(round(bbox[2]/scale)), int(round(bbox[3]/scale))), (0, 255, 0), 2) cv2.imwrite("result.jpg", color)
def demo_net(detector, image_name): """ wrapper for detector :param detector: Detector :param image_name: image name :return: None """ # load demo data im = cv2.imread(image_name + '.jpg') im_array, im_scale = resize(im, config.TEST.SCALES[0], config.TRAIN.MAX_SIZE) im_array = transform(im_array, config.PIXEL_MEANS) roi_array = sio.loadmat(image_name + '_boxes.mat')['boxes'] batch_index_array = np.zeros((roi_array.shape[0], 1)) projected_rois = roi_array * im_scale roi_array = np.hstack((batch_index_array, projected_rois)) scores, boxes = detector.im_detect(im_array, roi_array) all_boxes = [[] for _ in CLASSES] CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls in CLASSES: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] keep = np.where(cls_scores >= CONF_THRESH)[0] cls_boxes = cls_boxes[keep, :] cls_scores = cls_scores[keep] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) all_boxes[cls_ind] = dets[keep, :] boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))] vis_all_detection(im_array, boxes_this_image, CLASSES, 0)
def pred_eval(detector, test_data, imdb, vis=False): """ wrapper for calculating offline validation for faster data analysis in this example, all threshold are set by hand :param detector: Detector :param test_data: data iterator, must be non-shuffle :param imdb: image database :param vis: controls visualization :return: """ assert not test_data.shuffle thresh = 0.1 # limit detections to max_per_image over all classes max_per_image = 100 num_images = imdb.num_images # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] i = 0 for databatch in test_data: if i % 10 == 0: print 'testing {}/{}'.format(i, imdb.num_images) scores, boxes = detector.im_detect(databatch.data['data'], databatch.data['rois']) # we used scaled image & roi to train, so it is necessary to transform them back # visualization should also be from the original size im_path = imdb.image_path_from_index(imdb.image_set_index[i]) im = cv2.imread(im_path) im_height = im.shape[0] scale = float(databatch.data['data'].shape[2]) / float(im_height) im = image_processing.transform(im, config.PIXEL_MEANS) for j in range(1, imdb.num_classes): indexes = np.where(scores[:, j] > thresh)[0] cls_scores = scores[indexes, j] cls_boxes = boxes[indexes, j * 4:(j + 1) * 4] / scale cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) keep = nms(cls_dets, config.TEST.NMS) all_boxes[j][i] = cls_dets[keep, :] if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] boxes_this_image = [[]] + [all_boxes[j][i] for j in range(1, imdb.num_classes)] if vis: vis_all_detection(im, boxes_this_image, imdb_classes=imdb.classes) i += 1 cache_folder = os.path.join(imdb.cache_path, imdb.name) if not os.path.exists(cache_folder): os.mkdir(cache_folder) det_file = os.path.join(cache_folder, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f) imdb.evaluate_detections(all_boxes)
def forward(self, is_train, req, in_data, out_data, aux): # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) pre_nms_topN = config[self.cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = config[self.cfg_key].RPN_POST_NMS_TOP_N nms_thresh = config[self.cfg_key].RPN_NMS_THRESH min_size = config[self.cfg_key].RPN_MIN_SIZE # the first set of anchors are background probabilities # keep the second part scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :] bbox_deltas = in_data[1].asnumpy() im_info = in_data[2].asnumpy()[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox_deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = ProposalOperator._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def forward(self, is_train, req, in_data, out_data, aux): # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) pre_nms_topN = config[self.cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = config[self.cfg_key].RPN_POST_NMS_TOP_N nms_thresh = config[self.cfg_key].RPN_NMS_THRESH min_size = config[self.cfg_key].RPN_MIN_SIZE # the first set of anchors are background probabilities # keep the second part scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :] if np.isnan(scores).any(): raise ValueError("there is nan in input scores") bbox_deltas = in_data[1].asnumpy() if np.isnan(bbox_deltas).any(): raise ValueError("there is nan in input bbox_deltas") im_info = in_data[2].asnumpy()[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox_deltas and shifted anchors height, width = scores.shape[-2:] if self.cfg_key == 'TRAIN': height, width = int(im_info[0] / self._feat_stride), int(im_info[1] / self._feat_stride) if DEBUG: print 'score map size: {}'.format(scores.shape) print "resudial = ", scores.shape[2] - height, scores.shape[3] - width # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = ProposalOperator._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: if len(keep) == 0: logging.log(logging.ERROR, "currently len(keep) is zero") pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def pred_eval(detector, test_data, imdb, vis=False): """ wrapper for calculating offline validation for faster data analysis in this example, all threshold are set by hand :param detector: Detector :param test_data: data iterator, must be non-shuffle :param imdb: image database :param vis: controls visualization :return: """ assert not test_data.shuffle thresh = 0.05 # limit detections to max_per_image over all classes max_per_image = 100 num_images = imdb.num_images # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] i = 0 for databatch in test_data: if i % 10 == 0: print 'testing {}/{}'.format(i, imdb.num_images) if config.TEST.HAS_RPN: scores, boxes = detector.im_detect(databatch.data['data'], im_info=databatch.data['im_info']) scale = databatch.data['im_info'][0, 2] else: scores, boxes = detector.im_detect(databatch.data['data'], roi_array=databatch.data['rois']) # we used scaled image & roi to train, so it is necessary to transform them back # visualization should also be from the original size im_path = imdb.image_path_from_index(imdb.image_set_index[i]) im = cv2.imread(im_path) im_height = im.shape[0] scale = float(databatch.data['data'].shape[2]) / float(im_height) for j in range(1, imdb.num_classes): indexes = np.where(scores[:, j] > thresh)[0] cls_scores = scores[indexes, j] cls_boxes = boxes[indexes, j * 4:(j + 1) * 4] / scale cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) keep = nms(cls_dets, config.TEST.NMS) all_boxes[j][i] = cls_dets[keep, :] if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] boxes_this_image = [[]] + [all_boxes[j][i] for j in range(1, imdb.num_classes)] if vis: # visualize the testing scale for box in boxes_this_image: if isinstance(box, np.ndarray): box[:, :4] *= scale vis_all_detection(databatch.data['data'], boxes_this_image, imdb_classes=imdb.classes) i += 1 cache_folder = os.path.join(imdb.cache_path, imdb.name) if not os.path.exists(cache_folder): os.mkdir(cache_folder) det_file = os.path.join(cache_folder, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f) imdb.evaluate_detections(all_boxes)