def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data] scores_all = [] pred_boxes_all = [] ref_scores_all = [] ref_pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if cfg.TEST.HAS_RPN: concat_rois = output['concat_rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape rois, ref_rois = np.split(concat_rois, 2) scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] ref_scores = output['cls_prob_reshape_output'].asnumpy()[1] ref_bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[1] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) pred_boxes /= scale ref_pred_boxes = bbox_pred(ref_rois, ref_bbox_deltas) ref_pred_boxes = clip_boxes(ref_pred_boxes, im_shape[-2:]) ref_pred_boxes /= scale pred_boxes_all.append(pred_boxes) scores_all.append(scores) ref_pred_boxes_all.append(ref_pred_boxes) ref_scores_all.append(ref_scores) return scores_all, pred_boxes_all, data_dict_all
def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if cfg.TEST.HAS_RPN or cfg.network.ROIDispatch: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output if cfg.TEST.LEARN_NMS: pred_boxes = output['learn_nms_sorted_bbox'].asnumpy() # raw_scores = output['sorted_score_output'].asnumpy() scores = output['nms_final_score_output'].asnumpy() else: scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) return scores_all, pred_boxes_all, data_dict_all
def detect(self, batch, scales): data = dict(zip(self.data_names, batch.data)) outputs = self.forward(batch) scores, preds = [], [] im_shapes = np.array([im.shape[-2:] for im in data['data']]).reshape(-1, self.batch_size, 2) im_ids = np.array([], dtype=int) for i, (gpu_out, gpu_scales, gpu_shapes) in enumerate(zip(outputs, scales, im_shapes)): gpu_rois = gpu_out[self.rpn_output_names['rois']].asnumpy() # Reshape crois nper_gpu = gpu_rois.shape[0] / self.batch_size gpu_scores = gpu_out[self.rcnn_output_names['cls']].asnumpy() gpu_deltas = gpu_out[self.rcnn_output_names['bbox']].asnumpy() im_ids = np.hstack((im_ids, gpu_out[self.rcnn_output_names['im_ids']].asnumpy().astype(int))) for idx in range(self.batch_size): cids = np.where(gpu_rois[:, 0] == idx)[0] assert len(cids) == nper_gpu, 'The number of rois per GPU should be fixed!' crois = gpu_rois[cids, 1:] cscores = gpu_scores[idx] cdeltas = gpu_deltas[idx] # Apply deltas and clip predictions cboxes = bbox_pred(crois, cdeltas) cboxes = clip_boxes(cboxes, gpu_shapes[idx]) # Re-scale boxes cboxes = cboxes / gpu_scales[idx] # Store predictions scores.append(cscores) preds.append(cboxes) return scores, preds, data, im_ids
def forward(self, is_train, req, in_data, out_data, aux): rois = in_data[0].asnumpy()[:, 1:] cls_prob = in_data[1].asnumpy() assert self._cfg.CLASS_AGNOSTIC, 'Currently only support class agnostic' if self._cfg.CLASS_AGNOSTIC: bbox_deltas = in_data[2].asnumpy()[:, 4:8] else: fg_cls_prob = cls_prob[:, 1:] fg_cls_idx = np.argmax(fg_cls_prob, axis=1).astype(np.int) batch_idx_array = np.arange(fg_cls_idx.shape[0], dtype=np.int) # bbox_deltas = in_data[2].asnumpy()[batch_idx_array, fg_cls_idx * 4: (fg_cls_idx + 1) * 4] in_data2 = in_data[2].asnumpy() bbox_deltas = np.hstack( (in_data2[batch_idx_array, fg_cls_idx * 4].reshape(-1, 1), in_data2[batch_idx_array, fg_cls_idx * 4 + 1].reshape(-1, 1), in_data2[batch_idx_array, fg_cls_idx * 4 + 2].reshape(-1, 1), in_data2[batch_idx_array, fg_cls_idx * 4 + 3].reshape(-1, 1))) im_info = in_data[3].asnumpy()[0, :] # post processing # if self._is_train: # if self._cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: # bbox_deltas = bbox_deltas * np.array(self._cfg.TRAIN.BBOX_STDS) + np.array(self._cfg.TRAIN.BBOX_MEANS) proposals = bbox_pred(rois, bbox_deltas) proposals = clip_boxes(proposals, im_info[:2]) batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob)
def get_image(roidb, config): num_images = len(roidb) processed_ims = [] processed_roidb = [] for i in range(num_images): roi_rec = roidb[i] assert os.path.exists(roi_rec['image']), '%s does not exist'.format( roi_rec['image']) im = cv2.imread(roi_rec['image'], cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if roidb[i]['flipped']: im = im[:, ::-1, :] new_rec = roi_rec.copy() scale_ind = random.randrange(len(config.SCALES)) target_size = config.SCALES[scale_ind][0] max_size = config.SCALES[scale_ind][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS, config.network.PIXEL_STDS) processed_ims.append(im_tensor) im_info = [im_tensor.shape[2], im_tensor.shape[3], im_scale] new_rec['boxes'] = clip_boxes( np.round(roi_rec['boxes'].copy() * im_scale), im_info[:2]) new_rec['im_info'] = im_info processed_roidb.append(new_rec) return processed_ims, processed_roidb
def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if cfg.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) return scores_all, pred_boxes_all, data_dict_all
def get_image(roidb, config): """ preprocess image and return processed roidb :param roidb: a list of roidb :return: list of img as in mxnet format roidb add new item['im_info'] 0 --- x (width, second dim of im) | y (height, first dim of im) """ num_images = len(roidb) processed_ims = [] processed_roidb = [] for i in range(num_images): roi_rec = roidb[i] assert os.path.exists(roi_rec['image']), '%s does not exist'.format(roi_rec['image']) im = cv2.imread(roi_rec['image'], cv2.IMREAD_COLOR|cv2.IMREAD_IGNORE_ORIENTATION) if roidb[i]['flipped']: im = im[:, ::-1, :] new_rec = roi_rec.copy() scale_ind = random.randrange(len(config.SCALES)) target_size = config.SCALES[scale_ind][0] max_size = config.SCALES[scale_ind][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) processed_ims.append(im_tensor) im_info = [im_tensor.shape[2], im_tensor.shape[3], im_scale] new_rec['boxes'] = clip_boxes(np.round(roi_rec['boxes'].copy() * im_scale), im_info[:2]) new_rec['im_info'] = im_info processed_roidb.append(new_rec) return processed_ims, processed_roidb
def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) print('output length: {}'.format(len(output_all))) print('data batch length: {}'.format(len(data_batch.data))) data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if cfg.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) return scores_all, pred_boxes_all, data_dict_all
def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [ dict(zip(data_names, data_batch.data[i])) for i in xrange(len(data_batch.data)) ] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if cfg.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) if output_all[0].has_key('feat_conv_3x3_relu_output'): feat = output_all[0]['feat_conv_3x3_relu_output'] else: feat = None return scores_all, pred_boxes_all, data_dict_all, feat
def im_batch_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [ dict(zip(data_names, data_batch.data[i])) for i in xrange(len(data_batch.data)) ] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): im_infos = data_dict['im_info'].asnumpy() # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] rois = output['rois_output'].asnumpy() for im_idx in xrange(im_infos.shape[0]): bb_idxs = np.where(rois[:, 0] == im_idx)[0] im_shape = im_infos[im_idx, :2].astype(np.int) # post processing pred_boxes = bbox_pred(rois[bb_idxs, 1:], bbox_deltas[bb_idxs, :]) pred_boxes = clip_boxes(pred_boxes, im_shape) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale[im_idx] scores_all.append(scores[bb_idxs, :]) pred_boxes_all.append(pred_boxes) return scores_all, pred_boxes_all, data_dict_all
def im_batch_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [dict(zip(data_names, data_batch.data[i])) for i in xrange(len(data_batch.data))] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): im_infos = data_dict['im_info'].asnumpy() # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] rois = output['rois_output'].asnumpy() for im_idx in xrange(im_infos.shape[0]): bb_idxs = np.where(rois[:,0] == im_idx)[0] im_shape = im_infos[im_idx, :2].astype(np.int) # post processing pred_boxes = bbox_pred(rois[bb_idxs, 1:], bbox_deltas[bb_idxs, :]) pred_boxes = clip_boxes(pred_boxes, im_shape) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale[im_idx] scores_all.append(scores[bb_idxs, :]) pred_boxes_all.append(pred_boxes) return scores_all, pred_boxes_all, data_dict_all
def get_image(roidb, config): """ preprocess image and return processed roidb :param roidb: a list of roidb :return: list of img as in mxnet format roidb add new item['im_info'] 0 --- x (width, second dim of im) | y (height, first dim of im) """ num_images = len(roidb) processed_ims = [] processed_roidb = [] for i in range(num_images): roi_rec = roidb[i] assert os.path.exists(roi_rec['image']), '%s does not exist'.format(roi_rec['image']) im = cv2.imread(roi_rec['image'], cv2.IMREAD_COLOR|cv2.IMREAD_IGNORE_ORIENTATION) if roidb[i]['flipped']: im = im[:, ::-1, :] new_rec = roi_rec.copy() scale_ind = random.randrange(len(config.SCALES)) target_size = config.SCALES[scale_ind][0] max_size = config.SCALES[scale_ind][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) processed_ims.append(im_tensor) im_info = [im_tensor.shape[2], im_tensor.shape[3], im_scale] new_rec['boxes'] = clip_boxes(np.round(roi_rec['boxes'].copy() * im_scale), im_info[:2]) new_rec['im_info'] = im_info processed_roidb.append(new_rec) return processed_ims, processed_roidb
def detect(self, batch, scales): data = dict(zip(self.data_names, batch.data)) outputs = self.forward(batch) scores, preds = [], [] im_shapes = np.array([im.shape[-2:] for im in data['data']]).reshape(-1, self.batch_size, 2) im_ids = np.array([], dtype=int) for i, (gpu_out, gpu_scales, gpu_shapes) in enumerate(zip(outputs, scales, im_shapes)): gpu_rois = gpu_out[self.rpn_output_names['rois']].asnumpy() # Reshape crois nper_gpu = gpu_rois.shape[0] / self.batch_size gpu_scores = gpu_out[self.rcnn_output_names['cls']].asnumpy() gpu_deltas = gpu_out[self.rcnn_output_names['bbox']].asnumpy() im_ids = np.hstack((im_ids, gpu_out[self.rcnn_output_names['im_ids']].asnumpy().astype(int))) for idx in range(self.batch_size): cids = np.where(gpu_rois[:, 0] == idx)[0] assert len(cids) == nper_gpu, 'The number of rois per GPU should be fixed!' crois = gpu_rois[cids, 1:] cscores = gpu_scores[idx] cdeltas = gpu_deltas[idx] # Apply deltas and clip predictions cboxes = bbox_pred(crois, cdeltas) cboxes = clip_boxes(cboxes, gpu_shapes[idx]) # Re-scale boxes cboxes = cboxes / gpu_scales[idx] # Store predictions scores.append(cscores) preds.append(cboxes) return scores, preds, data, im_ids
def forward(self, is_train, req, in_data, out_data, aux): ''' :param is_train: :param req: :param in_data: in_data[0] rois: (128, 5) First col are all 0's. True coordinate. in_data[1] bbox_deltas: (128, 8) in_data[2] im_info: im.shape = (im_info[0], im_info[1]) :param out_data: :param aux: :return: ''' rois = in_data[0].asnumpy()[:, 1:] # (128, 4) Move 0's in first col. bbox_deltas = in_data[1].asnumpy() im_info = in_data[2].asnumpy()[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Convert anchors into proposals via bbox transformations proposals = bbox_pred(rois, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes( proposals, im_info[:2] ) # (128, 8) First 4 cols: background, last 4 cols: object proposals = proposals[:, 4:] # (128, 4) zeros = np.zeros((proposals.shape[0], 1), dtype=proposals.dtype) proposals = np.hstack((zeros, proposals)) self.assign(out_data[0], req[0], proposals) if DEBUG: print proposals
def det(mod, fn): raw_img = cv2.imread(fn) if raw_img.shape[0] < raw_img.shape[1]: raw_img = cv2.copyMakeBorder(raw_img,0 ,raw_img.shape[1]-raw_img.shape[0], 0, 0, cv2.BORDER_CONSTANT) im_shape = [IMG_H,IMG_W] # reverse order img = cv2.resize(raw_img, (IMG_H,IMG_W)) raw_h = img.shape[0] raw_w = img.shape[1] im_tensor = image.transform(img, [124,117,104], 0.0167) im_info = np.array([[ IMG_H, IMG_W, 4.18300658e-01]]) batch = mx.io.DataBatch([mx.nd.array(im_tensor), mx.nd.array(im_info)]) start = time.time() mod.forward(batch) output_names = mod.output_names output_tensor = mod.get_outputs() mod.get_outputs()[0].wait_to_read() print ("time", time.time()-start, "secs.") output = dict(zip(output_names ,output_tensor)) rois = output['rois_output'].asnumpy()[:, 1:] scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) num_classes = 2 all_cls_dets = [[] for _ in range(num_classes)] for j in range(1, num_classes): indexes = np.where(scores[:, j] > 0.1)[0] cls_scores = scores[indexes, j, np.newaxis] cls_boxes = pred_boxes[indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)).copy() all_cls_dets[j] = cls_dets for idx_class in range(1, num_classes): nms = py_nms_wrapper(0.3) keep = nms(all_cls_dets[idx_class]) all_cls_dets[idx_class] = all_cls_dets[idx_class][keep, :] for i in range(all_cls_dets[1].shape[0]): cv2.rectangle(img, (int(all_cls_dets[1][i][0]), int(all_cls_dets[1][i][1])) ,(int(all_cls_dets[1][i][2]), int(all_cls_dets[1][i][3])),(0,0,255),1) cv2.imshow("w", img) cv2.waitKey()
def get_image_m(scenedb, config): """ preprocess image and return processed roidb :param scenedb: a list of scenedb :return: list of img as in mxnet format roidb add new item['im_info'] 0 --- x (width, second dim of im) | y (height, first dim of im) """ views_list = ['top', 'left', 'right'] num_images = len(scenedb) processed_ims = [] processed_roidb = [] processed_img_names = [] for i in range(num_images): roi_rec = scenedb[i] im = None for view in views_list: img_fname = roi_rec['image_views'][view] assert os.path.exists(img_fname), '{0} does not exist'.format( img_fname) im_view = cv2.imread( img_fname, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) processed_img_names.append(img_fname) if view in ['left', 'right']: im_view = im_view.transpose( 1, 0, 2) # transpose+flip = rotate clounterclockwise 90 deg im_view = im_view[::-1, :, :] # flip about hor. axis if scenedb[i]['flipped']: im_view = im_view[:, ::-1, :] im = np.concatenate( (im, im_view), axis=2) if not im is None else im_view new_rec = roi_rec.copy() #Leonid, adding an ability for externally controlled augmentation to take place here if new_rec.has_key('aug_gen') and (new_rec['aug_gen'] is not None): im_, boxes_ = new_rec['aug_gen'](im, new_rec['boxes_views']['top']) im = im_ new_rec['boxes_views']['top'][:, :4] = boxes_ scale_ind = random.randrange(len(config.SCALES)) target_size = config.SCALES[scale_ind][0] max_size = config.SCALES[scale_ind][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform3(im, config.network.PIXEL_MEANS) processed_ims.append(im_tensor) im_info = [im_tensor.shape[2], im_tensor.shape[3], im_scale] new_rec['boxes'] = clip_boxes( np.round(roi_rec['boxes_views']['top'].copy() * im_scale), im_info[:2]) new_rec['im_info'] = im_info processed_roidb.append(new_rec) return processed_ims, processed_roidb, processed_img_names
def coco_results_one_category_kernel(data_pack): cat_id = data_pack['cat_id'] ann_type = data_pack['ann_type'] binary_thresh = data_pack['binary_thresh'] all_im_info = data_pack['all_im_info'] boxes = data_pack['boxes'] if ann_type == 'bbox': masks = [] elif ann_type == 'segm': masks = data_pack['masks'] elif ann_type == 'keypoints': keypoints = data_pack['keypoints'] else: print 'unimplemented ann_type: ' + ann_type cat_results = [] for im_ind, im_info in enumerate(all_im_info): index = im_info['index'] dets = boxes[im_ind].astype(np.float) if dets.size == 0: continue scores = dets[:, -1] if ann_type == 'bbox': xs = dets[:, 0] ys = dets[:, 1] ws = dets[:, 2] - xs + 1 hs = dets[:, 3] - ys + 1 result = [{ 'image_id': index, 'category_id': cat_id, 'bbox': [xs[k], ys[k], ws[k], hs[k]], 'score': scores[k] } for k in xrange(dets.shape[0])] elif ann_type == 'segm': width = im_info['width'] height = im_info['height'] dets[:, :4] = clip_boxes(dets[:, :4], [height, width]) mask_encode = mask_voc2coco(masks[im_ind], dets[:, :4], height, width, binary_thresh) result = [{ 'image_id': index, 'category_id': cat_id, 'segmentation': mask_encode[k], 'score': scores[k] } for k in xrange(len(mask_encode))] elif ann_type == 'keypoints': result = [{ 'image_id': index, 'category_id': cat_id, 'keypoints': keypoints[im_ind][k, :].astype(np.uint16).tolist(), 'score': scores[k] } for k in xrange(dets.shape[0])] cat_results.extend(result) return cat_results
def double_im_detect(predictor, data_batch, data_names, cfg): output_all = predictor.predict(data_batch) data_dict_all = [ dict(zip(data_names, data_batch.data[i])) for i in xrange(len(data_batch.data)) ] scores_all = [] pred_boxes_all = [] ref_scores_all = [] ref_pred_boxes_all = [] for output, data_dict in zip(output_all, data_dict_all): scale = data_dict['im_info'][0, 2] rois = output['rois_output'].asnumpy()[:, 1:] ref_rois = output['ref_rois_output'].asnumpy()[:, 1:] im_shape = data_dict['data'].shape ref_im_shape = data_dict['ref_data'] non_ref_dim = rois.shape[0] # save output scores = output['cls_prob_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_output'].asnumpy()[0] ref_scores = output['cls_prob_output'].asnumpy()[1] ref_bbox_deltas = output['bbox_pred_output'].asnumpy()[1] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) ref_pred_boxes = bbox_pred(ref_rois, ref_bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) ref_pred_boxes = clip_boxes(ref_pred_boxes, ref_im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale ref_pred_boxes = ref_pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) ref_scores_all.append(ref_scores) ref_pred_boxes_all.append(ref_pred_boxes) return scores_all, pred_boxes_all, ref_scores_all, ref_pred_boxes_all, data_dict_all
def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if cfg.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] tmp_w = rois[:, 2] - rois[:, 0] + 1 tmp_h = rois[:, 3] - rois[:, 1] + 1 tmp_area = np.sqrt(tmp_w * tmp_h) range1 = np.where(tmp_area <= 90)[0] range2 = np.where((tmp_area >= 30) & (tmp_area <= 160))[0] range3 = np.where(tmp_area >= 90)[0] range2_add = range2 + rois.shape[0] range3_add = range3 + rois.shape[0] * 2 keep = np.hstack((range1, range2, range3)) keep_add = np.hstack((range1, range2_add, range3_add)) rois = rois[keep, :] scores = scores[keep_add, :] bbox_deltas = bbox_deltas[keep_add, :] #tot = rois.shape[0] #idxs1 = np.zeros((tot, 1), dtype=np.int32) #idxs2 = np.zeros((tot, 1), dtype=np.int32) #idxs3 = np.zeros((tot, 1), dtype=np.int32) #idxs1[range1, :] = 1 #idxs2[range2, :] = 1 #idxs3[range3, :] = 1 #scores = (scores[:tot, :] * idxs1 + scores[tot:tot*2, :] * idxs2 + scores[tot*2:tot*3, :] * idxs3) / (idxs1 + idxs2 + idxs3) #bbox_deltas = (bbox_deltas[:tot, :] * idxs1 + bbox_deltas[tot:tot*2, :] * idxs2 + bbox_deltas[tot*2:tot*3, :] * idxs3) / (idxs1 + idxs2 + idxs3) # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) return scores_all, pred_boxes_all, data_dict_all
def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data] scores_all = [] pred_boxes_all = [] pred_kps_all = [] for output, data_dict in zip(output_all, data_dict_all): if cfg.TEST.HAS_RPN: batch_rois = output['rois_output'].asnumpy() else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output batch_scores = output['cls_prob_reshape_output'].asnumpy() batch_bbox_deltas = output['bbox_pred_reshape_output'].asnumpy() batch_im_info = data_dict['im_info'].asnumpy() for i in range(cfg.TEST.BATCH_IMAGES): scale = batch_im_info[i, 2] if scale < 1e-6: break indices = np.where(batch_rois[:, 0] == i)[0] rois = batch_rois[indices, 1:] scores = batch_scores[i] bbox_deltas = batch_bbox_deltas[i] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) if cfg.network.PREDICT_KEYPOINTS: assert cfg.TEST.BATCH_IMAGES == 1, "only support batch_size=1" kps_deltas = output['kps_pos_pred_reshape_output'].asnumpy( ) # [N, 2*K, G, G] kps_probs = output['kps_prob_output'].asnumpy() # [N*K, G*G] pred_kps = predict_keypoints(rois, kps_probs, kps_deltas, scale=scale) pred_kps_all.append(pred_kps) if cfg.network.PREDICT_KEYPOINTS: return scores_all, pred_boxes_all, pred_kps_all, data_dict_all return scores_all, pred_boxes_all, data_dict_all
def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [dict(zip(data_names, data_batch.data))] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all,scales): rois = output['rois_output'].asnumpy()[:,1:] im_shape = data_dict['data'].shape scores = output['cls_prob_reshape_output'].asnumpy()[0] stds = np.tile(np.array(cfg.TRAIN.BBOX_STDS),cfg.dataset.NUM_CLASSES) bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] *stds pred_boxes = bbox_pred(rois,bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) if DEBUG: print("im shape: ",im_shape) print(pred_boxes.shape) print(scores.shape) max_scores = scores.argmax(axis = 1) max_scores_val = scores[np.arange(pred_boxes.shape[0]),max_scores] keep = np.where(max_scores>0)[0] max_scores = max_scores[keep] print(pred_boxes) bboxes = pred_boxes.copy()[keep]*scale max_scores_val = max_scores_val[keep] img = data_dict['data'].asnumpy().transpose((0,2,3,1))[0] img = (img * np.array([[[0.229, 0.224, 0.225]]]) +np.array([[[0.485, 0.456, 0.406]]])) * 255 img = np.clip(img,0,255) img = img.astype(np.uint8) print(type(img)) image = cv2.cvtColor(img,cv2.COLOR_BGR2RGB) print(img.shape) print(max_scores_val) maxid = max_scores_val.argsort()[-30:] for i, boxxes in enumerate(bboxes): if not i in maxid: continue #print("ith box:") #print(boxxes) #print(max_scores[i]) box = boxxes[max_scores[i]*4:(max_scores[i]+1)*4] box = box.astype(np.int64) print(box) cv2.rectangle(image,tuple(box[:2]),tuple(box[2:]),(255,0,0),1) cv2.putText(image,names[max_scores[i]]+" "+str(max_scores_val[i]),tuple(box[:2]),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),1) cv2.imwrite("./det_images/det_img_{:3f}.png".format(np.random.randn()),image) #pdb.set_trace() return scores_all, pred_boxes_all, data_dict_all
def inference(predictor, data_batch, data_names, num_classes, BINARY_THRESH = 0.4, CONF_THRESH=0.7, gpu_id=0): scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] im_shapes = [data_batch.data[i][0].shape[2:4] for i in xrange(len(data_batch.data))] scores, boxes, masks, data_dict = im_detect(predictor, data_batch, data_names, scales, config) if not config.TEST.USE_MASK_MERGE: all_boxes = [[] for _ in xrange(num_classes)] all_masks = [[] for _ in xrange(num_classes)] nms = py_nms_wrapper(config.TEST.NMS) for j in range(1, num_classes): indexes = np.where(scores[0][:, j] > CONF_THRESH)[0] cls_scores = scores[0][indexes, j, np.newaxis] cls_masks = masks[0][indexes, 1, :, :] # try: # if config.CLASS_AGNOSTIC: # cls_boxes = boxes[0][indexes, :] # else: # raise Exception() # except: if config.CLASS_AGNOSTIC: cls_boxes = boxes[0][indexes, :] else: cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j] = cls_dets[keep, :] all_masks[j] = cls_masks[keep, :] dets = [all_boxes[j] for j in range(1, num_classes)] masks = [all_masks[j] for j in range(1, num_classes)] else: masks = masks[0][:, 1:, :, :] im_height = np.round(im_shapes[0][0] / scales[0]).astype('int') im_width = np.round(im_shapes[0][1] / scales[0]).astype('int') # print (im_height, im_width) boxes = clip_boxes(boxes[0], (im_height, im_width)) result_masks, result_dets = gpu_mask_voting(masks, boxes, scores[0], num_classes, 100, im_width, im_height, config.TEST.NMS, config.TEST.MASK_MERGE_THRESH, BINARY_THRESH, gpu_id) dets = [result_dets[j] for j in range(1, num_classes)] masks = [result_masks[j][:, 0, :, :] for j in range(1, num_classes)] for i in xrange(len(dets)): keep = np.where(dets[i][:,-1] > CONF_THRESH) dets[i] = dets[i][keep] masks[i] = masks[i][keep] return dets, masks
def im_detect(predictor, data_batch, data_names, scales, cfg, aggr_feats=False): output_all = predictor.predict(data_batch) data_dict_all = [ dict(zip(data_names, data_batch.data[i])) for i in xrange(len(data_batch.data)) ] scores_all = [] pred_boxes_all = [] aggr_feats_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if 'blockgrad0_output' in output: for i, key in enumerate([ '_', 'rois_output', 'cls_prob_reshape_output', 'bbox_pred_reshape_output', '_plus{}_output'.format(cfg.TEST.KEY_FRAME_INTERVAL * 2 - 1) ]): output[key] = output['blockgrad{}_output'.format(i)] if aggr_feats: aggr_feats_all.append( output['_plus{}_output'.format(cfg.TEST.KEY_FRAME_INTERVAL * 2 - 1)]) if cfg.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) if aggr_feats: return zip(scores_all, pred_boxes_all, data_dict_all), aggr_feats_all return zip(scores_all, pred_boxes_all, data_dict_all)
def get_image(roidb, config): """ preprocess image and return processed roidb :param roidb: a list of roidb :return: list of img as in mxnet format roidb add new item['im_info'] 0 --- x (width, second dim of im) | y (height, first dim of im) """ num_images = len(roidb) processed_ims = [] processed_roidb = [] processed_img_names = [] for i in range(num_images): roi_rec = roidb[i] assert os.path.exists(roi_rec['image']), '{0} does not exist'.format( roi_rec['image']) im = cv2.imread(roi_rec['image'], cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) img_name = roi_rec['image'] if roidb[i]['flipped']: im = im[:, ::-1, :] new_rec = roi_rec.copy() #Leonid, adding an ability for externally controlled augmentation to take place here if new_rec.has_key('aug_gen') and (new_rec['aug_gen'] is not None): im_, boxes_ = new_rec['aug_gen'](im, new_rec['boxes']) im = im_ new_rec['boxes'][:, :4] = boxes_ scale_ind = random.randrange(len(config.SCALES)) target_size = config.SCALES[scale_ind][0] max_size = config.SCALES[scale_ind][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) processed_ims.append(im_tensor) processed_img_names.append(img_name) im_info = [im_tensor.shape[2], im_tensor.shape[3], im_scale] new_rec['boxes'] = clip_boxes( np.round(roi_rec['boxes'].copy() * im_scale), im_info[:2]) new_rec['im_info'] = im_info processed_roidb.append(new_rec) return processed_ims, processed_roidb, processed_img_names
def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [dict(data_names, data_batch.data)] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all,scales): rois = output['rois_output'].asnumpy()[:,1:] im_shape = data_dict['data'].im_shape scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] pred_boxes = bbox_pred(rois,bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) return scores_all, pred_boxes_all, data_dict_all
def detect(self, batch, scales): data = dict(zip(self.data_names, batch.data)) outputs = self.forward(batch) scores, preds, maps = [], [], [] im_ids = np.array([], dtype=int) chip_ids = np.array([], dtype=int) has_focus_maps = True if self.rcnn_output_names[ 'scale_map'] in outputs[0] else False for i, (gpu_out, gpu_scales) in enumerate(zip(outputs, scales)): gpu_rois = gpu_out[self.rpn_output_names['rois']].asnumpy() # Reshape crois nper_gpu = gpu_rois.shape[0] / self.batch_size if has_focus_maps: scale_prob = gpu_out[ self.rcnn_output_names['scale_map']].asnumpy() gpu_scores = gpu_out[self.rcnn_output_names['cls']].asnumpy() gpu_deltas = gpu_out[self.rcnn_output_names['bbox']].asnumpy() gpu_infos = gpu_out[self.rcnn_output_names['im_info']].asnumpy() gpu_shapes = gpu_infos[:, :2] im_ids = np.hstack((im_ids, gpu_out[ self.rcnn_output_names['im_ids']].asnumpy().astype(int))) chip_ids = np.hstack((chip_ids, gpu_out[ self.rcnn_output_names['chip_ids']].asnumpy().astype(int))) for idx in range(self.batch_size): cids = np.where(gpu_rois[:, 0] == idx)[0] assert len( cids ) == nper_gpu, 'The number of rois per GPU should be fixed!' crois = gpu_rois[cids, 1:] cscores = gpu_scores[idx] cdeltas = gpu_deltas[idx] # Apply deltas and clip predictions cboxes = bbox_pred(crois, cdeltas) cboxes = clip_boxes(cboxes, gpu_shapes[idx]) # Re-scale boxes cboxes = cboxes / gpu_scales[idx] # Store predictions scores.append(cscores) preds.append(cboxes) if has_focus_maps: maps.append(scale_prob[idx]) return scores, preds, data, im_ids, maps, chip_ids
def get_image(roidb, target_size, max_size, stride=0): """ preprocess image and return processed roidb :param roidb: a list of roidb :return: list of img as in mxnet format roidb add new item['im_info'] 0 --- x (width, second dim of im) | y (height, first dim of im) """ num_images = len(roidb) processed_ims = [] processed_roidb = [] for i in range(num_images): roi_rec = roidb[i] assert os.path.exists(roi_rec['image']), '%s does not exist'.format( roi_rec['image']) im = cv2.imread(roi_rec['image'], cv2.IMREAD_COLOR | 128) if roidb[i]['flipped']: im = im[:, ::-1, :] new_rec = roi_rec.copy() im, im_scale = resize(im, target_size, max_size, stride=stride) processed_ims.append(im) im_info = [im.shape[0], im.shape[1], im_scale] new_rec['boxes'] = clip_boxes( np.round(roi_rec['boxes'].copy() * im_scale), im_info[:2]) new_rec['im_info'] = im_info if 'keypoints' in roi_rec: new_rec['keypoints'] = roi_rec['keypoints'].copy() new_rec['keypoints'][:, 0::3] = np.round( roi_rec['keypoints'][:, 0::3] * im_scale) new_rec['keypoints'][:, 1::3] = np.round( roi_rec['keypoints'][:, 1::3] * im_scale) kps = new_rec['keypoints'] DEBUG = False ### if DEBUG: import cPickle as pickle out_dir = '/tmp/rcnn-debug/' rand_id = random.randint(100, 999) cv2.imwrite(out_dir + str(rand_id) + '.jpg', im) pickle.dump(new_rec, open(out_dir + str(rand_id) + '.pkl', "wb")) ### processed_roidb.append(new_rec) return processed_ims, processed_roidb
def im_detect_feats_stats(predictor, data_batch, data_names, scales, cfg, stat_type, scores_field='cls_prob_reshape'): output_all = predictor.predict(data_batch) data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data] scores_all = [] pred_boxes_all = [] rois_feats_all = [] stats_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if cfg.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output[scores_field + '_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] rois_feats = output['psp_final_embed_output'].asnumpy( ) # shape: [#rois, Embed_dim] rois_feats_all.append(rois_feats) if stat_type == 'ratio_val': stats = output['fc_val_reg_2_output'].asnumpy() stats_all.append(stats) if stat_type == 'feat_pred': stats = output['fc_score_hist_3_output'].asnumpy() stats_all.append(stats) # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) return scores_all, pred_boxes_all, data_dict_all, rois_feats_all, stats_all
def forward(self, is_train, req, in_data, out_data, aux): bottom_rois = in_data[0].asnumpy() bbox_delta = in_data[1].asnumpy() cls_prob = in_data[2].asnumpy() im_info = in_data[3].asnumpy() num_rois = bottom_rois.shape[0] # 1. judge if bbox class-agnostic # 2. if not, calculate bbox_class_idx if self._bbox_class_agnostic: bbox_class_idx = np.ones((num_rois)) # (num_rois, 1) zeros else: bbox_class_idx = np.argmax(cls_prob[:, 1:], axis=1) + 1 bbox_class_idx = bbox_class_idx[:, np.newaxis] * 4 bbox_class_idx = np.hstack((bbox_class_idx, bbox_class_idx + 1, bbox_class_idx + 2, bbox_class_idx + 3)) # 3. get bbox_pred given bbox_class_idx rows = np.arange(num_rois, dtype=np.intp) bbox_delta = bbox_delta[rows[:, np.newaxis], bbox_class_idx.astype(np.intp)] # 4. calculate bbox_delta by bbox_pred[i] * std[i] + mean[i] means = np.array(self._bbox_means) stds = np.array(self._bbox_stds) vx = bbox_delta[:, 0] * stds[0] + means[0] vy = bbox_delta[:, 1] * stds[1] + means[1] vw = bbox_delta[:, 2] * stds[2] + means[2] vh = bbox_delta[:, 3] * stds[3] + means[3] bbox_delta = np.hstack((vx[:, np.newaxis], vy[:, np.newaxis], vw[:, np.newaxis], vh[:, np.newaxis])) # 6. calculate top_rois by bbox_pred proposal = bbox_pred(bottom_rois[:, 1:], bbox_delta) # 7. clip boxes if self._b_clip_boxes: proposal = clip_boxes(proposal, im_info[0, :2]) output = bottom_rois output[:, 1:] = proposal for ind, val in enumerate([output]): self.assign(out_data[ind], req[ind], val)
def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [ dict(list(zip(data_names, idata))) for idata in data_batch.data ] scores_all = [] pred_boxes_all = [] roi_score_all = [] rois_all = [] roi_feat_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if cfg.TEST.HAS_RPN: assert np.all(output['rois_output'].asnumpy()[:, 0] == 0.) rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] roi_score = output['rois_score'].asnumpy() indice = roi_score.flatten().argsort()[::-1] roi_score = roi_score[indice] roi_feat = output['roi_feat_output_output'].asnumpy()[indice] rois = rois[indice] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale rois = rois / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) roi_score_all.append(roi_score) rois_all.append(rois) roi_feat_all.append(roi_feat) return scores_all, pred_boxes_all, roi_score_all, rois_all, roi_feat_all, data_dict_all
def forward(self, is_train, req, in_data, out_data, aux): rois = in_data[0].asnumpy()[:, 1:] bbox_deltas = in_data[1].asnumpy()[:, 4:8] im_info = in_data[2].asnumpy()[0, :] cls_prob = in_data[3].asnumpy()[:, 1:] # ignore bg num_keep_index = int(rois.shape[0] * self._top) # sort scores max_scores = np.amax(cls_prob, axis=1) # keep top scores keep_index = np.argsort(-max_scores)[:num_keep_index] proposals = bbox_pred(rois, bbox_deltas) proposals = clip_boxes(proposals, im_info[:2]) batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob[keep_index, :]) self.assign(out_data[1], req[1], keep_index)
def coco_results_one_category_kernel(data_pack): cat_id = data_pack['cat_id'] ann_type = data_pack['ann_type'] binary_thresh = data_pack['binary_thresh'] all_im_info = data_pack['all_im_info'] boxes = data_pack['boxes'] if ann_type == 'bbox': masks = [] elif ann_type == 'segm': masks = data_pack['masks'] else: print 'unimplemented ann_type: ' + ann_type cat_results = [] for im_ind, im_info in enumerate(all_im_info): index = im_info['index'] dets = boxes[im_ind].astype(np.float) if len(dets) == 0: continue scores = dets[:, -1] if ann_type == 'bbox': xs = dets[:, 0] ys = dets[:, 1] ws = dets[:, 2] - xs + 1 hs = dets[:, 3] - ys + 1 result = [{'image_id': index, 'category_id': cat_id, 'bbox': [xs[k], ys[k], ws[k], hs[k]], 'score': scores[k]} for k in xrange(dets.shape[0])] elif ann_type == 'segm': width = im_info['width'] height = im_info['height'] dets[:, :4] = clip_boxes(dets[:, :4], [height, width]) mask_encode = mask_voc2coco(masks[im_ind], dets[:, :4], height, width, binary_thresh) result = [{'image_id': index, 'category_id': cat_id, 'segmentation': mask_encode[k], 'score': scores[k]} for k in xrange(len(mask_encode))] cat_results.extend(result) return cat_results
def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if cfg.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] if cfg.DCR.top == 1: dcr_scores = output['dcr_prob_reshape_output'].asnumpy()[0] scores = scores * dcr_scores elif cfg.DCR.top > 0: dcr_scores = output['dcr_prob_reshape_output'].asnumpy()[0] keep_index = output['keep_index_reshape_output'].asnumpy().astype( np.int)[0] dcr_final_scores = np.ones_like(scores) dcr_final_scores[keep_index, :] = dcr_scores scores = scores * dcr_final_scores bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) return scores_all, pred_boxes_all, data_dict_all
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError("Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) cls_prob_dict = { 'stride64': in_data[4], 'stride32': in_data[3], 'stride16': in_data[2], 'stride8': in_data[1], 'stride4': in_data[0], } bbox_pred_dict = { 'stride64': in_data[9], 'stride32': in_data[8], 'stride16': in_data[7], 'stride8': in_data[6], 'stride4': in_data[5], } pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size proposal_list = [] score_list = [] for s in self._feat_stride: stride = int(s) sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios) scores = cls_prob_dict['stride' + str(s)].asnumpy()[:, self._num_anchors:, :, :] bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy() im_info = in_data[-1].asnumpy()[0, :] # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / stride), int(im_info[1] / stride) # Enumerate all shifts shift_x = np.arange(0, width) * stride shift_y = np.arange(0, height) * stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] proposal_list.append(proposals) score_list.append(scores) proposals = np.vstack(proposal_list) scores = np.vstack(score_list) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # if is_train: self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def _cgenerate(boxes, width, height, chipsize, stride): boxes = clip_boxes(boxes, np.array([height - 1, width - 1])) return chips.generate(np.ascontiguousarray(boxes, dtype=np.float32), width, height, chipsize, stride)
def _pygenerate(boxes, width, height, chipsize, stride): chips = [] boxes = clip_boxes(boxes, np.array([height-1, width-1])) # ensure coverage of image for worst case # corners chips.append([max(width - chipsize, 0), 0, width - 1, min(chipsize, height-1)]) chips.append([0, max(height - chipsize, 0), min(chipsize, width-1), height-1]) chips.append([max(width - chipsize, 0), max(height - chipsize, 0), width-1, height-1]) for i in range(0, width - int(chipsize), stride): for j in range(0, height - int(chipsize), stride): x1 = i y1 = j x2 = i + chipsize - 1 y2 = j + chipsize - 1 chips.append([x1, y1, x2, y2]) for j in range(0, height - int(chipsize), stride): x1 = max(width - chipsize - 1,0) y1 = j x2 = width - 1 y2 = j + chipsize - 1 chips.append([x1, y1, x2, y2]) for i in range(0, width - int(chipsize), stride): x1 = i y1 = max(height - chipsize - 1,0) x2 = i + chipsize - 1 y2 = height - 1 chips.append([x1, y1, x2, y2]) chips = np.array(chips).astype(np.float) p = np.random.permutation(chips.shape[0]) chips = chips[p] overlaps = ignore_overlaps(chips, boxes.astype(np.float)) chip_matches = [] num_matches = [] for j in range(len(chips)): nvids = np.where(overlaps[j, :] == 1)[0] chip_matches.append(set(nvids.tolist())) num_matches.append(len(nvids)) fchips = [] totalmatches = 0 while True: max_matches = 0 max_match = max(num_matches) mid = np.argmax(np.array(num_matches)) if max_match == 0: break if max_match > max_matches: max_matches = max_match maxid = mid bestchip = chip_matches[maxid] fchips.append(chips[maxid]) totalmatches = totalmatches + max_matches # now remove all rois in bestchip for j in range(len(num_matches)): chip_matches[j] = chip_matches[j] - bestchip num_matches[j] = len(chip_matches[j]) return fchips
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError( "Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size # the first set of anchors are background probabilities # keep the second part scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :] bbox_deltas = in_data[1].asnumpy() im_info = in_data[2].asnumpy()[0, :] if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / self._feat_stride), int( im_info[1] / self._feat_stride) if DEBUG: print('score map size: {}'.format(scores.shape)) print("resudial: {}".format( (scores.shape[2] - height, scores.shape[3] - width))) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def forward(self, is_train, req, in_data, out_data, aux): before_pyramid_proposal = datetime.now() nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError( "Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) LAYER_NUM = len(in_data) / 2 LAYER_NUM = 11 if LAYER_NUM == 7: cls_prob_dict = { 'stride64': in_data[6], 'stride32': in_data[5], 'stride16': in_data[4], 'stride8': in_data[3], 'stride4': in_data[2], 'stride2': in_data[1], 'stride1': in_data[0], } bbox_pred_dict = { 'stride64': in_data[13], 'stride32': in_data[12], 'stride16': in_data[11], 'stride8': in_data[10], 'stride4': in_data[9], 'stride2': in_data[8], 'stride1': in_data[7], } elif LAYER_NUM == 6: cls_prob_dict = { 'stride64': in_data[5], 'stride32': in_data[4], 'stride16': in_data[3], 'stride8': in_data[2], 'stride4': in_data[1], 'stride2': in_data[0], } bbox_pred_dict = { 'stride64': in_data[11], 'stride32': in_data[10], 'stride16': in_data[9], 'stride8': in_data[8], 'stride4': in_data[7], 'stride2': in_data[6], } elif LAYER_NUM == 5: cls_prob_dict = { 'stride64': in_data[4], 'stride32': in_data[3], 'stride16': in_data[2], 'stride8': in_data[1], 'stride4': in_data[0], } bbox_pred_dict = { 'stride64': in_data[9], 'stride32': in_data[8], 'stride16': in_data[7], 'stride8': in_data[6], 'stride4': in_data[5], } elif LAYER_NUM == 2: cls_prob_dict = { 'stride64': in_data[4], 'stride32': in_data[3], } bbox_pred_dict = { 'stride64': in_data[9], 'stride32': in_data[8], } elif LAYER_NUM == 11: cls_prob_dict = { 'stride64': in_data[0], } bbox_pred_dict = { 'stride64': in_data[1], } elif LAYER_NUM == 1: cls_prob_dict = { 'stride1': in_data[0], } bbox_pred_dict = { 'stride1': in_data[1], } elif LAYER_NUM == 3: cls_prob_dict = { 'stride64': in_data[2], 'stride32': in_data[1], 'stride1': in_data[0], } bbox_pred_dict = { 'stride64': in_data[5], 'stride32': in_data[4], 'stride1': in_data[3], } ''' cls_prob_dict = { 'stride8': in_data[3], 'stride4': in_data[2], 'stride2': in_data[1], 'stride1': in_data[0], } bbox_pred_dict = { 'stride8': in_data[7], 'stride4': in_data[6], 'stride2': in_data[5], 'stride1': in_data[4], } ''' ''' cls_prob_dict = { 'stride2': in_data[1], 'stride1': in_data[0], } bbox_pred_dict = { 'stride2': in_data[3], 'stride1': in_data[2], } ''' pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size proposal_list = [] score_list = [] channel_list = [] before_feat = datetime.now() for s in self._feat_stride: stride = int(s) sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios) #print "cls_prob_dict['stride' + str(s)].shape:"+str(cls_prob_dict['stride' + str(s)].shape) scores = cls_prob_dict['stride' + str(s)].asnumpy()[:, self._num_anchors:, :, :] if DEBUG: scores1 = cls_prob_dict['stride' + str(s)].asnumpy() print "scores.shape:" + str(scores.shape) print "scores1.shape:" + str(scores1.shape) #print "scores.shape:"+str(scores.shape) bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy() #print "bbox_deltas.shape:"+str(bbox_deltas.shape) im_info = in_data[-1].asnumpy()[0, :] # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / stride), int(im_info[1] / stride) # Enumerate all shifts shift_x = np.arange(0, width) * stride shift_y = np.arange(0, height) * stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors before_enume = datetime.now() A = self._num_anchors K = shifts.shape[0] anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) after_enume = datetime.now() #print "enume time:"+str((after_enume-before_enume).seconds) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) if DEBUG: print "scores[:100]:" + str(scores[:50]) channels = np.ones((scores.shape)) * stride # Convert anchors into proposals via bbox transformations before_pred = datetime.now() proposals = bbox_pred(anchors, bbox_deltas) after_pred = datetime.now() #print "pred_time:" #print (after_pred-before_pred).seconds # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) if DEBUG: print str(min_size) print str(im_info[2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] if DEBUG: print "proposals3:" + str(proposals[0:10]) scores = scores[keep] channels = channels[keep] proposal_list.append(proposals) score_list.append(scores) channel_list.append(channels) after_feat = datetime.now() #print "feat time:" #print (after_feat-before_feat).seconds proposals = np.vstack(proposal_list) scores = np.vstack(score_list) channels = np.vstack(channel_list) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) before_sort = datetime.now() order = scores.ravel().argsort()[::-1] after_sort = datetime.now() #print "sort time:" #print (after_sort-before_sort).seconds if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] channels = channels[order] if DEBUG: print '-------1-------' print channels.shape for s in self._feat_stride: print "stride:" + str(s) print len(np.where(channels == float(s))[0]) print "proposals:" + str(proposals[0:20]) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] channels = channels[keep] if DEBUG: print '-------2-------' print channels.shape for s in self._feat_stride: print "stride:" + str(s) print len(np.where(channels == float(s))[0]) print "proposals:" + str(proposals[0:20]) print "scores:" + str(scores[0:20]) f_chan = open('channels.txt', 'w') for ii in range(channels.shape[0]): f_chan.write(str(channels[ii][0]) + ' ') f_chan.close() # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # if is_train: self.assign(out_data[0], req[0], blob) #print "out_data[0].shape"+str(out_data[0].shape) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False)) after_pyramid_proposal = datetime.now()