def post_process(self, im, sim_ops, scale_factor=1): """ MUST HAVE FUNCTION IN ALL NETWORKS !!!! Post-processing of the results from network. This function can be used to visualize data from hardware. """ im = im[:, :, (2, 1, 0)] cls_score = sim_ops[0] cls_prob = sim_ops[1] bbox_pred = sim_ops[2] rois = sim_ops[3] boxes = rois[:, 1:5] / scale_factor scores = cls_prob box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas, False) pred_boxes = self._clip_boxes(pred_boxes, im.shape) fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im, aspect='equal') CONF_THRESH = 0.6 NMS_THRESH = 0.4 for cls_ind, cls in enumerate(self.classes[1:]): cls_ind += 1 # because we skipped background cls_boxes = pred_boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] self._vis_detections(im, cls, dets, ax, thresh=CONF_THRESH)
def _unmap(label, target, tois): l = label.size r_diff = np.zeros((l, 4)) for i in xrange(l): curr_label = int(label[i] - 1) r_diff[i] = target[i, curr_label * 4:curr_label * 4 + 4] pred = bbox_transform_inv(tois, r_diff) return pred
def post_process(self, im, sim_ops, scale_factor=1): """ MUST HAVE FUNCTION IN ALL NETWORKS !!!! Post-processing of the results from network. This function can be used to visualize data from hardware. self.post_process(im, [cls_score, cls_prob, bbox_pred, rois], scale_factor) """ print("cls_score:\n") print(sim_ops[0]) print("cls_prob:\n") print(sim_ops[1]) print("bbox_pred:\n") print(sim_ops[2]) print("rois:\n") print(sim_ops[3]) print("scale_factor:\n") print(scale_factor) im = im[:, :, (2, 1, 0)] cls_score = sim_ops[0] cls_score = convert_to_float_py(cls_score, self._layer_map[77]['fl']) cls_prob = sim_ops[1] bbox_pred = sim_ops[2] bbox_pred = convert_to_float_py(bbox_pred, self._layer_map[78]['fl']) rois = sim_ops[3] boxes = rois[:, 1:5] / scale_factor # ABINASH ONLY FOR DEBUG DELETE IT scores = cls_prob #scores = cls_score box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas, False) pred_boxes = self._clip_boxes(pred_boxes, im.shape) fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im, aspect='equal') CONF_THRESH = 0.6 NMS_THRESH = 0.4 for cls_ind, cls in enumerate(self.classes[1:]): cls_ind += 1 # because we skipped background cls_boxes = pred_boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] print("TL DEBUG, pred_boxes shape: %s, cls_boxes shape: %s, scores shape: %s, cls_scores index: %d\n" %(str(pred_boxes.shape),str(cls_boxes.shape),str(scores.shape), cls_ind)) cls_scores = scores[:, cls_ind] print(cls_scores) dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] self._vis_detections(im, cls, dets, ax, thresh=CONF_THRESH) plt.show()
def pred_bbox(anchors, diff): for j in xrange(40): diff[j, 0] = max(min(diff[j, 0], 0.3), -0.3) diff[j, 1] = max(min(diff[j, 1], 0.3), -0.3) diff[j, 2] = max(min(diff[j, 2], 0.5), -0.5) diff[j, 3] = max(min(diff[j, 3], 0.5), -0.5) pred = bbox_transform_inv(anchors, diff) for j in xrange(40): pred[j, 0] = max(min(pred[j, 0], 19), 0) pred[j, 1] = max(min(pred[j, 1], 14), 0) pred[j, 2] = max(min(pred[j, 2], 19), 0) pred[j, 3] = max(min(pred[j, 3], 14), 0) return pred
def proposal_layer(bbox_pred, iou_pred, cls_pred, anchors, ls): box_pred = bbox_transform_inv( np.ascontiguousarray(bbox_pred, dtype=np.float32), np.ascontiguousarray(anchors, dtype=np.float32), ls, ls) * cfg.INP_SIZE box_pred = np.reshape(box_pred, [-1, 4]) iou_pred = np.reshape(iou_pred, [-1, 1]) cls_pred = np.reshape(cls_pred, [-1, cfg.NUM_CLASSES]) cls_inds = np.argmax(cls_pred, axis=1) cls_prob = cls_pred[np.arange(cls_pred.shape[0]), cls_inds][:, np.newaxis] scores = iou_pred * cls_prob # filter out boxes with scores <= coef thresh keep = np.where(scores >= cfg.COEF_THRESH)[0] # keep top n scores before apply nms keep = keep[np.argsort(-scores[keep, 0])[:cfg.PRE_NMS_TOP_N]] box_pred = box_pred[keep] cls_inds = cls_inds[keep] scores = scores[keep] # apply nms with top-n-score boxes keep = np.zeros(len(box_pred), dtype=np.int8) for i in range(cfg.NUM_CLASSES): inds = np.where(cls_inds == i)[0] if len(inds) == 0: continue keep_in_cls = nms_detection(np.hstack([box_pred[inds], scores[inds]]), cfg.NMS_THRESH) keep[inds[keep_in_cls]] = 1 keep = np.where(keep > 0) box_pred = box_pred[keep] cls_inds = cls_inds[keep].astype(np.int8) scores = scores[keep][:, 0] # clip boxes inside image box_pred = clip_boxes(np.ascontiguousarray(box_pred, dtype=np.float32), cfg.INP_SIZE, cfg.INP_SIZE) return box_pred, cls_inds, scores
def main(args): image_path = args.img_path img = cv2.imread(image_path) # height/width/channel height, width, _ = img.shape # img resize img = cv2.resize(img, im_size, interpolation=cv2.INTER_CUBIC) imgs, rects = get_proposal(img) # get model input_tensor = Input(shape=im_size + (3, )) model = get_model(input_tensor, classes_num) features_model = get_features_model(model) if not os.path.exists(args.weights): raise Exception('model weights not exists, please check it') features_model.load_weights(args.weights, by_name=True) features = features_model.predict_on_batch() # load svm and ridge svm_fit = joblib.load('./svm/svm.pkl') bbox_fit = joblib.load('./svm/bbox_train.pkl') svm_pred = svm_fit.predict(features) bbox_pred = bbox_fit.predict(features) keep = svm_pred[svm_pred != 0] # 取出预测是物体的anchors # svm_pred = svm_pred[keep] rects = rects[keep] bbox_pred = bbox_pred[keep] # 边框修复 pred_boxes = bbox_transform_inv(rects, bbox_pred) # 非极大值抑制 keep_ind = py_cpu_nms(pred_boxes, 0.5) # pred_boxes = pred_boxes[keep_ind, :] # pred_boxes[:, [2, 3]] = pred_boxes[:, [2, 3]] - pred_boxes[:, [0, 1]] pred_boxes[:, 2] = pred_boxes[:, 2] - pred_boxes[:, 0] pred_boxes[:, 3] = pred_boxes[:, 3] - pred_boxes[:, 1] # # show img show_rect(image_path, pred_boxes)
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes ''' scores = tf.reshape(rpn_cls_prob, shape=(-1, 2)) scores = scores[:, 1:] ''' scores = rpn_cls_prob[:, :, :, num_anchors:] scores = scores.reshape((-1, 1)) rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors): """A layer that just selects the top region proposals without using non-maximal suppression, For details please see the technical report """ rpn_top_n = cfg.TEST.RPN_TOP_N scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) length = scores.shape[0] if length < rpn_top_n: # Random selection, maybe unnecessary and loses good proposals # But such case rarely happens top_inds = npr.choice(length, size=rpn_top_n, replace=True) else: top_inds = scores.argsort(0)[::-1] top_inds = top_inds[:rpn_top_n] top_inds = top_inds.reshape(rpn_top_n, ) # Do the selection here anchors = anchors[top_inds, :] rpn_bbox_pred = rpn_bbox_pred[top_inds, :] scores = scores[top_inds] # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # Clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def bbox_reg_target(fastrcnn_reg_output, labels, rois, scores): """ 得到修正后的边框 :param fastrcnn_reg_output: :param labels: :param rois: :return: """ inds = np.where(labels[labels > 0])[0] bbox_reg = np.zeros((len(inds), 5)) for ind in inds: cls = labels[ind] start = (int(cls) - 1) * 4 end = start + 4 bbox_reg[ind, 0] = cls bbox_reg[ind, 1:] = fastrcnn_reg_output[ind, start:end] rois = rois[inds, ...] # len(rois) == len(bbox_reg) pred_boxes = bbox_transform_inv(rois[:, 1], bbox_reg[:, 1:]) # (None, 6) x1, y1, x2, y2, score, cls final_pred_boxes = np.hstack( (pred_boxes, scores[inds, np.newaxis], labels[inds, np.newaxis])) return final_pred_boxes
def test_net(test_split, net, batchsize, use_kld=cfg.USE_KLD, use_reg=cfg.USE_REG, threshold=cfg.OVERLAP_THRESHOLD, topk=cfg.TOPK, vis=False): print('validate split: %s' % test_split) rpn_topn = cfg.RPN_TOPN # dp = get_data_provider(data_split=test_split, batchsize=batchsize) dp = DDPNDataProvider(data_split=test_split, batchsize=batchsize) num_query = dp.get_num_query() num_right = 0 if cfg.NTHREADS > 1: try: import torch dataloader = torch.utils.data.DataLoader(dp, batch_size=batchsize, shuffle=False, num_workers=int(cfg.NTHREADS)) except: cfg.NTHREADS = 1 dataloader = dp else: dataloader = dp count = 0 for data in dataloader: if data is None: break data = map(np.array, data) my_complete_data = functools.partial(complete_data, batchsize=batchsize) gt_boxes, qvec, cvec, img_feat, bbox, img_shape, spt_feat, query_label, query_label_mask, \ query_bbox_targets, query_bbox_inside_weights, query_bbox_outside_weights, valid_data, iid_list = map( my_complete_data, data) tp_qvec = qvec.copy() tp_cvec = cvec.copy() qvec = np.transpose(qvec, (1, 0)) cvec = np.transpose(cvec, (1, 0)) query_bbox_targets = query_bbox_targets.reshape(-1, 4) query_bbox_inside_weights = query_bbox_inside_weights.reshape(-1, 4) query_bbox_outside_weights = query_bbox_outside_weights.reshape(-1, 4) # net.blobs['queries'].reshape(*(qvec.shape)) # net.blobs['query_cont'].reshape(*(cvec.shape)) # net.blobs['img_feat'].reshape(*(img_feat.shape)) # net.blobs['spt_feat'].reshape(*(spt_feat.shape)) # net.blobs['query_label'].reshape(*query_label.shape) # net.blobs['query_label_mask'].reshape(*query_label_mask.shape) # net.blobs['query_bbox_targets'].reshape(*query_bbox_targets.shape) # net.blobs['query_bbox_inside_weights'].reshape(*query_bbox_inside_weights.shape) # net.blobs['query_bbox_outside_weights'].reshape(*query_bbox_outside_weights.shape) # forward_kwargs = { 'qvec': qvec.astype(np.float32, copy=False), \ # 'cvec': cvec.astype(np.float32, copy=False), \ # 'img_feat': img_feat.astype(np.float32, copy=False), \ # 'spt_feat': spt_feat.astype(np.float32, copy=False), \ # 'query_label': query_label.astype(np.float32, copy=False), \ # 'query_label_mask': query_label_mask.astype(np.float32, copy=False), \ # 'query_bbox_targets': query_bbox_targets.astype(np.float32, copy=False), \ # 'query_bbox_inside_weights': query_bbox_inside_weights.astype(np.float32, copy=False), \ # 'query_bbox_outside_weights': query_bbox_outside_weights.astype(np.float32, copy=False)} net.blobs['qvec'].data.reshape(*qvec.shape) net.blobs['qvec'].data[...] = qvec net.blobs['cvec'].data.reshape(*cvec.shape) net.blobs['cvec'].data[...] = cvec net.blobs['img_feat'].data.reshape(*img_feat.shape) net.blobs['img_feat'].data[...] = img_feat net.blobs['spt_feat'].data.reshape(*spt_feat.shape) net.blobs['spt_feat'].data[...] = spt_feat net.blobs['query_label'].data.reshape(*query_label.shape) net.blobs['query_label'].data[...] = query_label net.blobs['query_label_mask'].data.reshape(*query_label_mask.shape) net.blobs['query_label_mask'].data[...] = query_label_mask net.blobs['query_bbox_targets'].data.reshape(*query_bbox_targets.shape) net.blobs['query_bbox_targets'].data[...] = query_bbox_targets net.blobs['query_bbox_inside_weights'].data.reshape(*query_bbox_inside_weights.shape) net.blobs['query_bbox_inside_weights'].data[...] = query_bbox_inside_weights net.blobs['query_bbox_outside_weights'].data.reshape(*query_bbox_outside_weights.shape) net.blobs['query_bbox_outside_weights'].data[...] = query_bbox_outside_weights blobs_out = net.forward() # query_emb_tile = net.blobs['query_emb_tile'].data rois = bbox.copy() rois = rois.reshape(-1, 4) query_score_pred = net.blobs['query_score_pred'].data if use_reg: query_bbox_pred = net.blobs['query_bbox_pred'].data query_bbox_pred = bbox_transform_inv(rois, query_bbox_pred) else: query_bbox_pred = rois query_inds = np.argsort(-query_score_pred, axis=1) rois = rois.reshape(batchsize, rpn_topn, 4) query_bbox_pred = query_bbox_pred.reshape(batchsize, rpn_topn, 4) for i in range(batchsize): if valid_data[i] != 0: right_flag = False t_query_bbox_pred = clip_boxes(query_bbox_pred[i], img_shape[i]) t_rois = clip_boxes(rois[i], img_shape[i]) for j in range(topk): query_ind = query_inds[i, j] # overlaps = bbox_overlaps( # np.ascontiguousarray(query_bbox_pred[query_ind][np.newaxis], dtype=np.float), # np.ascontiguousarray(gt_boxes, dtype=np.float) ) iou = calc_iou(t_query_bbox_pred[query_ind], gt_boxes[i]) # print '%.2f percent: %.2f'%((100 * float(i) / num_query), 100*iou) if iou >= threshold: num_right += 1 right_flag = True break # if overlaps[0].max() > threshold: # # json.dump([1], open(save_dir + '/right.json', 'w')) # print overlaps[0].max() # num_right += 1 # break # debug pred if vis: debug_dir = 'visual_pred_%s_%s' % (cfg.IMDB_NAME, test_split) img_path = dp.get_img_path(int(iid_list[i])) img = cv2.imread(img_path) img.shape debug_pred(debug_dir, count, tp_qvec[i], tp_cvec[i], img, gt_boxes[i], t_rois[query_ind], t_query_bbox_pred[query_ind], iou) percent = 100 * float(count) / num_query sys.stdout.write('\r' + ('%.2f' % percent) + '%') sys.stdout.flush() count += 1 if count >= num_query: break accuracy = num_right / float(num_query) print('accuracy: %f\n' % accuracy) return accuracy
def forward(self, results, results2): self._net.blobs['data'].reshape(self._batch_size, 3, self._depth, self._height, self._width) self._net.blobs['tois'].reshape(self._batch_size * self.top * 8, 5) self._net.blobs['toi2'].reshape(self._batch_size * self.top * 8, 5) [clip, gt_bboxes, labels, all_pred, _, is_last] = self.dataset.next_val_video() labels = int(labels) n = int(clip.shape[0]) rrrrr = [] for i in xrange(n - self._depth + 1): batch_clip = clip[i:i + 1 * self._depth].transpose([3, 0, 1, 2]) batch_clip = np.expand_dims(batch_clip, axis=0) pred = all_pred[i:i + 1 * self._depth] pred_anchors = np.reshape(pred, (-1, 4)) * 1.25 curr_results1 = results[i] curr_results2 = results2[i] curr_results = (curr_results1 + curr_results2) * 0.5 r1 = curr_results[:, :22] r2 = curr_results[:, 22:] curr_dets = { 'boxes': np.empty((0, self._depth, 4)), 'pred_label': np.empty((0)), 'pred_scores': np.empty((0, 2)), 'label_length': np.empty((0)), } tmp = r1.argmax(axis=1) for j in xrange(1, self.dataset._num_classes): ttmp = tmp[tmp == j] if ttmp.size > 0: print('pred_labe') print j argsort_r = np.argsort(r1[:, j])[-self.top:] curr_scores = np.vstack((r1[argsort_r, j], r2[argsort_r, j])).transpose() curr_boxes = pred_anchors[argsort_r] curr_boxes = np.repeat(curr_boxes, 8, axis=0) batch_tois = np.hstack((np.zeros( (curr_boxes.shape[0], 1)), curr_boxes)) curr_idx = np.arange(self._depth).reshape(1, self._depth) curr_idx = np.repeat(curr_idx, self.top, axis=0).reshape(-1, 1) batch_toi2 = np.hstack((curr_idx, curr_boxes)) self._net.blobs['data'].data[...] = batch_clip.astype( np.float32, copy=False) self._net.blobs['tois'].data[...] = batch_tois.astype( np.float32, copy=False) self._net.blobs['toi2'].data[...] = batch_toi2.astype( np.float32, copy=False) self._net.forward() diff = self._net.blobs['fc8-2'].data[...][:, (j - 1) * 4:j * 4] #print ('hhahaha') #print self._net.blobs['fc8-2'].data[...][:,:] #print self._net.blobs['fc8-2'].data[...][40,:] # diff[:,0:2] = np.maximum(-0.3, np.minimum(0.3, diff[:,0:2])) # diff[:,2:4] = np.maximum(-0.5, np.minimum(0.5, diff[:,2:4])) boxes = bbox_transform_inv(batch_tois[:, 1:5], diff).reshape( (self.top, 8, 4)) * 16 boxes[:, :, 0::2] = np.maximum(0, np.minimum(398.75, boxes[:, :, 0::2])) boxes[:, :, 1::2] = np.maximum(0, np.minimum(298.75, boxes[:, :, 1::2])) curr_dets['boxes'] = np.vstack((curr_dets['boxes'], boxes)) curr_dets['pred_label'] = np.hstack( (curr_dets['pred_label'], np.ones(self.top) * j)) curr_dets['pred_scores'] = np.vstack( (curr_dets['pred_scores'], curr_scores)) curr_dets['label_length'] = np.hstack( (curr_dets['label_length'], ttmp.size)) rrrrr.append(curr_dets) r = {'dets': rrrrr, 'gt_bboxes': gt_bboxes, 'gt_label': labels} ''' stack_overlaps = np.empty((self._depth, self.top, gt_bboxes.shape[0])) for j in xrange(self._depth): curr_gt_idx = np.where(gt_bboxes[0,:,0] == i * self._depth + j)[0] curr_gt = gt_bboxes[:, curr_gt_idx, 1 : 5].reshape(-1, 4) overlaps = bbox_overlaps( np.ascontiguousarray(boxes[:, j], dtype=np.float), np.ascontiguousarray(curr_gt, dtype=np.float)) stack_overlaps[j] = overlaps # Find wrong detections. for j in xrange(stack_overlaps.shape[2]): argmax_overlaps = np.sum(stack_overlaps[:,:,j], axis=0).argmax() ov[i * self._depth : (i+1) * self._depth, j] = stack_overlaps[:, argmax_overlaps, j] ''' return is_last, r
def test_gallery(net, dataloader, output_dir, thresh=0.): """test gallery images""" with open('config.yml', 'r') as f: config = yaml.load(f) num_images = len(dataloader.dataset) all_boxes = [] all_features = [] end = time.time() time_cost = AverageMeter() net.eval() for i, data in enumerate(dataloader): with torch.no_grad(): im, (orig_shape, im_info) = data im = im.to(device) im_info = im_info.numpy().squeeze(0) orig_shape = [x.item() for x in orig_shape] scores, bbox_pred, rois, features = net.forward(im, None, im_info) boxes = rois[:, 1:5] / im_info[2] scores = np.reshape(scores, [scores.shape[0], -1]) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) if config['test_bbox_reg']: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv( torch.from_numpy(boxes), torch.from_numpy(box_deltas)).numpy() pred_boxes = clip_boxes(pred_boxes, orig_shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) boxes = pred_boxes # skip j = 0, because it's the background class j = 1 inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(torch.from_numpy(cls_dets), config['test_nms']).numpy() if cls_dets.size > 0 else [] cls_dets = cls_dets[keep, :] all_boxes.append(cls_dets) all_features.append(features[inds][keep]) time_cost.update(time.time() - end) end = time.time() print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, num_images, time_cost.avg)) det_file = os.path.join(output_dir, 'gboxes.pkl') with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) feature_file = os.path.join(output_dir, 'gfeatures.pkl') with open(feature_file, 'wb') as f: pickle.dump(all_features, f, pickle.HIGHEST_PROTOCOL) return all_boxes, all_features
def forward(self, scores, bbox_delta, im_info, cfg_key): scores = scores[:, self._num_anchors:, :, :] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH batch_size = bbox_delta.size(0) assert (batch_size == 1) # Only support batch size = 1 # Get the full anchor feat_height, feat_width = scores.size(2), scores.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() A = self._num_anchors K = shifts.shape[0] anchors = self._anchor.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors_reshape = anchors.reshape((K * A, 4)).astype(np.float32, copy=False) # Convert the anchor into proposal bbox_delta = bbox_delta.permute(0, 2, 3, 1).contiguous() bbox_delta = bbox_delta.view(-1, 4) proposals = bbox_transform_inv(torch.from_numpy(anchors_reshape).type_as(bbox_delta), bbox_delta) proposals = clip_boxes(proposals, im_info) # choose the proposals scores = scores.permute(0, 2, 3, 1).contiguous() scores = scores.view(1, -1) # pick the top region proposals scores, order = scores.view(-1).sort(descending=True) if pre_nms_topN > 0: order = order[:pre_nms_topN] scores = scores[:pre_nms_topN].view(-1, 1) proposals = proposals[order.data, :] # scores_keep = scores # _, order = torch.sort(scores_keep, 1, True) # if pre_nms_topN > 0: # order_single = order[0] # scores_single = scores[0] # order_single = order_single[:pre_nms_topN] # proposals = proposals[order_single, :] # scores = scores_single[order_single].view(-1, 1) # Non-maximal suppression keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh) # pick the top region proposals after nms if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep, :] # TODO: batch_size > 1 # padding batch ids at the first row output = scores.new(post_nms_topN, 5).zero_() num_proposal = proposals.size(0) output[:num_proposal, 1:] = proposals return output, anchors_reshape
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' if self.phase==0: cfg_key = 'TRAIN' elif self.phase==1: cfg_key = 'TEST' else: cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' if cfg_key == 'TRAIN': nms_thresh = cfg[cfg_key].NMS_THRESH post_nms_topN = cfg[cfg_key].ANCHOR_N_POST_NMS pre_nms_topN = cfg[cfg_key].ANCHOR_N_PRE_NMS if cfg_key == 'TEST': pre_nms_topN = cfg[cfg_key].N_DETS_PER_MODULE min_size = cfg[cfg_key].ANCHOR_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0].data[:, self._num_anchors:, :, :] bbox_deltas = bottom[1].data im_info = bottom[2].data[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (if in training mode) # 7. take after_nms_topN # 8. return the top proposals (-> RoIs top) if self.phase == 0: # DO NMS ONLY IN TRAINING TIME # DURING TEST WE HAVE NMS OUTSIDE OF THIS FUNCTION keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 if proposals.shape[0] == 0: blob = np.array([[0,0,0,16,16]],dtype=np.float32) else: batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) top[0].reshape(*(blob.shape)) top[0].data[...] = blob # [Optional] output scores blob if len(top) > 1: top[1].reshape(*(scores.shape)) top[1].data[...] = scores
def pose_target_layer(rois, bbox_prob, bbox_pred, gt_boxes, poses, is_training): rois = rois.detach().cpu().numpy() bbox_prob = bbox_prob.detach().cpu().numpy() bbox_pred = bbox_pred.detach().cpu().numpy() gt_boxes = gt_boxes.detach().cpu().numpy() num_classes = bbox_prob.shape[1] # process boxes if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes)) means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes)) bbox_pred *= stds bbox_pred += means boxes = rois[:, 2:6].copy() pred_boxes = bbox_transform_inv(boxes, bbox_pred) # assign boxes for i in range(rois.shape[0]): cls = int(rois[i, 1]) rois[i, 2:6] = pred_boxes[i, cls * 4:cls * 4 + 4] rois[i, 6] = bbox_prob[i, cls] # convert boxes to (batch_ids, x1, y1, x2, y2, cls) roi_blob = rois[:, (0, 2, 3, 4, 5, 1)] gt_box_blob = np.zeros((0, 6), dtype=np.float32) pose_blob = np.zeros((0, 9), dtype=np.float32) for i in range(gt_boxes.shape[0]): for j in range(gt_boxes.shape[1]): if gt_boxes[i, j, -1] > 0: gt_box = np.zeros((1, 6), dtype=np.float32) gt_box[0, 0] = i gt_box[0, 1:5] = gt_boxes[i, j, :4] gt_box[0, 5] = gt_boxes[i, j, 4] gt_box_blob = np.concatenate((gt_box_blob, gt_box), axis=0) poses[i, j, 0] = i pose_blob = np.concatenate( (pose_blob, poses[i, j, :].cpu().reshape(1, 9)), axis=0) if gt_box_blob.shape[0] == 0: num = rois.shape[0] poses_target = np.zeros((num, 4 * num_classes), dtype=np.float32) poses_weight = np.zeros((num, 4 * num_classes), dtype=np.float32) else: # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(roi_blob[:, :5], dtype=np.float), np.ascontiguousarray(gt_box_blob[:, :5], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_box_blob[gt_assignment, 5] quaternions = pose_blob[gt_assignment, 2:6] # Select foreground RoIs as those with >= FG_THRESH overlap bg_inds = np.where(max_overlaps < cfg.TRAIN.FG_THRESH_POSE)[0] labels[bg_inds] = 0 bg_inds = np.where(roi_blob[:, -1] != labels)[0] labels[bg_inds] = 0 # in training, only use the positive boxes for pose regression if is_training: fg_inds = np.where(labels > 0)[0] if len(fg_inds) > 0: rois = rois[fg_inds, :] quaternions = quaternions[fg_inds, :] labels = labels[fg_inds] # pose regression targets and weights poses_target, poses_weight = _compute_pose_targets( quaternions, labels, num_classes) return torch.from_numpy(rois).cuda(), torch.from_numpy( poses_target).cuda(), torch.from_numpy(poses_weight).cuda()
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, fl_cls_prob, fl_bbox_pred, feat_stride=[16,], anchor_scales = [8, 16, 32], base_size = 10, ratios =[0.333, 0.5, 0.667, 1.0, 1.5, 2.0, 3.0], pre_nms_topN = 2000, max_nms_topN = 400, isHardware=False, num_stddev=2.0): """ Parameters ---------- rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg NOTICE: the old version is ordered by (1, H, W, 2, A) !!!! rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN im_info: a list of [image_height, image_width, scale_ratios] cfg_key: 'TRAIN' or 'TEST' _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2] """ _anchors = generate_anchors(base_size, ratios, anchor_scales) _num_anchors = _anchors.shape[0] im_info = im_info[0] assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' # Convert fixed point int to floats fror internal calculations ! rpn_cls_prob_reshape = convert_to_float_py(rpn_cls_prob_reshape, fl_cls_prob) rpn_bbox_pred = convert_to_float_py(rpn_bbox_pred, fl_bbox_pred) post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE height, width = rpn_cls_prob_reshape.shape[1:3] # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want # (1, H, W, A) scores = np.reshape(np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:,:,:,:,1], [1, height, width, _num_anchors]) # TODO: NOTICE: the old version is ordered by (1, H, W, 2, A) !!!! # TODO: if you use the old trained model, VGGnet_fast_rcnn_iter_70000.ckpt, uncomment this line scores = rpn_cls_prob_reshape[:,:,:,_num_anchors:] bbox_deltas = rpn_bbox_pred #im_info = bottom[2].data[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) print 'min_size: {}'.format(min_size) print 'max_nms_topN: {}'.format(max_nms_topN) print 'post_nms_topN: {}'.format(post_nms_topN) # 1. Generate proposals from bbox deltas and shifted anchors if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.reshape((-1, 4)) #(HxWxA, 4) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, isHardware) proposals = proposals.astype(bbox_deltas.dtype) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) #KM: Move filtering into NMS (after estimating parameters # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) #keep = _filter_boxes(proposals, min_size * im_info[2]) #proposals = proposals[keep, :] # #print '[Ref Model Log] Num total Proposals before NMS : ' + str(proposals.shape) #scores = scores[keep] # # remove irregular boxes, too fat too tall # keep = _filter_irregular_boxes(proposals) # proposals = proposals[keep, :] # scores = scores[keep] # Hardware modeling if (isHardware): #if (0): #proposals1 = np.copy(proposals) #scores1 = np.copy(scores) #KM: Proposal inputs to NMS need to be in same order as HW or final results will be different! proposals1 = np.zeros(proposals.shape) scores1 = np.zeros(scores.shape) idy = 0 for k in range(0,A): for j in range(0,width): for i in range(0,height): idx = (i*width*A)+(j*A)+k scores1[idy] = scores[idx] proposals1[idy] = proposals[idx] print_msg(str(k) + '.' + str(j) + '.' + str(i) + ' Proposal ' + str(idy) + ' -> [' + str(int(8*scores1[idy])) + '] ' + str((16*proposals1[idy,:]).astype(int)),1) idy = idy+1 prop, score = nms_hw(proposals1, scores1, num_stddev, nms_thresh, min_size, im_info[2], max_nms_topN, post_nms_topN) batch_inds = np.zeros((prop.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, prop.astype(np.float32, copy=False))) else: order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] print 'Number of proposals : ' + str(len(keep)) batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob
def demo_search(net, im_dir, images, use_cuda, thresh=.75): with open('config.yml', 'r') as f: config = yaml.load(f) q_name = 's15166.jpg' q_roi = [29, 5, 164, 439] # x1, y1, h, w x1, y1, h, w = q_roi q_path = os.path.join(im_dir, q_name) q_im, q_scale, _ = pre_process_image(q_path) q_roi = np.array(q_roi) * q_scale q_info = np.array([q_im.shape[1], q_im.shape[2], q_scale], dtype=np.float32) q_im = q_im.transpose([0, 3, 1, 2]) q_roi = np.hstack(([[0]], q_roi.reshape(1, 4))) with torch.no_grad(): if use_cuda: q_im = torch.from_numpy(q_im).cuda() q_roi = torch.from_numpy(q_roi).float().cuda() else: q_im = torch.from_numpy(q_im) q_roi = torch.from_numpy(q_roi).float() q_feat = net.forward(q_im, q_roi, q_info, 'query')[0] # Show query fig, ax = plt.subplots(figsize=(16, 9)) ax.imshow(plt.imread(q_path)) plt.axis('off') ax.add_patch( plt.Rectangle((x1, y1), h, w, fill=False, edgecolor='#F92672', linewidth=3.5)) ax.add_patch( plt.Rectangle((x1, y1), h, w, fill=False, edgecolor='white', linewidth=1)) ax.text(x1 + 5, y1 - 15, '{}'.format('Query'), bbox=dict(facecolor='#F92672', linewidth=0), fontsize=20, color='white') plt.tight_layout() fig.savefig(os.path.join(im_dir, 'query.jpg')) plt.show() plt.close(fig) # Get gallery images images.remove(q_name) for im_name in images: im_path = os.path.join(im_dir, im_name) im, im_scale, orig_shape = pre_process_image(im_path, copy=True) im_info = np.array([im.shape[1], im.shape[2], im_scale], dtype=np.float32) im = im.transpose([0, 3, 1, 2]) if use_cuda: im = torch.from_numpy(im).cuda() else: im = torch.from_numpy(im) scores, bbox_pred, rois, features = net.forward(im, None, im_info) boxes = rois[:, 1:5] / im_info[2] scores = np.reshape(scores, [scores.shape[0], -1]) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) if config['test_bbox_reg']: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv( torch.from_numpy(boxes), torch.from_numpy(box_deltas)).numpy() pred_boxes = clip_boxes(pred_boxes, orig_shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) boxes = pred_boxes # skip j = 0, because it's the background class j = 1 inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(torch.from_numpy(cls_dets), config['test_nms']).numpy() if cls_dets.size > 0 else [] cls_dets = cls_dets[keep, :] features = features[inds][keep] if cls_dets is None: print('There are no detections in image {}'.format(im_name)) continue similarities = features.dot(q_feat) fig, ax = plt.subplots(figsize=(16, 9)) ax.imshow(plt.imread(im_path)) plt.axis('off') # Set different colors for different ids similarities_list = similarities.tolist() max_sim = max(similarities_list) similarities_list.remove(max_sim) colors = {value: '#66D9EF' for value in similarities_list} colors[max_sim] = '#4CAF50' for box, sim in zip(cls_dets, similarities): x1, y1, x2, y2, _ = box ax.add_patch( plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor=colors[sim], linewidth=3.5)) ax.add_patch( plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='white', linewidth=1)) ax.text(x1 + 5, y1 - 15, '{:.2f}'.format(sim), bbox=dict(facecolor=colors[sim], linewidth=0), fontsize=20, color='white') plt.tight_layout() fig.savefig(os.path.join(im_dir, 'result_' + im_name)) plt.show() plt.close(fig)
def demo_detection(net, im_dir, images, use_cuda, thresh=.75): with open('config.yml', 'r') as f: config = yaml.load(f) with torch.no_grad(): for im_name in images: im_path = os.path.join(im_dir, im_name) im, im_scale, orig_shape = pre_process_image(im_path, copy=True) im_info = np.array([im.shape[1], im.shape[2], im_scale], dtype=np.float32) im = im.transpose([0, 3, 1, 2]) if use_cuda: im = torch.from_numpy(im).cuda() else: im = torch.from_numpy(im) scores, bbox_pred, rois, _ = net.forward(im, None, im_info) boxes = rois[:, 1:5] / im_info[2] scores = np.reshape(scores, [scores.shape[0], -1]) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) if config['test_bbox_reg']: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv( torch.from_numpy(boxes), torch.from_numpy(box_deltas)).numpy() pred_boxes = clip_boxes(pred_boxes, orig_shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) boxes = pred_boxes # skip j = 0, because it's the background class j = 1 inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms( torch.from_numpy(cls_dets), config['test_nms']).numpy() if cls_dets.size > 0 else [] cls_dets = cls_dets[keep, :] if cls_dets is None: print('There are no detections in image {}'.format(im_name)) continue fig, ax = plt.subplots(figsize=(16, 9)) ax.imshow(plt.imread(im_path)) plt.axis('off') for box in cls_dets: x1, y1, x2, y2, score = box ax.add_patch( plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='#66D9EF', linewidth=3.5)) ax.add_patch( plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='white', linewidth=1)) ax.text(x1 + 5, y1 - 15, '{:.2f}'.format(score), bbox=dict(facecolor='#66D9EF', linewidth=0), fontsize=20, color='white') plt.tight_layout() plt.show() plt.close(fig)
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' if self.phase == 0: cfg_key = 'TRAIN' elif self.phase == 1: cfg_key = 'TEST' else: cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' if cfg_key == 'TRAIN': nms_thresh = cfg[cfg_key].NMS_THRESH post_nms_topN = cfg[cfg_key].ANCHOR_N_POST_NMS pre_nms_topN = cfg[cfg_key].ANCHOR_N_PRE_NMS if cfg_key == 'TEST': pre_nms_topN = cfg[cfg_key].N_DETS_PER_MODULE score_thresh = cfg[cfg_key].SCORE_THRESH min_size = cfg[cfg_key].ANCHOR_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[-3].data # For multi-class bbox_deltas = bottom[-2].data im_info = bottom[-1].data[0, :] # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride[0] shift_y = np.arange(0, height) * self._feat_stride[0] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] num_classes = scores.shape[1] / (A * self._num_feats) anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) self.anchors = anchors # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape( (-1, num_classes, A * self._num_feats)).transpose( (0, 2, 1)).reshape((-1, num_classes)) # Convert anchors into proposals via bbox transformations new_anchors = np.concatenate([anchors[:, np.newaxis, :]] * self._num_feats, axis=1).reshape((-1, 4)) proposals = bbox_transform_inv(new_anchors, bbox_deltas) for i in range(self._num_refine): # Do this because a combination of bbox_transform_inv and _compute_targets # will cause a larger 3rd and 4th entry of coordinates # We do not do this at the last regression, just to follow the original code proposals[:, 2:4] -= 1 refine_delta = bottom[i].data refine_delta = refine_delta.transpose((0, 2, 3, 1)).reshape( (-1, 4)) proposals = bbox_transform_inv(proposals, refine_delta) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) if self._subsampled: anchor_map = np.zeros((height, width, A)) for i in xrange(A): stride = self._feat_stride[i / len(self._shifts)** 2] // self._feat_stride[0] anchor_map[::stride, ::stride, i] = 1 anchor_map = anchor_map.reshape((K * A)) subsampled_inds = np.where(anchor_map)[0] proposals = proposals[subsampled_inds, :] scores = scores[subsampled_inds, :] # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep, :] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN # max_score = np.max(scores[:, 1:], axis=1).ravel() order = max_score.argsort()[::-1] try: thresh_idx = np.where(max_score[order] >= score_thresh)[0].max() except: thresh_idx = 0 # Nothing greater then score_thresh, just keep the largest one if pre_nms_topN > 0: order = order[:pre_nms_topN] order = order[:thresh_idx + 1] proposals = proposals[order, :] scores = scores[order, :] # 6. apply nms (if in training mode) # 7. take after_nms_topN # 8. return the top proposals (-> RoIs top) if self.phase == 0: # DO NMS ONLY IN TRAINING TIME # DURING TEST WE HAVE NMS OUTSIDE OF THIS FUNCTION keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 if proposals.shape[0] == 0: blob = np.array([[0, 0, 0, 16, 16]], dtype=np.float32) else: batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack( (batch_inds, proposals.astype(np.float32, copy=False))) top[0].reshape(*(blob.shape)) top[0].data[...] = blob # [Optional] output scores blob if len(top) > 1: top[1].reshape(*(scores.shape)) top[1].data[...] = scores
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, self._num_anchors:, :, :] bbox_deltas = input[1] im_info = input[2] pre_nms_topN = self.cf.rpn_pre_nms_top_n post_nms_topN = self.cf.rpn_post_nms_top_n nms_thresh = self.cf.rpn_nms_thresh batch_size = bbox_deltas.size(0) feat_height, feat_width = scores.size(2), scores.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy( np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(scores).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as(scores) anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # Same story for the scores: scores = scores.permute(0, 2, 3, 1).contiguous() scores = scores.view(batch_size, -1) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1, 1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i, :, 0] = i output[i, :num_proposal, 1:] = proposals_single return output
def test_model(imdb, valroidb, model_test, output_dir): # --------------------data gen------------------- data_test = DataGen(imdb.num_classes, shuffle=False) data_test_gen = data_test.generator(valroidb) # --------------------data gen------------------- #verbose = False #class_name = ('__background__', # always index 0 # 'aeroplane', 'bicycle', 'bird', 'boat', # 'bottle', 'bus', 'car', 'cat', 'chair', # 'cow', 'diningtable', 'dog', 'horse', # 'motorbike', 'person', 'pottedplant', # 'sheep', 'sofa', 'train', 'tvmonitor') # --------------------start testing------------------- all_boxes = [[[] for _ in range(len(imdb.image_index))] for _ in range(imdb.num_classes)] #output_dir = '../test_save/' epoch_length = len(imdb.image_index) thresh = 0. max_per_image = 100 progbar = generic_utils.Progbar(epoch_length) for i_batch in xrange(epoch_length): input_image, im_info, gt_boxes = next(data_test_gen) rois_test, _, cls_prob_test, bbox_pred_test \ = model_test.predict_on_batch([input_image, im_info]) im_scale = im_info[0, 2] stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (imdb.num_classes)) means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (imdb.num_classes)) bbox_pred_test *= stds bbox_pred_test += means boxes = rois_test[:, 1:5]/im_scale scores = np.reshape(cls_prob_test, [cls_prob_test.shape[0], -1]) bbox_pred = np.reshape(bbox_pred_test, [bbox_pred_test.shape[0], -1]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) image_shape = np.floor(im_info[0, 0:2]/im_scale) pred_boxes = _clip_boxes(pred_boxes, image_shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) # skip j = 0, because it's the background class for j in range(1, imdb.image_index): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = pred_boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] all_boxes[j][i_batch] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i_batch][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i_batch][:, -1] >= image_thresh)[0] all_boxes[j][i_batch] = all_boxes[j][i_batch][keep, :] progbar.update(i_batch) # --------------------start testing------------------- det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') imdb.evaluate_detections(all_boxes, output_dir)
def compute_targets(feed_data, anchors, ls): bbox_pred, iou_pred, gt_boxes, gt_cls = feed_data # filter ignored groundtruth boxes gt_inds = np.where(gt_cls >= 0)[0] gt_boxes = gt_boxes[gt_inds] gt_cls = gt_cls[gt_inds] # transform bbox and rescale to inp_size box_pred = bbox_transform_inv( np.ascontiguousarray(bbox_pred, dtype=np.float32), np.ascontiguousarray(anchors, dtype=np.float32), ls, ls) * cfg.INP_SIZE hw, num_anchors, _ = box_pred.shape cls_target = np.zeros((hw, num_anchors, cfg.NUM_CLASSES), dtype=np.float32) cls_mask = np.zeros((hw, num_anchors, 1), dtype=np.float32) iou_target = np.zeros((hw, num_anchors, 1), dtype=np.float32) iou_mask = np.zeros((hw, num_anchors, 1), dtype=np.float32) bbox_target = np.zeros((hw, num_anchors, 4), dtype=np.float32) bbox_mask = np.zeros((hw, num_anchors, 1), dtype=np.float32) # compute overlaps btw prediction and groundtruth boxes box_pred = np.reshape(box_pred, [-1, 4]) box_ious = box_overlaps(np.ascontiguousarray(box_pred, dtype=np.float32), np.ascontiguousarray(gt_boxes, dtype=np.float32)) box_ious = np.reshape(box_ious, [hw, num_anchors, -1]) # select boxes with best iou smaller than thresh to assign negative neg_box_inds = np.where(np.max(box_ious, axis=2) < cfg.IOU_THRESH) iou_mask[neg_box_inds] = cfg.NO_OBJECT_SCALE * (0 - iou_pred[neg_box_inds]) # locate groundtruth cells, compute bbox target feat_stride = cfg.INP_SIZE / ls cx = (gt_boxes[:, 0] + gt_boxes[:, 2]) * 0.5 / feat_stride cy = (gt_boxes[:, 1] + gt_boxes[:, 3]) * 0.5 / feat_stride cell_inds = np.floor(cx) * ls + np.floor(cy) cell_inds = cell_inds.astype(np.int) box_target = np.empty(gt_boxes.shape, dtype=np.float32) box_target[:, 0] = cx - np.floor(cx) box_target[:, 1] = cy - np.floor(cy) box_target[:, 2] = (gt_boxes[:, 2] - gt_boxes[:, 0]) / feat_stride box_target[:, 3] = (gt_boxes[:, 3] - gt_boxes[:, 1]) / feat_stride # select best anchor for each groundtruth boxes gt_boxes /= feat_stride # rescale to anchors' scale anchor_ious = anchor_overlaps( np.ascontiguousarray(anchors, dtype=np.float32), np.ascontiguousarray(gt_boxes, dtype=np.float32)) anchor_inds = np.argmax(anchor_ious, axis=0) # compute targets, masks for i, cell_i in enumerate(cell_inds): if cell_i >= hw or cell_i < 0: continue a = anchor_inds[i] iou_mask[cell_i, a, :] = cfg.OBJECT_SCALE * \ (1 - iou_pred[cell_i, a, :]) iou_target[cell_i, a, :] = box_ious[cell_i, a, i] bbox_mask[cell_i, a, :] = cfg.BBOX_SCALE box_target[i, 2:4] /= anchors[a] bbox_target[cell_i, a, :] = box_target[i] cls_mask[cell_i, a, :] = cfg.CLS_SCALE cls_target[cell_i, a, gt_cls[i]] = 1 return bbox_target, bbox_mask, iou_target, iou_mask, cls_target, cls_mask
rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(image, info, gt_boxes) scores = cls_prob.data boxes = rois.data[:, 1:5] box_deltas = bbox_pred.data if cfg.TRAIN.CLASS_AGNOSTIC: box_deltas = box_deltas.view(-1, 4) * bbox_normalize_stds + bbox_normalize_means box_deltas = box_deltas.view(-1, 4) else: box_deltas = box_deltas.view(-1, 4) * bbox_normalize_stds + bbox_normalize_means box_deltas = box_deltas.view(-1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, info) pred_boxes /= im_scales[0] im2show = np.copy(im) for j in range(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if cfg.TRAIN.CLASS_AGNOSTIC: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j*4:(j+1)*4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
def _proposal_layer(rpn_bbox_cls, rpn_bbox_pred, im_size, feat_stride, eval_mode): """ :param rpn_bbox_cls: (None, H, W, 2 * k) :param rpn_bbox_pred: (None, H, W, 4 * k) :param im_size: (800, 600) :param feat_stride: 16 :return: """ rpn_bbox_cls_prob = rpn_softmax(rpn_bbox_cls) anchor = Anchors(feat_stride=feat_stride) # all_anchors (A * H * W, 4) anchors, A = anchor.get_anchors() num_anchors = A # (1, 2 * k, H, W) rpn_bbox_cls_prob = np.transpose(rpn_bbox_cls_prob, [0, 3, 1, 2]) # (1, 4 * k, H, W) rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2]) assert rpn_bbox_cls_prob.shape[0] == 1, 'Only support 1 batch_size' if not eval_mode: # 训练模式 pre_nms_topN = cfg.train_rpn_pre_nms_top_n post_nms_topN = cfg.train_rpn_post_nms_top_n nms_thresh = cfg.train_rpn_nms_thresh min_size = cfg.train_rpn_min_size else: # 验证模式 pre_nms_topN = cfg.test_rpn_pre_nms_top_n post_nms_topN = cfg.test_rpn_post_nms_top_n nms_thresh = cfg.test_rpn_nms_thresh min_size = cfg.test_rpn_min_size # 对于预测的cls 前9个表示背景 后9个表示前景 scores = rpn_bbox_cls_prob[:, num_anchors:, :, :] bbox_deltas = rpn_bbox_pred # (1, 4 * k, H, W) -> (1, H, W, 4 * A) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # 根据anchor 和 bbox 预测值 回归出来真正的anchor 从dx dy dw dh --> cx cy w, h proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_size) # 3. remove predicted boxes with either height or width < threshold keep = _filter_boxes(proposals, min_size) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] # scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob
def forward(self, results): self._net.blobs['data'].reshape(self._batch_size, 3, self._depth, self._height, self._width) self._net.blobs['tois'].reshape(self._batch_size * self.top * 8, 5) self._net.blobs['toi2'].reshape(self._batch_size * self.top * 8, 5) [clip, labels, gt_bboxes, is_last] = self.dataset.next_val_video(random=False) labels = int(labels) n = int(np.floor(clip.shape[0] / 8.0)) rrrrr = [] for i in xrange(n): batch_clip = clip[i * self._depth:(i + 1) * self._depth].transpose( [3, 0, 1, 2]) batch_clip = np.expand_dims(batch_clip, axis=0) curr_results = results[i] r1 = curr_results[:, :11] r2 = curr_results[:, 11:] curr_dets = { 'boxes': np.empty((0, self._depth, 4)), 'pred_label': np.empty((0)), 'pred_scores': np.empty((0, 2)), } tmp = r1.argmax(axis=1) for j in xrange(1, self.dataset.num_classes): tmp = tmp[tmp == j] if tmp.size == 0 and not (j == labels): continue argsort_r = np.argsort(r1[:, j])[-self.top:] curr_scores = np.vstack((r1[argsort_r, j], r2[argsort_r, j])).transpose() curr_boxes = self.anchors[argsort_r] curr_boxes = np.repeat(curr_boxes, 8, axis=0) batch_tois = np.hstack((np.zeros( (curr_boxes.shape[0], 1)), curr_boxes)) curr_idx = np.arange(self._depth).reshape(1, self._depth) curr_idx = np.repeat(curr_idx, self.top, axis=0).reshape(-1, 1) batch_toi2 = np.hstack((curr_idx, curr_boxes)) self._net.blobs['data'].data[...] = batch_clip.astype( np.float32, copy=False) self._net.blobs['tois'].data[...] = batch_tois.astype( np.float32, copy=False) self._net.blobs['toi2'].data[...] = batch_toi2.astype( np.float32, copy=False) self._net.forward() diff = self._net.blobs['fc8-2'].data[...][:, (j - 1) * 4:j * 4] boxes = bbox_transform_inv(batch_tois[:, 1:5], diff).reshape( (self.top, 8, 4)) * 16 curr_dets['boxes'] = np.vstack((curr_dets['boxes'], boxes)) curr_dets['pred_label'] = np.hstack( (curr_dets['pred_label'], np.ones(self.top) * j)) curr_dets['pred_scores'] = np.vstack( (curr_dets['pred_scores'], curr_scores)) rrrrr.append(curr_dets) r = {'dets': rrrrr, 'gt_bboxes': gt_bboxes, 'gt_label': labels} ''' stack_overlaps = np.empty((self._depth, self.top, gt_bboxes.shape[0])) for j in xrange(self._depth): curr_gt_idx = np.where(gt_bboxes[0,:,0] == i * self._depth + j)[0] curr_gt = gt_bboxes[:, curr_gt_idx, 1 : 5].reshape(-1, 4) overlaps = bbox_overlaps( np.ascontiguousarray(boxes[:, j], dtype=np.float), np.ascontiguousarray(curr_gt, dtype=np.float)) stack_overlaps[j] = overlaps # Find wrong detections. for j in xrange(stack_overlaps.shape[2]): argmax_overlaps = np.sum(stack_overlaps[:,:,j], axis=0).argmax() ov[i * self._depth : (i+1) * self._depth, j] = stack_overlaps[:, argmax_overlaps, j] ''' return is_last, r