def __init__(self, feat_stride, scales, ratios, output_score, rpn_pre_nms_top_n, rpn_post_nms_top_n, nms_threshold, rpn_min_size): super(ProposalOperator, self).__init__() self._feat_stride = feat_stride self._scales = np.fromstring(scales[1:-1], dtype=float, sep=',') self._ratios = np.fromstring(ratios[1:-1], dtype=float, sep=',') self._anchors = generate_anchors(base_size=self._feat_stride, scales=self._scales, ratios=self._ratios) self._num_anchors = self._anchors.shape[0] self._output_score = output_score self._rpn_pre_nms_top_n = rpn_pre_nms_top_n self._rpn_post_nms_top_n = rpn_post_nms_top_n self._nms_thresh = nms_threshold self._rpn_min_size = rpn_min_size
def __init__(self, feat_stride, scales, ratios, output_score, rpn_pre_nms_top_n, rpn_post_nms_top_n, threshold, rpn_min_size): super(ProposalOperator, self).__init__() self._feat_stride = feat_stride self._scales = np.fromstring(scales[1:-1], dtype=float, sep=',') self._ratios = np.fromstring(ratios[1:-1], dtype=float, sep=',') self._anchors = generate_anchors(base_size=self._feat_stride, scales=self._scales, ratios=self._ratios) self._num_anchors = self._anchors.shape[0] self._output_score = output_score self._rpn_pre_nms_top_n = rpn_pre_nms_top_n self._rpn_post_nms_top_n = rpn_post_nms_top_n self._threshold = threshold self._rpn_min_size = rpn_min_size if DEBUG: print 'feat_stride: {}'.format(self._feat_stride) print 'anchors:' print self._anchors
def __init__(self, feat_stride, scales, ratios, output_score, rpn_pre_nms_top_n, rpn_post_nms_top_n, threshold, rpn_min_size): super(ProposalQuadrangleOperator, self).__init__() self._feat_stride = feat_stride self._scales = np.fromstring(scales[1:-1], dtype=float, sep=',') self._ratios = np.fromstring(ratios[1:-1], dtype=float, sep=',') self._anchors = generate_anchors(base_size=self._feat_stride, scales=self._scales, ratios=self._ratios) self._num_anchors = self._anchors.shape[0] self._output_score = output_score self._rpn_pre_nms_top_n = rpn_pre_nms_top_n self._rpn_post_nms_top_n = rpn_post_nms_top_n self._threshold = threshold self._rpn_min_size = rpn_min_size if DEBUG: print 'feat_stride: {}'.format(self._feat_stride) print 'anchors:' print self._anchors
def forward(self, is_train, req, in_data, out_data, aux): before_pyramid_proposal = datetime.now() nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError( "Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) LAYER_NUM = len(in_data) / 2 LAYER_NUM = 11 if LAYER_NUM == 7: cls_prob_dict = { 'stride64': in_data[6], 'stride32': in_data[5], 'stride16': in_data[4], 'stride8': in_data[3], 'stride4': in_data[2], 'stride2': in_data[1], 'stride1': in_data[0], } bbox_pred_dict = { 'stride64': in_data[13], 'stride32': in_data[12], 'stride16': in_data[11], 'stride8': in_data[10], 'stride4': in_data[9], 'stride2': in_data[8], 'stride1': in_data[7], } elif LAYER_NUM == 6: cls_prob_dict = { 'stride64': in_data[5], 'stride32': in_data[4], 'stride16': in_data[3], 'stride8': in_data[2], 'stride4': in_data[1], 'stride2': in_data[0], } bbox_pred_dict = { 'stride64': in_data[11], 'stride32': in_data[10], 'stride16': in_data[9], 'stride8': in_data[8], 'stride4': in_data[7], 'stride2': in_data[6], } elif LAYER_NUM == 5: cls_prob_dict = { 'stride64': in_data[4], 'stride32': in_data[3], 'stride16': in_data[2], 'stride8': in_data[1], 'stride4': in_data[0], } bbox_pred_dict = { 'stride64': in_data[9], 'stride32': in_data[8], 'stride16': in_data[7], 'stride8': in_data[6], 'stride4': in_data[5], } elif LAYER_NUM == 2: cls_prob_dict = { 'stride64': in_data[4], 'stride32': in_data[3], } bbox_pred_dict = { 'stride64': in_data[9], 'stride32': in_data[8], } elif LAYER_NUM == 11: cls_prob_dict = { 'stride64': in_data[0], } bbox_pred_dict = { 'stride64': in_data[1], } elif LAYER_NUM == 1: cls_prob_dict = { 'stride1': in_data[0], } bbox_pred_dict = { 'stride1': in_data[1], } elif LAYER_NUM == 3: cls_prob_dict = { 'stride64': in_data[2], 'stride32': in_data[1], 'stride1': in_data[0], } bbox_pred_dict = { 'stride64': in_data[5], 'stride32': in_data[4], 'stride1': in_data[3], } ''' cls_prob_dict = { 'stride8': in_data[3], 'stride4': in_data[2], 'stride2': in_data[1], 'stride1': in_data[0], } bbox_pred_dict = { 'stride8': in_data[7], 'stride4': in_data[6], 'stride2': in_data[5], 'stride1': in_data[4], } ''' ''' cls_prob_dict = { 'stride2': in_data[1], 'stride1': in_data[0], } bbox_pred_dict = { 'stride2': in_data[3], 'stride1': in_data[2], } ''' pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size proposal_list = [] score_list = [] channel_list = [] before_feat = datetime.now() for s in self._feat_stride: stride = int(s) sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios) #print "cls_prob_dict['stride' + str(s)].shape:"+str(cls_prob_dict['stride' + str(s)].shape) scores = cls_prob_dict['stride' + str(s)].asnumpy()[:, self._num_anchors:, :, :] if DEBUG: scores1 = cls_prob_dict['stride' + str(s)].asnumpy() print "scores.shape:" + str(scores.shape) print "scores1.shape:" + str(scores1.shape) #print "scores.shape:"+str(scores.shape) bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy() #print "bbox_deltas.shape:"+str(bbox_deltas.shape) im_info = in_data[-1].asnumpy()[0, :] # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / stride), int(im_info[1] / stride) # Enumerate all shifts shift_x = np.arange(0, width) * stride shift_y = np.arange(0, height) * stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors before_enume = datetime.now() A = self._num_anchors K = shifts.shape[0] anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) after_enume = datetime.now() #print "enume time:"+str((after_enume-before_enume).seconds) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) if DEBUG: print "scores[:100]:" + str(scores[:50]) channels = np.ones((scores.shape)) * stride # Convert anchors into proposals via bbox transformations before_pred = datetime.now() proposals = bbox_pred(anchors, bbox_deltas) after_pred = datetime.now() #print "pred_time:" #print (after_pred-before_pred).seconds # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) if DEBUG: print str(min_size) print str(im_info[2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] if DEBUG: print "proposals3:" + str(proposals[0:10]) scores = scores[keep] channels = channels[keep] proposal_list.append(proposals) score_list.append(scores) channel_list.append(channels) after_feat = datetime.now() #print "feat time:" #print (after_feat-before_feat).seconds proposals = np.vstack(proposal_list) scores = np.vstack(score_list) channels = np.vstack(channel_list) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) before_sort = datetime.now() order = scores.ravel().argsort()[::-1] after_sort = datetime.now() #print "sort time:" #print (after_sort-before_sort).seconds if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] channels = channels[order] if DEBUG: print '-------1-------' print channels.shape for s in self._feat_stride: print "stride:" + str(s) print len(np.where(channels == float(s))[0]) print "proposals:" + str(proposals[0:20]) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] channels = channels[keep] if DEBUG: print '-------2-------' print channels.shape for s in self._feat_stride: print "stride:" + str(s) print len(np.where(channels == float(s))[0]) print "proposals:" + str(proposals[0:20]) print "scores:" + str(scores[0:20]) f_chan = open('channels.txt', 'w') for ii in range(channels.shape[0]): f_chan.write(str(channels[ii][0]) + ' ') f_chan.close() # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # if is_train: self.assign(out_data[0], req[0], blob) #print "out_data[0].shape"+str(out_data[0].shape) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False)) after_pyramid_proposal = datetime.now()
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError( "Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) cls_prob_dict = { 'stride64': in_data[4], 'stride32': in_data[3], 'stride16': in_data[2], 'stride8': in_data[1], 'stride4': in_data[0], } bbox_pred_dict = { 'stride64': in_data[9], 'stride32': in_data[8], 'stride16': in_data[7], 'stride8': in_data[6], 'stride4': in_data[5], } pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size proposal_list = [] score_list = [] for s in self._feat_stride: stride = int(s) sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios) scores = cls_prob_dict['stride' + str(s)].asnumpy()[:, self._num_anchors:, :, :] bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy() im_info = in_data[-1].asnumpy()[0, :] # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / stride), int(im_info[1] / stride) # Enumerate all shifts shift_x = np.arange(0, width) * stride shift_y = np.arange(0, height) * stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] proposal_list.append(proposals) score_list.append(scores) proposals = np.vstack(proposal_list) scores = np.vstack(score_list) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # if is_train: self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError("Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) cls_prob_dict = { 'stride64': in_data[4], 'stride32': in_data[3], 'stride16': in_data[2], 'stride8': in_data[1], 'stride4': in_data[0], } bbox_pred_dict = { 'stride64': in_data[9], 'stride32': in_data[8], 'stride16': in_data[7], 'stride8': in_data[6], 'stride4': in_data[5], } pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size proposal_list = [] score_list = [] for s in self._feat_stride: stride = int(s) sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios) scores = cls_prob_dict['stride' + str(s)].asnumpy()[:, self._num_anchors:, :, :] bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy() im_info = in_data[-1].asnumpy()[0, :] # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / stride), int(im_info[1] / stride) # Enumerate all shifts shift_x = np.arange(0, width) * stride shift_y = np.arange(0, height) * stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] proposal_list.append(proposals) score_list.append(scores) proposals = np.vstack(proposal_list) scores = np.vstack(score_list) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # if is_train: self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def forward(self, is_train, req, in_data, out_data, aux): cls_pro = in_data[4] bbox_pred_dict = { 'stride128': in_data[3], 'stride64': in_data[2], 'stride32': in_data[1], 'stride16': in_data[0], } cls_prob_dict = { 'stride128': in_data[7], 'stride64': in_data[6], 'stride32': in_data[5], 'stride16': in_data[4], } im_info = in_data[8].asnumpy()[0, :] im = in_data[9].asnumpy() proposal_list = [] score_list = [] destore_rois_list = [] destore_cls_list = [] for s in self._feat_stride: stride = int(s) sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios) bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy() # im_info = in_data[-1].asnumpy()[0, :] # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] # Enumerate all shifts shift_x = np.arange(0, width) * stride shift_y = np.arange(0, height) * stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) # Convert anchors into proposals via bbox transformations proposals = bbox_pred(anchors, bbox_deltas) proposals = clip_boxes(proposals, im_info[:2]) scores = cls_prob_dict['stride' + str(s)].asnumpy() s_list = [] start = 0 for i in range(self._num_classes): s = scores[:, start:start + self._num_anchors, :, :] start = start + self._num_anchors s = self._clip_pad(s, (height, width)) s = s.transpose((0, 2, 3, 1)).reshape((-1, 1)) s_list.append(s) scores = np.concatenate(s_list, axis=1) destore_rois_list.append(proposals) destore_cls_list.append(scores) destore_rois = np.concatenate(destore_rois_list, axis=0) destore_cls = np.concatenate(destore_cls_list, axis=0) # print destore_cls s = np.max(destore_cls, axis=1) # print s order = s.ravel().argsort()[::-1] order = order[:self._keep_num] destore_cls = destore_cls[order, :] destore_rois = destore_rois[order, :] vis = False if vis: vis_all_detection(im, destore_rois[:, :]) self.assign(out_data[0], req[0], mx.nd.array(destore_rois)) self.assign(out_data[1], req[1], mx.nd.array(destore_cls))
def assign_pyramid_anchor(feat_shapes, gt_boxes, im_info, cfg, feat_strides=(4,8,16,16,16), scales = (8,8,8,16,32),ratios = (0.5,1,2), allowed_border = 0, balance_scale_bg = False): def _unmap(data, count, inds, fill = 0): if len(data.shape) == 1: ret = np.empty((count,),dtype = np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count,) + data.shape[1:],dtype = np.float32) ret.fill(fill) ret[inds,:] = data return ret DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype = np.float32) ratios = np.array(ratios, dtype = np.float32) fpn_args = [] fpn_anchors_fid = np.zeros(0).astype(int) fpn_anchors = np.zeros([0,4]) fpn_labels = np.zeros(0) fpn_inds_inside = [] for feat_id in range(len(feat_strides)): base_anchors = generate_anchors(base_size = feat_strides[feat_id], ratios = ratios, scales = [scales[feat_id]]) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shapes[feat_id][0][-2:] shift_x = np.arange(0, feat_width) * feat_strides[feat_id] shift_y = np.arange(0, feat_height) * feat_strides[feat_id] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] labels = np.empty((len(inds_inside),),dtype = np.float32) labels.fill(-1) fpn_anchors_fid = np.hstack((fpn_anchors_fid,len(inds_inside))) fpn_anchors = np.vstack((fpn_anchors,anchors)) fpn_labels = np.hstack((fpn_labels,labels)) fpn_inds_inside.append(inds_inside) fpn_args.append([feat_height,feat_width,A,total_anchors]) if gt_boxes.size > 0: overlaps = bbox_overlaps(fpn_anchors.astype(np.float),gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis = 1) max_overlaps = overlaps[np.arange(len(fpn_anchors)),argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis = 0) gt_max_overlaps = overlaps[gt_argmax_overlaps,np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 fpn_labels[gt_argmax_overlaps] = 1 fpn_labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: fpn_labels[:] = 0 num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE ==-1 else int (cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(fpn_labels >= 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size = (len(fg_inds) - num_fg), replace = False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] fpn_labels[disable_inds] = -1 num_bg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum(fpn_labels>=1) bg_inds = np.where(fpn_labels ==0)[0] fpn_anchors_fid = np.hstack((0,fpn_anchors_fid.cumsum())) if balance_scale_bg: num_bg_scale = num_bg / len(feat_strides) for feat_id in range(0,len(feat_strides)): bg_ind_scale = bg_inds[(bg_inds >= fpn_anchors_fid[feat_id]) & (bg_inds < fpn_anchors_fid[feat_id+1])] if len(bg_ind_scale) > num_bg_scale: disable_inds = npr.choice(bg_ind_scale, size=(len(bg_ind_scale) - num_bg_scale), replace=False) fpn_labels[disable_inds] = -1 else: if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size = (len(bg_inds) - num_bg), replace = False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] fpn_labels[disable_inds] = -1 fpn_bbox_targets = np.zeros((len(fpn_anchors),4),dtype = np.float32) if gt_boxes.size > 0: fpn_bbox_targets[fpn_labels>=1,:] = bbox_transform(fpn_anchors[fpn_labels>=1,:],gt_boxes[argmax_overlaps[fpn_labels >= 1], :4]) fpn_bbox_weights = np.zeros((len(fpn_anchors),4),dtype = np.float32) fpn_bbox_weights[fpn_labels>=1,:] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) label_list = [] bbox_target_list = [] bbox_weight_list = [] for feat_id in range(0,len(feat_strides)): feat_height, feat_width,A,total_anchors = fpn_args[feat_id] labels = _unmap(fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]],total_anchors,fpn_inds_inside[feat_id],fill = -1) bbox_targets = _unmap(fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0) bbox_weights = _unmap(fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0) labels = labels.reshape((1,feat_height, feat_width,A)).transpose(0,3,1,2) labels = labels.reshape((1,A*feat_height*feat_width)) bbox_targets = bbox_targets.reshape((1,feat_height,feat_width,A*4)).transpose(0,3,1,2) bbox_targets = bbox_targets.reshape((1, A * 4, -1)) bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) bbox_weights = bbox_weights.reshape((1, A * 4, -1)) label_list.append(labels) bbox_target_list.append(bbox_targets) bbox_weight_list.append(bbox_weights) label = { 'label':np.concatenate(label_list,axis = 1), 'bbox_target':np.concatenate(bbox_target_list, axis = 2), 'bbox_weight':np.concatenate(bbox_weight_list,axis = 2) } return label#label['label'] = 1,(A*w1*h1+A*w2*h2 +...),label['bbox_target'] = (1,4A,(w1h1+w2h2+...))
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, 0) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError( "Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size # the first set of anchors are background probabilities # keep the second part scores_list = in_data[0].asnumpy() #[1,n] #print 'score_list shape:',scores_list.shape bbox_deltas_list = in_data[1].asnumpy() #[1,n*2] im_info = in_data[2].asnumpy()[0, :] p2_shape = in_data[3].asnumpy().shape p3_shape = in_data[4].asnumpy().shape p4_shape = in_data[5].asnumpy().shape p5_shape = in_data[6].asnumpy().shape p6_shape = in_data[7].asnumpy().shape feat_shape = [] feat_shape.append(p2_shape) feat_shape.append(p3_shape) feat_shape.append(p4_shape) feat_shape.append(p5_shape) feat_shape.append(p6_shape) #t = time.time() #print 'feat_shape:', feat_shape num_feat = len(feat_shape) #[1,5,4] score_index_start = 0 bbox_index_start = 0 keep_proposal = [] keep_scores = [] #t_1 = time.time() for i in range(num_feat): feat_stride = int(self._feat_stride[i]) #4,8,16,32,64 #print 'feat_stride:', feat_stride anchor = generate_anchors(feat_stride, scales=self._scales, ratios=self._ratios) num_anchors = anchor.shape[0] #3 height = feat_shape[i][2] width = feat_shape[i][3] shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() A = num_anchors #3 K = shifts.shape[0] #height*width anchors = anchor.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) #3*height*widht,4 scores = (scores_list[ 0, int(score_index_start):int(score_index_start + K * A * 2)]).reshape( (1, int(2 * num_anchors), -1, int(width))) #1,2*3,h,w scores = scores[:, num_anchors:, :, :] #1,3,h,w bbox_deltas = (bbox_deltas_list[ 0, int(bbox_index_start):int(bbox_index_start + K * A * 4)]).reshape( (1, int(4 * num_anchors), -1, int(width))) #1,4*3,h,w score_index_start += K * A * 2 bbox_index_start += K * A * 4 bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape( (-1, 4)) #[1,h,w,12]--->[1*h*w*3,4] scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape( (-1, 1)) #[1,h,w,3]--->[1*h*w*3,1] proposals = bbox_pred(anchors, bbox_deltas) #debug here, corresponding? proposals = clip_boxes(proposals, im_info[:2]) keep = self._filter_boxes(proposals, min_size[i] * im_info[2]) keep_proposal.append(proposals[keep, :]) keep_scores.append(scores[keep]) proposals = keep_proposal[0] scores = keep_scores[0] for i in range(1, num_feat): proposals = np.vstack((proposals, keep_proposal[i])) scores = np.vstack((scores, keep_scores[i])) #print 'roi concate t_1 spends :{:.4f}s'.format(time.time()-t_1) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) #t_2 = time.time() order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] #print 'roi concate t_2_1_1 spends :{:.4f}s'.format(time.time()-t_2) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) #t_nms = time.time() det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) #print 'roi concate nms spends :{:.4f}s'.format(time.time()-t_nms) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: try: pad = npr.choice(keep, size=post_nms_topN - len(keep)) except: proposals = np.zeros((post_nms_topN, 4), dtype=np.float32) proposals[:, 2] = 16 proposals[:, 3] = 16 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack( (batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False)) return keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] #print 'roi concate t_2 spends :{:.4f}s'.format(time.time()-t_2) # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))