def _compute_targets_experimental(rois, small_rois, overlaps, labels): """Compute bounding-box regression targets for an image.""" # Indices of ground-truth ROIs gt_inds = np.where(overlaps == 1)[0] if len(gt_inds) == 0: # Bail if the image has no ground-truth ROIs return np.zeros((rois.shape[0], 5), dtype=np.float32), np.zeros( (small_rois.shape[0], 5), dtype=np.float32) # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = bbox_overlaps( np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] small_ex_rois = small_rois[ex_inds, :] targets = np.zeros((rois.shape[0], 5), dtype=np.float32) targets[ex_inds, 0] = labels[ex_inds] targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) small_targets = np.zeros((small_rois.shape[0], 5), dtype=np.float32) small_targets[ex_inds, 0] = labels[ex_inds] small_targets[ex_inds, 1:] = bbox_transform(small_ex_rois, gt_rois) return targets, small_targets
def _compute_targets(rois, overlaps, labels): # 计算某张图片的回归偏移量 """Compute bounding-box regression targets for an image.""" # Indices of ground-truth ROIs gt_inds = np.where(overlaps == 1)[0] if len(gt_inds) == 0: # Bail if the image has no ground-truth ROIs return np.zeros((rois.shape[0], 5), dtype=np.float32) # Indices of examples for which we try to make predictions # 这样也会把gt_inds取出来啊!??? ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = bbox_overlaps( np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target # 对于每个ex roi ,与它IoU最大的gt box的索引 gt_assignment = ex_gt_overlaps.argmax(axis=1) # box的x1,y1,x2,y2 gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] targets = np.zeros((rois.shape[0], 5), dtype=np.float32) # 第0列是box的索引 targets[ex_inds, 0] = labels[ex_inds] # 第1-4列是box的偏移量:x的偏移量,y的偏移量,w的伸缩量,h的伸缩量 targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) return targets
def _accuracy(track, gt): if len(track) < 2: return [], [], [] abs_acc = [] rel_acc = [] ious = [] st_frame = track[0]['frame'] end_frame = track[-1]['frame'] assert end_frame - st_frame + 1 == len(track) gt_seg = select_gt_segment(gt['track'], st_frame, end_frame) assert len(gt_seg) <= len(track) track_bbox1 = np.asarray([track[0]['bbox']]) gt_bbox1 = np.asarray([gt_seg[0]]) for track_box, gt_bbox in zip(track[1:len(gt_seg)], gt_seg[1:]): # current track box track_bbox = np.asarray([track_box['bbox']]) # gt motion gt_delta = bbox_transform(gt_bbox1, np.asarray([gt_bbox])) # target is the first track_bbox with gt motion track_bbox_target = bbox_transform_inv(track_bbox1, gt_delta) abs_diff = np.abs(track_bbox - track_bbox_target) cur_iou = iou(track_bbox, track_bbox_target) width = track_bbox_target[0,2] - track_bbox_target[0,0] height = track_bbox_target[0,3] - track_bbox_target[0,1] rel_diff = abs_diff / (np.asarray([width, height, width, height]) + np.finfo(float).eps) abs_acc.extend(abs_diff.flatten().tolist()) rel_acc.extend(rel_diff.flatten().tolist()) ious.extend(cur_iou.flatten().tolist()) return abs_acc, rel_acc, ious
def _compute_targets(rois, overlaps, labels): """Compute bounding-box regression targets for an image.""" #这个函数主要是计算一副图像bboxes回归信息,返回(rois.shape[0], 5) # Indices of ground-truth ROIs #那一行有1,len(gt_inds)表示所有行一共有几个1 gt_inds = np.where(overlaps == 1)[0] #GT情况:这种情况不存在,roidb已经筛选出没有任何fg与bg的图片,只要有一个,就会存在1,len(gt_inds)就不为0 if len(gt_inds) == 0: # Bail if the image has no ground-truth ROIs return np.zeros((rois.shape[0], 5), dtype=np.float32) # Indices of examples for which we try to make predictions #cfg.TRAIN.BBOX_THRESH为0.5 #情况为GT,则全满足 ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI #建立(len(ex_inds),len(gt_inds))大小的矩阵,内容为iou ex_gt_overlaps = bbox_overlaps( np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target #找到与该ex_roi最佳匹配GT gt_assignment = ex_gt_overlaps.argmax(axis=1) #取出gt_rois与ex_rois(bboxes) gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] #targets:(标签,dx,dy,dw,dh) targets = np.zeros((rois.shape[0], 5), dtype=np.float32) #gt情况:就是max_classes,ex_inds就是全部引索,因为GT情况上面的条件全满足 targets[ex_inds, 0] = labels[ex_inds] targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) return targets
def _compute_targets(rois, overlaps, labels): """Compute bounding-box regression targets for an image.""" # Indices of ground-truth ROIs gt_inds = np.where(overlaps == 1)[0] # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = bbox_overlaps( np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target try: gt_assignment = ex_gt_overlaps.argmax(axis=1) except: import pdb pdb.set_trace() gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] targets = np.zeros((rois.shape[0], 5), dtype=np.float32) targets[ex_inds, 0] = labels[ex_inds] targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) return targets
def _compute_targets(rois, overlaps, labels): """Compute bounding-box regression targets for an image.""" # Indices of ground-truth ROIs gt_inds = np.where(overlaps == 1)[0] if len(gt_inds) == 0: # Bail if the image has no ground-truth ROIs return np.zeros((rois.shape[0], 5), dtype=np.float32) # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = bbox_overlaps( np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] targets = np.zeros((rois.shape[0], 5), dtype=np.float32) targets[ex_inds, 0] = labels[ex_inds] targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) return targets
def _compute_targets(rois, overlaps, labels): """Compute bounding-box regression targets for an image.""" # Indices of ground-truth ROIs #这里可以计算出来ground-truth的个数,假设rois为[2000,5],overlaps为[2000,1],labels为[2000,1],则这样可以计算出来ground truth的所在的行,假设gt_inds为[4] gt_inds = np.where(overlaps == 1)[0] # Indices of examples for which we try to make predictions #假设这儿所对应的为[500] ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] #计算每一个ex ROI和ground truth的交集,可以得到[500,4] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = bbox_overlaps( np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target #这样可以得到[500] gt_assignment = ex_gt_overlaps.argmax(axis=1) #则这样便可以得到最后的几个可选择候选框! gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] targets = np.zeros((rois.shape[0], 5), dtype=np.float32) targets[ex_inds, 0] = labels[ex_inds] targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) return targets
def _compute_targets(ex_rois, gt_rois): """Compute bounding-box regression targets for an image.""" assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 5 #bbox_transform根据anchor和gt box生成offset的标签值,(x* - x_a)/w_a...,见论文offset变换 return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
def _compute_targets(ex_rois, gt_rois): """Compute bounding-box regression targets for an image.""" assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 5 return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
def _compute_targets(ex_rois, gt_rois, labels): '''Compute bounding-box regression targets for an image.''' # targets: trasformation from ex_rois to gt_rois, (tx, ty, tw, th) targets = bbox_transform(ex_rois, gt_rois) if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS)) return np.concatenate((labels[:, np.newaxis], targets), axis=1).astype(np.float32, copy=False)
def _compute_targets(ex_rois, gt_rois): """Compute bounding-box regression targets for an image.""" assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 # (Yuliang) Add 2 more for strokes and areas assert gt_rois.shape[1] == 7 #5+2 return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
def _compute_targets(rois, overlaps, labels): """Compute bounding-box regression targets for an image.""" # We are sampling relations from fg rois, hence each # fg box must be assigned to an gt box assert(cfg.TRAIN.FG_THRESH >= cfg.TRAIN.BBOX_THRESH) # Indices of ground-truth ROIs gt_inds = np.where(overlaps == 1)[0] if len(gt_inds) == 0: # Bail if the image has no ground-truth ROIs return np.zeros((rois.shape[0], 5), dtype=np.float32) else: # sanity check assert(gt_inds[0] == 0) for i in range(1, len(gt_inds)): assert(gt_inds[i] - gt_inds[i-1] == 1) # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = bbox_overlaps( np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) # guarding against the case where a gt box doesn't get assigned to itself gt_to_ex_inds = [np.where(ex_inds == g)[0][0] for g in gt_inds] for i, g in enumerate(gt_to_ex_inds): gt_assignment[g] = gt_inds[i] # assign rois gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] # record target assignments for all foreground rois fg_gt_ind_assignment = {} for i, e in enumerate(ex_inds): if overlaps[e] >= cfg.TRAIN.FG_THRESH: fg_gt_ind_assignment[e] = gt_inds[gt_assignment[i]] # check if all gt has been assigned for g in gt_inds: assert(g in list(fg_gt_ind_assignment.values())) targets = np.zeros((rois.shape[0], 5), dtype=np.float32) targets[ex_inds, 0] = labels[ex_inds] # transfer to center and log targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) return targets, fg_gt_ind_assignment
def _compute_targets(ex_rois, gt_rois): """Compute bounding-box regression targets for an image.""" #要求anchor与对应匹配最好GT个数相同 assert ex_rois.shape[0] == gt_rois.shape[0] #要有anchor左上角与右下角坐标,有4个元素 assert ex_rois.shape[1] == 4 #GT有标签位,所以为5个 assert gt_rois.shape[1] == 5 #返回一个用于anchor回归成target的包含每个anchor回归值(dx、dy、dw、dh)的array return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
def _compute_targets(ex_rois, gt_rois, labels): """Compute bounding-box regression targets for an image.""" assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 4 targets = bbox_transform(ex_rois, gt_rois) if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev targets = (targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS) return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _compute_targets(ex_rois, gt_rois): """Compute bounding-box regression targets for an image.""" assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 5 # 输入rois和gt_rois 都是[x1,y1,x2,y2] # 输出各个rois相对于对应的gt_rois的偏移量 # 4列分别代表:x的偏移量,y的偏移量,w的伸缩量,h的伸缩量 return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
def _compute_targets(ex_rois, gt_rois): """Compute bounding-box regression targets for an image.""" assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 4 targets = bbox_transform(ex_rois, gt_rois) if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS)) return targets
def _compute_targets(ex_rois, gt_rois): """Compute bounding-box regression targets for an image.""" if DEBUG: curframe = inspect.currentframe() calframe = inspect.getouterframes(curframe, 2) print 'current name:', inspect.stack()[0][3] print 'caller name:', calframe[1][1], calframe[1][3] print('gt_rois', gt_rois) assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 5 return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
def _compute_targets(ex_rois, gt_rois, labels): """Compute bounding-box regression targets for an image.""" assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 4 targets = bbox_transform(ex_rois, gt_rois) if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS)) return np.hstack( (labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _compute_targets(ex_rois, gt_rois): """Compute bounding-box regression targets for an image.""" assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 5 targets = bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False) if cfg.TRAIN.RPN_NORMALIZE_TARGETS: assert cfg.TRAIN.RPN_NORMALIZE_MEANS is not None assert cfg.TRAIN.RPN_NORMALIZE_STDS is not None targets -= cfg.TRAIN.RPN_NORMALIZE_MEANS targets /= cfg.TRAIN.RPN_NORMALIZE_STDS return targets
def _compute_targets(rois, overlaps, labels): """Compute bounding-box regression targets for an image.""" # Indices of ground-truth ROIs gt_inds = np.where(overlaps == 1)[0] if len(gt_inds) == 0: # Bail if the image has no ground-truth ROIs return np.zeros((rois.shape[0], 5), dtype=np.float32) # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = bbox_overlaps( np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] ###########################Print ex_rois and gt################ # f = open("./tools/rois-targets_detail.txt", "w") # nrois = ex_rois.shape[0] # for i in xrange(nrois): # ex_roi = ex_rois[i] # gt_roi = gt_rois[i] # x1 = ex_roi[0] # y1 = ex_roi[1] # x2 = ex_roi[2] # y2 = ex_roi[3] # # x1_t = gt_roi[0] # y1_t = gt_roi[1] # x2_t = gt_roi[2] # y2_t = gt_roi[3] # f.write("Roi: " + str(x1) + " " + str(y1) + " " + str(x2) + " " + str(y2)) # f.write(" GT: " + str(x1_t) + " " + str(y1_t) + " " + str(x2_t) + " " + str(y2_t)) # f.write("\n") # f.write("\n") # f.close() ############################################################### targets = np.zeros((rois.shape[0], 5), dtype=np.float32) targets[ex_inds, 0] = labels[ex_inds] targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) return targets
def _compute_targets(self, ex_rois, gt_rois, labels): """Compute bounding-box regression targets for an image.""" assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 4 targets = bbox_transform(ex_rois, gt_rois) ########my implementation ######## if DEBUG and self._iter % 400 == 0: print "DEBUG for ProposalTargetLayer" self._sums += targets[labels != 0, :].sum(axis=0) self._squared_sums += (targets[labels != 0, :]**2).sum(axis=0) self._counts += np.sum(labels != 0) # Compute values needed for means and stds # var(x) = E(x^2) - E(x)^2 means = self._sums / self._counts stds = np.sqrt(self._squared_sums / self._counts - means**2) print 'means and stdenvs for bbox bbox_targets of ProposalTargetLayer!!!!' print 'All class means:', means print 'All class stdevs:', stds for i in xrange(self._num_classes - 1): cls_index = i + 1 self._cls_sums[i] += targets[labels == cls_index, :].sum( axis=0) self._cls_squared_sums[i] += ( targets[labels == cls_index, :]**2).sum(axis=0) self._cls_counts[i] += np.sum(labels == cls_index) # Compute values needed for means and stds # var(x) = E(x^2) - E(x)^2 cls_means = self._cls_sums[i] / self._cls_counts[i] cls_stds = np.sqrt(self._cls_squared_sums[i] / self._cls_counts[i] - cls_means**2) print 'class %d means:' % (cls_index) print cls_means print 'class %d stdevs:' % (cls_index) print cls_stds ########END OF my implementation ######## if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS)) return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _compute_targets(ex_rois, gt_rois, labels): """Compute bounding-box regression targets for an image.""" assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 4 #返回anchor相对于GT的(dx,dy,dw,dh)四个回归值,shape(len(rois),4) targets = bbox_transform(ex_rois, gt_rois) #cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED为False if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS)) #注意,labels传进来是(len(rois),)大小的,labels[:, np.newaxis]将转换成(len(rois),1)大小,之后与targets合并成(len(rois),5)大小 #内容信息为:[标签,dx,dy,dw,dh] return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def forward(self, bottom, top): rois_labels = bottom[0].data gt_assignments = bottom[1].data.astype(np.int32) rois_boxes = bottom[2].data gt_boxes = bottom[3].data num_rois = len(rois_labels) assert len(gt_assignments) == num_rois, \ "{} vs {}".format(len(gt_assignments), num_rois) assert len(rois_boxes) == num_rois, \ "{} vs {}".format(len(rois_boxes), num_rois) assert rois_boxes.shape[1] == 5, rois_boxes.shape # only support single image assert np.all(rois_boxes[:, 0] == 0), rois_boxes bbox_targets = np.zeros((num_rois, 4), dtype=np.float32) bbox_inside_weights = np.zeros((num_rois, 4), dtype=np.float32) bbox_outside_weights = np.zeros((num_rois, 4), dtype=np.float32) # sample rois pos_inds = np.where(rois_labels == 1)[0] n_pos = len(pos_inds) if n_pos > 0: # dst boxes dst_boxes = np.zeros((num_rois, 4), dtype=np.float32) dst_boxes[pos_inds] = gt_boxes[gt_assignments[pos_inds]] # choose valid boxes pos_sel_inds = filter_valid(rois_boxes, dst_boxes) # targets targets = bbox_transform(rois_boxes[pos_sel_inds, 1:], dst_boxes[pos_sel_inds]) if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: targets = ((targets - np.array(self.bbox_normalize_means)) / np.array(self.bbox_normalize_stds)) # assert np.all(targets < 10), targets bbox_targets[pos_sel_inds] = targets bbox_inside_weights[pos_sel_inds] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS bbox_outside_weights[bbox_inside_weights > 0] = 1. top[0].reshape(*(bbox_targets.shape)) top[0].data[...] = bbox_targets top[1].reshape(*(bbox_inside_weights.shape)) top[1].data[...] = bbox_inside_weights top[2].reshape(*(bbox_outside_weights.shape)) top[2].data[...] = bbox_outside_weights
def _compute_targets(rois, overlaps, labels): """ compute bounding-box regression targets for an image rios [num_box, 4] overlappes [num_box] labels [num_box] """ """ Do not know where max_overlaps get bigger than gt_overlaps, becuase previously, max_overlaps = roi[i]['gt_overlaps'].max(axis=1) and where soft overlaps are calculated """ # indices of gt ROIs gt_inds = np.where(overlaps == 1)[0] # reuse for other cases if len(gt_inds) == 0: # bail if the image has no gt ROIs return np.zeros((rois.shape[0], 5), dtype=np.float32) # the gt for every rois # inices of examples for which we try to make predictions # Overlap required between a ROI and ground-truth box in order for that ROI to # be used as a bounding-box regression training example ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] # get IoU overlap between each exROI and gt ROI ex_gt_overlaps = bbox_overlaps( np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) # gt_boxes could overlap? one box is pared with only one gt boxes. # [ num_gt_inds, num_ex_inds] # find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] targets = np.zeros((rois.shape[0], 5), dtype=np.float32) targets[ex_inds, 0] = labels[ex_inds] # copy labels, why not ground truth label? targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) # no anchor boxes? No! not now return targets
def _compute_targets(ex_rois, gt_rois, labels): """Compute bounding-box regression targets for an image.""" #确保rois与gt_rois的数量一致 assert ex_rois.shape[0] == gt_rois.shape[0] # 确保有x1,y1,x2,y2 assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 4 # 输出各个rois相对于对应的gt_rois的偏移量 # 4列分别代表:x的偏移量,y的偏移量,w的伸缩量,h的伸缩量 targets = bbox_transform(ex_rois, gt_rois) # 默认不normalization if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS)) # labels是个一维向量 return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _gt_propagate_boxes(boxes, annot_proto, frame_id, window, overlap_thres): pred_boxes = [] annots = [] for annot in annot_proto['annotations']: for idx, box in enumerate(annot['track']): if box['frame'] == frame_id: gt1 = box['bbox'] deltas = [] deltas.append(gt1) for offset in xrange(1, window): try: gt2 = annot['track'][idx + offset]['bbox'] except IndexError: gt2 = gt1 delta = bbox_transform(np.asarray([gt1]), np.asarray([gt2])) deltas.append(delta) annots.append(deltas) gt1s = [annot[0] for annot in annots] if not gt1s: # no grount-truth, boxes remain still return np.tile(np.asarray(boxes)[:, np.newaxis, :], [1, window - 1, 1]) overlaps = bbox_overlaps(np.require(boxes, dtype=np.float), np.require(gt1s, dtype=np.float)) assert len(overlaps) == len(boxes) for gt_overlaps, box in zip(overlaps, boxes): max_overlap = np.max(gt_overlaps) max_gt = np.argmax(gt_overlaps) sequence_box = [] if max_overlap < overlap_thres: for offset in xrange(1, window): sequence_box.append(box) else: for offset in xrange(1, window): delta = annots[max_gt][offset] sequence_box.append( bbox_transform_inv(np.asarray([box]), delta)[0].tolist()) pred_boxes.append((sequence_box)) return np.asarray(pred_boxes)
def syn_compute_targets(ex_rois, gt_rois, gt_info, labels): """Compute bounding-box regression targets for an image.""" assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 4 ########################################### curve assert gt_rois.shape[0] == gt_info.shape[0] assert gt_info.shape[1] == 28 ########################################### targets = bbox_transform(ex_rois, gt_rois) # curve targets_2 = info_syn_transform_hw(ex_rois, gt_info) if DEBUG: print 'targets after bbox_transform:' print targets print 'targets_info after bbox_transform:' print targets_2 print 'targets_info_curve after bbox_transform:' if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS)) targets_2 = ((targets_2 - np.array(cfg.TRAIN.INFO_NORMALIZE_MEANS)) / np.array(cfg.TRAIN.INFO_NORMALIZE_STDS)) if DEBUG: print 'targets after normalize:' print targets print 'targets_info after normalize:' print targets_2 return np.hstack( (labels[:, np.newaxis], targets)).astype(np.float32, copy=False), targets_2
def _compute_targets(ex_rois, gt_rois, labels): """Compute bounding-box regression targets for an image.""" """ rois: (num of finally left proposal, 5) blob[:,0]=0; blob[:-2,1:5] = x1,y1,x2,y2(pred box); blob[-2:,1:5] = x1,y1,x2,y2(gt_box) [0:fg_rois_per_this_image]: the left foregound; [fg_rois_per_this_image:]:the left background gt_boxes[gt_assignment[keep_inds, :4]:the coordinates of the ground truth boxes correspounding to per pred box [num of finally left proposal,4] labels: the final classes of the ground truth correspounding to per pred box [num of finally left proposal,] : """ assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 4 targets = bbox_transform(ex_rois, gt_rois) #(num of left box * 4)[dx,dy,dw,dh] if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: #false # Optionally normalize targets by a precomputed mean and stdev targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS)) return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def forward(self, step = 1): selected = False while not selected: index = self.index[self.iter] img_names = self.imagelist[index] proc_imgs = [] for img_name in img_names: img_path = osp.join(self.root_dir, img_name) assert osp.isfile(img_path) proc_img, scale = _get_image_blob(cv2.imread(img_path)) proc_imgs.append(proc_img) blobs = np.vstack(proc_imgs) bboxes = self.bbox[index][0][:,:4] gts = self.gt[index] self.iter += step if self.iter >= len(self.imagelist): self.iter -= len(self.imagelist) if gts[0].shape[0] > 0: selected = True # sample rois overlaps = bbox_overlaps(np.require(bboxes, dtype=np.float), np.require(gts[0], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) fg_inds = np.where(max_overlaps >= self.config['select_overlap'])[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = min(self.config['batch_size'], fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) bg_inds = np.where(max_overlaps < self.config['select_overlap'])[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = self.config['batch_size'] - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) labels = np.ones((self.config['batch_size'], 1), dtype=np.float) labels[fg_rois_per_this_image:] = 0 keep_ids = np.append(fg_inds, bg_inds) # n * 1 * 4 rois = bboxes[keep_ids][:,np.newaxis,:] rois = np.tile(rois, (1, self.length, 1)) rois = rois * scale # scale rois to match image scale assignment = np.tile(np.arange(self.length), (self.config['batch_size'], 1))[:,:,np.newaxis] rois = np.concatenate((assignment, rois), axis=2).reshape((-1, 5)) # compute targets and weights bbox_targets = [] bbox_weights = [] for gt in gts[1:]: cur_bbox_targets = bbox_transform(gts[0][gt_assignment[keep_ids]], gt[gt_assignment[keep_ids]]) cur_bbox_weights = np.zeros_like(cur_bbox_targets) cur_bbox_weights[labels.flatten().astype('bool'), ...] = 1 bbox_targets.append(cur_bbox_targets) bbox_weights.append(cur_bbox_weights) bbox_targets = np.hstack(bbox_targets) bbox_weights = np.hstack(bbox_weights) bbox_targets = (bbox_targets - self.bbox_mean) / self.bbox_std return blobs, rois, labels, bbox_targets, bbox_weights
def imdb_rpn_compute_stats(net, imdb, anchor_scales=(8,16,32), feature_stride=16): raw_anchors = generate_anchors(scales=np.array(anchor_scales)) print raw_anchors.shape sums = 0 squred_sums = 0 counts = 0 roidb = filter_roidb(imdb.roidb) # Compute a map of input image size and output feature map blob map_w = {} map_h = {} for i in xrange(50, cfg.TRAIN.MAX_SIZE + 10): blobs = { 'data': np.zeros((1, 3, i, i)), 'im_info': np.asarray([[i, i, 1.0]]) } net.blobs['data'].reshape(*(blobs['data'].shape)) net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) blobs_out = net.forward( data=blobs['data'].astype(np.float32, copy=False), im_info=blobs['im_info'].astype(np.float32, copy=False)) height, width = net.blobs['rpn/output'].data.shape[-2:] map_w[i] = width map_h[i] = height for i in xrange(len(roidb)): if not i % 5000: print 'computing %d/%d' % (i, imdb.num_images) im = None if cfg.TRAIN.FORMAT == 'pickle': with open(roidb[i]['image'], 'rb') as f: im = cPickle.load(f) else: im = cv2.imread(roidb[i]['image']) im_data, im_info = _get_image_blob(im) gt_boxes = roidb[i]['boxes'] gt_boxes = gt_boxes * im_info[0, 2] height = map_h[im_data.shape[2]] width = map_w[im_data.shape[3]] # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feature_stride shift_y = np.arange(0, height) * feature_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = raw_anchors.shape[0] K = shifts.shape[0] all_anchors = (raw_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= 0) & (all_anchors[:, 1] >= 0) & (all_anchors[:, 2] < im_info[0, 1]) & # width (all_anchors[:, 3] < im_info[0, 0]) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) # There are 2 types of bbox targets # 1. anchor whose overlaps with gt is greater than RPN_POSITIVE_OVERLAP argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] fg_inds = np.where(max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP)[0] # 2. anchors which best match certain gt gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] fg_inds = np.unique(np.hstack((fg_inds, gt_argmax_overlaps))) gt_rois = gt_boxes[argmax_overlaps, :] anchors = anchors[fg_inds, :] gt_rois = gt_rois[fg_inds, :] targets = bbox_transform(anchors, gt_rois[:, :4]).astype(np.float32, copy=False) sums += targets.sum(axis=0) squred_sums += (targets ** 2).sum(axis=0) counts += targets.shape[0] means = sums / counts stds = np.sqrt(squred_sums / counts - means ** 2) print means print stds return means, stds
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32) bbox_targets = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4]).astype(np.float32, copy=False) # map up to original set of anchors #labels = _unmap(labels, total_anchors, inds_inside, fill=-1) #bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) #overlaps_ = _unmap(overlaps, total_anchors, inds_inside, fill=0) # Reshape to 1 x 1 x (A x H) x W #labels_reshape = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) #labels_reshape = labels_reshape.reshape((1, 1, A * height, width)) #bbox_targets_reshape = bbox_targets.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) #####
import os.path as osp import numpy as np import cPickle this_dir = osp.dirname(__file__) sys.path.insert(0, osp.join(this_dir, '../../external/py-faster-rcnn/lib')) from fast_rcnn.bbox_transform import bbox_transform if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('paired_gt_file') parser.add_argument('save_mean_file') parser.add_argument('save_std_file') args = parser.parse_args() deltas = [] gts = sio.loadmat(args.paired_gt_file)['gt'] for track_gts in gts: gt1 = track_gts[0] if len(gt1) == 0: continue cur_deltas = [] for gt in track_gts[1:]: cur_deltas.append(bbox_transform(gt1, gt)) deltas.append(np.hstack(cur_deltas)) delta = np.vstack(deltas) mean = np.mean(delta, axis=0) std = np.std(delta, axis=0) with open(args.save_mean_file, 'wb') as f: cPickle.dump(mean, f, cPickle.HIGHEST_PROTOCOL) with open(args.save_std_file, 'wb') as f: cPickle.dump(std, f, cPickle.HIGHEST_PROTOCOL)
def imdb_rpn_compute_stats(net, imdb, anchor_scales=(8, 16, 32), feature_stride=16): raw_anchors = generate_anchors(scales=np.array(anchor_scales)) print(raw_anchors.shape) sums = 0 squred_sums = 0 counts = 0 roidb = filter_roidb(imdb.roidb) # Compute a map of input image size and output feature map blob map_w = {} map_h = {} for i in range(50, cfg.TRAIN.MAX_SIZE + 10): blobs = { 'data': np.zeros((1, 3, i, i)), 'im_info': np.asarray([[i, i, 1.0]]) } net.blobs['data'].reshape(*(blobs['data'].shape)) net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) blobs_out = net.forward(data=blobs['data'].astype(np.float32, copy=False), im_info=blobs['im_info'].astype(np.float32, copy=False)) height, width = net.blobs['rpn/output'].data.shape[-2:] map_w[i] = width map_h[i] = height for i in range(len(roidb)): if not i % 5000: print('computing %d/%d' % (i, imdb.num_images)) im = cv2.imread(roidb[i]['image']) im_data, im_info = _get_image_blob(im) gt_boxes = roidb[i]['boxes'] gt_boxes = gt_boxes * im_info[0, 2] height = map_h[im_data.shape[2]] width = map_w[im_data.shape[3]] # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feature_stride shift_y = np.arange(0, height) * feature_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = raw_anchors.shape[0] K = shifts.shape[0] all_anchors = (raw_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= 0) & (all_anchors[:, 1] >= 0) & (all_anchors[:, 2] < im_info[0, 1]) & # width (all_anchors[:, 3] < im_info[0, 0]) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) # There are 2 types of bbox targets # 1. anchor whose overlaps with gt is greater than RPN_POSITIVE_OVERLAP argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] fg_inds = np.where(max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP)[0] # 2. anchors which best match certain gt gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] fg_inds = np.unique(np.hstack((fg_inds, gt_argmax_overlaps))) gt_rois = gt_boxes[argmax_overlaps, :] anchors = anchors[fg_inds, :] gt_rois = gt_rois[fg_inds, :] targets = bbox_transform(anchors, gt_rois[:, :4]).astype(np.float32, copy=False) sums += targets.sum(axis=0) squred_sums += (targets**2).sum(axis=0) counts += targets.shape[0] means = old_div(sums, counts) stds = np.sqrt(old_div(squred_sums, counts) - means**2) print(means) print(stds) return means, stds