def setup(self, bottom, top): self._batch_size = 6 self._depth = 8 self._height = 300 self._width = 400 self.dataset = UcfSports('train', [self._height, self._width], '/home/rhou/ucf_sports') self._feat_stride = 16 self._pooled_height = np.round(self._height / float(self._feat_stride)) self._pooled_width = np.round(self._width / float(self._feat_stride)) self._root_anchors = generate_anchors(ratios=[0.5, 1, 2, 4], scales=np.array( [3, 6, 8, 11, 14])) self.num_anchors = self._root_anchors.shape[0] shift_x = np.arange(0, self._pooled_width) * self._feat_stride shift_y = np.arange(0, self._pooled_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() K = shifts.shape[0] A = self.num_anchors all_anchors = (self._root_anchors.reshape((1, A, 4)).transpose( (1, 0, 2)) + shifts.reshape((1, K, 4))) all_anchors = all_anchors.reshape((K * A, 4)) self.total_anchors = int(K * A) self.inds_inside = np.where( (all_anchors[:, 0] >= 0) & (all_anchors[:, 1] >= 0) & (all_anchors[:, 2] < self._width + 0) & # width (all_anchors[:, 3] < self._height + 0) # height )[0] self.anchors = all_anchors[self.inds_inside, :] self.len = len(self.inds_inside)
def setup(self, bottom, top): # parse the layer parameter string, which must be valid YAML try: layer_params = yaml.load(self.param_str_) except AttributeError: layer_params = yaml.load(self.param_str) except: raise self._feat_stride = layer_params['feat_stride'] anchor_scales = layer_params.get('scales', (8, 16, 32)) self._anchors = generate_anchors(scales=np.array(anchor_scales)) self._num_anchors = self._anchors.shape[0] # if DEBUG: # print 'feat_stride: {}'.format(self._feat_stride) # print 'anchors:' # print self._anchors # rois blob: holds R regions of interest, each is a 5-tuple # (n, x1, y1, x2, y2) specifying an image batch index n and a # rectangle (x1, y1, x2, y2) top[0].reshape(1, 5) # # scores blob: holds scores for R regions of interest # if len(top) > 1: # top[1].reshape(1, 1, 1, 1) # ind blob: if len(top) > 1: top[1].reshape(1)
def __init__(self, feature_stride, scales, ratios): super(_ProposalLayer, self).__init__() self._feat_stride = feature_stride self._anchors = torch.from_numpy(generate_anchors(feature_stride=16, scales=np.array(scales), ratios=np.array(ratios))).float() self._num_anchors = self._anchors.size(0)
def __init__(self, im_width=0, im_height=0, name=None): self.im_w = im_width self.im_h = im_height self.dataset_name = name self.basic_size = cfg.BASIC_SIZE self.ratios = cfg.RATIOS self.scales = cfg.SCALES self.raw_anchors = G.generate_anchors(self.basic_size, self.ratios, self.scales)
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, data, _feat_stride=[ 16, ], anchor_scales=[4, 8, 16, 32]): _anchors = generate_anchors(scale=np.array(anchor_scales)) _num_anchors = _anchors.shape[0]
def __init__(self, feat_stride, scales, ratios): super(_AnchorTargetLayer, self).__init__() self._feat_stride = feat_stride self._scales = scales anchor_scales = scales self._anchors = torch.from_numpy( generate_anchors(self._feat_stride, scales=np.array(anchor_scales), ratios=np.array(ratios))).float() self._num_anchors = self._anchors.size(0) self._allowed_border = 0 # allow boxes to sit over the edge by a small amount
def __init__(self, im_width, im_height, batch_size=8): self.vid_dir = '/mnt/sda7/ILSVRC2015/Data/VID/train' self.annot_dir = '/mnt/sda7/VID_Manual/Annotations/train/random' self.img_dirs = [] self.anno_dirs = [] self.stride = cfg.STRIDE self.basic_size = cfg.BASIC_SIZE self.ratios = cfg.RATIOS self.scales = cfg.SCALES self.K = len(self.ratios) * len(self.scales) for DIR in VID_SUBDIRS: img_dirs = sorted(os.listdir(op.join(self.vid_dir, DIR))) anno_dirs = sorted(os.listdir(op.join(self.annot_dir, DIR))) # img_dirs = [op.join(DIR, _dir) for _dir in img_dirs] '''depend on anno dirs, not img dirs''' img_dirs = [op.join(DIR, _dir) for _dir in anno_dirs] anno_dirs = [op.join(DIR, _dir) for _dir in anno_dirs] self.img_dirs.extend(img_dirs) self.anno_dirs.extend(anno_dirs) self.index = 0 self.vis_dir = './vis_vid' self.vis_index = 0 self.margin_gain = 0.2 self.im_w = im_width self.im_h = im_height self.batch_size = batch_size self.roi_size = cfg.DET_ROI_SIZE - cfg.TEMP_ROI_SIZE + 1 '''INTER_SEQ v.s. INTER_IMG''' self.method = cfg.DATA_LOADER_METHOD # assert len(self.img_dirs) == len(self.anno_dirs), 'Data and annotation dirs not uniformed' self.num_sequences = len(self.anno_dirs) self.num_images = 0 self.num_visualize = 100 self.permute_inds = np.random.permutation(np.arange( self.num_sequences)) self.max_interval = 50 if cfg.PHASE == 'TRAIN' else 20 # self.valid_seq_inds=np.zeros(0, dtype=np.int32) self.iter_stop = False self.enum_sequences() self.raw_anchors = G.generate_anchors(self.basic_size, self.ratios, self.scales)
def __init__(self): self.batch_size=2 self.stride=STRIDE[net_type] self.im_w, self.im_h=RESOLUTION[net_type] self.display=20 self.snapshot=2 self.decay_ratio=0.1 self.lr_mult=0.5 self.fetch_config() self.update_config() self.K=len(self.ratios)*len(self.scales) self.TK=len(self.track_ratios)*len(self.track_scales) self.raw_anchors=G.generate_anchors(self.basic_size, self.ratios, self.scales) self.track_raw_anchors=G.generate_anchors(self.track_basic_size, self.track_ratios, self.track_scales) self.model=MotFRCNN(self.im_w, self.im_h)
def __init__(self, im_width, im_height, batch_size=8): self.im_w = im_width self.im_h = im_height self.batch_size = batch_size self.stride = cfg.STRIDE self.bound = (im_width, im_height) self.out_size = (self.im_w // self.stride, self.im_h // self.stride) self.fetch_config() self.raw_anchors = G.generate_anchors(self.basic_size, self.ratios, self.scales) dummy_search_box = np.array([[0, 0, self.im_w - 1, self.im_h - 1]]) self.anchors=G.gen_region_anchors(self.raw_anchors, \ dummy_search_box, self.bound, K=self.K, size=self.out_size)[0]
def setup(self, bottom, top): layer_params = yaml.load(self.param_str) anchor_scales = layer_params.get('scales', (8, 16, 32)) self._anchors = generate_anchors(scales=np.array(anchor_scales)) self._num_anchors = self._anchors.shape[0] self._feat_stride = layer_params['feat_stride'] if DEBUG: print('anchors:') print(self._anchors) print('anchor shapes:') print( np.hstack(( self._anchors[:, 2::4] - self._anchors[:, 0::4], self._anchors[:, 3::4] - self._anchors[:, 1::4], ))) self._counts = cfg.EPS self._sums = np.zeros((1, 4)) self._squared_sums = np.zeros((1, 4)) self._fg_sum = 0 self._bg_sum = 0 self._count = 0 # allow boxes to sit over the edge by a small amount self._allowed_border = layer_params.get('allowed_border', 0) height, width = bottom[0].data.shape[-2:] if DEBUG: print('AnchorTargetLayer: height', height, 'width', width) A = self._num_anchors # labels top[0].reshape(1, 1, A * height, width) # bbox_targets top[1].reshape(1, A * 4, height, width) # bbox_inside_weights top[2].reshape(1, A * 4, height, width) # bbox_outside_weights top[3].reshape(1, A * 4, height, width)
def setup(self, bottom, top): # parse the layer parameter string, which must be valid YAML layer_params = yaml.load(self.param_str_) self._feat_stride = layer_params['feat_stride'] anchor_scales = layer_params.get('scales', (8, 16, 32)) self._anchors = generate_anchors(scales=np.array(anchor_scales)) self._num_anchors = self._anchors.shape[0] if DEBUG: print(('feat_stride: {}'.format(self._feat_stride))) print('anchors:') print((self._anchors)) # rois blob: holds R regions of interest, each is a 5-tuple # (n, x1, y1, x2, y2) specifying an image batch index n and a # rectangle (x1, y1, x2, y2) top[0].reshape(1, 5) # scores blob: holds scores for R regions of interest if len(top) > 1: top[1].reshape(1, 1, 1, 1)
def inference_track(model, roidb): rpn_conv_size = cfg.RPN_CONV_SIZE basic_size = cfg.TRACK_BASIC_SIZE ratios = cfg.TRACK_RATIOS scales = cfg.TRACK_SCALES K = len(ratios) * len(scales) raw_anchors = G.generate_anchors(basic_size, ratios, scales) if cfg.IMAGE_NORMALIZE: roidb['temp_image'] -= roidb['temp_image'].min() roidb['temp_image'] /= roidb['temp_image'].max() roidb['det_image'] -= roidb['det_image'].min() roidb['det_image'] /= roidb['det_image'].max() roidb['temp_image'] = (roidb['temp_image'] - 0.5) / 0.5 roidb['det_image'] = (roidb['det_image'] - 0.5) / 0.5 else: roidb['temp_image'] -= cfg.PIXEL_MEANS roidb['det_image'] -= cfg.PIXEL_MEANS bound = roidb['bound'] # print(bound) output_dict = model(roidb, task='track') temp_boxes = roidb['temp_boxes'] search_boxes = roidb['search_boxes'] configs = {} configs['K'] = K configs['temp_boxes'] = temp_boxes configs['search_boxes'] = search_boxes configs['rpn_conv_size'] = rpn_conv_size configs['raw_anchors'] = raw_anchors configs['bound'] = bound ret = get_track_output(output_dict, configs) bboxes_list = ret['bboxes_list'] anchors_list = ret['anchors_list'] return bboxes_list, anchors_list
def setup(self, bottom, top): # parse the layer parameter string, which must be valid YAML layer_params = yaml.load(self.param_str) self._feat_stride = layer_params['feat_stride'] anchor_scales = layer_params.get('scales', (8, 16, 32)) self._anchors = generate_anchors(scales=np.array(anchor_scales)) self._num_anchors = self._anchors.shape[0] if DEBUG: print('feat_stride: {}'.format(self._feat_stride)) print('anchors:') print(self._anchors) # rois blob: holds R regions of interest, each is a 5-tuple # (n, x1, y1, x2, y2) specifying an image batch index n and a # rectangle (x1, y1, x2, y2) top[0].reshape(1, 5) # scores blob: holds scores for R regions of interest if len(top) > 1: top[1].reshape(1, 1, 1, 1)
def generate_proposals(self, cls_prob, bbox_pred, im_info): batch_size = cls_prob[0].shape[0] if batch_size > 1: raise ValueError("Sorry, multiple images for each device is not implemented.") pre_nms_topN = self.rpn_pre_nms_top_n post_nms_topN = self.rpn_post_nms_top_n min_size = self.rpn_min_size proposal_list = [] score_list = [] for idx in range(len(self.feat_stride)): stride = int(self.feat_stride[idx]) sub_anchors = generate_anchors(stride=stride, sizes=self.scales * stride, aspect_ratios=self.ratios) scores, bbox_deltas = cls_prob[idx], bbox_pred[idx] # 1. generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = scores.shape[-3:-1] # enumerate all shifts shift_x = np.arange(0, width) * stride shift_y = np.arange(0, height) * stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() A = self.num_anchors K = shifts.shape[0] anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) bbox_deltas = bbox_deltas.reshape((-1, 4)) scores = scores.reshape((-1, 1)) if self.individual_proposals: if pre_nms_topN <= 0 or pre_nms_topN >= len(scores): order = np.argsort(-scores.squeeze()) else: inds = np.argpartition( -scores.squeeze(), pre_nms_topN )[:pre_nms_topN] order = np.argsort(-scores[inds].squeeze()) order = inds[order] bbox_deltas = bbox_deltas[order, :] anchors = anchors[order, :] scores = scores[order] # convert anchors into proposals via bbox transformations proposals = bbox_transform(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) # keep = self._filter_boxes(proposals, min_size * im_info[2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] if self.individual_proposals: keep = self.nms_func(np.hstack((proposals, scores)).astype(np.float32)) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] proposal_list.append(proposals) score_list.append(scores) proposals = np.vstack(proposal_list) scores = np.vstack(score_list) batch_inds = np.ones((proposals.shape[0], 1), dtype=np.float32) * self.batch_idx blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
from rpn.template import get_template im_width = 768 im_height = 448 MAX_TEMPLATE_SIZE = im_height colors = [[0, 255, 255], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [255, 0, 0], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [0, 0, 255], [85, 0, 255], [255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0]] templates = get_template(min_size=64, max_size=im_height, num_templates=5) det_raw_anchors = G.generate_anchors(cfg.BASIC_SIZE, cfg.RATIOS, cfg.SCALES) track_raw_anchors = G.generate_anchors(cfg.TRACK_BASIC_SIZE, cfg.TRACK_RATIOS, cfg.TRACK_SCALES) K = len(cfg.RATIOS) * len(cfg.SCALES) TK = len(cfg.TRACK_RATIOS) * len(cfg.TRACK_SCALES) rpn_conv_size = cfg.RPN_CONV_SIZE out_size = (im_width // 8, im_height // 8) def add_new_targets(ids, boxes, new_ids, new_boxes): combined_ids = ids.copy() combined_boxes = boxes.copy() num_instances = len(ids) num_new_instances = new_boxes.shape[0] index = 0
def _load_imagenet3d_annotation(self, index): """ Load image and bounding boxes info from txt file in the imagenet3d format. """ if self._image_set == 'test' or self._image_set == 'test_1' or self._image_set == 'test_2': lines = [] else: filename = os.path.join(self._imagenet3d_path, 'Labels', index + '.txt') lines = [] with open(filename) as f: for line in f: lines.append(line) num_objs = len(lines) boxes = np.zeros((num_objs, 4), dtype=np.float32) viewpoints = np.zeros( (num_objs, 3), dtype=np.float32) # azimuth, elevation, in-plane rotation viewpoints_flipped = np.zeros( (num_objs, 3), dtype=np.float32) # azimuth, elevation, in-plane rotation gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) for ix, line in enumerate(lines): words = line.split() assert len(words) == 5 or len( words) == 8, 'Wrong label format: {}'.format(index) cls = self._class_to_ind[words[0]] boxes[ix, :] = [float(n) for n in words[1:5]] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 if len(words) == 8: viewpoints[ix, :] = [float(n) for n in words[5:8]] # flip the viewpoint viewpoints_flipped[ix, 0] = -viewpoints[ix, 0] # azimuth viewpoints_flipped[ix, 1] = viewpoints[ix, 1] # elevation viewpoints_flipped[ix, 2] = -viewpoints[ix, 2] # in-plane rotation else: viewpoints[ix, :] = np.inf viewpoints_flipped[ix, :] = np.inf gt_subclasses = np.zeros((num_objs), dtype=np.int32) gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) viewindexes_azimuth = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_azimuth_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_elevation = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_elevation_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_rotation = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_rotation_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32) overlaps = scipy.sparse.csr_matrix(overlaps) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) viewindexes_azimuth = scipy.sparse.csr_matrix(viewindexes_azimuth) viewindexes_azimuth_flipped = scipy.sparse.csr_matrix( viewindexes_azimuth_flipped) viewindexes_elevation = scipy.sparse.csr_matrix(viewindexes_elevation) viewindexes_elevation_flipped = scipy.sparse.csr_matrix( viewindexes_elevation_flipped) viewindexes_rotation = scipy.sparse.csr_matrix(viewindexes_rotation) viewindexes_rotation_flipped = scipy.sparse.csr_matrix( viewindexes_rotation_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(list(range(num_objs)), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in range(1, self.num_classes): fg_inds.extend( np.where((gt_classes_all == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) index_covered = np.unique(index[fg_inds]) for i in range(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal base_size = 16 ratios = cfg.TRAIN.RPN_ASPECTS scales = cfg.TRAIN.RPN_SCALES anchors = generate_anchors(base_size, ratios, scales) num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in range(1, self.num_classes): fg_inds.extend( np.where((gt_classes == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) for i in range(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[fg_inds] == i)[0]) return { 'boxes': boxes, 'gt_classes': gt_classes, 'gt_viewpoints': viewpoints, 'gt_viewpoints_flipped': viewpoints_flipped, 'gt_viewindexes_azimuth': viewindexes_azimuth, 'gt_viewindexes_azimuth_flipped': viewindexes_azimuth_flipped, 'gt_viewindexes_elevation': viewindexes_elevation, 'gt_viewindexes_elevation_flipped': viewindexes_elevation_flipped, 'gt_viewindexes_rotation': viewindexes_rotation, 'gt_viewindexes_rotation_flipped': viewindexes_rotation_flipped, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped': False }
def __init__(self): super(ProposalLayer, self).__init__() self.feat_stride = cfg.FEAT_STRIDE[0] self.anchors = generate_anchors() self.num_anchors = self.anchors.size(0)
import rpn.generate_anchors as G im_width = 768 im_height = 448 colors = [[0, 255, 255], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [255, 0, 0], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [0, 0, 255], [85, 0, 255], [255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0]] K = len(cfg.RATIOS) * len(cfg.SCALES) bound = (im_width, im_height) out_size = (im_width // 8, im_height // 8) det_raw_anchors = G.generate_anchors(cfg.BASIC_SIZE, cfg.RATIOS, cfg.SCALES) dummy_search_box = np.array([[0, 0, im_width - 1, im_height - 1]]) det_anchors = G.gen_region_anchors(det_raw_anchors, dummy_search_box, bound, K=K, size=out_size)[0] bound = (im_width, im_height) def main(dataset_obj, model=None): loader = DataLoader(dataset_obj) # video_path='./result.avi' # writer=cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc('M','J','P','G'), 30.0, (im_width, im_height)) roidb = {'anchors': det_anchors, 'bound': bound}
def imdb_rpn_compute_stats(net, imdb, anchor_scales=(8, 16, 32), feature_stride=16): raw_anchors = generate_anchors(scales=np.array(anchor_scales)) print(raw_anchors.shape) sums = 0 squred_sums = 0 counts = 0 roidb = filter_roidb(imdb.roidb) # Compute a map of input image size and output feature map blob map_w = {} map_h = {} for i in range(50, cfg.TRAIN.MAX_SIZE + 10): blobs = { 'data': np.zeros((1, 3, i, i)), 'im_info': np.asarray([[i, i, 1.0]]) } net.blobs['data'].reshape(*(blobs['data'].shape)) net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) blobs_out = net.forward(data=blobs['data'].astype(np.float32, copy=False), im_info=blobs['im_info'].astype(np.float32, copy=False)) height, width = net.blobs['rpn/output'].data.shape[-2:] map_w[i] = width map_h[i] = height for i in range(len(roidb)): if not i % 5000: print('computing %d/%d' % (i, imdb.num_images)) im = cv2.imread(roidb[i]['image']) im_data, im_info = _get_image_blob(im) gt_boxes = roidb[i]['boxes'] gt_boxes = gt_boxes * im_info[0, 2] height = map_h[im_data.shape[2]] width = map_w[im_data.shape[3]] # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feature_stride shift_y = np.arange(0, height) * feature_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = raw_anchors.shape[0] K = shifts.shape[0] all_anchors = (raw_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= 0) & (all_anchors[:, 1] >= 0) & (all_anchors[:, 2] < im_info[0, 1]) & # width (all_anchors[:, 3] < im_info[0, 0]) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) # There are 2 types of bbox targets # 1. anchor whose overlaps with gt is greater than RPN_POSITIVE_OVERLAP argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] fg_inds = np.where(max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP)[0] # 2. anchors which best match certain gt gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] fg_inds = np.unique(np.hstack((fg_inds, gt_argmax_overlaps))) gt_rois = gt_boxes[argmax_overlaps, :] anchors = anchors[fg_inds, :] gt_rois = gt_rois[fg_inds, :] targets = bbox_transform(anchors, gt_rois[:, :4]).astype(np.float32, copy=False) sums += targets.sum(axis=0) squred_sums += (targets**2).sum(axis=0) counts += targets.shape[0] means = sums / counts stds = np.sqrt(squred_sums / counts - means**2) print(means) print(stds) return means, stds
ratios = cfg.RATIOS scales = cfg.SCALES stride = 8 cfg.STRIDE = stride cfg.PHASE = 'TEST' cfg.TEST.RPN_POST_NMS_TOP_N = 100 cfg.NUM_CLASSES = len(CLASSES) cfg.TEST.IMS_PER_BATCH = 1 cfg.TEST.NMS_THRESH = 0.5 cfg.TEST.RPN_NMS_THRESH = 0.7 # cfg.TEST.RPN_POST_NMS_TOP_N=300 K = len(ratios) * len(scales) raw_anchors = G.generate_anchors(basic_size, ratios, scales) bound = (im_width, im_height) out_size = (im_width // stride, im_height // stride) dummy_search_box = np.array([0, 0, bound[0], bound[1]]).reshape(1, -1) anchors = G.gen_region_anchors(raw_anchors, dummy_search_box, bound, K=K, size=out_size)[0] print(anchors.shape) img_files = [ 'img00337.jpg', 'img00832.jpg', 'img00995.jpg', 'img01879.jpg',
def _load_pascal_annotation(self, index): """ Load image and bounding boxes info from XML file in the PASCAL VOC format. """ filename = os.path.join(self._data_path, 'Annotations', index + '.xml') # print 'Loading: {}'.format(filename) def get_data_from_tag(node, tag): return node.getElementsByTagName(tag)[0].childNodes[0].data with open(filename) as f: data = minidom.parseString(f.read()) objs = data.getElementsByTagName('object') num_objs = len(objs) boxes = np.zeros((num_objs, 4), dtype=np.uint16) gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) # Load object bounding boxes into a data frame. for ix, obj in enumerate(objs): # Make pixel indexes 0-based x1 = float(get_data_from_tag(obj, 'xmin')) - 1 y1 = float(get_data_from_tag(obj, 'ymin')) - 1 x2 = float(get_data_from_tag(obj, 'xmax')) - 1 y2 = float(get_data_from_tag(obj, 'ymax')) - 1 name = str(get_data_from_tag(obj, "name")).lower().strip() if name in self._classes: cls = self._class_to_ind[name] else: cls = 0 boxes[ix, :] = [x1, y1, x2, y2] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 overlaps = scipy.sparse.csr_matrix(overlaps) gt_subclasses = np.zeros((num_objs), dtype=np.int32) gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(list(range(num_objs)), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in range(1, self.num_classes): fg_inds.extend( np.where((gt_classes_all == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) index_covered = np.unique(index[fg_inds]) for i in range(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal anchors = generate_anchors() num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in range(1, self.num_classes): fg_inds.extend( np.where((gt_classes == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) for i in range(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[fg_inds] == i)[0]) return { 'boxes': boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped': False }
def predict(self, inputs): _anchors = generate_anchors(scales=np.array(self._anchor_scales)) _num_anchors = _anchors.shape[0] print("_num_anchors",_anchors.shape) pre_nms_topN = cfg.FRCNN.RPN_PRE_NMS_TOP_N post_nms_topN = cfg.FRCNN.RPN_POST_NMS_TOP_N nms_thresh = cfg.FRCNN.RPN_NMS_THRESH min_size = cfg.FRCNN.RPN_MIN_SIZE print("nms_thresh",nms_thresh) scores = inputs[0][:, :, :, _num_anchors:] bbox_deltas = inputs[1] #anchors height, width = scores.shape[-3:-1] shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) print("ttttttttttttttttttttttt,anchors",anchors.shape) #box bbox_deltas = bbox_deltas.reshape((-1, 4)) #scores scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, bbox_deltas) boxdecode = proposals im_info = np.array([height, width, 0]) print("proposals1:",proposals.shape) proposals = clip_boxes(proposals, im_info[:2]) print("proposals2:",proposals.shape) print("im_info:[:2]",im_info[:2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] print("proposals3:",proposals.shape) print("scores.shape1",scores.shape) scores = scores[keep] print("scores.shape2",scores.shape) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] print("proposals4:",proposals.shape) scores = scores[order] keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] print("proposals5:",proposals.shape) return proposals,boxdecode,anchors
def _load_kitti_annotation(self, index): """ Load image and bounding boxes info from txt file in the KITTI format. """ if self._image_set == 'test': lines = [] else: filename = os.path.join(self._data_path, 'training', 'label_2', index + '.txt') lines = [] with open(filename) as f: for line in f: line = line.replace('Van', 'Car') words = line.split() cls = words[0] truncation = float(words[1]) occlusion = int(words[2]) height = float(words[7]) - float(words[5]) if cls in self._class_to_ind and truncation < 0.5 and occlusion < 3 and height > 25: lines.append(line) num_objs = len(lines) boxes = np.zeros((num_objs, 4), dtype=np.float32) gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) for ix, line in enumerate(lines): words = line.split() cls = self._class_to_ind[words[0]] boxes[ix, :] = [float(n) for n in words[4:8]] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 overlaps = scipy.sparse.csr_matrix(overlaps) gt_subclasses = np.zeros((num_objs), dtype=np.int32) gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(list(range(num_objs)), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in range(1, self.num_classes): fg_inds.extend( np.where((gt_classes_all == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) index_covered = np.unique(index[fg_inds]) for i in range(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal anchors = generate_anchors() num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in range(1, self.num_classes): fg_inds.extend( np.where((gt_classes == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) for i in range(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[fg_inds] == i)[0]) return { 'boxes': boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped': False }
num_anchors = 20 # anchors_person = gen_anchors(imdb.roidb, 10, [1]) # anchors_cyclist = gen_anchors(imdb.roidb, 10, [2]) # anchors_car = gen_anchors(imdb.roidb, 60, [3]) # anchors = np.vstack( (anchors_person, anchors_cyclist, anchors_car) ) anchors = gen_anchors(imdb.roidb, num_anchors, [1, 2, 3]) from rpn.generate_anchors import generate_anchors # anchor_scales = np.exp( np.linspace( np.log(2), np.log(11), 3 ) ) # anchor_ratios = np.exp( np.linspace( np.log(0.3), np.log(2), 3) ) anchor_scales = (2, 4, 8, 16, 32) anchor_ratios = (0.5, 1, 2.0) anchors_ = generate_anchors(scales=np.array(anchor_scales), ratios=np.array(anchor_ratios)) # Draw anchors fig = plt.figure(1, figsize=(15, 10)) axes = [fig.add_subplot(2, 1, ii + 1) for ii in range(2)] clrs = sns.color_palette("Set2", 100) axes[0].set_xlim(-200, 200) axes[0].set_ylim(-200, 200) axes[1].set_xlim(-200, 200) axes[1].set_ylim(-200, 200) for aa, clr in zip(anchors, clrs): axes[0].add_patch( plt.Rectangle((aa[0], aa[1]), aa[2] - aa[0],
# Load dataset imdb = kitti('train', '2012') roidb = imdb.roidb #im_scale = float(576) / float(375) im_scale = 1.0 feat_stride = 16 height, width = (int(375. * im_scale / feat_stride), int(1242. * im_scale / feat_stride)) # feature map size # Load anchors anchor_setting = 'kitti_scale5_ratio4_imscale1.0' scales = np.array(range(1, 10, 2)) ratios = np.asarray([0.5, 1.0, 2., 2.5]) anchors = generate_anchors(scales=scales, ratios=ratios) #anchor_setting = 'kitti-data-driven' #anchors = imdb.get_anchors() anchors = anchors * im_scale A = anchors.shape[0] # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
if __name__ == '__main__': cfg_from_file('experiments/cfgs/faster_rcnn_end2end_kitti.yml') # Load dataset from datasets.kitti import kitti imdb = kitti('train', '2012') roidb = imdb.roidb im_scale = float(576) / float(375) # Load anchors from rpn.generate_anchors import generate_anchors anchors = generate_anchors(scales=np.array(range(1,10)), ratios=[0.5, 1., 1.5, 2., 2.5, 3.]) anchors = anchors * im_scale num_anchors = anchors.shape[0] #height, width = (375, 1242) height, width = (int(375*im_scale/16), int(1242*im_scale/16)) feat_stride = 16 # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to
def _load_pascal3d_voxel_exemplar_annotation(self, index): """ Load image and bounding boxes info from txt file in the pascal subcategory exemplar format. """ if self._image_set == 'val': return self._load_pascal_annotation(index) filename = os.path.join(self._pascal3d_path, cfg.SUBCLS_NAME, index + '.txt') assert os.path.exists(filename), \ 'Path does not exist: {}'.format(filename) # the annotation file contains flipped objects lines = [] lines_flipped = [] with open(filename) as f: for line in f: words = line.split() subcls = int(words[1]) is_flip = int(words[2]) if subcls != -1: if is_flip == 0: lines.append(line) else: lines_flipped.append(line) num_objs = len(lines) # store information of flipped objects assert (num_objs == len(lines_flipped) ), 'The number of flipped objects is not the same!' gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) for ix, line in enumerate(lines_flipped): words = line.split() subcls = int(words[1]) gt_subclasses_flipped[ix] = subcls boxes = np.zeros((num_objs, 4), dtype=np.float32) gt_classes = np.zeros((num_objs), dtype=np.int32) gt_subclasses = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) for ix, line in enumerate(lines): words = line.split() cls = self._class_to_ind[words[0]] subcls = int(words[1]) # Make pixel indexes 0-based boxes[ix, :] = [float(n) - 1 for n in words[3:7]] gt_classes[ix] = cls gt_subclasses[ix] = subcls overlaps[ix, cls] = 1.0 subindexes[ix, cls] = subcls subindexes_flipped[ix, cls] = gt_subclasses_flipped[ix] overlaps = scipy.sparse.csr_matrix(overlaps) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(list(range(num_objs)), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in range(1, self.num_classes): fg_inds.extend( np.where((gt_classes_all == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) index_covered = np.unique(index[fg_inds]) for i in range(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal base_size = 16 ratios = [3.0, 2.0, 1.5, 1.0, 0.75, 0.5, 0.25] scales = 2**np.arange(1, 6, 0.5) anchors = generate_anchors(base_size, ratios, scales) num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in range(1, self.num_classes): fg_inds.extend( np.where((gt_classes == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) for i in range(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[fg_inds] == i)[0]) return { 'boxes': boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped': False }
def predict(self, inputs): _anchors = generate_anchors(scales=np.array(self._anchor_scales)) _num_anchors = _anchors.shape[0] print("_num_anchors", _anchors.shape) pre_nms_topN = 6000 post_nms_topN = 100 nms_thresh = 0.699999988079 min_size = 16 print("nms_thresh", nms_thresh) scores = inputs[0][:, :, :, _num_anchors:] bbox_deltas = inputs[1] # anchors height, width = scores.shape[-3:-1] shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) print("================ anchors", anchors.shape) print("================ anchors 0", anchors[0]) # box bbox_deltas = bbox_deltas.reshape((-1, 4)) # scores scores = scores.reshape((-1, 1)) # ('bbox_deltas 1:', array([ 0.8462524, -0.1174521, -2.104301 , -1.7837868], dtype=float32)) # ('bbox_deltas 2:', array([-0.1174521, 0.8462524, -2.104301 , -1.7837868], dtype=float32)) bbox_deltas = bbox_deltas[:, (1, 0, 2, 3)] proposals = bbox_transform_inv(anchors, bbox_deltas) boxdecode = proposals im_info = np.array([height, width, 0]) print("bbox_transform_inv proposals:", proposals.shape) print("bbox_transform_inv proposals[0]:", proposals[0]) proposals = clip_boxes(proposals, im_info[:2]) print("clip_boxes proposals :", proposals.shape) print("clip_boxes proposals[0] :", proposals[0]) print("im_info:[:2]", im_info[:2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] print("_filter_boxes proposals3:", proposals.shape) print("_filter_boxes proposals3 [0]:", proposals[0]) print("scores.shape1", scores.shape) scores = scores[keep] print("scores.shape2", scores.shape) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] print("proposals4:", proposals.shape) scores = scores[order] keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] print("proposals5:", proposals.shape) return proposals, boxdecode, anchors
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride=[ 16, ], anchor_scales=[8, 16, 32]): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) _anchors = generate_anchors(scales=np.array(anchor_scales)) _num_anchors = _anchors.shape[0] rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape, [0, 3, 1, 2]) rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2]) #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1]) #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1]) im_info = im_info[0] assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' #cfg_key = 'TEST' if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :] bbox_deltas = rpn_bbox_pred #im_info = bottom[2].data[0, :] if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print('score map size: {}'.format(scores.shape)) # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob
def __init__(self, feat_stride, anchor_scales, phase): self._feat_stride = feat_stride self._anchors = generate_anchors(scales=np.array(anchor_scales)) self._num_anchors = self._anchors.shape[0] self.phase = phase
def __init__(self, im_width, im_height, batch_size=8): self.data_dir = op.join(cfg.DATA_DIR, 'Insight-MVT_Annotation_Train') self.anno_dir = op.join(cfg.DATA_DIR, 'DETRAC-Train-Annotations-XML') self.stride = cfg.STRIDE self.basic_size = cfg.BASIC_SIZE self.ratios = cfg.RATIOS self.scales = cfg.SCALES self.track_basic_size = cfg.TRACK_BASIC_SIZE self.track_ratios = cfg.TRACK_RATIOS self.track_scales = cfg.TRACK_SCALES self.K = len(self.ratios) * len(self.scales) self.TK = len(self.track_ratios) * len(self.track_scales) self.rpn_conv_size = cfg.RPN_CONV_SIZE self.img_dirs = sorted(os.listdir(self.data_dir)) self.anno_files = sorted(os.listdir(self.anno_dir)) for ext_seq in EXTRA_SEQS: ext_anno = '{}.xml'.format(ext_seq) # assert ext_anno in self.anno_files, '{} not exists'.format(ext_anno) self.anno_files.remove(ext_anno) self.img_dirs.remove(ext_seq) self.index = 0 self.vis_dir = './vis_vid' self.vis_index = 0 self.margin_gain = 0.2 self.im_w = im_width self.im_h = im_height self.bound = (im_width, im_height) self.out_size = (im_width // self.stride, im_height // self.stride) self.batch_size = batch_size self.roi_size = cfg.DET_ROI_SIZE - cfg.TEMP_ROI_SIZE + 1 self.num_sequences = len(self.anno_files) self.num_images = 0 self.num_visualize = 100 self.permute_inds = np.random.permutation(np.arange( self.num_sequences)) self.max_interval = 4 if cfg.PHASE == 'TRAIN' else 1 self.iter_stop = False self.enum_sequences() self.templates = get_template(min_size=cfg.TEMP_MIN_SIZE, max_size=cfg.TEMP_MAX_SIZE, num_templates=cfg.TEMP_NUM) self.raw_anchors = G.generate_anchors(self.basic_size, self.ratios, self.scales) dummy_search_box = np.array([[0, 0, self.im_w - 1, self.im_h - 1]]) self.det_anchors = G.gen_region_anchors(self.raw_anchors, dummy_search_box, self.bound, K=self.K, size=self.out_size)[0] self.track_raw_anchors = G.generate_anchors(self.track_basic_size, self.track_ratios, self.track_scales)