def forward(self): [clips, gt_bboxes, gt_label, vid_name, is_last] \ = self.dataset.next_val_video() num_frames = clips.shape[0] r1 = [] r2 = [] for i in xrange(num_frames - self._depth + 1): curr_gt = np.mean(gt_bboxes[i:i + self._depth, 1:5], axis=0) / 16 curr_gt = np.expand_dims(curr_gt, axis=0) overlaps = bbox_overlaps( np.ascontiguousarray(self.anchors, dtype=np.float), np.ascontiguousarray(curr_gt, dtype=np.float)) max_overlaps = overlaps.max(axis=1) gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps.max(axis=0) curr_labels = np.ones(self._anchor_dims[0] * self._anchor_dims[1] * self._anchor_dims[2]) * (-1) curr_labels[self.valid_idx[max_overlaps < 0.5]] = 0 curr_labels[self.valid_idx[max_overlaps > 0.6]] = 1 curr_labels[self.valid_idx[gt_argmax_overlaps]] = 1 l = max_overlaps > 0.6 l[gt_argmax_overlaps] = True ol = overlaps[l] pos_box = self.anchors[l] diff = bbox_transform(pos_box, curr_gt) r1.append(gt_max_overlaps) r2.append(np.abs(diff).max(axis=0)) return r1, r2, is_last
def _compute_targets(ex_rois, gt_rois): """Compute bounding-box regression targets for an image.""" assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 5 return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
def _map(label, target, gt_bbox, l, n): diff = bbox_transform(target, gt_bbox) r_diff = np.zeros((n, l * 4)) mask = np.zeros((n, l * 4)) for i in xrange(len(label)): curr_label = int(label[i] - 1) r_diff[i, curr_label * 4:curr_label * 4 + 4] = diff[i] mask[i, curr_label * 4:curr_label * 4 + 4] = 1 return r_diff, mask
def _compute_target(self, ex_rois, gt_rois): """Compute bounding-box regression targets for an image.""" assert ex_rois.size(0) == gt_rois.size(0) assert ex_rois.size(1) == 4 assert gt_rois.size(1) == 4 targets = bbox_transform(ex_rois, gt_rois) targets = ((targets - self.BBOX_NORMALIZE_MEANS.expand_as(targets)) / self.BBOX_NORMALIZE_STDS.expand_as(targets)) return targets
def compute_targets(self, ex_rois, gt_rois, query_label): """Compute bounding-box regression targets for an image.""" assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 4 targets = bbox_transform(ex_rois, gt_rois) if cfg.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev targets = ((targets - np.array(cfg.BBOX_NORMALIZE_MEANS)) / np.array(cfg.BBOX_NORMALIZE_STDS)) query_bbox_target_data = np.hstack( (query_label[:, np.newaxis], targets)).astype(np.float32, copy=False) return query_bbox_target_data
def forward(self, bottom, top): [clips, labels, tmp_bboxes, box_idx] \ = self.dataset.next_batch(self._batch_size, self._depth) batch_clip = clips.transpose((0, 4, 1, 2, 3)) batch_labels = np.empty( (self._batch_size * self._depth, 1, self._anchor_dims[0] * self._anchor_dims[1] * self._anchor_dims[2])) batch_diff = np.empty( (self._batch_size * self._depth, 4, self._anchor_dims[0] * self._anchor_dims[1] * self._anchor_dims[2])) batch_mask = np.empty( (self._batch_size * self._depth, 4, self._anchor_dims[0] * self._anchor_dims[1] * self._anchor_dims[2])) for i in xrange(self._depth): box = tmp_bboxes[0, :, :] gt_bboxes = np.expand_dims((box[i] / 16), axis=0) overlaps = bbox_overlaps( np.ascontiguousarray(self.anchors, dtype=np.float), np.ascontiguousarray(gt_bboxes, dtype=np.float)) max_overlaps = overlaps.max(axis=1) gt_argmax_overlaps = overlaps.argmax(axis=0) curr_labels = np.ones(self._anchor_dims[0] * self._anchor_dims[1] * self._anchor_dims[2]) * (-1) curr_labels[self.valid_idx[max_overlaps < 0.5]] = 0 curr_labels[self.valid_idx[max_overlaps > 0.6]] = 1 curr_labels[self.valid_idx[gt_argmax_overlaps]] = 1 batch_labels[i, 0] = curr_labels.reshape( (self._anchor_dims[1], self._anchor_dims[2], self._anchor_dims[0])).transpose((2, 0, 1)).reshape(-1) pos_boxes = self.anchors[max_overlaps > 0.6] curr_diff = np.zeros((self._anchor_dims[0] * self._anchor_dims[1] * self._anchor_dims[2], 4)) curr_diff[self.valid_idx[max_overlaps > 0.6]] \ = bbox_transform(pos_boxes, gt_bboxes) batch_diff[i] = curr_diff.reshape( (self._anchor_dims[1], self._anchor_dims[2], self._anchor_dims[0], 4)).transpose((3, 2, 0, 1)).reshape( (4, -1)) curr_mask = batch_labels[i] curr_mask[curr_mask < 1] = 0 batch_mask[i] = np.repeat(curr_mask, 4, axis=0) top[0].data[...] = batch_clip.astype(np.float32, copy=False) top[1].data[...] = batch_labels.astype(np.float32, copy=False) top[2].data[...] = batch_diff.astype(np.float32, copy=False) top[3].data[...] = batch_mask.astype(np.float32, copy=False)
def forward(self, bottom, top): [clips, labels, tmp_bboxes, box_idx] \ = self.dataset.next_batch(self._batch_size, self._depth) batch_clip = clips.transpose((0, 4, 1, 2, 3)) batch_tois = np.empty((0, 5)) batch_label = np.empty((0, 1)) batch_diff = np.empty((0, 4)) batch_mask = np.empty((0, 4)) batch_toi2 = np.empty((0, 5)) for i in xrange(self._depth): box = tmp_bboxes[0, :, :] gt_bboxes = np.expand_dims((box[i] / 16), axis=0) overlaps = bbox_overlaps( np.ascontiguousarray(self.anchors, dtype=np.float), np.ascontiguousarray(gt_bboxes, dtype=np.float)) max_overlaps = overlaps.max(axis=1) gt_argmax_overlaps = overlaps.argmax(axis=0) curr_labels = np.ones(self.anchors.shape[0]) * (-1) curr_labels[max_overlaps < 0.4] = 0 curr_labels[max_overlaps >= 0.6] = 1 curr_labels[gt_argmax_overlaps] = 1 fg_inds = np.where(curr_labels > 0)[0] num_fg = len(fg_inds) if len(fg_inds) > 4: fg_inds = np.random.choice(fg_inds, size=(4)) num_fg = 4 bg_inds = np.where(curr_labels == 0)[0] bg_inds = np.random.choice(bg_inds, size=(num_fg)) curr_inds = np.concatenate((fg_inds, bg_inds)) curr_i = np.ones((num_fg * 2, 1)) * i curr_tois = \ np.concatenate((curr_i, self.anchors[curr_inds]), axis=1) curr_toi2 = np.concatenate((np.zeros( (num_fg * 2, 1)), self.anchors[curr_inds]), axis=1) curr_l = np.expand_dims(curr_labels[curr_inds], axis=1) num_samples = 2 * num_fg fg_diff = bbox_transform(self.anchors[fg_inds], gt_bboxes) curr_diff = np.zeros((num_samples, 4)) curr_diff[0:num_fg] = fg_diff curr_mask = np.repeat(curr_l, 4, axis=1) batch_tois = np.concatenate((batch_tois, curr_tois), axis=0) batch_label = np.concatenate((batch_label, curr_l), axis=0) batch_diff = np.concatenate((batch_diff, curr_diff), axis=0) batch_mask = np.concatenate((batch_mask, curr_mask), axis=0) batch_toi2 = np.concatenate((batch_toi2, curr_toi2), axis=0) top[1].reshape(*batch_tois.shape) top[2].reshape(*batch_label.shape) top[3].reshape(*batch_diff.shape) top[4].reshape(*batch_mask.shape) top[5].reshape(*batch_toi2.shape) top[0].data[...] = batch_clip.astype(np.float32, copy=False) top[1].data[...] = batch_tois.astype(np.float32, copy=False) top[2].data[...] = batch_label.astype(np.float32, copy=False) top[3].data[...] = batch_diff.astype(np.float32, copy=False) top[4].data[...] = batch_mask.astype(np.float32, copy=False) top[5].data[...] = batch_toi2.astype(np.float32, copy=False)
def _data_generator(self, batch_size): i = 0 n = self.sample_nums while True: total_img_data = [] total_labels = [] total_deltas = [] for b in range(batch_size): if i == 0: self._random_shuffle() annotation = self._annotations[i] image_path, gt_boxes = self._parse_annotation(annotation) img = cv2.imread(image_path) # height/width/channel height, width, _ = img.shape # img resize img = cv2.resize(img, im_size, interpolation=cv2.INTER_CUBIC) # BGR -> RGB 做简单处理 img = img[:, :, (2, 1, 0)] img = img.astype(np.float32) img = img / 255 # gt_box resize gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * (im_size[0] / width) gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * (im_size[1] / height) # regions 里面 是 x1, y1, x2, y2 _, regions = selective_search(img, scale=200, sigma=0.9, min_size=50) rects = np.asarray([list(region['rect']) for region in regions]) selected_imgs = [] candidates = set() # 过滤掉一些框 for r in rects: x1, y1, x2, y2 = r x1, y1, x2, y2 = int(round(x1)), int(round(y1)), int(round(x2)), int(round(y2)) if (x1, y1, x2, y2) in candidates: continue if (x2 - x1) * (y2 - y1) < 220: continue crop_img = img[y1:y2, x1:x2, :] # 裁剪后进行resize crop_img = cv2.resize(crop_img, im_size, interpolation=cv2.INTER_CUBIC) selected_imgs.append(crop_img) candidates.add((x1, y1, x2, y2)) rects = np.asarray([list(candidate) for candidate in candidates]) # 将 gt_boxes 添加进来 for idx in range(len(gt_boxes)): x1, y1, x2, y2 = gt_boxes[idx, 0:4] x1, y1, x2, y2 = int(round(x1)), int(round(y1)), int(round(x2)), int(round(y2)) # 裁剪后进行resize crop_img = img[y1:y2, x1:x2, :] crop_img = cv2.resize(crop_img, im_size, interpolation=cv2.INTER_CUBIC) selected_imgs.append(crop_img) rects = np.vstack((rects, gt_boxes[:, 0:4])) # cal iou overlaps = bbox_overlaps(rects, gt_boxes) # 选出与哪个gt_box iou最大的索引位置 argmax_overlaps = np.argmax(overlaps, axis=1) # judge cls max_overlaps = np.max(overlaps, axis=1) keep = np.where(max_overlaps > threshold)[0] labels = np.empty(len(argmax_overlaps)) labels.fill(0) labels[keep] = gt_boxes[argmax_overlaps[keep], 4] # do reg deltas = bbox_transform(rects, gt_boxes[argmax_overlaps, 0:4]) total_deltas.append(deltas) total_labels.append(labels) total_img_data.append(selected_imgs) i = (i + 1) % n total_img_data = np.concatenate(total_img_data, axis=0) total_labels = np.concatenate(total_labels, axis=0) total_deltas = np.concatenate(total_deltas, axis=0) yield total_img_data, total_labels, total_deltas # # voc_data = VocData('~/segment_data', 2007, 'train', './data/voc_classes.txt') # g = voc_data.data_generator_wrapper() # x, y, z = next(g) # print(x.shape) # print(y.shape) # print(z.shape)
def _transform_regions(self, regions, gt_boxes): regions_target = bbox_transform(regions, gt_boxes) return regions_target
def _compute_targets(self, ex_rois, gt_rois): assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 5 return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
def _data_generator(self, batch_size, is_svm): i = 0 n = self.samples_num while True: total_img_data = [] total_labels = [] total_deltas = [] for b in range(batch_size): if i == 0: self._random_shuffle() annotation = self._annotations[i] image_path, gt_boxes = self._parse_annotation(annotation) img = cv2.imread(image_path) # height/width/channel height, width, _ = img.shape # img resize img = cv2.resize(img, im_size, interpolation=cv2.INTER_CUBIC) # BGR -> RGB 做简单处理 img = img[:, :, (2, 1, 0)] img = img.astype(np.float32) img = img / 255. # gt_box resize gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * (im_size[0] / width) gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * (im_size[1] / height) # regions 里面 是 x1, y1, x2, y2 _, regions = selective_search(img, scale=200, sigma=0.9, min_size=50) rects = np.asarray( [list(region['rect']) for region in regions]) selected_imgs = [] candidates = set() # 过滤掉一些框 for r in rects: x1, y1, x2, y2 = r x1, y1, x2, y2 = int(round(x1)), int(round(y1)), int( round(x2)), int(round(y2)) if (x1, y1, x2, y2) in candidates: continue if (x2 - x1) * (y2 - y1) < 220: continue crop_img = img[y1:y2, x1:x2, :] # 裁剪后进行resize crop_img = cv2.resize(crop_img, im_size, interpolation=cv2.INTER_CUBIC) selected_imgs.append(crop_img) candidates.add((x1, y1, x2, y2)) rects = [list(candidate) for candidate in candidates] # 将 gt_boxes 添加进来 for idx in range(len(gt_boxes)): x1, y1, x2, y2 = gt_boxes[idx, 0:4] x1, y1, x2, y2 = int(round(x1)), int(round(y1)), int( round(x2)), int(round(y2)) # 裁剪后进行resize crop_img = img[y1:y2, x1:x2, :] try: crop_img = cv2.resize(crop_img, im_size, interpolation=cv2.INTER_CUBIC) selected_imgs.append(crop_img) rects.append(gt_boxes[idx, 0:4]) except: continue rects = np.asarray(rects) # cal iou overlaps = bbox_overlaps(rects, gt_boxes) # 选出与哪个gt_box iou最大的索引位置 argmax_overlaps = np.argmax(overlaps, axis=1) # judge cls max_overlaps = np.max(overlaps, axis=1) threshold = cfg.THRESHOLD if is_svm else cfg.FINE_TUNE_THRESHOLD keep = np.where(max_overlaps >= threshold)[0] labels = np.empty(len(argmax_overlaps)) # svm和fine-tune的iou取值是不一样的 if is_svm: # 因为svm非常适合小训练集 所以论文中严格限制iou范围 减少svm训练样本集 # 用 -1 填充 labels.fill(-1) # bg_ids = np.where(max_overlaps < ) # ground - truth样本作为正样本 且IoU大于0.3的“hard negatives”, # 背景 bg_ids = np.where(max_overlaps > threshold)[0] labels[bg_ids] = 0 # gt 为正样本 这里用>0.7来当做正样本 fg_ids = np.where(max_overlaps > 0.7) labels[fg_ids] = gt_boxes[argmax_overlaps[fg_ids], 4] else: labels.fill(0) # 对于大于指定threshold 前景类别 labels[keep] = gt_boxes[argmax_overlaps[keep], 4] # to something deltas = bbox_transform(rects, gt_boxes[argmax_overlaps, 0:4]) total_deltas.append(deltas) total_labels.append(labels) total_img_data.append(selected_imgs) i = (i + 1) % n total_img_data = np.concatenate(total_img_data, axis=0) total_labels = np.concatenate(total_labels, axis=0) total_deltas = np.concatenate(total_deltas, axis=0) yield total_img_data, total_labels, total_deltas