def vis_images(self, prev, curr, gt_bb, pred_bb, prefix='train'): def unnormalize(image, mean, std): image = np.transpose(image, (1, 2, 0)) * std + mean image = image.astype(np.float32) return image for i in range(0, prev.shape[0]): _mean = np.array([104, 117, 123]) _std = np.ones_like(_mean) prev_img = prev[i].cpu().detach().numpy() curr_img = curr[i].cpu().detach().numpy() prev_img = unnormalize(prev_img, _mean, _std) curr_img = unnormalize(curr_img, _mean, _std) gt_bb_i = BoundingBox(*gt_bb[i].cpu().detach().numpy().tolist()) gt_bb_i.unscale(curr_img) curr_img = draw.bbox(curr_img, gt_bb_i, color=(255, 255, 255)) pred_bb_i = BoundingBox( *pred_bb[i].cpu().detach().numpy().tolist()) pred_bb_i.unscale(curr_img) curr_img = draw.bbox(curr_img, pred_bb_i) out = np.concatenate( (prev_img[np.newaxis, ...], curr_img[np.newaxis, ...]), axis=0) out = np.transpose(out, [0, 3, 1, 2]) self._viz.plot_images_np(out, title='sample_{}'.format(i), env='goturn_{}'.format(prefix))
def reset(self, bbox_curr, bbox_prev, img_curr, img_prev): """This prepares the target image with enough context (search region) @bbox_curr: current frame bounding box @bbox_prev: previous frame bounding box @img_curr: current frame @img_prev: previous frame """ target_pad, pad_image_location, _, _ = cropPadImage(bbox_prev, img_prev, dbg=self._dbg, viz=self._viz) self.img_curr_ = img_curr self.bbox_curr_gt_ = bbox_curr self.bbox_prev_gt_ = bbox_prev self.target_pad_ = target_pad # crop kContextFactor * bbox_curr copied if self._dbg: env = self._env + '_targetpad' search_dbg = draw.bbox(img_prev, bbox_prev, color=(0, 0, 255)) search_dbg = draw.bbox(search_dbg, pad_image_location) self._viz.plot_image_opencv(search_dbg, 'target_region', env=env) self._viz.plot_image_opencv(target_pad, 'cropped_target_region', env=env) del search_dbg
def make_training_sample_BBShift_(self, bbParams, dbg=False): """generate training samples based on bbparams""" bbox_curr_gt = self.bbox_curr_gt_ bbox_curr_shift = BoundingBox(0, 0, 0, 0) bbox_curr_shift = bbox_curr_gt.shift( self.img_curr_, bbParams.lamda_scale, bbParams.lamda_shift, bbParams.min_scale, bbParams.max_scale, True, bbox_curr_shift) rand_search_region, rand_search_location, edge_spacing_x, edge_spacing_y = cropPadImage( bbox_curr_shift, self.img_curr_, dbg=self._dbg, viz=self._viz) bbox_curr_gt = self.bbox_curr_gt_ bbox_gt_recentered = BoundingBox(0, 0, 0, 0) bbox_gt_recentered = bbox_curr_gt.recenter(rand_search_location, edge_spacing_x, edge_spacing_y, bbox_gt_recentered) if dbg: env = self._env + '_make_training_sample_bbshift' viz = self._viz curr_img_bbox = draw.bbox(self.img_curr_, bbox_curr_gt) recentered_img = draw.bbox(rand_search_region, bbox_gt_recentered) viz.plot_image_opencv(curr_img_bbox, 'curr shifted bbox', env=env) viz.plot_image_opencv(recentered_img, 'recentered shifted bbox', env=env) bbox_gt_recentered.scale(rand_search_region) bbox_gt_scaled = bbox_gt_recentered return rand_search_region, self.target_pad_, bbox_gt_scaled
def visualize(self, image, target, bbox, idx): """ sample generator prepares image and the respective targets (with bounding box). This function helps you to visualize it. The visualization is based on the Visdom server, please initialize the visdom server by running the command $ python -m visdom.server open http://localhost:8097 in your browser to visualize the images """ if image_io._is_pil_image(image): image = np.asarray(image) if image_io._is_pil_image(target): target = np.asarray(target) target = cv2.resize(target, (227, 227)) target = cv2.cvtColor(target, cv2.COLOR_BGR2RGB) image = cv2.resize(image, (227, 227)) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) bbox.unscale(image) bbox.x1, bbox.x2, bbox.y1, bbox.y2 = int(bbox.x1), int(bbox.x2), int( bbox.y1), int(bbox.y2) image_bb = draw.bbox(image, bbox) out = np.concatenate( (target[np.newaxis, ...], image_bb[np.newaxis, ...]), axis=0) out = np.transpose(out, [0, 3, 1, 2]) self._viz.plot_images_np(out, title='sample_{}'.format(idx), env=self._env + '_train')
def __getitem__(self, idx): """Get the current idx data @idx: Current index for the data """ prev_imgpath, bbox_prev, curr_imgpath, bbox_curr = self._alov_imgpairs[ idx] image_prev = image_io.load(prev_imgpath) image_prev = np.asarray(image_prev, dtype=np.uint8) image_curr = image_io.load(curr_imgpath) image_curr = np.asarray(image_curr, dtype=np.uint8) if self._dbg: viz, env = self._viz, self._env prev_img_bbox = draw.bbox(image_prev, bbox_prev) curr_img_bbox = draw.bbox(image_curr, bbox_curr) viz.plot_image_opencv(prev_img_bbox, 'prev', env=env) viz.plot_image_opencv(curr_img_bbox, 'current', env=env) del prev_img_bbox del curr_img_bbox return image_prev, bbox_prev, image_curr, bbox_curr
def make_true_sample(self): """Generate true target:search_region pair""" curr_prior_tight = self.bbox_prev_gt_ target_pad = self.target_pad_ # To find out the region in which we need to search in the # current frame, we use the previous frame bbox to get the # region in which we can make the search output = cropPadImage(curr_prior_tight, self.img_curr_, self._dbg, self._viz) curr_search_region, curr_search_location, edge_spacing_x, edge_spacing_y = output bbox_curr_gt = self.bbox_curr_gt_ bbox_curr_gt_recentered = BoundingBox(0, 0, 0, 0) bbox_curr_gt_recentered = bbox_curr_gt.recenter( curr_search_location, edge_spacing_x, edge_spacing_y, bbox_curr_gt_recentered) if self._dbg: env = self._env + '_make_true_sample' search_dbg = draw.bbox(self.img_curr_, curr_search_location) search_dbg = draw.bbox(search_dbg, bbox_curr_gt, color=(255, 255, 0)) self._viz.plot_image_opencv(search_dbg, 'search_region', env=env) recentered_img = draw.bbox(curr_search_region, bbox_curr_gt_recentered, color=(255, 255, 0)) self._viz.plot_image_opencv(recentered_img, 'cropped_search_region', env=env) del recentered_img del search_dbg bbox_curr_gt_recentered.scale(curr_search_region) return curr_search_region, target_pad, bbox_curr_gt_recentered