def get_videos(self): """Docstring for get_videos. :returns: returns video frames in each sub folder of vot directory """ logger = self.logger vot_folder = self.vot_folder sub_vot_dirs = self.find_subfolders(vot_folder) for vot_sub_dir in sub_vot_dirs: video_path = glob.glob( os.path.join(vot_folder, vot_sub_dir, '*.jpg')) objVid = video(video_path) list_of_frames = sorted(video_path) if not list_of_frames: logger.error('vot folders should contain only .jpg images') objVid.all_frames = list_of_frames bbox_gt_file = os.path.join(vot_folder, vot_sub_dir, 'groundtruth.txt') with open(bbox_gt_file, 'r') as f: for i, line in enumerate(f): co_ords = line.strip().split(',') co_ords = [(float(co_ord)) for co_ord in co_ords] ax, ay, bx, by, cx, cy, dx, dy = co_ords x1 = min(ax, min(bx, min(cx, dx))) - 1 y1 = min(ay, min(by, min(cy, dy))) - 1 x2 = max(ax, max(bx, max(cx, dx))) - 1 y2 = max(ay, max(by, max(cy, dy))) - 1 bbox = BoundingBox(x1, y1, x2, y2) bbox.frame_num = i objVid.annotations.append(bbox) self.videos[vot_sub_dir] = [objVid.all_frames, objVid.annotations] return self.videos
def make_training_example_BBShift_(self, bbParams, visualize_example=False): """TODO: Docstring for make_training_example_BBShift_. :returns: TODO """ bbox_curr_gt = self.bbox_curr_gt_ bbox_curr_shift = BoundingBox(0, 0, 0, 0) bbox_curr_shift = bbox_curr_gt.shift( self.img_curr_, bbParams.lamda_scale, bbParams.lamda_shift, bbParams.min_scale, bbParams.max_scale, True, bbox_curr_shift) rand_search_region, rand_search_location, edge_spacing_x, edge_spacing_y = cropPadImage( bbox_curr_shift, self.img_curr_) bbox_curr_gt = self.bbox_curr_gt_ bbox_gt_recentered = BoundingBox(0, 0, 0, 0) bbox_gt_recentered = bbox_curr_gt.recenter(rand_search_location, edge_spacing_x, edge_spacing_y, bbox_gt_recentered) bbox_gt_recentered.scale(rand_search_region) bbox_gt_scaled = bbox_gt_recentered return rand_search_region, self.target_pad_, bbox_gt_scaled
def init(self, image_curr, init_bbox): """ initializing the first frame in the video """ left = float(init_bbox[0]) top = float(init_bbox[1]) right = float(init_bbox[2]) bottom = float(init_bbox[3]) bbox_gt = BoundingBox(left, top, right, bottom) self.image_prev = image_curr self.bbox_prev_tight = bbox_gt self.bbox_curr_prior_tight = bbox_gt self.DeltaBox = np.array([0., 0.]) self.lambdaBox = 0.3 self.prevBoxeffect = 0 self.occlusion_flag = 0 target_pad, _, _, _ = cropPadImage(self.bbox_prev_tight, self.image_prev) # image, BGR(training type) target_pad_resize = self.preprocess(target_pad) # jaehyuk, check hanning windows hann_1d = np.expand_dims(np.hanning(227), axis=0) hann_2d = np.transpose(hann_1d) * hann_1d hann_2d = np.expand_dims(hann_2d, axis=2) target_pad_resize = target_pad_resize * hann_2d target_pad_expdim = np.expand_dims(target_pad_resize, axis=0) self.target_pool5 = sess.run( [tracknet.target_pool5], feed_dict={tracknet.target: target_pad_expdim}) self.target_pool5 = np.resize(self.target_pool5, [1, 6, 6, 256])
def load_annotation_file(self, alov_sub_folder, annotation_file): video_path = os.path.join(self.alov_folder, alov_sub_folder, annotation_file.split('/')[-1].split('.')[0]) objVideo = video(video_path) all_frames = glob.glob(os.path.join(video_path, '*.jpg')) objVideo.all_frames = sorted(all_frames) with open(annotation_file, 'r') as f: data = f.read().rstrip().split('\n') for bb in data: frame_num, ax, ay, bx, by, cx, cy, dx, dy = bb.split() frame_num, ax, ay, bx, by, cx, cy, dx, dy = int(frame_num), float(ax), float(ay), float(bx), float(by), float(cx), float(cy), float(dx), float(dy) x1 = min(ax, min(bx, min(cx, dx))) - 1 y1 = min(ay, min(by, min(cy, dy))) - 1 x2 = max(ax, max(bx, max(cx, dx))) - 1 y2 = max(ay, max(by, max(cy, dy))) - 1 bbox = BoundingBox(x1, y1, x2, y2) objFrame = frame(frame_num - 1, bbox) objVideo.annotations.append(objFrame) video_name = video_path.split('/')[-1] self.alov_videos[video_name] = objVideo if alov_sub_folder not in self.category.keys(): self.category[alov_sub_folder] = [] self.category[alov_sub_folder].append(self.alov_videos[video_name])
def computeCropPadImageLocation(bbox_tight, image): """TODO: Docstring for computeCropPadImageLocation. :returns: TODO """ # Center of the bounding box bbox_center_x = bbox_tight.get_center_x() bbox_center_y = bbox_tight.get_center_y() image_height = image.shape[0] image_width = image.shape[1] # Padded output width and height output_width = bbox_tight.compute_output_width() output_height = bbox_tight.compute_output_height() roi_left = max(0.0, bbox_center_x - (output_width / 2.)) roi_bottom = max(0.0, bbox_center_y - (output_height / 2.)) # Padded roi width left_half = min(output_width / 2., bbox_center_x) right_half = min(output_width / 2., image_width - bbox_center_x) roi_width = max(1.0, left_half + right_half) # Padded roi height top_half = min(output_height / 2., bbox_center_y) bottom_half = min(output_height / 2., image_height - bbox_center_y) roi_height = max(1.0, top_half + bottom_half) # Padded image location in the original image objPadImageLocation = BoundingBox(roi_left, roi_bottom, roi_left + roi_width, roi_bottom + roi_height) return objPadImageLocation
def load_annotation_file(self, annotation_file): video_path = os.path.join(self.video_folder, annotation_file.split('/')[-1].split('.')[0]) objVideo = video(video_path) all_frames = glob.glob(os.path.join(video_path, '*.jpg')) objVideo.all_frames = sorted(all_frames) with open(annotation_file, 'r') as f: data = f.read().rstrip().split('\n') frame_num = 0 for bb in data: x, y, w, h = bb.split(',') x, y, w, h = int(x), int(y), int(w), int(h) x1, y1 = x, y x2, y2 = x + w, y + h bbox = BoundingBox(x1, y1, x2, y2) objFrame = frame(frame_num, bbox) objVideo.annotations.append(objFrame) frame_num += 1 video_name = video_path.split('/')[-1] self.videos.append(objVideo)
def make_true_example(self): """TODO: Docstring for make_true_example. :returns: TODO """ curr_prior_tight = self.bbox_prev_gt_ target_pad = self.target_pad_ curr_search_region, curr_search_location, edge_spacing_x, edge_spacing_y = cropPadImage(curr_prior_tight, self.img_curr_) bbox_curr_gt = self.bbox_curr_gt_ bbox_curr_gt_recentered = BoundingBox(0, 0, 0, 0) bbox_curr_gt_recentered = bbox_curr_gt.recenter(curr_search_location, edge_spacing_x, edge_spacing_y, bbox_curr_gt_recentered) bbox_curr_gt_recentered.scale(curr_search_region) return curr_search_region, target_pad, bbox_curr_gt_recentered
def init(self, image_curr, region): """ initializing the first frame in the video """ left = max(region.x, 0) top = max(region.y, 0) right = min(region.x + region.width, image.shape[1] - 1) bottom = min(region.y + region.height, image.shape[0] - 1) bbox_gt = BoundingBox(left, top, right, bottom) self.image_prev = image_curr self.bbox_prev_tight = bbox_gt self.bbox_curr_prior_tight = bbox_gt self.DeltaBox = np.array([0., 0.]) self.lambdaBox = 0.3 self.prevBoxeffect = 0 self.occlusion_flag = 0
def track(self, image_curr, tracknet, sess): """TODO: Docstring for tracker. :returns: TODO """ target_pad, _, _, _ = cropPadImage(self.bbox_prev_tight, self.image_prev) cur_search_region, search_location, edge_spacing_x, edge_spacing_y = cropPadImage( self.bbox_curr_prior_tight, image_curr) # image, BGR(training type) cur_search_region_resize = self.preprocess(cur_search_region) target_pad_resize = self.preprocess(target_pad) cur_search_region_expdim = np.expand_dims(cur_search_region_resize, axis=0) target_pad_expdim = np.expand_dims(target_pad_resize, axis=0) fc8 = sess.run( [tracknet.fc8], feed_dict={ tracknet.image: cur_search_region_expdim, tracknet.target: target_pad_expdim }) bbox_estimate = calculate_box(fc8) # this box is NMS result, TODO, all bbox check if not len(bbox_estimate) == 0: bbox_estimate = BoundingBox(bbox_estimate[0][0], bbox_estimate[0][1], bbox_estimate[0][2], bbox_estimate[0][3]) # Inplace correction of bounding box bbox_estimate.unscale(cur_search_region) bbox_estimate.uncenter(image_curr, search_location, edge_spacing_x, edge_spacing_y) self.image_prev = image_curr self.bbox_prev_tight = bbox_estimate self.bbox_curr_prior_tight = bbox_estimate else: bbox_estimate = False return bbox_estimate
def track(self, image_curr, tracknet, velocity, sess): """TODO: Docstring for tracker. :returns: TODO """ target_pad, _, _, _ = cropPadImage(self.bbox_prev_tight, self.image_prev) cur_search_region, search_location, edge_spacing_x, edge_spacing_y = cropPadImage( self.bbox_curr_prior_tight, image_curr) # image, BGR(training type) cur_search_region_resize = self.preprocess(cur_search_region) target_pad_resize = self.preprocess(target_pad) cur_search_region_expdim = np.expand_dims(cur_search_region_resize, axis=0) target_pad_expdim = np.expand_dims(target_pad_resize, axis=0) re_fc4_image, fc4_adj = sess.run( [tracknet.re_fc4_image, tracknet.fc4_adj], feed_dict={ tracknet.image: cur_search_region_expdim, tracknet.target: target_pad_expdim }) bbox_estimate, object_bool, objectness = calculate_box( re_fc4_image, fc4_adj) print('objectness_s is: ', objectness) ########### original method ############ # this box is NMS result, TODO, all bbox check if not len(bbox_estimate) == 0: bbox_estimate = BoundingBox(bbox_estimate[0][0], bbox_estimate[0][1], bbox_estimate[0][2], bbox_estimate[0][3]) # Inplace correction of bounding box bbox_estimate.unscale(cur_search_region) bbox_estimate.uncenter(image_curr, search_location, edge_spacing_x, edge_spacing_y) # self.image_prev = image_curr # self.bbox_prev_tight = bbox_estimate self.bbox_curr_prior_tight = bbox_estimate else: # self.image_prev = self.image_prev # self.bbox_prev_tight = self.bbox_prev_tight self.bbox_curr_prior_tight = self.bbox_curr_prior_tight bbox_estimate = self.bbox_curr_prior_tight ########### original method ############ ############ trick method ############ # if object_bool: # # if not len(bbox_estimate) == 0: # # current_box_wh = np.array([(bbox_estimate.[0][2] - bbox_estimate.[0][0]), (bbox_estimate.[0][3] - bbox_estimate.[0][1])], dtype=np.float32) # # prev_box_wh = np.array([5., 5.], dtype=np.float32) # # bbox_estimate = BoundingBox(bbox_estimate[0][0], bbox_estimate[0][1], bbox_estimate[0][2], bbox_estimate[0][3]) # # # relative distance from center point [5. 5.] # relative_current_box = np.array([(bbox_estimate.x2 + bbox_estimate.x1) / 2, # (bbox_estimate.y2 + bbox_estimate.y1) / 2], # dtype=np.float32) # relative_distance = np.linalg.norm(relative_current_box - np.array([5., 5.])) # # # Inplace correction of bounding box # bbox_estimate.unscale(cur_search_region) # bbox_estimate.uncenter(image_curr, search_location, edge_spacing_x, edge_spacing_y) # # # image's width height , center point # current_box = np.array([(bbox_estimate.x2 + bbox_estimate.x1) / 2, (bbox_estimate.y2 + bbox_estimate.y1) / 2], dtype=np.float32) # prev_box = np.array([(self.bbox_curr_prior_tight.x2 + self.bbox_curr_prior_tight.x1) / 2, (self.bbox_curr_prior_tight.y2 + self.bbox_curr_prior_tight.y1) / 2], # dtype=np.float32) # # if relative_distance < 2: # self.DeltaBox = self.lambdaBox * (current_box - prev_box) + (1 - self.lambdaBox) * self.DeltaBox # # # self.image_prev = image_curr # self.bbox_prev_tight = bbox_estimate # self.bbox_curr_prior_tight = bbox_estimate # print(self.DeltaBox) # else: # # under prev img, box block is no update # self.image_prev = self.image_prev # self.bbox_prev_tight = self.bbox_prev_tight # # self.bbox_curr_prior_tight = self.bbox_prev_tight # self.bbox_curr_prior_tight = BoundingBox(self.bbox_curr_prior_tight.x1 + self.DeltaBox[0], # self.bbox_curr_prior_tight.y1 + self.DeltaBox[1], # self.bbox_curr_prior_tight.x2 + self.DeltaBox[0], # self.bbox_curr_prior_tight.y2 + self.DeltaBox[1]) # bbox_estimate = self.bbox_curr_prior_tight # print('distance is {:>3}'.format(relative_distance)) # print(self.DeltaBox) # else: # # under prev img, box block is no update # self.image_prev = self.image_prev # self.bbox_prev_tight = self.bbox_prev_tight # # self.bbox_curr_prior_tight = self.bbox_prev_tight # self.bbox_curr_prior_tight = BoundingBox(self.bbox_curr_prior_tight.x1 + self.DeltaBox[0], # self.bbox_curr_prior_tight.y1 + self.DeltaBox[1], # self.bbox_curr_prior_tight.x2 + self.DeltaBox[0], # self.bbox_curr_prior_tight.y2 + self.DeltaBox[1]) # bbox_estimate = self.bbox_curr_prior_tight # print('occlusion is detected') # print(self.DeltaBox) # # ############ trick method ############ left_x = bbox_estimate.x1 left_y = bbox_estimate.y1 width = bbox_estimate.x2 - bbox_estimate.x1 height = bbox_estimate.y2 - bbox_estimate.y1 return vot.Rectangle(left_x, left_y, width, height)
def __init__(self): """TODO: to be defined1. """ self.bbox = BoundingBox(0, 0, 0, 0) self.image_path = [] self.disp_width = 0 self.disp_height = 0