示例#1
0
    def get_videos(self):
        """Docstring for get_videos.

        :returns: returns video frames in each sub folder of vot directory

        """

        logger = self.logger
        vot_folder = self.vot_folder
        sub_vot_dirs = self.find_subfolders(vot_folder)
        for vot_sub_dir in sub_vot_dirs:
            video_path = glob.glob(
                os.path.join(vot_folder, vot_sub_dir, '*.jpg'))
            objVid = video(video_path)
            list_of_frames = sorted(video_path)
            if not list_of_frames:
                logger.error('vot folders should contain only .jpg images')

            objVid.all_frames = list_of_frames
            bbox_gt_file = os.path.join(vot_folder, vot_sub_dir,
                                        'groundtruth.txt')
            with open(bbox_gt_file, 'r') as f:
                for i, line in enumerate(f):
                    co_ords = line.strip().split(',')
                    co_ords = [(float(co_ord)) for co_ord in co_ords]
                    ax, ay, bx, by, cx, cy, dx, dy = co_ords
                    x1 = min(ax, min(bx, min(cx, dx))) - 1
                    y1 = min(ay, min(by, min(cy, dy))) - 1
                    x2 = max(ax, max(bx, max(cx, dx))) - 1
                    y2 = max(ay, max(by, max(cy, dy))) - 1
                    bbox = BoundingBox(x1, y1, x2, y2)
                    bbox.frame_num = i
                    objVid.annotations.append(bbox)
            self.videos[vot_sub_dir] = [objVid.all_frames, objVid.annotations]
        return self.videos
    def make_training_example_BBShift_(self,
                                       bbParams,
                                       visualize_example=False):
        """TODO: Docstring for make_training_example_BBShift_.
        :returns: TODO

        """
        bbox_curr_gt = self.bbox_curr_gt_
        bbox_curr_shift = BoundingBox(0, 0, 0, 0)
        bbox_curr_shift = bbox_curr_gt.shift(
            self.img_curr_, bbParams.lamda_scale, bbParams.lamda_shift,
            bbParams.min_scale, bbParams.max_scale, True, bbox_curr_shift)
        rand_search_region, rand_search_location, edge_spacing_x, edge_spacing_y = cropPadImage(
            bbox_curr_shift, self.img_curr_)

        bbox_curr_gt = self.bbox_curr_gt_
        bbox_gt_recentered = BoundingBox(0, 0, 0, 0)
        bbox_gt_recentered = bbox_curr_gt.recenter(rand_search_location,
                                                   edge_spacing_x,
                                                   edge_spacing_y,
                                                   bbox_gt_recentered)
        bbox_gt_recentered.scale(rand_search_region)

        bbox_gt_scaled = bbox_gt_recentered

        return rand_search_region, self.target_pad_, bbox_gt_scaled
    def init(self, image_curr, init_bbox):
        """ initializing the first frame in the video """
        left = float(init_bbox[0])
        top = float(init_bbox[1])
        right = float(init_bbox[2])
        bottom = float(init_bbox[3])
        bbox_gt = BoundingBox(left, top, right, bottom)
        self.image_prev = image_curr
        self.bbox_prev_tight = bbox_gt
        self.bbox_curr_prior_tight = bbox_gt
        self.DeltaBox = np.array([0., 0.])
        self.lambdaBox = 0.3
        self.prevBoxeffect = 0
        self.occlusion_flag = 0

        target_pad, _, _, _ = cropPadImage(self.bbox_prev_tight,
                                           self.image_prev)

        # image, BGR(training type)
        target_pad_resize = self.preprocess(target_pad)

        # jaehyuk, check hanning windows
        hann_1d = np.expand_dims(np.hanning(227), axis=0)
        hann_2d = np.transpose(hann_1d) * hann_1d
        hann_2d = np.expand_dims(hann_2d, axis=2)
        target_pad_resize = target_pad_resize * hann_2d

        target_pad_expdim = np.expand_dims(target_pad_resize, axis=0)
        self.target_pool5 = sess.run(
            [tracknet.target_pool5],
            feed_dict={tracknet.target: target_pad_expdim})
        self.target_pool5 = np.resize(self.target_pool5, [1, 6, 6, 256])
示例#4
0
    def load_annotation_file(self, alov_sub_folder, annotation_file):

        video_path = os.path.join(self.alov_folder, alov_sub_folder, annotation_file.split('/')[-1].split('.')[0])

        objVideo = video(video_path)
        all_frames = glob.glob(os.path.join(video_path, '*.jpg'))
        objVideo.all_frames = sorted(all_frames)

        with open(annotation_file, 'r') as f:
            data = f.read().rstrip().split('\n')
            for bb in data:
                frame_num, ax, ay, bx, by, cx, cy, dx, dy = bb.split()
                frame_num, ax, ay, bx, by, cx, cy, dx, dy = int(frame_num), float(ax), float(ay), float(bx), float(by), float(cx), float(cy), float(dx), float(dy)

                x1 = min(ax, min(bx, min(cx, dx))) - 1
                y1 = min(ay, min(by, min(cy, dy))) - 1
                x2 = max(ax, max(bx, max(cx, dx))) - 1
                y2 = max(ay, max(by, max(cy, dy))) - 1

                bbox = BoundingBox(x1, y1, x2, y2)
                objFrame = frame(frame_num - 1, bbox)
                objVideo.annotations.append(objFrame)

        video_name = video_path.split('/')[-1]
        self.alov_videos[video_name] = objVideo
        if alov_sub_folder not in self.category.keys():
            self.category[alov_sub_folder] = []

        self.category[alov_sub_folder].append(self.alov_videos[video_name])
示例#5
0
def computeCropPadImageLocation(bbox_tight, image):
    """TODO: Docstring for computeCropPadImageLocation.
    :returns: TODO

    """

    # Center of the bounding box
    bbox_center_x = bbox_tight.get_center_x()
    bbox_center_y = bbox_tight.get_center_y()

    image_height = image.shape[0]
    image_width = image.shape[1]

    # Padded output width and height
    output_width = bbox_tight.compute_output_width()
    output_height = bbox_tight.compute_output_height()

    roi_left = max(0.0, bbox_center_x - (output_width / 2.))
    roi_bottom = max(0.0, bbox_center_y - (output_height / 2.))

    # Padded roi width
    left_half = min(output_width / 2., bbox_center_x)
    right_half = min(output_width / 2., image_width - bbox_center_x)
    roi_width = max(1.0, left_half + right_half)

    # Padded roi height
    top_half = min(output_height / 2., bbox_center_y)
    bottom_half = min(output_height / 2., image_height - bbox_center_y)
    roi_height = max(1.0, top_half + bottom_half)

    # Padded image location in the original image
    objPadImageLocation = BoundingBox(roi_left, roi_bottom, roi_left + roi_width, roi_bottom + roi_height)

    return objPadImageLocation
示例#6
0
    def load_annotation_file(self, annotation_file):

        video_path = os.path.join(self.video_folder,
                                  annotation_file.split('/')[-1].split('.')[0])

        objVideo = video(video_path)
        all_frames = glob.glob(os.path.join(video_path, '*.jpg'))
        objVideo.all_frames = sorted(all_frames)

        with open(annotation_file, 'r') as f:
            data = f.read().rstrip().split('\n')
            frame_num = 0
            for bb in data:
                x, y, w, h = bb.split(',')
                x, y, w, h = int(x), int(y), int(w), int(h)

                x1, y1 = x, y
                x2, y2 = x + w, y + h

                bbox = BoundingBox(x1, y1, x2, y2)
                objFrame = frame(frame_num, bbox)
                objVideo.annotations.append(objFrame)
                frame_num += 1

        video_name = video_path.split('/')[-1]
        self.videos.append(objVideo)
    def make_true_example(self):
        """TODO: Docstring for make_true_example.
        :returns: TODO

        """

        curr_prior_tight = self.bbox_prev_gt_
        target_pad = self.target_pad_
        curr_search_region, curr_search_location, edge_spacing_x, edge_spacing_y = cropPadImage(curr_prior_tight,
                                                                                                self.img_curr_)

        bbox_curr_gt = self.bbox_curr_gt_
        bbox_curr_gt_recentered = BoundingBox(0, 0, 0, 0)
        bbox_curr_gt_recentered = bbox_curr_gt.recenter(curr_search_location, edge_spacing_x, edge_spacing_y,
                                                        bbox_curr_gt_recentered)
        bbox_curr_gt_recentered.scale(curr_search_region)

        return curr_search_region, target_pad, bbox_curr_gt_recentered
示例#8
0
 def init(self, image_curr, region):
     """ initializing the first frame in the video
     """
     left = max(region.x, 0)
     top = max(region.y, 0)
     right = min(region.x + region.width, image.shape[1] - 1)
     bottom = min(region.y + region.height, image.shape[0] - 1)
     bbox_gt = BoundingBox(left, top, right, bottom)
     self.image_prev = image_curr
     self.bbox_prev_tight = bbox_gt
     self.bbox_curr_prior_tight = bbox_gt
     self.DeltaBox = np.array([0., 0.])
     self.lambdaBox = 0.3
     self.prevBoxeffect = 0
     self.occlusion_flag = 0
示例#9
0
    def track(self, image_curr, tracknet, sess):
        """TODO: Docstring for tracker.
        :returns: TODO

        """
        target_pad, _, _, _ = cropPadImage(self.bbox_prev_tight,
                                           self.image_prev)
        cur_search_region, search_location, edge_spacing_x, edge_spacing_y = cropPadImage(
            self.bbox_curr_prior_tight, image_curr)

        # image, BGR(training type)
        cur_search_region_resize = self.preprocess(cur_search_region)
        target_pad_resize = self.preprocess(target_pad)

        cur_search_region_expdim = np.expand_dims(cur_search_region_resize,
                                                  axis=0)
        target_pad_expdim = np.expand_dims(target_pad_resize, axis=0)

        fc8 = sess.run(
            [tracknet.fc8],
            feed_dict={
                tracknet.image: cur_search_region_expdim,
                tracknet.target: target_pad_expdim
            })
        bbox_estimate = calculate_box(fc8)
        # this box is NMS result, TODO, all bbox check
        if not len(bbox_estimate) == 0:
            bbox_estimate = BoundingBox(bbox_estimate[0][0],
                                        bbox_estimate[0][1],
                                        bbox_estimate[0][2],
                                        bbox_estimate[0][3])

            # Inplace correction of bounding box
            bbox_estimate.unscale(cur_search_region)
            bbox_estimate.uncenter(image_curr, search_location, edge_spacing_x,
                                   edge_spacing_y)

            self.image_prev = image_curr
            self.bbox_prev_tight = bbox_estimate
            self.bbox_curr_prior_tight = bbox_estimate
        else:
            bbox_estimate = False

        return bbox_estimate
示例#10
0
    def track(self, image_curr, tracknet, velocity, sess):
        """TODO: Docstring for tracker.
        :returns: TODO

        """
        target_pad, _, _, _ = cropPadImage(self.bbox_prev_tight,
                                           self.image_prev)
        cur_search_region, search_location, edge_spacing_x, edge_spacing_y = cropPadImage(
            self.bbox_curr_prior_tight, image_curr)

        # image, BGR(training type)
        cur_search_region_resize = self.preprocess(cur_search_region)
        target_pad_resize = self.preprocess(target_pad)

        cur_search_region_expdim = np.expand_dims(cur_search_region_resize,
                                                  axis=0)
        target_pad_expdim = np.expand_dims(target_pad_resize, axis=0)

        re_fc4_image, fc4_adj = sess.run(
            [tracknet.re_fc4_image, tracknet.fc4_adj],
            feed_dict={
                tracknet.image: cur_search_region_expdim,
                tracknet.target: target_pad_expdim
            })
        bbox_estimate, object_bool, objectness = calculate_box(
            re_fc4_image, fc4_adj)

        print('objectness_s is: ', objectness)

        ########### original method ############
        # this box is NMS result, TODO, all bbox check

        if not len(bbox_estimate) == 0:
            bbox_estimate = BoundingBox(bbox_estimate[0][0],
                                        bbox_estimate[0][1],
                                        bbox_estimate[0][2],
                                        bbox_estimate[0][3])

            # Inplace correction of bounding box
            bbox_estimate.unscale(cur_search_region)
            bbox_estimate.uncenter(image_curr, search_location, edge_spacing_x,
                                   edge_spacing_y)

            # self.image_prev = image_curr
            # self.bbox_prev_tight = bbox_estimate
            self.bbox_curr_prior_tight = bbox_estimate
        else:
            # self.image_prev = self.image_prev
            # self.bbox_prev_tight = self.bbox_prev_tight
            self.bbox_curr_prior_tight = self.bbox_curr_prior_tight
            bbox_estimate = self.bbox_curr_prior_tight

        ########### original method ############

        ############ trick method ############

        # if object_bool:
        # # if not len(bbox_estimate) == 0:
        #     # current_box_wh = np.array([(bbox_estimate.[0][2] - bbox_estimate.[0][0]), (bbox_estimate.[0][3] - bbox_estimate.[0][1])], dtype=np.float32)
        #     # prev_box_wh = np.array([5., 5.], dtype=np.float32)
        #
        #     bbox_estimate = BoundingBox(bbox_estimate[0][0], bbox_estimate[0][1], bbox_estimate[0][2], bbox_estimate[0][3])
        #
        #     # relative distance from center point [5. 5.]
        #     relative_current_box = np.array([(bbox_estimate.x2 + bbox_estimate.x1) / 2,
        #                             (bbox_estimate.y2 + bbox_estimate.y1) / 2],
        #                            dtype=np.float32)
        #     relative_distance = np.linalg.norm(relative_current_box - np.array([5., 5.]))
        #
        #     # Inplace correction of bounding box
        #     bbox_estimate.unscale(cur_search_region)
        #     bbox_estimate.uncenter(image_curr, search_location, edge_spacing_x, edge_spacing_y)
        #
        #     # image's width height , center point
        #     current_box = np.array([(bbox_estimate.x2 + bbox_estimate.x1) / 2, (bbox_estimate.y2 + bbox_estimate.y1) / 2], dtype=np.float32)
        #     prev_box = np.array([(self.bbox_curr_prior_tight.x2 + self.bbox_curr_prior_tight.x1) / 2, (self.bbox_curr_prior_tight.y2 + self.bbox_curr_prior_tight.y1) / 2],
        #                         dtype=np.float32)
        #
        #     if relative_distance < 2:
        #         self.DeltaBox = self.lambdaBox * (current_box - prev_box) + (1 - self.lambdaBox) * self.DeltaBox
        #
        #
        #         self.image_prev = image_curr
        #         self.bbox_prev_tight = bbox_estimate
        #         self.bbox_curr_prior_tight = bbox_estimate
        #         print(self.DeltaBox)
        #     else:
        #         # under prev img, box block is no update
        #         self.image_prev = self.image_prev
        #         self.bbox_prev_tight = self.bbox_prev_tight
        #         # self.bbox_curr_prior_tight = self.bbox_prev_tight
        #         self.bbox_curr_prior_tight = BoundingBox(self.bbox_curr_prior_tight.x1 + self.DeltaBox[0],
        #                                                  self.bbox_curr_prior_tight.y1 + self.DeltaBox[1],
        #                                                  self.bbox_curr_prior_tight.x2 + self.DeltaBox[0],
        #                                                  self.bbox_curr_prior_tight.y2 + self.DeltaBox[1])
        #         bbox_estimate = self.bbox_curr_prior_tight
        #         print('distance is {:>3}'.format(relative_distance))
        #         print(self.DeltaBox)
        # else:
        #     # under prev img, box block is no update
        #     self.image_prev = self.image_prev
        #     self.bbox_prev_tight = self.bbox_prev_tight
        #     # self.bbox_curr_prior_tight = self.bbox_prev_tight
        #     self.bbox_curr_prior_tight = BoundingBox(self.bbox_curr_prior_tight.x1 + self.DeltaBox[0],
        #                                              self.bbox_curr_prior_tight.y1 + self.DeltaBox[1],
        #                                              self.bbox_curr_prior_tight.x2 + self.DeltaBox[0],
        #                                              self.bbox_curr_prior_tight.y2 + self.DeltaBox[1])
        #     bbox_estimate = self.bbox_curr_prior_tight
        #     print('occlusion is detected')
        #     print(self.DeltaBox)
        #
        # ############ trick method ############

        left_x = bbox_estimate.x1
        left_y = bbox_estimate.y1
        width = bbox_estimate.x2 - bbox_estimate.x1
        height = bbox_estimate.y2 - bbox_estimate.y1
        return vot.Rectangle(left_x, left_y, width, height)
示例#11
0
 def __init__(self):
     """TODO: to be defined1. """
     self.bbox = BoundingBox(0, 0, 0, 0)
     self.image_path = []
     self.disp_width = 0
     self.disp_height = 0