Exemplo n.º 1
0
    def initialize(self, image, info: dict) -> dict:
        # Initialize some stuff
        self.frame_num = 1
        if not self.params.has('device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'
        # Initialize network
        self.initialize_features()
        # The DiMP network
        self.net = self.params.net

        # Time initialization
        tic = time.time()

        state = info['init_bbox']

        # Get Target layer
        target_depth = get_target_depth(image, state)

        # Get Target layer
        target_depth = get_target_depth(image, state)
        print(target_depth)
        target_layer = get_layered_image_by_depth(image, target_depth)

        self.layer_id = int(target_depth // 2000)
        print('layer id : ', self.layer_id)
        # Convert image
        # im = numpy_to_torch(image) # HxWx6 -> 6 * H * W
        im = numpy_to_torch(target_layer)

        # Get target position and size

        self.pos = torch.Tensor(
            [state[1] + (state[3] - 1) / 2, state[0] + (state[2] - 1) / 2])
        self.target_sz = torch.Tensor([state[3], state[2]])

        # Get object id
        self.object_id = info.get('object_ids', [None])[0]
        self.id_str = '' if self.object_id is None else ' {}'.format(
            self.object_id)
        # Set sizes
        self.image_sz = torch.Tensor([im.shape[2], im.shape[3]])
        sz = self.params.image_sample_size
        sz = torch.Tensor([sz, sz] if isinstance(sz, int) else sz)
        if self.params.get('use_image_aspect_ratio', False):
            sz = self.image_sz * sz.prod().sqrt() / self.image_sz.prod().sqrt()
            stride = self.params.get('feature_stride', 32)
            sz = torch.round(sz / stride) * stride
        self.img_sample_sz = sz
        self.img_support_sz = self.img_sample_sz

        # Set search area
        search_area = torch.prod(self.target_sz *
                                 self.params.search_area_scale).item()
        self.target_scale = math.sqrt(
            search_area) / self.img_sample_sz.prod().sqrt()

        # Target size in base scale
        self.base_target_sz = self.target_sz / self.target_scale

        # Setup scale factors
        if not self.params.has('scale_factors'):
            self.params.scale_factors = torch.ones(1)
        elif isinstance(self.params.scale_factors, (list, tuple)):
            self.params.scale_factors = torch.Tensor(self.params.scale_factors)

        # Setup scale bounds
        self.min_scale_factor = torch.max(10 / self.base_target_sz)
        self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz)

        # Extract and transform sample
        init_backbone_feat = self.generate_init_samples(im)

        # Initialize classifier
        self.init_classifier(init_backbone_feat)

        # Initialize IoUNet
        if self.params.get('use_iou_net', True):
            self.init_iou_net(init_backbone_feat)

        out = {'time': time.time() - tic}
        return out
Exemplo n.º 2
0
    def track(self, image, info: dict = None) -> dict:

        self.debug_info = {}

        self.frame_num += 1
        self.debug_info['frame_num'] = self.frame_num

        # track on each layer
        # 0-2 m 2-4m, 4-6m , 6-8m, 8-10m, 10-inf
        max_score = 0
        flag = 'not_found'
        new_pos = [-1, -1, -1, -1]
        scale_ind = None
        backbone_feat = None
        test_x = None
        sample_pos = None
        s = None
        sample_coords = None

        target_dist = 0
        final_layer = image
        print(np.max(image))

        start = max(0, self.layer_id - 1)
        end = min(11, self.layer_id + 2)
        for z_dist in range(start, end):
            if z_dist == 10:
                lower = 10000  # 10 meter
                upper = np.max(image)
            else:
                lower = z_dist * 1000
                upper = (z_dist + 2) * 1000

            layer = image.copy()
            layer[layer > upper] = 0
            layer[layer < lower] = 0

            print(lower, upper, np.median(np.nonzero(layer)))

            layer = (layer - lower) / (upper - lower)
            layer = np.asarray(layer * 255, dtype=np.uint8)
            layer = cv2.applyColorMap(layer, cv2.COLORMAP_JET)
            layer = numpy_to_torch(layer)

            # Extract backbone features
            backbone_feat_layer, sample_coords_layer, im_patches_layer = self.extract_backbone_features(
                layer, self.get_centered_sample_pos(),
                self.target_scale * self.params.scale_factors,
                self.img_sample_sz)
            # Extract classification features
            test_x_layer = self.get_classification_features(
                backbone_feat_layer)

            # Location of sample
            sample_pos_layer, sample_scales_layer = self.get_sample_location(
                sample_coords_layer)

            # Compute classification scores
            scores_raw_layer = self.classify_target(test_x_layer)

            # Localize the target
            translation_vec_layer, scale_ind_layer, s_layer, flag_layer = self.localize_target(
                scores_raw_layer, sample_pos_layer,
                sample_scales_layer)  # Song Here can add depth cues
            new_pos_layer = sample_pos_layer[
                scale_ind_layer, :] + translation_vec_layer

            score_map_layer = s_layer[scale_ind_layer, ...]
            max_score_layer = torch.max(score_map_layer).item()

            if flag_layer != 'not_found' and max_score_layer > max_score:
                flag = flag_layer
                max_score = max_score_layer
                new_pos = new_pos_layer
                scale_ind = scale_ind_layer
                sample_pos = sample_pos_layer
                backbone_feat = backbone_feat_layer
                test_x = test_x_layer
                sample_scales = sample_scales_layer
                s = s_layer
                target_dist = z_dist
                sample_coords = sample_coords_layer

                final_layer = layer

        # if max_score > 0.8:
        # self.layer_id = target_dist

        print('Choose %d meter ... ' % target_dist, flag, max_score)

        # Update position and scale
        if flag != 'not_found':
            if self.params.get('use_iou_net', True):
                update_scale_flag = self.params.get(
                    'update_scale_when_uncertain', True) or flag != 'uncertain'
                if self.params.get('use_classifier', True):
                    self.update_state(new_pos)
                self.refine_target_box(backbone_feat, sample_pos[scale_ind, :],
                                       sample_scales[scale_ind], scale_ind,
                                       update_scale_flag)
            elif self.params.get('use_classifier', True):
                self.update_state(new_pos, sample_scales[scale_ind])

        # ------- UPDATE ------- #

        update_flag = flag not in ['not_found', 'uncertain']
        hard_negative = (flag == 'hard_negative')
        learning_rate = self.params.get('hard_negative_learning_rate',
                                        None) if hard_negative else None

        if update_flag and self.params.get('update_classifier', False):
            # Get train sample
            train_x = test_x[scale_ind:scale_ind + 1, ...]

            # Create target_box and label for spatial sample
            target_box = self.get_iounet_box(self.pos, self.target_sz,
                                             sample_pos[scale_ind, :],
                                             sample_scales[scale_ind])

            # Update the classifier model
            self.update_classifier(train_x, target_box, learning_rate,
                                   s[scale_ind, ...])

        # Set the pos of the tracker to iounet pos
        if self.params.get('use_iou_net',
                           True) and flag != 'not_found' and hasattr(
                               self, 'pos_iounet'):
            self.pos = self.pos_iounet.clone()

        if flag != 'not_found':
            score_map = s[scale_ind, ...]
            max_score = torch.max(score_map).item()

            # Visualize and set debug info
            self.search_area_box = torch.cat(
                (sample_coords[scale_ind,
                               [1, 0]], sample_coords[scale_ind, [3, 2]] -
                 sample_coords[scale_ind, [1, 0]] - 1))
            self.debug_info['flag' + self.id_str] = flag
            self.debug_info['max_score' + self.id_str] = max_score
            if self.visdom is not None:
                self.visdom.register(score_map, 'heatmap', 2,
                                     'Score Map' + self.id_str)
                self.visdom.register(self.debug_info, 'info_dict', 1, 'Status')
            elif self.params.debug >= 2:
                show_tensor(score_map,
                            5,
                            title='Max score = {:.2f}'.format(max_score))

        else:
            max_score = 0
            final_layer = image

        # Compute output bounding box
        new_state = torch.cat(
            (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2,
             self.target_sz[[1, 0]]))

        if self.params.get('output_not_found_box',
                           False) and flag == 'not_found':
            output_state = [-1, -1, -1, -1]
        else:
            output_state = new_state.tolist()

            target_depth = get_target_depth(image, output_state)
            self.layer_id = int(target_depth // 2000)

        out = {
            'target_bbox': output_state,
            'confidence': max_score,
            'image': torch_to_numpy(final_layer)
        }
        return out
Exemplo n.º 3
0
    def _get_frame(self, seq_path, frame_id, bbox=None):
        '''
        Return :
            - colormap from depth image
            - [depth, depth, depth]
        '''
        color_path, depth_path = self._get_frame_path(seq_path, frame_id)

        rgb = cv2.imread(color_path)
        rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
        dp = cv2.imread(depth_path, -1)

        max_depth = min(np.max(dp), 10000)
        dp[dp > max_depth] = max_depth

        if self.dtype == 'color':
            # img = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
            img = rgb

        elif self.dtype == 'R':
            img = rgb[:, :, 0]
            img = cv2.merge((img, img, img))

        elif self.dtype == 'G':
            img = rgb[:, :, 1]
            img = cv2.merge((img, img, img))

        elif self.dtype == 'B':
            img = rgb[:, :, 2]
            img = cv2.merge((img, img, img))

        elif self.dtype == 'RColormap':
            R = rgb[:, :, 0]
            R = cv2.normalize(R,
                              None,
                              alpha=0,
                              beta=255,
                              norm_type=cv2.NORM_MINMAX)
            R = np.asarray(R, dtype=np.uint8)
            img = cv2.applyColorMap(R, cv2.COLORMAP_JET)

        elif self.dtype == 'GColormap':
            G = rgb[:, :, 1]
            G = cv2.normalize(G,
                              None,
                              alpha=0,
                              beta=255,
                              norm_type=cv2.NORM_MINMAX)
            G = np.asarray(G, dtype=np.uint8)
            img = cv2.applyColorMap(G, cv2.COLORMAP_JET)

        elif self.dtype == 'BColormap':
            B = rgb[:, :, 2]
            B = cv2.normalize(B,
                              None,
                              alpha=0,
                              beta=255,
                              norm_type=cv2.NORM_MINMAX)
            B = np.asarray(B, dtype=np.uint8)
            img = cv2.applyColorMap(B, cv2.COLORMAP_JET)

        elif self.dtype == 'rgbcolormap':

            colormap = cv2.normalize(dp,
                                     None,
                                     alpha=0,
                                     beta=255,
                                     norm_type=cv2.NORM_MINMAX)
            colormap = np.asarray(colormap, dtype=np.uint8)
            colormap = cv2.applyColorMap(colormap, cv2.COLORMAP_JET)

            img = cv2.merge((rgb, colormap))

        elif self.dtype in [
                'centered_colormap', 'centered_norm_depth',
                'centered_raw_depth'
        ]:
            if bbox is None:
                print('Error !!! require bbox for centered_colormap')
                return
            target_depth = get_target_depth(dp, bbox)
            img = get_layered_image_by_depth(dp,
                                             target_depth,
                                             dtype=self.dtype)

        elif self.dtype == 'colormap':

            dp = cv2.normalize(dp,
                               None,
                               alpha=0,
                               beta=255,
                               norm_type=cv2.NORM_MINMAX)
            dp = np.asarray(dp, dtype=np.uint8)
            img = cv2.applyColorMap(dp, cv2.COLORMAP_JET)

        elif self.dtype == 'colormap_norm_depth':
            '''
            Colormap + depth
            '''
            dp = cv2.normalize(dp,
                               None,
                               alpha=0,
                               beta=255,
                               norm_type=cv2.NORM_MINMAX)
            dp = np.asarray(dp, dtype=np.uint8)

            colormap = cv2.applyColorMap(dp, cv2.COLORMAP_JET)
            r, g, b = cv2.split(colormap)
            img = cv2.merge((r, g, b, dp))

        elif self.dtype == 'colormap_raw_depth':
            raw_dp = dp
            dp = cv2.normalize(dp,
                               None,
                               alpha=0,
                               beta=255,
                               norm_type=cv2.NORM_MINMAX)
            dp = np.asarray(dp, dtype=np.uint8)

            colormap = cv2.applyColorMap(dp, cv2.COLORMAP_JET)
            r, g, b = cv2.split(colormap)
            # img = cv2.merge((r, g, b, dp))
            img = np.stack((r, g, b, raw_dp), axis=2)

        elif self.dtype == 'raw_depth':
            # No normalization here !!!!
            image = cv2.merge((dp, dp, dp))

        elif self.dtype == 'normalized_depth':
            dp = cv2.normalize(dp,
                               None,
                               alpha=0,
                               beta=255,
                               norm_type=cv2.NORM_MINMAX)
            dp = np.asarray(dp, dtype=np.uint8)
            img = cv2.merge((dp, dp, dp))  # H * W * 3

        elif self.dtype == 'rgbd':
            r, g, b = cv2.split(rgb)
            dp = cv2.normalize(dp,
                               None,
                               alpha=0,
                               beta=255,
                               norm_type=cv2.NORM_MINMAX)
            dp = np.asarray(dp, dtype=np.uint8)
            img = cv2.merge((r, g, b, dp))

        elif self.dtype == 'hha':
            hha_path = os.path.join(seq_path, 'hha')
            if not os.path.isdir(hha_path):
                os.mkdir(hha_path)

            hha_img = os.path.join(
                hha_path,
                '{:08}.png'.format(frame_id + 1))  # frames start from 1
            print(hha_img)
            if not os.path.isfile(hha_img):
                dp = dp / 1000
                img = getHHA(dp, dp)
                cv2.imwrite(hha_img, img)
            else:
                img = cv2.imread(hha_img)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        elif self.dtype == 'sigmoid':
            img = sigmoid(dp)

        else:
            print('no such dtype ... : %s' % self.dtype)
            img = None

        return img
Exemplo n.º 4
0
    def _read_image(self, image_file: str, dtype='colormap', bbox=None):

        if dtype == 'color':
            im = cv.imread(image_file)
            return cv.cvtColor(im, cv.COLOR_BGR2RGB)

        elif dtype == 'R':
            im = cv.imread(image_file)
            im = cv.cvtColor(im, cv.COLOR_BGR2RGB)
            im = im[:, :, 0]
            im = cv.merge((im, im, im))
            return im

        elif dtype == 'G':
            im = cv.imread(image_file)
            im = cv.cvtColor(im, cv.COLOR_BGR2RGB)
            im = im[:, :, 1]
            im = cv.merge((im, im, im))
            return im
        elif dtype == 'B':
            im = cv.imread(image_file)
            im = cv.cvtColor(im, cv.COLOR_BGR2RGB)
            im = im[:, :, 2]
            im = cv.merge((im, im, im))
            return im

        elif dtype == 'RColormap':
            rgb = cv.imread(image_file)
            rgb = cv.cvtColor(rgb, cv.COLOR_BGR2RGB)
            R = rgb[:, :, 0]
            R = cv.normalize(R,
                             None,
                             alpha=0,
                             beta=255,
                             norm_type=cv.NORM_MINMAX)
            R = np.asarray(R, dtype=np.uint8)
            img = cv.applyColorMap(R, cv.COLORMAP_JET)
            return img

        elif dtype == 'GColormap':
            rgb = cv.imread(image_file)
            rgb = cv.cvtColor(rgb, cv.COLOR_BGR2RGB)
            G = rgb[:, :, 1]
            G = cv.normalize(G,
                             None,
                             alpha=0,
                             beta=255,
                             norm_type=cv.NORM_MINMAX)
            G = np.asarray(G, dtype=np.uint8)
            img = cv.applyColorMap(G, cv.COLORMAP_JET)
            return img

        elif dtype == 'BColormap':
            rgb = cv.imread(image_file)
            rgb = cv.cvtColor(rgb, cv.COLOR_BGR2RGB)
            B = rgb[:, :, 2]
            B = cv.normalize(B,
                             None,
                             alpha=0,
                             beta=255,
                             norm_type=cv.NORM_MINMAX)
            B = np.asarray(B, dtype=np.uint8)
            img = cv.applyColorMap(B, cv.COLORMAP_JET)
            return img

        elif dtype == 'rgbcolormap':

            color_image = cv.imread(image_file['color'])
            color = cv.cvtColor(color_image, cv.COLOR_BGR2RGB)
            depth_image = cv.imread(image_file['depth'], -1)
            depth_image = cv.normalize(depth_image,
                                       None,
                                       alpha=0,
                                       beta=255,
                                       norm_type=cv.NORM_MINMAX,
                                       dtype=cv.CV_32F)
            depth_image = np.asarray(depth_image, dtype=np.uint8)
            depth_image = cv.applyColorMap(depth_image, cv.COLORMAP_JET)
            img = cv.merge((color_image, depth_image))
            return img

        else:
            depth_image_file = image_file

            dp = cv.imread(depth_image_file, -1)
            dp[dp > 10000] = 10000

            if dtype == 'colormap':
                img = cv.normalize(dp,
                                   None,
                                   alpha=0,
                                   beta=255,
                                   norm_type=cv.NORM_MINMAX,
                                   dtype=cv.CV_32F)
                img = np.asarray(img, dtype=np.uint8)
                img = cv.applyColorMap(img, cv.COLORMAP_JET)
            elif dtype == 'normalized_depth':
                img = cv.normalize(dp,
                                   None,
                                   alpha=0,
                                   beta=255,
                                   norm_type=cv.NORM_MINMAX,
                                   dtype=cv.CV_32F)
                img = np.asarray(img, dtype=np.uint8)
                img = cv.merge((img, img, img))

            elif dtype == 'raw_depth':
                # img = np.asarray(dp)
                img = dp
                # img = np.stack((img, img, img), axis=2)

            elif dtype in [
                    'centered_colormap', 'centered_normalized_depth',
                    'centered_raw_depth'
            ]:
                if bbox is None:
                    print('centered colormap requires BBox !!!')
                    return None

                target_depth = get_target_depth(dp, bbox)
                img = get_layered_image_by_depth(dp, target_depth, dtype=dtype)

            elif dtype == 'hha':

                dp = dp / 1000  # meter
                img = getHHA(dp, dp)

            elif dtype == 'sigmoid':
                img = sigmoid(dp)

            elif dtype == 'sigmoid_depth':
                depth = dp

                sig = dp / 1000
                sig = sigmoid(sig)

                img = {}
                img['sigmoid'] = sig
                img['depth'] = depth

            elif dtype == 'colormap_depth':
                depth = dp

                colormap = cv.normalize(dp,
                                        None,
                                        alpha=0,
                                        beta=255,
                                        norm_type=cv.NORM_MINMAX,
                                        dtype=cv.CV_32F)
                colormap = np.asarray(colormap, dtype=np.uint8)
                colormap = cv.applyColorMap(colormap, cv.COLORMAP_JET)

                img = {}
                img['colormap'] = colormap
                img['depth'] = depth

            else:
                print('No such dtype !!! ')
                img = None

        return img
Exemplo n.º 5
0
    def _get_frames(self, seq_id, depth_threshold=None, bbox=None):
        '''
        dypte :
            - raw_depth
            - norm_depth
            - centered_raw_depth
            - centered_norm_depth
            - colormap
            - centered_colormap
        '''

        rgb_path = self.coco_set.loadImgs([
            self.coco_set.anns[self.sequence_list[seq_id]]['image_id']
        ])[0]['file_name']

        depth_path = rgb_path[:-4] + '.png'

        rgb = cv2.imread(os.path.join(self.img_path, 'color', rgb_path))
        rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)

        if os.path.isfile(os.path.join(self.img_path, 'segDepth', depth_path)):
            dp = cv2.imread(
                os.path.join(self.img_path, 'segDepth', depth_path), -1)
        else:
            dp = cv2.imread(os.path.join(self.img_path, 'depth', depth_path),
                            -1)

        max_depth = min(np.max(dp), 10000)
        dp[dp > max_depth] = max_depth

        if self.dtype in [
                'centered_colormap', 'centered_raw_depth',
                'centered_norm_depth'
        ]:
            if bbox is None:
                print('Error !!!  centered_colormap requires BBox ')
                return
            # bbox is repeated
            target_depth = get_target_depth(dp, bbox[0])
            img = get_layered_image_by_depth(dp,
                                             target_depth,
                                             dtype=self.dtype)

        elif self.dtype == 'R':
            img = rgb[:, :, 0]
            img = cv2.merge((img, img, img))

        elif self.dtype == 'G':
            img = rgb[:, :, 1]
            img = cv2.merge((img, img, img))

        elif self.dtype == 'B':
            img = rgb[:, :, 2]
            img = cv2.merge((img, img, img))

        elif self.dtype == 'RColormap':
            R = rgb[:, :, 0]
            R = cv2.normalize(R,
                              None,
                              alpha=0,
                              beta=255,
                              norm_type=cv2.NORM_MINMAX)
            R = np.asarray(R, dtype=np.uint8)
            img = cv2.applyColorMap(R, cv2.COLORMAP_JET)

        elif self.dtype == 'GColormap':
            G = rgb[:, :, 1]
            G = cv2.normalize(G,
                              None,
                              alpha=0,
                              beta=255,
                              norm_type=cv2.NORM_MINMAX)
            G = np.asarray(G, dtype=np.uint8)
            img = cv2.applyColorMap(G, cv2.COLORMAP_JET)

        elif self.dtype == 'BColormap':
            B = rgb[:, :, 2]
            B = cv2.normalize(B,
                              None,
                              alpha=0,
                              beta=255,
                              norm_type=cv2.NORM_MINMAX)
            B = np.asarray(B, dtype=np.uint8)
            img = cv2.applyColorMap(B, cv2.COLORMAP_JET)

        elif self.dtype == 'colormap':
            dp = cv2.normalize(dp,
                               None,
                               alpha=0,
                               beta=255,
                               norm_type=cv2.NORM_MINMAX)
            dp = np.asarray(dp, dtype=np.uint8)
            img = cv2.applyColorMap(dp, cv2.COLORMAP_JET)

        elif self.dtype == 'colormap_norm_depth':
            '''
            Colormap + depth
            '''
            dp = cv2.normalize(dp,
                               None,
                               alpha=0,
                               beta=255,
                               norm_type=cv2.NORM_MINMAX)
            dp = np.asarray(dp, dtype=np.uint8)
            colormap = cv2.applyColorMap(dp, cv2.COLORMAP_JET)
            r, g, b = cv2.split(colormap)
            img = cv2.merge((r, g, b, dp))

        elif self.dtype == 'colormap_raw_depth':
            raw_dp = dp
            dp = cv2.normalize(dp,
                               None,
                               alpha=0,
                               beta=255,
                               norm_type=cv2.NORM_MINMAX)
            dp = np.asarray(dp, dtype=np.uint8)

            colormap = cv2.applyColorMap(dp, cv2.COLORMAP_JET)
            r, g, b = cv2.split(colormap)
            # img = cv2.merge((r, g, b, dp))
            img = np.stack((r, g, b, raw_dp), axis=2)

        elif self.dtype == 'normalized_depth':
            dp = cv2.normalize(dp,
                               None,
                               alpha=0,
                               beta=255,
                               norm_type=cv2.NORM_MINMAX)
            dp = np.asarray(dp, dtype=np.uint8)
            img = cv2.merge((dp, dp, dp))  # H * W * 3

        elif self.dtype == 'raw_depth':
            img = np.stack((dp, dp, dp), axis=2)

        elif self.dtype == 'color':
            img = rgb

        elif self.dtype == 'hha':

            hha_path = os.path.join(self.img_path, 'hha')
            if not os.path.isdir(hha_path):
                os.mkdir(hha_path)

            hha_img = os.path.join(hha_path, depth_path)
            print(hha_img)
            if not os.path.isfile(hha_img):
                dp = dp / 1000  # meter
                img = getHHA(dp, dp)
                cv2.imwrite(hha_img, img)
            else:
                img = cv2.imread(hha_img)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        elif self.dtype == 'sigmoid':
            img = sigmoid(dp)

        elif self.dtype == 'rgbcolormap':
            dp = cv2.normalize(dp,
                               None,
                               alpha=0,
                               beta=255,
                               norm_type=cv2.NORM_MINMAX)
            dp = np.asarray(dp, dtype=np.uint8)
            colormap = cv2.applyColorMap(dp, cv2.COLORMAP_JET)
            img = cv2.merge((rgb, colormap))

        return img