Exemplo n.º 1
0
    def process_image_rgbd_coord(self, img, hha, depth, coord, crop_size=None):
        p_img = img
        p_hha = hha
        p_depth = depth
        p_coord = coord

        if img.shape[2] < 3:
            im_b = p_img
            im_g = p_img
            im_r = p_img
            p_img = np.concatenate((im_b, im_g, im_r), axis=2)

        p_img = normalize(p_img, self.image_mean, self.image_std)
        # p_depth = normalize(p_depth, 0, 1)
        

        if crop_size is not None:
            p_img, margin = pad_image_to_shape(p_img, crop_size,
                                               cv2.BORDER_CONSTANT, value=0)
            p_hha, margin = pad_image_to_shape(p_hha, crop_size,
                                               cv2.BORDER_CONSTANT, value=0)
            p_depth, margin = pad_image_to_shape(p_depth, crop_size,
                                               cv2.BORDER_CONSTANT, value=0)
            p_coord, margin = pad_image_to_shape(p_coord, crop_size,
                                               cv2.BORDER_CONSTANT, value=0)
        p_img = p_img.transpose(2, 0, 1)
        p_hha = p_hha.transpose(2, 0, 1)
        p_depth = p_depth[np.newaxis,...]
        p_coord = p_coord.transpose(2, 0, 1)

        return p_img, p_hha, p_depth, p_coord, margin
Exemplo n.º 2
0
    def scale_process_rgbd_coord(self, img, hha, depth, coord, camera_params, ori_shape, crop_size, stride_rate, device=None):
        new_rows, new_cols, c = img.shape
        long_size = new_cols if new_cols > new_rows else new_rows

        if long_size <= crop_size:
            input_data, input_hha, input_depth, input_coord, margin = self.process_image_rgbd_coord(img, hha, depth, coord, crop_size)
            score = self.val_func_process_rgbd_coord(input_data, input_hha, input_depth, input_coord, camera_params, device)
            score = score[:, margin[0]:(score.shape[1] - margin[1]),
                    margin[2]:(score.shape[2] - margin[3])]
        else:
            stride = int(np.ceil(crop_size * stride_rate))
            img_pad, margin = pad_image_to_shape(img, crop_size,
                                                 cv2.BORDER_CONSTANT, value=0)
            hha_pad, margin = pad_image_to_shape(hha, crop_size,
                                                 cv2.BORDER_CONSTANT, value=0)
            depth_pad, margin = pad_image_to_shape(depth, crop_size,
                                                  cv2.BORDER_CONSTANT, value=0)
            coord_pad, margin = pad_image_to_shape(coord, crop_size,
                                                  cv2.BORDER_CONSTANT, value=0)

            pad_rows = img_pad.shape[0]
            pad_cols = img_pad.shape[1]
            r_grid = int(np.ceil((pad_rows - crop_size) / stride)) + 1
            c_grid = int(np.ceil((pad_cols - crop_size) / stride)) + 1
            data_scale = torch.zeros(self.class_num, pad_rows, pad_cols).cuda(
                device)
            count_scale = torch.zeros(self.class_num, pad_rows, pad_cols).cuda(
                device)

            for grid_yidx in range(r_grid):
                for grid_xidx in range(c_grid):
                    s_x = grid_xidx * stride
                    s_y = grid_yidx * stride
                    e_x = min(s_x + crop_size, pad_cols)
                    e_y = min(s_y + crop_size, pad_rows)
                    s_x = e_x - crop_size
                    s_y = e_y - crop_size
                    img_sub = img_pad[s_y:e_y, s_x: e_x, :]
                    hha_sub = hha_pad[s_y:e_y, s_x: e_x, :]
                    depth_sub = depth_pad[s_y:e_y, s_x: e_x]
                    coord_sub = coord_pad[s_y:e_y, s_x: e_x]
                    count_scale[:, s_y: e_y, s_x: e_x] += 1

                    input_data, input_hha, input_depth, input_coord, tmargin = self.process_image_rgbd_coord(img_sub, hha_sub, depth_sub, coord_sub, crop_size)
                    temp_score = self.val_func_process_rgbd_coord(input_data, input_hha, input_depth, input_coord, camera_params, device)
                    temp_score = temp_score[:,
                                 tmargin[0]:(temp_score.shape[1] - tmargin[1]),
                                 tmargin[2]:(temp_score.shape[2] - tmargin[3])]
                    data_scale[:, s_y: e_y, s_x: e_x] += temp_score
            score = data_scale #/ count_scale
            score = score[:, margin[0]:(score.shape[1] - margin[1]),
                    margin[2]:(score.shape[2] - margin[3])]

        score = score.permute(1, 2, 0)
        data_output = cv2.resize(score.cpu().numpy(),
                                 (ori_shape[1], ori_shape[0]),
                                 interpolation=cv2.INTER_LINEAR)

        return data_output
    def process_image_rgbd(self, img, disp, crop_size=None):
        from utils.img_utils import pad_image_to_shape
        from dataloader import normalize

        p_img = img
        p_disp = disp

        if img.shape[2] < 3:
            im_b = p_img
            im_g = p_img
            im_r = p_img
            p_img = np.concatenate((im_b, im_g, im_r), axis=2)

        p_img = normalize(p_img, self.image_mean, self.image_std)
        if len(disp.shape) == 2:
            p_disp = normalize(p_disp, 0, 1)
        else:
            p_disp = normalize(p_disp, self.image_mean, self.image_std)

        if crop_size is not None:
            p_img, margin = pad_image_to_shape(p_img,
                                               crop_size,
                                               cv2.BORDER_CONSTANT,
                                               value=0)

            p_disp, _ = pad_image_to_shape(p_disp,
                                           crop_size,
                                           cv2.BORDER_CONSTANT,
                                           value=0)
            p_img = p_img.transpose(2, 0, 1)
            if len(disp.shape) == 2:
                p_disp = p_disp[np.newaxis, ...]
            else:
                p_disp = p_disp.transpose(2, 0, 1)

            return p_img, p_disp, margin

        p_img = p_img.transpose(2, 0, 1)

        if len(disp.shape) == 2:
            p_disp = p_disp[np.newaxis, ...]
        else:
            p_disp = p_disp.transpose(2, 0, 1)

        return p_img, p_disp
Exemplo n.º 4
0
    def scale_rectangular_process(self, img, ori_shape, crop_height, crop_width, stride_rate, device=None):
        new_rows, new_cols, c = img.shape
        long_size = new_cols if new_cols > new_rows else new_rows

        if new_cols<crop_width  or new_rows<crop_height:

            print('ERROR: img is smaller than crop__size')
            exit(1)
            # input_data, margin = self.process_image(img, crop_size)
            # score = self.val_func_process(input_data, device)
            # score = score[:, margin[0]:(score.shape[1] - margin[1]),
            #         margin[2]:(score.shape[2] - margin[3])]
        else:
            stride_h = int(np.ceil(crop_height * stride_rate))
            stride_w = int(np.ceil(crop_width * stride_rate))
            img_pad, margin = pad_image_to_shape(img, {crop_height, crop_width},
                                                 cv2.BORDER_CONSTANT, value=0)

            pad_rows = img_pad.shape[0] #height
            pad_cols = img_pad.shape[1] #width
            #print('****',pad_rows,pad_cols,'****')
            r_grid = int(np.ceil((pad_rows - crop_height) / stride_h)) + 1
            c_grid = int(np.ceil((pad_cols - crop_width) / stride_w)) + 1
            data_scale = torch.zeros(self.class_num, pad_rows, pad_cols).cuda(device)
            count_scale = torch.zeros(self.class_num, pad_rows, pad_cols).cuda(device)

            for grid_yidx in range(r_grid):
                for grid_xidx in range(c_grid):
                    s_x = grid_xidx * stride_w
                    s_y = grid_yidx * stride_h
                    e_x = min(s_x + crop_width, pad_cols)
                    e_y = min(s_y + crop_height, pad_rows)
                    s_x = e_x - crop_width
                    s_y = e_y - crop_height
                    img_sub = img_pad[s_y:e_y, s_x: e_x, :]
                    count_scale[:, s_y: e_y, s_x: e_x] += 1

                    input_data, tmargin = self.process_image(img_sub, {crop_height, crop_width})

                    temp_score = self.val_func_process(input_data, device)
                    temp_score = temp_score[:,
                                 tmargin[0]:(temp_score.shape[1] - tmargin[1]),
                                 tmargin[2]:(temp_score.shape[2] - tmargin[3])]
                    #print('temp_score.shape,',temp_score.shape)
                    data_scale[:, s_y: e_y, s_x: e_x] += temp_score
            # score = data_scale / count_scale
            score = data_scale
            score = score[:, margin[0]:(score.shape[1] - margin[1]),
                    margin[2]:(score.shape[2] - margin[3])]

        score = score.permute(1, 2, 0)
        data_output = cv2.resize(score.cpu().numpy(),
                                 (ori_shape[1], ori_shape[0]),
                                 interpolation=cv2.INTER_LINEAR)

        return data_output
Exemplo n.º 5
0
    def process_image(self, img, crop_size=None):
        p_img = img

        if img.shape[2] < 3:
            im_b = p_img
            im_g = p_img
            im_r = p_img
            p_img = np.concatenate((im_b, im_g, im_r), axis=2)

        p_img = normalize(p_img, self.image_mean, self.image_std)

        if crop_size is not None:
            p_img, margin = pad_image_to_shape(p_img, crop_size, cv2.BORDER_CONSTANT, value=0)
            p_img = p_img.transpose(2, 0, 1)

            return p_img, margin

        p_img = p_img.transpose(2, 0, 1)

        return p_img
def scale_process(val_func,
                  class_num,
                  img_scale,
                  ori_shape,
                  img_means,
                  img_std,
                  crop_size,
                  is_flip=False,
                  device=None):
    new_rows, new_cols, c = img_scale.shape
    long_size = new_cols if new_cols > new_rows else new_rows

    if long_size <= crop_size:
        input_data, margin = pre_img(img_scale, img_means, img_std, crop_size)
        score = val_func_process(val_func, input_data, is_flip, device)
        score = score[:, margin[0]:(score.shape[1] - margin[1]),
                      margin[2]:(score.shape[2] - margin[3])]
    else:
        stride_rate = 2 / 3
        stride = int(np.ceil(crop_size * stride_rate))
        # stride = crop_size - 170
        img_pad = img_scale

        img_pad, margin = pad_image_to_shape(img_pad,
                                             crop_size,
                                             cv2.BORDER_CONSTANT,
                                             value=0)

        pad_rows = img_pad.shape[0]
        pad_cols = img_pad.shape[1]
        r_grid = int(np.ceil((pad_rows - crop_size) / stride)) + 1
        c_grid = int(np.ceil((pad_cols - crop_size) / stride)) + 1
        data_scale = torch.zeros(class_num, pad_rows, pad_cols).cuda(device)
        count_scale = torch.zeros(class_num, pad_rows, pad_cols).cuda(device)

        for grid_yidx in range(r_grid):
            for grid_xidx in range(c_grid):
                s_x = grid_xidx * stride
                s_y = grid_yidx * stride
                e_x = min(s_x + crop_size, pad_cols)
                e_y = min(s_y + crop_size, pad_rows)
                s_x = e_x - crop_size
                s_y = e_y - crop_size
                img_sub = img_pad[s_y:e_y, s_x:e_x, :]
                count_scale[:, s_y:e_y, s_x:e_x] += 1

                input_data, tmargin = pre_img(img_sub, img_means, img_std,
                                              crop_size)
                temp_score = val_func_process(val_func, input_data, is_flip,
                                              device)
                temp_score = temp_score[:, tmargin[0]:(temp_score.shape[1] -
                                                       tmargin[1]),
                                        tmargin[2]:(temp_score.shape[2] -
                                                    tmargin[3])]
                data_scale[:, s_y:e_y, s_x:e_x] += temp_score
        # score = data_scale / count_scale
        score = data_scale
        score = score[:, margin[0]:(score.shape[1] - margin[1]),
                      margin[2]:(score.shape[2] - margin[3])]

    score = score.permute(1, 2, 0)
    ori_shape = (ori_shape[1], ori_shape[0])
    data_output = cv2.resize(score.cpu().numpy(),
                             ori_shape,
                             interpolation=cv2.INTER_LINEAR)

    return data_output