def process_image_rgbd_coord(self, img, hha, depth, coord, crop_size=None): p_img = img p_hha = hha p_depth = depth p_coord = coord if img.shape[2] < 3: im_b = p_img im_g = p_img im_r = p_img p_img = np.concatenate((im_b, im_g, im_r), axis=2) p_img = normalize(p_img, self.image_mean, self.image_std) # p_depth = normalize(p_depth, 0, 1) if crop_size is not None: p_img, margin = pad_image_to_shape(p_img, crop_size, cv2.BORDER_CONSTANT, value=0) p_hha, margin = pad_image_to_shape(p_hha, crop_size, cv2.BORDER_CONSTANT, value=0) p_depth, margin = pad_image_to_shape(p_depth, crop_size, cv2.BORDER_CONSTANT, value=0) p_coord, margin = pad_image_to_shape(p_coord, crop_size, cv2.BORDER_CONSTANT, value=0) p_img = p_img.transpose(2, 0, 1) p_hha = p_hha.transpose(2, 0, 1) p_depth = p_depth[np.newaxis,...] p_coord = p_coord.transpose(2, 0, 1) return p_img, p_hha, p_depth, p_coord, margin
def scale_process_rgbd_coord(self, img, hha, depth, coord, camera_params, ori_shape, crop_size, stride_rate, device=None): new_rows, new_cols, c = img.shape long_size = new_cols if new_cols > new_rows else new_rows if long_size <= crop_size: input_data, input_hha, input_depth, input_coord, margin = self.process_image_rgbd_coord(img, hha, depth, coord, crop_size) score = self.val_func_process_rgbd_coord(input_data, input_hha, input_depth, input_coord, camera_params, device) score = score[:, margin[0]:(score.shape[1] - margin[1]), margin[2]:(score.shape[2] - margin[3])] else: stride = int(np.ceil(crop_size * stride_rate)) img_pad, margin = pad_image_to_shape(img, crop_size, cv2.BORDER_CONSTANT, value=0) hha_pad, margin = pad_image_to_shape(hha, crop_size, cv2.BORDER_CONSTANT, value=0) depth_pad, margin = pad_image_to_shape(depth, crop_size, cv2.BORDER_CONSTANT, value=0) coord_pad, margin = pad_image_to_shape(coord, crop_size, cv2.BORDER_CONSTANT, value=0) pad_rows = img_pad.shape[0] pad_cols = img_pad.shape[1] r_grid = int(np.ceil((pad_rows - crop_size) / stride)) + 1 c_grid = int(np.ceil((pad_cols - crop_size) / stride)) + 1 data_scale = torch.zeros(self.class_num, pad_rows, pad_cols).cuda( device) count_scale = torch.zeros(self.class_num, pad_rows, pad_cols).cuda( device) for grid_yidx in range(r_grid): for grid_xidx in range(c_grid): s_x = grid_xidx * stride s_y = grid_yidx * stride e_x = min(s_x + crop_size, pad_cols) e_y = min(s_y + crop_size, pad_rows) s_x = e_x - crop_size s_y = e_y - crop_size img_sub = img_pad[s_y:e_y, s_x: e_x, :] hha_sub = hha_pad[s_y:e_y, s_x: e_x, :] depth_sub = depth_pad[s_y:e_y, s_x: e_x] coord_sub = coord_pad[s_y:e_y, s_x: e_x] count_scale[:, s_y: e_y, s_x: e_x] += 1 input_data, input_hha, input_depth, input_coord, tmargin = self.process_image_rgbd_coord(img_sub, hha_sub, depth_sub, coord_sub, crop_size) temp_score = self.val_func_process_rgbd_coord(input_data, input_hha, input_depth, input_coord, camera_params, device) temp_score = temp_score[:, tmargin[0]:(temp_score.shape[1] - tmargin[1]), tmargin[2]:(temp_score.shape[2] - tmargin[3])] data_scale[:, s_y: e_y, s_x: e_x] += temp_score score = data_scale #/ count_scale score = score[:, margin[0]:(score.shape[1] - margin[1]), margin[2]:(score.shape[2] - margin[3])] score = score.permute(1, 2, 0) data_output = cv2.resize(score.cpu().numpy(), (ori_shape[1], ori_shape[0]), interpolation=cv2.INTER_LINEAR) return data_output
def process_image_rgbd(self, img, disp, crop_size=None): from utils.img_utils import pad_image_to_shape from dataloader import normalize p_img = img p_disp = disp if img.shape[2] < 3: im_b = p_img im_g = p_img im_r = p_img p_img = np.concatenate((im_b, im_g, im_r), axis=2) p_img = normalize(p_img, self.image_mean, self.image_std) if len(disp.shape) == 2: p_disp = normalize(p_disp, 0, 1) else: p_disp = normalize(p_disp, self.image_mean, self.image_std) if crop_size is not None: p_img, margin = pad_image_to_shape(p_img, crop_size, cv2.BORDER_CONSTANT, value=0) p_disp, _ = pad_image_to_shape(p_disp, crop_size, cv2.BORDER_CONSTANT, value=0) p_img = p_img.transpose(2, 0, 1) if len(disp.shape) == 2: p_disp = p_disp[np.newaxis, ...] else: p_disp = p_disp.transpose(2, 0, 1) return p_img, p_disp, margin p_img = p_img.transpose(2, 0, 1) if len(disp.shape) == 2: p_disp = p_disp[np.newaxis, ...] else: p_disp = p_disp.transpose(2, 0, 1) return p_img, p_disp
def scale_rectangular_process(self, img, ori_shape, crop_height, crop_width, stride_rate, device=None): new_rows, new_cols, c = img.shape long_size = new_cols if new_cols > new_rows else new_rows if new_cols<crop_width or new_rows<crop_height: print('ERROR: img is smaller than crop__size') exit(1) # input_data, margin = self.process_image(img, crop_size) # score = self.val_func_process(input_data, device) # score = score[:, margin[0]:(score.shape[1] - margin[1]), # margin[2]:(score.shape[2] - margin[3])] else: stride_h = int(np.ceil(crop_height * stride_rate)) stride_w = int(np.ceil(crop_width * stride_rate)) img_pad, margin = pad_image_to_shape(img, {crop_height, crop_width}, cv2.BORDER_CONSTANT, value=0) pad_rows = img_pad.shape[0] #height pad_cols = img_pad.shape[1] #width #print('****',pad_rows,pad_cols,'****') r_grid = int(np.ceil((pad_rows - crop_height) / stride_h)) + 1 c_grid = int(np.ceil((pad_cols - crop_width) / stride_w)) + 1 data_scale = torch.zeros(self.class_num, pad_rows, pad_cols).cuda(device) count_scale = torch.zeros(self.class_num, pad_rows, pad_cols).cuda(device) for grid_yidx in range(r_grid): for grid_xidx in range(c_grid): s_x = grid_xidx * stride_w s_y = grid_yidx * stride_h e_x = min(s_x + crop_width, pad_cols) e_y = min(s_y + crop_height, pad_rows) s_x = e_x - crop_width s_y = e_y - crop_height img_sub = img_pad[s_y:e_y, s_x: e_x, :] count_scale[:, s_y: e_y, s_x: e_x] += 1 input_data, tmargin = self.process_image(img_sub, {crop_height, crop_width}) temp_score = self.val_func_process(input_data, device) temp_score = temp_score[:, tmargin[0]:(temp_score.shape[1] - tmargin[1]), tmargin[2]:(temp_score.shape[2] - tmargin[3])] #print('temp_score.shape,',temp_score.shape) data_scale[:, s_y: e_y, s_x: e_x] += temp_score # score = data_scale / count_scale score = data_scale score = score[:, margin[0]:(score.shape[1] - margin[1]), margin[2]:(score.shape[2] - margin[3])] score = score.permute(1, 2, 0) data_output = cv2.resize(score.cpu().numpy(), (ori_shape[1], ori_shape[0]), interpolation=cv2.INTER_LINEAR) return data_output
def process_image(self, img, crop_size=None): p_img = img if img.shape[2] < 3: im_b = p_img im_g = p_img im_r = p_img p_img = np.concatenate((im_b, im_g, im_r), axis=2) p_img = normalize(p_img, self.image_mean, self.image_std) if crop_size is not None: p_img, margin = pad_image_to_shape(p_img, crop_size, cv2.BORDER_CONSTANT, value=0) p_img = p_img.transpose(2, 0, 1) return p_img, margin p_img = p_img.transpose(2, 0, 1) return p_img
def scale_process(val_func, class_num, img_scale, ori_shape, img_means, img_std, crop_size, is_flip=False, device=None): new_rows, new_cols, c = img_scale.shape long_size = new_cols if new_cols > new_rows else new_rows if long_size <= crop_size: input_data, margin = pre_img(img_scale, img_means, img_std, crop_size) score = val_func_process(val_func, input_data, is_flip, device) score = score[:, margin[0]:(score.shape[1] - margin[1]), margin[2]:(score.shape[2] - margin[3])] else: stride_rate = 2 / 3 stride = int(np.ceil(crop_size * stride_rate)) # stride = crop_size - 170 img_pad = img_scale img_pad, margin = pad_image_to_shape(img_pad, crop_size, cv2.BORDER_CONSTANT, value=0) pad_rows = img_pad.shape[0] pad_cols = img_pad.shape[1] r_grid = int(np.ceil((pad_rows - crop_size) / stride)) + 1 c_grid = int(np.ceil((pad_cols - crop_size) / stride)) + 1 data_scale = torch.zeros(class_num, pad_rows, pad_cols).cuda(device) count_scale = torch.zeros(class_num, pad_rows, pad_cols).cuda(device) for grid_yidx in range(r_grid): for grid_xidx in range(c_grid): s_x = grid_xidx * stride s_y = grid_yidx * stride e_x = min(s_x + crop_size, pad_cols) e_y = min(s_y + crop_size, pad_rows) s_x = e_x - crop_size s_y = e_y - crop_size img_sub = img_pad[s_y:e_y, s_x:e_x, :] count_scale[:, s_y:e_y, s_x:e_x] += 1 input_data, tmargin = pre_img(img_sub, img_means, img_std, crop_size) temp_score = val_func_process(val_func, input_data, is_flip, device) temp_score = temp_score[:, tmargin[0]:(temp_score.shape[1] - tmargin[1]), tmargin[2]:(temp_score.shape[2] - tmargin[3])] data_scale[:, s_y:e_y, s_x:e_x] += temp_score # score = data_scale / count_scale score = data_scale score = score[:, margin[0]:(score.shape[1] - margin[1]), margin[2]:(score.shape[2] - margin[3])] score = score.permute(1, 2, 0) ori_shape = (ori_shape[1], ori_shape[0]) data_output = cv2.resize(score.cpu().numpy(), ori_shape, interpolation=cv2.INTER_LINEAR) return data_output