def op_script(data: torch.Tensor, mean: torch.Tensor, std: torch.Tensor) -> torch.Tensor: return kornia.normalize(data, mean, std) data = torch.ones(2, 3, 1, 1).to(device) data += 2 mean = torch.tensor([0.5, 1.0, 2.0]).repeat(2, 1).to(device) std = torch.tensor([2.0, 2.0, 2.0]).repeat(2, 1).to(device) actual = op_script(data, mean, std) expected = kornia.normalize(data, mean, std) assert_allclose(actual, expected)
def forward(self, input: torch.Tensor) -> List[torch.Tensor]: ''' Forward pass of the model :param input: (torch.Tenor) Input tensor of shape (batch size, channels, height, width) :return: (List[torch.Tensor]) List of intermediate features in ascending oder w.r.t. the number VGG layer ''' # Adopt grayscale to rgb if needed if input.shape[1] == 1: output = input.repeat_interleave(3, dim=1) else: output = input # Normalize input output = kornia.normalize(output, mean=torch.tensor([0.485, 0.456, 0.406], device=output.device), std=torch.tensor([0.229, 0.224, 0.225], device=output.device)) # Init list for features features = [] # Feature path for layer in self.vgg16.features: output = layer(output) if isinstance(layer, nn.MaxPool2d): features.append(output) # Average pool operation output = self.vgg16.avgpool(output) # Flatten tensor output = output.flatten(start_dim=1) # Classification path for index, layer in enumerate(self.vgg16.classifier): output = layer(output) if index == 3 or index == 6: features.append(output) if self.return_output: return output return features
def test_single_value(self, device): # prepare input data mean = torch.tensor(2).to(device) std = torch.tensor(3).to(device) data = torch.ones(2, 3, 256, 313).to(device) # expected output expected = (data - mean) / std assert_allclose(kornia.normalize(data, mean, std), expected)
def _get_transformed_images(images, hflip): images_transformed = images if hflip: images_transformed = K.hflip(images_transformed) # Normalize images_transformed = K.normalize(images_transformed, 0.5, 0.5) return images_transformed
def _get_transformed_frames(frames, hflip): frames_transformed = frames if hflip: frames_transformed = K.hflip(frames_transformed) # Normalize frames_transformed = K.normalize(frames_transformed, 0.5, 0.5) # Permute CTHW frames_transformed = frames_transformed.permute(1, 0, 2, 3) return frames_transformed
def closure(): nonlocal global_step global_step += 1 if torch.is_grad_enabled(): optimizer.zero_grad() _, _, model_images = flame() model_grays = rgb_to_grayscale(model_images) model_grays = normalize(model_grays, model_grays.mean(), model_grays.std()) model_heatmaps = estimator(model_grays)[-1] # loss_landmarks = 50 * criterion_landmarks(get_target_landmarks(target_landmarks), original_landmarks) # loss = loss_landmarks # log('loss_landmarks', loss_landmarks.item(), global_step) loss_simple = criterion_simple(model_heatmaps, heatmaps) loss = loss_simple log('loss_simple', loss_simple.item(), global_step) model_heatmaps_gray = get_heatmap_gray(model_heatmaps) if criterion_gp is not None: loss_gp = arg.loss_gp_lambda * criterion_gp( model_heatmaps_gray, heatmaps_gray) + 100 loss = loss + loss_gp log('loss_gp', loss_gp.item(), global_step) log('loss', loss.item(), global_step) if loss.requires_grad: loss.backward(retain_graph=True) log_img( 'original', derescale_0_1(heatmaps_gray[0].unsqueeze(0), 0, 255).detach().to(dtype=torch.uint8), global_step) log_img( 'rendered', derescale_0_1(model_images, 0, 255)[0].detach().to(dtype=torch.uint8), global_step) log_img( 'target', derescale_0_1(model_heatmaps_gray[0].unsqueeze(0), 0, 255).detach().to(dtype=torch.uint8), global_step) return loss
def preprocess(self, images): """ Preprocess images Args: images: (N, 3, H, W), Input images Return x: (N, 3, H, W), Preprocessed images """ x = images if self.pretrained: # Create a mask for padded pixels mask = torch.isnan(x) # Match ResNet pretrained preprocessing x = kornia.normalize(x, mean=self.norm_mean, std=self.norm_std) # Make padded pixels = 0 x[mask] = 0 return x
def __getitem__(self, item): dataset_route, dataset, split, type, annotation, crop_size, RGB, sigma, trans_ratio, rotate_limit,\ scale_ratio_up, scale_ratio_down, scale_horizontal, scale_vertical =\ self.arg.dataset_route, self.dataset, self.split, self.type,\ self.list[item], self.arg.crop_size, self.arg.RGB, self.arg.sigma,\ self.arg.trans_ratio, self.arg.rotate_limit, self.arg.scale_ratio_up, self.arg.scale_ratio_down,\ self.arg.scale_horizontal, self.arg.scale_vertical pic_orig = cv2.imread(dataset_route[dataset] + annotation[-1]) coord_x = list(map(float, annotation[:2 * kp_num[dataset]:2])) coord_y = list(map(float, annotation[1:2 * kp_num[dataset]:2])) bbox = np.array(list(map(int, annotation[-7:-3]))) translation, trans_dir, rotation, scaling, scaling_horizontal, scaling_vertical, flip, gaussian_blur = get_random_transform_param( type, bbox, trans_ratio, rotate_limit, scale_ratio_up, scale_ratio_down, scale_horizontal, scale_vertical, flip=False, gaussian=False) horizontal_add = (bbox[2] - bbox[0]) * (1 - scaling) vertical_add = (bbox[3] - bbox[1]) * (1 - scaling) bbox = np.float32([ bbox[0] - horizontal_add, bbox[1] - vertical_add, bbox[2] + horizontal_add, bbox[3] + vertical_add ]) horizontal_add = (bbox[2] - bbox[0]) * scaling_horizontal vertical_add = (bbox[3] - bbox[1]) * scaling_vertical bbox = np.float32([ bbox[0] - horizontal_add, bbox[1] - vertical_add, bbox[2] + horizontal_add, bbox[3] + vertical_add ]) position_before = np.float32( [[ int(bbox[0]) + pow(-1, trans_dir + 1) * translation, int(bbox[1]) + pow(-1, trans_dir // 2 + 1) * translation ], [ int(bbox[0]) + pow(-1, trans_dir + 1) * translation, int(bbox[3]) + pow(-1, trans_dir // 2 + 1) * translation ], [ int(bbox[2]) + pow(-1, trans_dir + 1) * translation, int(bbox[3]) + pow(-1, trans_dir // 2 + 1) * translation ]]) position_after = np.float32([[0, 0], [0, crop_size - 1], [crop_size - 1, crop_size - 1]]) crop_matrix = cv2.getAffineTransform(position_before, position_after) # crop_matrix = np.vstack([crop_matrix, [0, 0, 1]]) pic_affine_orig = cv2.warpAffine(pic_orig, crop_matrix, (crop_size, crop_size), borderMode=cv2.BORDER_REPLICATE) # width_height = (bbox[2] - bbox[0], bbox[3] - bbox[1]) width_height = (crop_size, crop_size) affine_matrix = get_affine_matrix(width_height, rotation, scaling) # affine_matrix = np.vstack([affine_matrix, [0, 0, 1]]) # affine_matrix = np.matmul(crop_matrix, affine_matrix) # TODO one transform pic_affine_orig = cv2.warpAffine(pic_affine_orig, affine_matrix, (crop_size, crop_size), borderMode=cv2.BORDER_REPLICATE) pic_affine_orig = further_transform( pic_affine_orig, bbox, flip, gaussian_blur) if type in ['train'] else pic_affine_orig # show_img(pic_affine_orig, wait=0, keep=False) pic_affine_orig = bgr_to_rgb(image_to_tensor(pic_affine_orig)) pic_affine_orig_norm = normalize(pic_affine_orig, torch.from_numpy(self.mean_color), torch.from_numpy(self.std_color)) if not RGB: pic_affine = convert_img_to_gray(pic_affine_orig) pic_affine = normalize(pic_affine, self.mean_gray, self.std_gray) else: pic_affine = pic_affine_orig_norm coord_x_cropped, coord_y_cropped = get_cropped_coords(dataset, crop_matrix, coord_x, coord_y, crop_size, flip=flip) gt_coords_xy = get_gt_coords(dataset, affine_matrix, coord_x_cropped, coord_y_cropped) return pic_affine, pic_affine_orig_norm, gt_coords_xy