def __getitem__(self, idx): img_file_name = str(self.image_file_names[idx]) if idx != 0 and idx != len(self.image_file_names) - 1: plus = random.uniform(0, 1) if plus <= 0.5: adjacent_img_file_name = str(self.image_file_names[idx - 1]) else: adjacent_img_file_name = str(self.image_file_names[idx + 1]) else: if idx == 0: adjacent_img_file_name = str(self.image_file_names[idx + 1]) else: adjacent_img_file_name = str(self.image_file_names[idx - 1]) img_1 = cv2.imread(str(img_file_name)) img_2 = cv2.imread(str(adjacent_img_file_name)) ## The position of a flow vector is based on the grid of img_1 flow = optical_flow_estimate(img_1, img_2) if self.to_augment: data = {"image": img_1, "mask": flow} augmented = self.transform(**data) img_1, flow = augmented["image"], augmented["mask"] return img_to_tensor(img_1), img_to_tensor(flow / self.scale)
def __getitem__(self, idx): img_file_name = str(self.image_file_names[idx]) mask = load_image(img_file_name) image = cv2.cvtColor(mask, cv2.COLOR_RGB2GRAY) image = np.repeat(np.expand_dims(image, axis=-1), repeats=3, axis=-1) if self.to_augment: image, mask = self.transform(image, mask) return img_to_tensor(image), img_to_tensor(mask)
def __getitem__(self, idx): img_left_file_name = str(self.image_file_names[idx]) img_right_file_name = img_left_file_name.replace("left_frames", "right_frames") image_left = cv2.imread(str(img_left_file_name)) image_right = cv2.imread(str(img_right_file_name)) prob = random.uniform(0, 1) if prob <= self.p: flow = optical_flow_estimate(image_right, image_left) if self.to_augment: data = {"image": image_left, "mask": flow} augmented = self.transform(**data) image, flow = augmented["image"], augmented["mask"] return img_to_tensor(image), img_to_tensor(flow / self.scale) else: return img_to_tensor(image_left), img_to_tensor(flow / self.scale) else: flow = optical_flow_estimate(image_left, image_right) if self.to_augment: data = {"image": image_right, "mask": flow} augmented = self.transform(**data) image, flow = augmented["image"], augmented["mask"] return img_to_tensor(image), img_to_tensor(flow / self.scale) else: return img_to_tensor(image_right), img_to_tensor(flow / self.scale)
def __getitem__(self, idx): img_file_name = self.file_names[idx] image = load_image(img_file_name) mask = load_mask(img_file_name) data = {"image": image, "mask": mask} augmented = self.transform(**data) image, mask = augmented["image"], augmented["mask"] if self.mode == 'train': return img_to_tensor(image), torch.from_numpy( np.expand_dims(mask, 0)).float() else: return img_to_tensor(image), str(img_file_name)
def __getitem__(self, idx): img_file_name = self.file_names[idx] image = load_image(img_file_name) mask = load_mask(img_file_name) data = {"image": image, "mask": mask} augmented = self.transform(**data) image, mask = augmented["image"], augmented["mask"] mask = make_one_hot(mask) if self.mode == 'train': return img_to_tensor(image), torch.from_numpy(mask).long(), str( img_file_name) else: return img_to_tensor(image), str(img_file_name)
def __getitem__(self, idx): image_id = self.local_ids[idx] # data = {'image': self.load_image_extra(image_id)} data = {} if True: x1 = self.load_image_extra(image_id) x2 = self.extra[self.real_idx[self.local_ids[idx]]] # print(x1.shape, x2.shape) x = np.zeros((101, 101, self.num_channels + self.extra.shape[3]), dtype=np.float32) x[..., :self.num_channels] = x1[..., :self.num_channels] x[..., self.num_channels:] = x2 data = {'image': x} if self.mode != 'test': data['mask'] = self.load_mask(image_id) augmented = self.transform(**data) image_tensor = img_to_tensor(augmented['image']).reshape( self.num_channels + self.extra.shape[3], SIZE, SIZE) if self.mode != 'test': return image_tensor, torch.from_numpy(augmented['mask']).reshape( 1, SIZE, SIZE).float() else: return image_tensor, self.get_image_fname(image_id)
def __getitem__(self, idx): imageid = self.image_ids[idx] im = get_image(imageid, basepath=self.basepath, rgbdir='test_rgb') assert im is not None augmented = self.aug(image=im) return img_to_tensor(augmented['image']), imageid
def __getitem__(self, idx): is_labeled = idx < len(self.local_ids) image_id = self.get_image_id(idx) image = self.load_image(self.get_image_fname(image_id, is_labeled)) data = dict() data['image'] = image if is_labeled: data['mask'] = self.load_mask(image_id) else: data['mask'] = np.zeros((SIZE, SIZE), dtype=np.uint8) augmented = self.transform(**data) image_tensor = img_to_tensor(augmented['image']).reshape(3, SIZE, SIZE) # TODO: add noise to teacher input teacher_tensor = img_to_tensor(augmented['image']).reshape( 3, SIZE, SIZE) return (image_tensor, teacher_tensor, int(is_labeled)), torch.from_numpy(augmented['mask']).reshape( 1, SIZE, SIZE).float()
def __getitem__(self, idx): img_file_name = str(self.image_file_names[idx]) mask_file_name = img_file_name.replace("left_frames", "labels") image = load_image(img_file_name) mask = load_mask(mask_file_name, self.class_color_table) if self.to_augment: data = {"image": image, "mask": mask} augmented = self.transform(**data) image, mask = augmented["image"], augmented["mask"] return img_to_tensor(image), torch.from_numpy(mask).long()
def __getitem__(self, idx): img_left_file_name_1 = str(self.image_file_names[idx]) if idx < self.dataset_length - 1: ## We need to handle the boundary case where names from two different sequences meet img_left_file_name_2 = str(self.image_file_names[idx + 1]) index = img_left_file_name_1.find(".png") substring = img_left_file_name_1[index - 3:index] if substring == "000": img_left_file_name_2 = str(self.image_file_names[idx - 1]) else: img_left_file_name_2 = str(self.image_file_names[idx - 1]) # Double the amount of data for training prob = random.uniform(0, 1) if prob <= self.p: img_left_file_name_1.replace("left_frames", "right_frames") img_left_file_name_2.replace("left_frames", "right_frames") image_1 = cv2.imread(str(img_left_file_name_1)) image_2 = cv2.imread(str(img_left_file_name_2)) if(image_1.shape[0] != self.img_height or image_2.shape[1] != self.img_width): image_1 = cv2.resize(image_1, dsize=(self.img_width, self.img_height)) image_2 = cv2.resize(image_2, dsize=(self.img_width, self.img_height)) prob = random.uniform(0, 1) ## Change the order for calculating optical flows if prob <= 0.5: flow = optical_flow_estimate(image_1, image_2) if self.to_augment: image_1, flow = self.transform(image_1, flow) return img_to_tensor(image_1), img_to_tensor(flow / self.scale) else: flow = optical_flow_estimate(image_2, image_1) if self.to_augment: image_2, flow = self.transform(image_2, flow) return img_to_tensor(image_2), img_to_tensor(flow / self.scale)
def __getitem__(self, i): img = self.image_loader(self.img_files[i]) if self.mode in ['train', 'val']: mask = self.mask_loader(self.mask_files[i]) data = {"image": img, "mask": mask} if self.augment: augmented = self.transforms.augment()(**data) else: augmented = RandomCrop(256, 256, p=1)(**data) img, mask = augmented["image"], augmented["mask"] #mask = np.asarray(self.transforms.resize_to_final(mask)) #print(np.unique(mask)) #img = np.asarray(self.transforms.resize_to_final(img)) img = img_to_tensor(img) img = self.transforms.normalize(img) return img, torch.from_numpy(np.expand_dims(mask, 0)).float() else: #img = np.asarray(self.transforms.resize_to_final(img)) img = img_to_tensor(img) img = self.transforms.normalize(img) return img
def __getitem__(self, idx): imageid = self.image_ids[idx] im = get_image(imageid, basepath=self.basepath, rgbdir='train_rgb') assert im is not None locid = '_'.join(imageid.split('_')[-2:]) mask = cv2.imread(f'{self.basepath}/masks/mask_{locid}.tif', cv2.IMREAD_GRAYSCALE) assert mask is not None augmented = self.aug(image=im, mask=mask) mask_ = (augmented['mask'] > 0).astype(np.uint8) mask_ = torch.from_numpy(np.expand_dims(mask_, 0)).float() label_ = torch.from_numpy(np.expand_dims(augmented['mask'], 0)).float() return (img_to_tensor(augmented['image']), mask_, label_, imageid)
def __getitem__(self, idx): random.seed(random.randint(0, 666)) np.random.seed(random.randint(0, 666)) img_file_name = self.file_names[idx] image = load_image(img_file_name, self.mode) if self.mode == 'test': return self.test(image, img_file_name) mask = load_mask(img_file_name) data = {"image": image, "mask": mask} augmented = self.transform(**data) image, mask = augmented["image"], augmented["mask"] if self.mode == 'train': return add_depth_channels(img_to_tensor(image)), torch.from_numpy( np.expand_dims(mask, 0)).float()
def __getitem__(self, idx): image_id = self.local_ids[idx] data = {'image': self.load_image_extra(image_id)} if False: #x1 = self.load_image_extra(image_id) x2 = self.extra[self.real_idx[self.local_ids[idx]]] #x = np.zeros((101, 101, 1 + self.extra.shape[3]), dtype=np.float32) #x[..., 0] = x1[..., 0] #x[..., 1:] = x2 # qdata = {'image': x2} if self.mode != 'test': data['mask'] = self.load_mask(image_id) augmented = self.transform(**data) image_tensor = img_to_tensor(augmented['image']).reshape( self.num_channels, SIZE, SIZE) if self.mode != 'test': return image_tensor, torch.from_numpy(augmented['mask']).reshape( 1, SIZE, SIZE).float() else: return image_tensor, self.get_image_fname(image_id)
def mask_overlay(image, mask, color=(0, 255, 0)): """ Helper function to visualize mask on the top of the car """ mask = np.dstack((mask, mask, mask)) * np.array(color) mask = mask.astype(np.uint8) weighted_sum = cv2.addWeighted(mask, 0.5, image, 0.5, 0.) img = image.copy() ind = mask[:, :, 1] > 0 img[ind] = weighted_sum[ind] return img model_path = 'data/models/unet11_binary_20/model_0.pt' model = get_model(model_path, model_type='UNet11', problem_type='binary') img_file_name = 'data/cropped_train/instrument_dataset_3/images/frame004.jpg' gt_file_name = 'data/cropped_train/instrument_dataset_3/binary_masks/frame004.png' image = load_image(img_file_name) gt = cv2.imread(gt_file_name, 0) > 0 imshow(image) with torch.no_grad(): input_image = torch.unsqueeze(img_to_tensor(img_transform(p=1)(image=image)['image']).cuda(), dim=0) mask = model(input_image) mask_array = mask.data[0].cpu().numpy()[0] imshow(mask_array > 0) imshow(mask_overlay(image, (mask_array > 0).astype(np.uint8)))
def test(self, image, img_file_name): data = {"image": image} augmented = self.transform(**data) image = augmented["image"] return add_depth_channels(img_to_tensor(image)), str(img_file_name)
import torch import cv2 from pathlib import Path from albumentations.torch.functional import img_to_tensor import random import numpy as np image_path = Path( "G:/Johns Hopkins University/Challenge/davinci_surgical_video/video_1") image = cv2.imread(str(image_path / "frame_00000.png")) tensor = img_to_tensor(image) print(tensor.shape) vector = tensor.view(3, -1) ## Spatially random sampling (512 samples) random_locations = np.array(random.sample(range(vector.shape[1]), 512)) sample = torch.index_select(vector, 1, torch.from_numpy(random_locations).long()) print(sample) sample_norm = torch.norm(sample, p=2, dim=0, keepdim=True) print(sample_norm.shape) epsilon = 1.0e-15 sample_kernel_theta = (epsilon + torch.mm(sample.permute(1, 0), sample)) / \ (epsilon + torch.mm(sample_norm.permute(1, 0), sample_norm)) print(sample_kernel_theta) # expanded_sample_norm = sample_norm.expand(sample_norm.shape[1], -1)