def test_pksampler(self): p, k = 16, 4 # Ensure sampler does not allow p to be greater than num_classes dataset = FakeData(size=100, num_classes=10, image_size=(3, 1, 1)) targets = [target.item() for _, target in dataset] self.assertRaises(AssertionError, PKSampler, targets, p, k) # Ensure p, k constraints on batch trans = transforms.Compose([ transforms.PILToTensor(), transforms.ConvertImageDtype(torch.float), ]) dataset = FakeData(size=1000, num_classes=100, image_size=(3, 1, 1), transform=trans) targets = [target.item() for _, target in dataset] sampler = PKSampler(targets, p, k) loader = DataLoader(dataset, batch_size=p * k, sampler=sampler) for _, labels in loader: bins = defaultdict(int) for label in labels.tolist(): bins[label] += 1 # Ensure that each batch has samples from exactly p classes self.assertEqual(len(bins), p) # Ensure that there are k samples from each class for b in bins: self.assertEqual(bins[b], k)
def __init__(self, args): super(ExtractRAFT, self).__init__() self.feature_type = args.feature_type self.path_list = form_list_from_user_input(args) self.model_path = RAFT_MODEL_PATH self.batch_size = args.batch_size self.extraction_fps = args.extraction_fps self.resize_to_smaller_edge = args.resize_to_smaller_edge self.side_size = args.side_size if self.side_size is not None: self.transforms = transforms.Compose([ transforms.ToPILImage(), ResizeImproved(self.side_size, self.resize_to_smaller_edge), transforms.PILToTensor(), ToFloat(), ]) else: self.transforms = transforms.Compose([ToTensorWithoutScaling()]) self.show_pred = args.show_pred self.keep_tmp_files = args.keep_tmp_files self.extraction_fps = args.extraction_fps self.on_extraction = args.on_extraction self.tmp_path = os.path.join(args.tmp_path, self.feature_type) self.output_path = os.path.join(args.output_path, self.feature_type) self.progress = tqdm(total=len(self.path_list))
def _get_reflect_imgs(self, force_regen: bool = False) -> torch.Tensor: r"""Get reflect images with shape ``(candidate_num, C, H, W)``. Will generate tar file containing reflect images if it doesn't exist or ``force_regen == True``. Args: force_regen (bool): Whether to force regenerating tar file. Defaults to ``False``. Returns: torch.Tensor: Reflect images with shape ``(N, C, H, W)``. """ tar_path = os.path.join(self.voc_root, 'reflect.tar') if force_regen or not os.path.isfile(tar_path): gen_reflect_imgs(tar_path, self.voc_root, num_attack=self.candidate_num) tf = tarfile.open(tar_path, mode='r') transform = transforms.Compose([ transforms.Resize([self.dataset.data_shape[-2:]]), transforms.PILToTensor(), transforms.ConvertImageDtype(torch.float) ]) images = torch.stack([transform(Image.open(tf.extractfile(member), mode='r')) for member in tf.getmembers()]) if len(images) >= self.candidate_num: images = images[:self.candidate_num] elif not force_regen: return self._get_reflect_imgs(force_regen=True) else: raise RuntimeError('Can not generate enough images') tf.close() return images.to(device=env['device'])
def main(args): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") p = args.labels_per_batch k = args.samples_per_label batch_size = p * k model = EmbeddingNet() if args.resume: model.load_state_dict(torch.load(args.resume)) model.to(device) criterion = TripletMarginLoss(margin=args.margin) optimizer = Adam(model.parameters(), lr=args.lr) transform = transforms.Compose([ transforms.Lambda(lambda image: image.convert("RGB")), transforms.Resize((224, 224)), transforms.PILToTensor(), transforms.ConvertImageDtype(torch.float), ]) # Using FMNIST to demonstrate embedding learning using triplet loss. This dataset can # be replaced with any classification dataset. train_dataset = FashionMNIST(args.dataset_dir, train=True, transform=transform, download=True) test_dataset = FashionMNIST(args.dataset_dir, train=False, transform=transform, download=True) # targets is a list where the i_th element corresponds to the label of i_th dataset element. # This is required for PKSampler to randomly sample from exactly p classes. You will need to # construct targets while building your dataset. Some datasets (such as ImageFolder) have a # targets attribute with the same format. targets = train_dataset.targets.tolist() train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=PKSampler(targets, p, k), num_workers=args.workers) test_loader = DataLoader(test_dataset, batch_size=args.eval_batch_size, shuffle=False, num_workers=args.workers) for epoch in range(1, args.epochs + 1): print("Training...") train_epoch(model, optimizer, criterion, train_loader, device, epoch, args.print_freq) print("Evaluating...") evaluate(model, test_loader, device) print("Saving...") save(model, epoch, args.save_dir, "ckpt.pth")
def get_transform_bit(mode: str, data_shape: list[int]) -> transforms.Compose: hyperrule = data_shape[-2] * data_shape[-1] < 96 * 96 precrop, crop = (160, 128) if hyperrule else (512, 480) if mode == 'train': transform = transforms.Compose([ transforms.Resize((precrop, precrop)), transforms.RandomCrop((crop, crop)), transforms.RandomHorizontalFlip(), transforms.PILToTensor(), transforms.ConvertImageDtype(torch.float) ]) else: transform = transforms.Compose([ transforms.Resize((crop, crop)), transforms.PILToTensor(), transforms.ConvertImageDtype(torch.float) ]) return transform
def get_transform(self, mode: str) -> transforms.Compose: if mode != 'train': transform = transforms.Compose([ transforms.Resize((32, 32)), transforms.PILToTensor(), transforms.ConvertImageDtype(torch.float) ]) else: transform = super().get_transform(mode=mode) return transform
def __getitem__(self, idx): img_loc = os.path.join(self.img_pth, self.total_imgs[idx]) image = Image.open(img_loc).convert("RGB") mask_loc = os.path.join(self.mask_pth, self.total_masks[idx]) mask = Image.open(mask_loc).convert("RGB") out_image, rgb_mask = self.transform(image), self.transform(mask) out_image = transforms.Compose([transforms.ToTensor()])(out_image) rgb_mask = transforms.Compose([transforms.PILToTensor()])(rgb_mask) out_mask = rgb_to_mask( torch.from_numpy(np.array(rgb_mask)).permute(1, 2, 0), id2code) return out_image, out_mask, rgb_mask.permute(0, 1, 2)
def __init__(self, data_path, dataset, train=True): self.data_path = data_path self.comps, self.real, self.masks = load_image_names(data_path, dataset, train=True) process = [ transforms.ToPILImage(), transforms.Resize([256, 256]), transforms.RandomHorizontalFlip(), transforms.PILToTensor() ] self.transform_train = transforms.Compose(process)
def get_transform_imagenet(mode: str, use_tuple: bool = False, auto_augment: bool = False) -> transforms.Compose: if mode == 'train': transform_list = [ transforms.RandomResizedCrop((224, 224) if use_tuple else 224), transforms.RandomHorizontalFlip(), # transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), # noqa ] if auto_augment: transform_list.append( transforms.AutoAugment(transforms.AutoAugmentPolicy.IMAGENET)) transform_list.append(transforms.PILToTensor()) transform_list.append(transforms.ConvertImageDtype(torch.float)) transform = transforms.Compose(transform_list) else: # TODO: torchvision.prototypes.transforms._presets.ImageClassificationEval transform = transforms.Compose([ transforms.Resize((256, 256) if use_tuple else 256), transforms.CenterCrop((224, 224) if use_tuple else 224), transforms.PILToTensor(), transforms.ConvertImageDtype(torch.float) ]) return transform
def get_transform_cifar( mode: str, auto_augment: bool = False, cutout: bool = False, cutout_length: int = None, data_shape: list[int] = [3, 32, 32]) -> transforms.Compose: if mode != 'train': return transforms.Compose([ transforms.PILToTensor(), transforms.ConvertImageDtype(torch.float) ]) cutout_length = cutout_length or data_shape[-1] // 2 transform_list = [ transforms.RandomCrop(data_shape[-2:], padding=data_shape[-1] // 8), transforms.RandomHorizontalFlip(), ] if auto_augment: transform_list.append( transforms.AutoAugment(transforms.AutoAugmentPolicy.CIFAR10)) transform_list.append(transforms.PILToTensor()) transform_list.append(transforms.ConvertImageDtype(torch.float)) if cutout: transform_list.append(Cutout(cutout_length)) return transforms.Compose(transform_list)
def get_standard_data_augmentation(): """ Standard data augmentation used on my experiments """ transform = transforms.Compose([ lambda x: bob.io.image.to_matplotlib(x), lambda x: x.astype("uint8"), transforms.ToPILImage(), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomRotation(degrees=(-3, 3)), transforms.RandomAutocontrast(p=0.1), transforms.PILToTensor(), lambda x: (x - 127.5) / 128.0, ]) return transform
def get_transform(self, mode: str, normalize: bool = None) -> transforms.Compose: r"""Get dataset transform based on :attr:`self.transform`. * ``None |'none'`` (:any:`torchvision.transforms.PILToTensor` and :any:`torchvision.transforms.ConvertImageDtype`) * ``'bit'`` (transform used in BiT network) * ``'pytorch'`` (pytorch transform used in ImageNet training). Args: mode (str): The dataset mode (e.g., ``'train' | 'valid'``). normalize (bool | None): Whether to use :any:`torchvision.transforms.Normalize` in dataset transform. Defaults to ``self.normalize``. Returns: torchvision.transforms.Compose: The transform sequence. """ normalize = normalize if normalize is not None else self.normalize if self.transform == 'bit': return get_transform_bit(mode, self.data_shape) elif self.data_shape == [3, 224, 224]: transform = get_transform_imagenet( mode, use_tuple=self.transform != 'pytorch', auto_augment=self.auto_augment) elif self.transform != 'none' and self.data_shape in ([3, 16, 16 ], [3, 32, 32]): transform = get_transform_cifar(mode, auto_augment=self.auto_augment, cutout=self.cutout, cutout_length=self.cutout_length, data_shape=self.data_shape) else: transform = transforms.Compose([ transforms.PILToTensor(), transforms.ConvertImageDtype(torch.float) ]) if normalize and self.norm_par is not None: transform.transforms.append( transforms.Normalize(mean=self.norm_par['mean'], std=self.norm_par['std'])) return transform
def __init__(self, data_name, data_dir, train, crop_long_edge=False, resize_size=None, random_flip=False, normalize=True, hdf5_path=None, load_data_in_memory=False): super(Dataset_, self).__init__() self.data_name = data_name self.data_dir = data_dir self.train = train self.random_flip = random_flip self.normalize = normalize self.hdf5_path = hdf5_path self.load_data_in_memory = load_data_in_memory self.trsf_list = [] if self.hdf5_path is None: if crop_long_edge: self.trsf_list += [CenterCropLongEdge()] if resize_size is not None: self.trsf_list += [ transforms.Resize(resize_size, Image.LANCZOS) ] else: self.trsf_list += [transforms.ToPILImage()] if self.random_flip: self.trsf_list += [transforms.RandomHorizontalFlip()] if self.normalize: self.trsf_list += [transforms.ToTensor()] self.trsf_list += [ transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ] else: self.trsf_list += [transforms.PILToTensor()] self.trsf = transforms.Compose(self.trsf_list) self.load_dataset()
def ok(self): """ This method reads the 'selected' list view and fills the trList list with the corresponding dataset transforms """ for idx in range(self.selected.count()): t = self.selected.item(idx).text() if t == 'ToTensor': self.trList.append(tr.ToTensor()) elif t == 'PILToTensor': self.trList.append(tr.PILToTensor()) elif t == 'Normalize': self.trList.append(tr.Normalize(self.params_2level['Normalize']['Mean']['value'], self.params_2level['Normalize']['Std deviation']['value'])) elif t == 'Flatten': self.trList.append(tr.Lambda(lambda x: torch.flatten(x))) elif t == 'ToPILImage': self.trList.append(tr.ToPILImage())
def test_sort(): # img = Image.open('/Users/shuqin/Downloads/2DMOT2015/train/ETH-Sunnyday/img1/000001.jpg') # x = transform(img)[None] model = SORT() model.eval() transform = T.Compose([ T.PILToTensor(), T.ConvertImageDtype(torch.float) ]) img_list = ['000001.jpg', '000002.jpg', '000003.jpg'] dir_path = '/Users/shuqin/Downloads/2DMOT2015/train/ETH-Sunnyday/img1' for img in img_list: img = os.path.join(dir_path, img) x = transform(Image.open(img))[None] bbox = model(x) img = x.cpu().numpy().squeeze().transpose(1, 2, 0) plot_bbox(img, bbox)
self.log("com_loss", com_loss) #if global_step % 100 == 0: # self.logger.experiment.add_image('image', torch.cat([x, x_rec], dim=-1)[0], optimizer_idx) beta = 0.25 loss = rec_loss + emb_loss + beta * com_loss return loss def configure_optimizers(self): optimizer = torch.optim.Adam(self.parameters(), lr=5e-4) scheduler = LinearWarmupCosineAnnealingLR(optimizer, warmup_epochs=1, max_epochs=40) return [optimizer], [scheduler] #%% datamodule = CIFAR10DataModule('/workspace/data', batch_size=128) transform = transform_lib.Compose([transform_lib.PILToTensor()]) datamodule.train_transforms = transform datamodule.val_transforms = transform datamodule.test_transforms = transform trainer = pl.Trainer(gpus=1, max_epochs=30, progress_bar_refresh_rate=10) vqvae = VQVAE() trainer.fit(vqvae, datamodule)
def get_image(path2image): _image_ = Image.open(path2image) _tensor = transforms.PILToTensor()(_image_) _tensor = _tensor.unsqueeze(0) / 255 return _tensor, _image_
def prepare_data(image, annotation): image, names, bboxes = voc_to_yolo(image, annotations) return transforms.PILToTensor()(image)/255., (names, bboxes)
x = self.conv(x) x = x.mean([2, 3]) x = self.fc(x) return nn.Sigmoid()(x) model = MinusChecker() model.load_state_dict(torch.load('./checkpoints_mlt/minus_checker.pth')) i = 0 images = [] while True: try: img = Image.open('./data/cropped/' + str(i) + '.png').convert('RGB') images.append(img) except: break i += 1 with open('./data/res/minuses.txt', 'w+') as f: for img in images: x = transforms.PILToTensor()(img).unsqueeze(0) / 255 res = model(x)[:, 1] if res[0] > 0.5: f.write('1') else: f.write('-1') f.write('\n')
# In[ ]: class CustomCrop: def __call__(self, sample): shape = sample.shape min_dimension = min(shape[1], shape[2]) center_crop = transforms.CenterCrop(min_dimension) sample = center_crop(sample) return sample # In[ ]: primary_transforms = transforms.Compose([ transforms.PILToTensor(), CustomCrop(), transforms.Resize((WIDTH, HEIGHT)) ]) # In[ ]: @timeit def get_images(transforms, n, log_step): images = torch.empty((n, CHANNELS, WIDTH, HEIGHT), dtype=torch.uint8) for i in range(n): if (i + 1) % log_step == 0: print(f'Images loaded: {i + 1}/{n}') image = Image.open(IMAGES_DIR + f'{i}.jpg') images[i] = transforms(image).detach().clone()
def cv_wait_key_and_destroy_all(delay: int = 0, quit_key: str = 'q') -> bool: key = cv.waitKey(delay) & 0xff cv.destroyAllWindows() return key == ord(quit_key) def cv_to_pil_img(img: np.ndarray) -> Image: img = cv.cvtColor(img, cv.COLOR_BGR2RGB) return Image.fromarray(img) def pil_to_cv_img(img: Image) -> np.ndarray: return cv.cvtColor(np.array(img), cv.COLOR_RGB2BGR) _pil_to_tensor_transform = transforms.PILToTensor() def pil_to_tensor(img: ImageT) -> torch.Tensor: return _pil_to_tensor_transform(img).float() / 255.0 def assure_int_bbox(bbox: np.ndarray) -> np.ndarray: if issubclass(bbox.dtype.type, numbers.Integral): return bbox else: return bbox.round().astype(np.int) def rand_uniform(a: float, b: float,
def getpoint(self, data): """converts Tensors or PIL Images to numpy arrays""" if type(data) == PIL.Image.Image: return transforms.PILToTensor()(data).numpy().astype(np.float32) else: return data.numpy()
class CustomCrop: def __call__(self, sample): shape = sample.shape min_dimension = min(shape[1], shape[2]) center_crop = transforms.CenterCrop(min_dimension) sample = center_crop(sample) return sample # In[ ]: primary_transforms = transforms.Compose( [transforms.PILToTensor(), CustomCrop(), transforms.Resize((WIDTH, HEIGHT))]) # In[ ]: @timeit def get_images(transforms, n, log_step): images = torch.empty((n, CHANNELS, WIDTH, HEIGHT), dtype=torch.uint8) for i in range(n): if (i + 1) % log_step == 0: print(f'Images loaded: {i + 1}/{n}') image = Image.open(IMAGES_DIR + f'{i}.jpg') images[i] = transforms(image).detach().clone() print('Images were loaded from the disk.') return images
print('grid size must be a multiple of 2') SystemExit(0) with open(args.config) as f: config = json.load(f) device = torch.device("cuda:0" if ( torch.cuda.is_available() and config['ngpu'] > 0) else "cpu") # device = 'cpu' generator = Generator128(config).to(device) checkpoint = torch.load(args.model_path) generator.load_state_dict(checkpoint['gen_state_dict']) transImg = transforms.ToPILImage() transPIL = transforms.PILToTensor() n_images = args.grid_size * args.grid_size * args.n_tiles batch_size = 128 if int(n_images / batch_size) < 1: n_batch = 1 else: n_batch = int(n_images / batch_size) n_fig = 1 for n in range(n_batch): fixed_noise = torch.randn(batch_size, config['nz'], 1, 1, device=device) results = generator(fixed_noise).detach().cpu() inds = [i for i in range(batch_size)]
def __init__(self, data_dir): super(Dataset_, self).__init__() self.data_dir = data_dir self.trsf = transforms.PILToTensor() self.load_dataset()
from torch.utils.data import DataLoader from torchvision.datasets import CIFAR10 import torchvision.transforms as transforms BATCH_SIZE=64 train_dataset = CIFAR10("/workspace/data", train=True, transform=transforms.Compose([transforms.PILToTensor()])) train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE) model = ... # optimizer = torch.optimizer.Adam() for epoch in range(10): for batch in train_dataloader: pass