def build_kaggle_dataset(base_config): data_dir = '/data2/chenpj/FundusTool/data/fundus/EyePac/' train_path = os.path.join(data_dir, 'train') test_path = os.path.join(data_dir, 'test') val_path = os.path.join(data_dir, 'val') train_preprocess = transforms.Compose([ transforms.RandomResizedCrop(size=base_config['size'], scale=(1 / 1.15, 1.15), ratio=(0.7561, 1.3225)), transforms.RandomAffine(degrees=(-180, 180), translate=(40 / base_config['size'], 40 / base_config['size']), scale=None, shear=None), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.ToTensor(), transforms.Normalize(base_config['mean'], base_config['std']), # KrizhevskyColorAugmentation(sigma=0.5) ]) test_preprocess = transforms.Compose([ transforms.Resize((base_config['size'], base_config['size'])), transforms.ToTensor(), transforms.Normalize(base_config['mean'], base_config['std']) ]) # Compile Dataset train_dataset = datasets.ImageFolder(train_path, train_preprocess) test_dataset = datasets.ImageFolder(test_path, test_preprocess) val_dataset = datasets.ImageFolder(val_path, test_preprocess) weights, weights_per_class = make_weights_for_balanced_classes( train_dataset.imgs, len(train_dataset.classes)) print('Use sample weights') # weights= torch.DoubleTensor(weights) # Compile Sampler weighted_sampler = torch.utils.data.sampler.WeightedRandomSampler( weights, len(weights), replacement=False) print('[Train]: ', train_dataset.__len__()) print('[Val]: ', val_dataset.__len__()) print('[Test]: ', test_dataset.__len__()) train_dataloader = DataLoader(train_dataset, batch_size=base_config['batch_size'], sampler=weighted_sampler, num_workers=base_config['n_threads']) val_dataloader = DataLoader(val_dataset, batch_size=base_config['batch_size'], shuffle=True, num_workers=base_config['n_threads']) test_dataloader = DataLoader(test_dataset, batch_size=base_config['batch_size'], shuffle=False, num_workers=base_config['n_threads']) return train_dataloader, val_dataloader, test_dataloader
def train(self, train_dataset, test_dataset, model): weights = make_weights_for_balanced_classes(train_dataset.targets) sampler = WeightedRandomSampler(weights, len(weights)) train_dataloader = DataLoader( train_dataset, batch_size=self.batch_size, sampler=sampler, num_workers=8, ) test_dataloader = DataLoader( test_dataset, batch_size=self.batch_size, num_workers=8, ) criterion_triplet = OnlineTripleLoss( margin=self.triplet_margin, sampling_strategy=self.triplet_sampling_strategy, ) criterion_classifier = CrossEntropyLoss() optimizer_triplet = Adam( params=model.feature_extractor.parameters(), lr=self.learning_rate_triplet, ) optimizer_classifier = Adam( params=model.classifier.parameters(), lr=self.learning_rate_classify, ) print("Training with Triplet loss") for i in range(self.epochs_triplet): self._train_epoch_triplet( model, train_dataloader, optimizer_triplet, criterion_triplet, i + 1, ) save_embedding_umap(model, train_dataloader, test_dataloader, self.exp_folder, i + 1) print("Training the classifier") for i in range(self.epochs_classifier): self._train_epoch_classify( model, train_dataloader, optimizer_classifier, criterion_classifier, i + 1, ) self._test_epoch_(model, test_dataloader, criterion_classifier, i + 1)
def get_loaders(): svhn, mnist = get_datasets(is_training=True) val_svhn, val_mnist = get_datasets(is_training=False) train_dataset = svhn if DATA == 'svhn' else mnist weights = make_weights_for_balanced_classes(train_dataset, num_classes=10) sampler = WeightedRandomSampler(weights, len(weights)) train_loader = DataLoader(train_dataset, BATCH_SIZE, sampler=sampler, pin_memory=True, drop_last=True) val_svhn_loader = DataLoader(val_svhn, BATCH_SIZE, shuffle=False, drop_last=False) val_mnist_loader = DataLoader(val_mnist, BATCH_SIZE, shuffle=False, drop_last=False) return train_loader, val_svhn_loader, val_mnist_loader
[ # refer to https://pytorch.org/docs/stable/torchvision/transforms.html for more build-in online data augmentation transforms.Resize([ int(128 * INPUT_SIZE[0] / 112), int(128 * INPUT_SIZE[0] / 112) ]), # smaller side resized transforms.RandomCrop([INPUT_SIZE[0], INPUT_SIZE[1]]), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=RGB_MEAN, std=RGB_STD), ]) dataset_train = datasets.ImageFolder( os.path.join(DATA_ROOT, 'celeba_aligned'), train_transform) # create a weighted random sampler to process imbalanced data weights = make_weights_for_balanced_classes(dataset_train.imgs, len(dataset_train.classes)) weights = torch.DoubleTensor(weights) sampler = torch.utils.data.sampler.WeightedRandomSampler( weights, len(weights)) train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=BATCH_SIZE, sampler=sampler, pin_memory=PIN_MEMORY, num_workers=NUM_WORKERS, drop_last=DROP_LAST) NUM_CLASS = len(train_loader.dataset.classes) print("Number of Training Classes: {}".format(NUM_CLASS)) BACKBONE = ResNet_50(INPUT_SIZE)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-e', '--exp_name', default='resnet50_vggface') parser.add_argument('-c', '--config', type=int, default=1, choices=configurations.keys()) parser.add_argument('-d', '--dataset_path', default='/srv/data1/arunirc/datasets/vggface2') parser.add_argument('-m', '--model_path', default=None, help='Initialize from pre-trained model') parser.add_argument('--resume', help='Checkpoint path') parser.add_argument('--bottleneck', action='store_true', default=False, help='Add a 512-dim bottleneck layer with L2 normalization') args = parser.parse_args() # gpu = args.gpu cfg = configurations[args.config] out = get_log_dir(args.exp_name, args.config, cfg, verbose=False) resume = args.resume # os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) cuda = torch.cuda.is_available() torch.manual_seed(1337) if cuda: torch.cuda.manual_seed(1337) torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True # enable if all images are same size # ----------------------------------------------------------------------------- # 1. Dataset # ----------------------------------------------------------------------------- # Images should be arranged like this: # data_root/ # class_1/....jpg.. # class_2/....jpg.. # ......./....jpg.. data_root = args.dataset_path kwargs = {'num_workers': 4, 'pin_memory': True} if cuda else {} RGB_MEAN = [ 0.485, 0.456, 0.406 ] RGB_STD = [ 0.229, 0.224, 0.225 ] # Data transforms # http://pytorch.org/docs/master/torchvision/transforms.html train_transform = transforms.Compose([ transforms.Scale(256), # smaller side resized transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean = RGB_MEAN, std = RGB_STD), ]) val_transform = transforms.Compose([ transforms.Scale(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean = RGB_MEAN, std = RGB_STD), ]) # Data loaders - using PyTorch built-in objects # loader = DataLoaderClass(DatasetClass) # * `DataLoaderClass` is PyTorch provided torch.utils.data.DataLoader # * `DatasetClass` loads samples from a dataset; can be a standard class # provided by PyTorch (datasets.ImageFolder) or a custom-made class. # - More info: http://pytorch.org/docs/master/torchvision/datasets.html#imagefolder traindir = osp.join(data_root, 'train') dataset_train = datasets.ImageFolder(traindir, train_transform) # For unbalanced dataset we create a weighted sampler # * Balanced class sampling: https://discuss.pytorch.org/t/balanced-sampling-between-classes-with-torchvision-dataloader/2703/3 weights = utils.make_weights_for_balanced_classes( dataset_train.imgs, len(dataset_train.classes)) weights = torch.DoubleTensor(weights) sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights)) train_loader = torch.utils.data.DataLoader( dataset_train, batch_size=cfg['batch_size'], sampler = sampler, **kwargs) valdir = osp.join(data_root, 'val-crop') val_loader = torch.utils.data.DataLoader( datasets.ImageFolder(valdir, val_transform), batch_size=cfg['batch_size'], shuffle=False, **kwargs) # print 'dataset classes:' + str(train_loader.dataset.classes) num_class = len(train_loader.dataset.classes) print 'Number of classes: %d' % num_class # ----------------------------------------------------------------------------- # 2. Model # ----------------------------------------------------------------------------- model = torchvision.models.resnet50(pretrained=False) if type(model.fc) == torch.nn.modules.linear.Linear: # Check if final fc layer sizes match num_class if not model.fc.weight.size()[0] == num_class: # Replace last layer print model.fc model.fc = torch.nn.Linear(2048, num_class) print model.fc else: pass else: pass if args.model_path: # If existing model is to be loaded from a file checkpoint = torch.load(args.model_path) if checkpoint['arch'] == 'DataParallel': # if we trained and saved our model using DataParallel model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3, 4, 5, 6, 7]) model.load_state_dict(checkpoint['model_state_dict']) model = model.module # get network module from inside its DataParallel wrapper else: model.load_state_dict(checkpoint['model_state_dict']) # Optionally add a "bottleneck + L2-norm" layer after GAP-layer # TODO -- loading a bottleneck model might be a problem .... do some unit-tests if args.bottleneck: layers = [] layers.append(torch.nn.Linear(2048, 512)) layers.append(nn.BatchNorm2d(512)) layers.append(torch.nn.ReLU(inplace=True)) layers.append(models.NormFeat()) # L2-normalization layer layers.append(torch.nn.Linear(512, num_class)) model.fc = torch.nn.Sequential(*layers) # TODO - config options for DataParallel and device_ids model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3, 4, 5, 6, 7]) if cuda: model.cuda() start_epoch = 0 start_iteration = 0 # Loss - cross entropy between predicted scores (unnormalized) and class labels (integers) criterion = nn.CrossEntropyLoss() if cuda: criterion = criterion.cuda() if resume: # Resume training from last saved checkpoint checkpoint = torch.load(resume) model.load_state_dict(checkpoint['model_state_dict']) start_epoch = checkpoint['epoch'] start_iteration = checkpoint['iteration'] else: pass # ----------------------------------------------------------------------------- # 3. Optimizer # ----------------------------------------------------------------------------- params = filter(lambda p: p.requires_grad, model.parameters()) # Parameters with p.requires_grad=False are not updated during training. # This can be specified when defining the nn.Modules during model creation if 'optim' in cfg.keys(): if cfg['optim'].lower()=='sgd': optim = torch.optim.SGD(params, lr=cfg['lr'], momentum=cfg['momentum'], weight_decay=cfg['weight_decay']) elif cfg['optim'].lower()=='adam': optim = torch.optim.Adam(params, lr=cfg['lr'], weight_decay=cfg['weight_decay']) else: raise NotImplementedError('Optimizers: SGD or Adam') else: optim = torch.optim.SGD(params, lr=cfg['lr'], momentum=cfg['momentum'], weight_decay=cfg['weight_decay']) if resume: optim.load_state_dict(checkpoint['optim_state_dict']) # ----------------------------------------------------------------------------- # [optional] Sanity-check: forward pass with a single batch # ----------------------------------------------------------------------------- DEBUG = False if DEBUG: # model = model.cpu() dataiter = iter(val_loader) img, label = dataiter.next() print 'Labels: ' + str(label.size()) # batchSize x num_class print 'Input: ' + str(img.size()) # batchSize x 3 x 224 x 224 im = img.squeeze().numpy() im = im[0,:,:,:] # get first image in the batch im = im.transpose((1,2,0)) # permute to 224x224x3 im = im * [ 0.229, 0.224, 0.225 ] # unnormalize im = im + [ 0.485, 0.456, 0.406 ] im[im<0] = 0 f = plt.figure() plt.imshow(im) plt.savefig('sanity-check-im.jpg') # save transformed image in current folder inputs = Variable(img) if cuda: inputs = inputs.cuda() model.eval() outputs = model(inputs) print 'Network output: ' + str(outputs.size()) model.train() else: pass # ----------------------------------------------------------------------------- # 4. Training # ----------------------------------------------------------------------------- trainer = train.Trainer( cuda=cuda, model=model, criterion=criterion, optimizer=optim, init_lr=cfg['lr'], lr_decay_epoch = cfg['lr_decay_epoch'], train_loader=train_loader, val_loader=val_loader, out=out, max_iter=cfg['max_iteration'], interval_validate=cfg.get('interval_validate', len(train_loader)), ) trainer.epoch = start_epoch trainer.iteration = start_iteration trainer.train()
def train_and_evaluate(): svhn, mnist = get_datasets(is_training=True) source_dataset = svhn if SOURCE_DATA == 'svhn' else mnist target_dataset = mnist if SOURCE_DATA == 'svhn' else svhn weights = make_weights_for_balanced_classes(source_dataset, num_classes=10) sampler = WeightedRandomSampler(weights, len(weights)) source_loader = DataLoader(source_dataset, BATCH_SIZE, sampler=sampler, pin_memory=True, drop_last=True) target_loader = DataLoader(target_dataset, BATCH_SIZE, shuffle=True, pin_memory=True, drop_last=True) val_svhn, val_mnist = get_datasets(is_training=False) val_svhn_loader = DataLoader(val_svhn, BATCH_SIZE, shuffle=False, drop_last=False) val_mnist_loader = DataLoader(val_mnist, BATCH_SIZE, shuffle=False, drop_last=False) print('\nsource dataset is', SOURCE_DATA, '\n') num_steps_per_epoch = math.floor(min(len(svhn), len(mnist)) / BATCH_SIZE) embedder = Network(image_size=(32, 32), embedding_dim=EMBEDDING_DIM).to(DEVICE) classifier = nn.Linear(EMBEDDING_DIM, 10).to(DEVICE) model = nn.Sequential(embedder, classifier) model.train() optimizer = optim.Adam(lr=1e-3, params=model.parameters(), weight_decay=1e-3) scheduler = CosineAnnealingLR(optimizer, T_max=num_steps_per_epoch * NUM_EPOCHS - DELAY, eta_min=1e-6) cross_entropy = nn.CrossEntropyLoss() association = WalkerVisitLosses() text = 'e:{0:2d}, i:{1:3d}, classification loss: {2:.3f}, ' +\ 'walker loss: {3:.3f}, visit loss: {4:.4f}, ' +\ 'total loss: {5:.3f}, lr: {6:.6f}' logs, val_logs = [], [] i = 0 # iteration for e in range(NUM_EPOCHS): model.train() for (x_source, y_source), (x_target, _) in zip(source_loader, target_loader): x_source = x_source.to(DEVICE) x_target = x_target.to(DEVICE) y_source = y_source.to(DEVICE) x = torch.cat([x_source, x_target], dim=0) embeddings = embedder(x) a, b = torch.split(embeddings, BATCH_SIZE, dim=0) logits = classifier(a) usual_loss = cross_entropy(logits, y_source) walker_loss, visit_loss = association(a, b, y_source) if i > DELAY: growth = torch.clamp( torch.tensor((i - DELAY) / GROWTH_STEPS).to(DEVICE), 0.0, 1.0) loss = usual_loss + growth * (BETA1 * walker_loss + BETA2 * visit_loss) else: loss = usual_loss optimizer.zero_grad() loss.backward() optimizer.step() if i > DELAY: scheduler.step() lr = scheduler.get_lr()[0] log = (e, i, usual_loss.item(), walker_loss.item(), visit_loss.item(), loss.item(), lr) print(text.format(*log)) logs.append(log) i += 1 result1 = evaluate(model, cross_entropy, val_svhn_loader, DEVICE) result2 = evaluate(model, cross_entropy, val_mnist_loader, DEVICE) print('\nsvhn loss {0:.3f} and accuracy {1:.3f}'.format(*result1)) print('mnist loss {0:.3f} and accuracy {1:.3f}\n'.format(*result2)) val_logs.append((i, ) + result1 + result2) torch.save(model.state_dict(), SAVE_PATH) write_logs(logs, val_logs, LOGS_PATH)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-e', '--exp_name', default='resnet50_vggface') parser.add_argument('-c', '--config', type=int, default=1, choices=configurations.keys()) parser.add_argument('-d', '--dataset_path', default='/srv/data1/arunirc/datasets/vggface2') parser.add_argument('-m', '--model_path', default=None, help='Initialize from pre-trained model') parser.add_argument('--resume', help='Checkpoint path') parser.add_argument( '--bottleneck', action='store_true', default=False, help='Add a 512-dim bottleneck layer with L2 normalization') args = parser.parse_args() # gpu = args.gpu cfg = configurations[args.config] out = get_log_dir(args.exp_name, args.config, cfg, verbose=False) resume = args.resume # os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) cuda = torch.cuda.is_available() torch.manual_seed(1337) if cuda: torch.cuda.manual_seed(1337) torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True # enable if all images are same size # ----------------------------------------------------------------------------- # 1. Dataset # ----------------------------------------------------------------------------- # Images should be arranged like this: # data_root/ # class_1/....jpg.. # class_2/....jpg.. # ......./....jpg.. data_root = args.dataset_path kwargs = {'num_workers': 4, 'pin_memory': True} if cuda else {} RGB_MEAN = [0.485, 0.456, 0.406] RGB_STD = [0.229, 0.224, 0.225] # Data transforms # http://pytorch.org/docs/master/torchvision/transforms.html train_transform = transforms.Compose([ transforms.Scale(256), # smaller side resized transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=RGB_MEAN, std=RGB_STD), ]) val_transform = transforms.Compose([ transforms.Scale(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=RGB_MEAN, std=RGB_STD), ]) # Data loaders - using PyTorch built-in objects # loader = DataLoaderClass(DatasetClass) # * `DataLoaderClass` is PyTorch provided torch.utils.data.DataLoader # * `DatasetClass` loads samples from a dataset; can be a standard class # provided by PyTorch (datasets.ImageFolder) or a custom-made class. # - More info: http://pytorch.org/docs/master/torchvision/datasets.html#imagefolder traindir = osp.join(data_root, 'train') dataset_train = datasets.ImageFolder(traindir, train_transform) # For unbalanced dataset we create a weighted sampler # * Balanced class sampling: https://discuss.pytorch.org/t/balanced-sampling-between-classes-with-torchvision-dataloader/2703/3 weights = utils.make_weights_for_balanced_classes( dataset_train.imgs, len(dataset_train.classes)) weights = torch.DoubleTensor(weights) sampler = torch.utils.data.sampler.WeightedRandomSampler( weights, len(weights)) train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=cfg['batch_size'], sampler=sampler, **kwargs) valdir = osp.join(data_root, 'val-crop') val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( valdir, val_transform), batch_size=cfg['batch_size'], shuffle=False, **kwargs) # print 'dataset classes:' + str(train_loader.dataset.classes) num_class = len(train_loader.dataset.classes) print 'Number of classes: %d' % num_class # ----------------------------------------------------------------------------- # 2. Model # ----------------------------------------------------------------------------- model = torchvision.models.resnet50(pretrained=False) if type(model.fc) == torch.nn.modules.linear.Linear: # Check if final fc layer sizes match num_class if not model.fc.weight.size()[0] == num_class: # Replace last layer print model.fc model.fc = torch.nn.Linear(2048, num_class) print model.fc else: pass else: pass if args.model_path: # If existing model is to be loaded from a file checkpoint = torch.load(args.model_path) if checkpoint['arch'] == 'DataParallel': # if we trained and saved our model using DataParallel model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3, 4, 5, 6, 7]) model.load_state_dict(checkpoint['model_state_dict']) model = model.module # get network module from inside its DataParallel wrapper else: model.load_state_dict(checkpoint['model_state_dict']) # Optionally add a "bottleneck + L2-norm" layer after GAP-layer # TODO -- loading a bottleneck model might be a problem .... do some unit-tests if args.bottleneck: layers = [] layers.append(torch.nn.Linear(2048, 512)) layers.append(nn.BatchNorm2d(512)) layers.append(torch.nn.ReLU(inplace=True)) layers.append(models.NormFeat()) # L2-normalization layer layers.append(torch.nn.Linear(512, num_class)) model.fc = torch.nn.Sequential(*layers) # TODO - config options for DataParallel and device_ids model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3, 4, 5, 6, 7]) if cuda: model.cuda() start_epoch = 0 start_iteration = 0 # Loss - cross entropy between predicted scores (unnormalized) and class labels (integers) criterion = nn.CrossEntropyLoss() if cuda: criterion = criterion.cuda() if resume: # Resume training from last saved checkpoint checkpoint = torch.load(resume) model.load_state_dict(checkpoint['model_state_dict']) start_epoch = checkpoint['epoch'] start_iteration = checkpoint['iteration'] else: pass # ----------------------------------------------------------------------------- # 3. Optimizer # ----------------------------------------------------------------------------- params = filter(lambda p: p.requires_grad, model.parameters()) # Parameters with p.requires_grad=False are not updated during training. # This can be specified when defining the nn.Modules during model creation if 'optim' in cfg.keys(): if cfg['optim'].lower() == 'sgd': optim = torch.optim.SGD(params, lr=cfg['lr'], momentum=cfg['momentum'], weight_decay=cfg['weight_decay']) elif cfg['optim'].lower() == 'adam': optim = torch.optim.Adam(params, lr=cfg['lr'], weight_decay=cfg['weight_decay']) else: raise NotImplementedError('Optimizers: SGD or Adam') else: optim = torch.optim.SGD(params, lr=cfg['lr'], momentum=cfg['momentum'], weight_decay=cfg['weight_decay']) if resume: optim.load_state_dict(checkpoint['optim_state_dict']) # ----------------------------------------------------------------------------- # [optional] Sanity-check: forward pass with a single batch # ----------------------------------------------------------------------------- DEBUG = False if DEBUG: # model = model.cpu() dataiter = iter(val_loader) img, label = dataiter.next() print 'Labels: ' + str(label.size()) # batchSize x num_class print 'Input: ' + str(img.size()) # batchSize x 3 x 224 x 224 im = img.squeeze().numpy() im = im[0, :, :, :] # get first image in the batch im = im.transpose((1, 2, 0)) # permute to 224x224x3 im = im * [0.229, 0.224, 0.225] # unnormalize im = im + [0.485, 0.456, 0.406] im[im < 0] = 0 f = plt.figure() plt.imshow(im) plt.savefig( 'sanity-check-im.jpg') # save transformed image in current folder inputs = Variable(img) if cuda: inputs = inputs.cuda() model.eval() outputs = model(inputs) print 'Network output: ' + str(outputs.size()) model.train() else: pass # ----------------------------------------------------------------------------- # 4. Training # ----------------------------------------------------------------------------- trainer = train.Trainer( cuda=cuda, model=model, criterion=criterion, optimizer=optim, init_lr=cfg['lr'], lr_decay_epoch=cfg['lr_decay_epoch'], train_loader=train_loader, val_loader=val_loader, out=out, max_iter=cfg['max_iteration'], interval_validate=cfg.get('interval_validate', len(train_loader)), ) trainer.epoch = start_epoch trainer.iteration = start_iteration trainer.train()
subdir_and_files = [ tarinfo for tarinfo in tar.getmembers() if tarinfo.name.startswith(tar_name + '/' + fold_lst[fold_num]) ] tar.extractall(members=subdir_and_files, path=tar_extract_path) # Forming the dataset and dataloader image_datasets = { x: datasets.ImageFolder( os.path.join(tar_extract_path, tar_name, fold_lst[fold_num], x), data_transforms[x]) for x in ['train', 'val'] } weights_dict = { x: make_weights_for_balanced_classes(image_datasets[x].imgs, len(image_datasets[x].classes)) for x in ['train', 'val'] } sampler_dict = { x: torch.utils.data.sampler.WeightedRandomSampler( torch.DoubleTensor(weights_dict[x]), len(torch.DoubleTensor(weights_dict[x]))) for x in ['train', 'val'] } dataloaders_dict_sampler = { x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=False, sampler=sampler_dict[x],
transforms.Compose([ transforms.Resize(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) testset = ImageFolder( 'data/gender_images/test', transforms.Compose([ transforms.Resize(224), transforms.ToTensor(), normalize, ])) weights = make_weights_for_balanced_classes(trainset.imgs, len(trainset.classes)) sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights)) trainloader = DataLoader(trainset, batch_size=16, sampler=sampler) testloader = DataLoader(testset, batch_size=1, shuffle=True) e = enumerate(testloader) def plotIm(): _, (inputs, targets) = next(e) print(targets[0]) inp = inputs[0].numpy().transpose((1, 2, 0)) plt.imshow(inp) plt.show()