def __init__(self, options, path): print('Prepare the network and data.') self._options = options self._path = path # Network. self._net = torch.nn.DataParallel(BCNN()).cuda() print(self._net) # Criterion. self._criterion = torch.nn.CrossEntropyLoss().cuda() # Solver. self._solver = torch.optim.SGD( [ self._net.module.w1, self._net.module.w2, self._net.module.w3, self._net.module.w4 ] + list(self._net.module.fc.parameters()), lr=self._options['base_lr'], momentum=0.9, weight_decay=self._options['weight_decay']) self._scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( self._solver, mode='max', factor=0.1, patience=5, verbose=True, threshold=1e-4) train_transforms = torchvision.transforms.Compose([ torchvision.transforms.Resize(size=448), # Let smaller edge match torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.RandomCrop(size=448), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) test_transforms = torchvision.transforms.Compose([ torchvision.transforms.Resize(size=448), torchvision.transforms.CenterCrop(size=448), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) train_data = cub200.CUB200(root=self._path['cub200'], train=True, download=True, transform=train_transforms) test_data = cub200.CUB200(root=self._path['cub200'], train=False, download=True, transform=test_transforms) self._train_loader = torch.utils.data.DataLoader( train_data, batch_size=self._options['batch_size'], shuffle=True, num_workers=16, pin_memory=True) self._test_loader = torch.utils.data.DataLoader(test_data, batch_size=16, shuffle=False, num_workers=16, pin_memory=True)
def __init__(self, options, path): self._options = options self._path = path # Network. self._net = torch.nn.DataParallel(HBP()).cuda() print(self._net) # Criterion. self._criterion = torch.nn.CrossEntropyLoss().cuda() # Solver. param_to_optim = [] for param in self._net.parameters(): if param.requires_grad == False: continue param_to_optim.append(param) self._solver = torch.optim.SGD( param_to_optim, lr=self._options['base_lr'], momentum=0.9, weight_decay=self._options['weight_decay']) milestones = [40,60,80,100] self._scheduler = torch.optim.lr_scheduler.MultiStepLR(self._solver,milestones = milestones,gamma=0.25) train_transforms = torchvision.transforms.Compose([ torchvision.transforms.Resize(size=448), # Let smaller edge match torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.RandomCrop(size=448), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) test_transforms = torchvision.transforms.Compose([ torchvision.transforms.Resize(size=448), torchvision.transforms.CenterCrop(size=448), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) train_data = cub200.CUB200( root=self._path['cub200'], train=True, download=True, transform=train_transforms) test_data = cub200.CUB200( root=self._path['cub200'], train=False, download=True, transform=test_transforms) #added to train on one batch batch_indices = [536,54,4400,1769,1518,1287,5554,4919,2547,2249,5757,589] #12 indices subset_sampler = torch.utils.data.sampler.SubsetRandomSampler(batch_indices) self._train_loader = torch.utils.data.DataLoader( train_data, batch_size=self._options['batch_size'], shuffle=False, sampler=subset_sampler, num_workers=4, pin_memory=True) # self._train_loader = torch.utils.data.DataLoader( # train_data, batch_size=self._options['batch_size'], # shuffle=True, num_workers=4, pin_memory=True) self._test_loader = torch.utils.data.DataLoader( test_data, batch_size=1, shuffle=False, num_workers=4, pin_memory=True)
def __init__(self, options, path): print('Prepare the network and data.') self._options = options self._path = path # Network. self._net = torch.nn.DataParallel(HBP()).cuda() print(self._net) self._net.load_state_dict(torch.load(self._path['model'])) # Criterion. self._criterion = torch.nn.CrossEntropyLoss().cuda() # Solver. param_to_optim = [] for param in self._net.parameters(): param_to_optim.append(param) self._solver = torch.optim.SGD( param_to_optim, lr=self._options['base_lr'], momentum=0.9, weight_decay=self._options['weight_decay']) milestones = [100] self._scheduler = torch.optim.lr_scheduler.MultiStepLR( self._solver, milestones=milestones, gamma=0.25) train_transforms = torchvision.transforms.Compose([ torchvision.transforms.Resize(size=448), # Let smaller edge match torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.RandomCrop(size=448), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) test_transforms = torchvision.transforms.Compose([ torchvision.transforms.Resize(size=448), torchvision.transforms.CenterCrop(size=448), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) train_data = cub200.CUB200(root=self._path['cub200'], train=True, download=True, transform=train_transforms) test_data = cub200.CUB200(root=self._path['cub200'], train=False, download=True, transform=test_transforms) self._train_loader = torch.utils.data.DataLoader( train_data, batch_size=self._options['batch_size'], shuffle=True, num_workers=4, pin_memory=True) self._test_loader = torch.utils.data.DataLoader(test_data, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)
def __init__(self, paths): """Prepare the network and data. Args: paths, dict<str, str>: Useful paths. """ print('Prepare the network and data.') # Configurations. self._paths = paths # Network. # self._net = torchvision.models.vgg16(pretrained=True).features # self._net = torch.nn.Sequential(*list(self._net.children())[:-2]) # self._net = self._net.cuda() self._net = torchvision.models.resnet50(pretrained=True) self._net = torch.nn.Sequential(*list(self._net.children())[:-2]) print('resnet50!----finished!') print(self._net) # Data. # NOTE: Resize such that the short edge is 448, and then ceter crop 448. train_transforms = torchvision.transforms.Compose([ torchvision.transforms.Resize(size=(448, 448)), # torchvision.transforms.CenterCrop(size=448), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) test_transforms = torchvision.transforms.Compose([ torchvision.transforms.Resize(size=(448, 448)), # torchvision.transforms.CenterCrop(size=448), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) train_data = cub200.CUB200(root=self._paths['cub200'], train=True, transform=train_transforms, download=False) test_data = cub200.CUB200(root=self._paths['cub200'], train=False, transform=test_transforms, download=False) self._train_loader = torch.utils.data.DataLoader(train_data, batch_size=1, shuffle=False, num_workers=0, pin_memory=False) self._test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=0, pin_memory=False)
def getStat(self): """Get the mean and std value for a certain dataset.""" print('Compute mean and variance for training data.') # train_data = cub200.CUB200( # root=self._path['cub200'], train=True, # transform=torchvision.transforms.ToTensor(), download=True) train_data = cub200.CUB200(root=self._path[cub200.coarse_class], train=True, transform=torchvision.transforms.ToTensor(), download=True) #change train_loader = torch.utils.data.DataLoader(train_data, batch_size=1, shuffle=False, num_workers=4, pin_memory=True) mean = torch.zeros(3) std = torch.zeros(3) for X, _ in train_loader: for d in range(3): mean[d] += X[:, d, :, :].mean() std[d] += X[:, d, :, :].std() mean.div_(len(train_data)) std.div_(len(train_data)) print(mean) print(std)
def getStat(self): print('Compute mean and variance for training data.') train_data = cub200.CUB200(root=self._path['cub200'], train=True, transform=torchvision.transforms.ToTensor(), download=True) train_loader = torch.utils.data.DataLoader(train_data, batch_size=1, shuffle=False, num_workers=4, pin_memory=True) mean = torch.zeros(3) std = torch.zeros(3) for X, _ in train_loader: for d in range(3): mean[d] += X[:, d, :, :].mean() std[d] += X[:, d, :, :].std() mean.div_(len(train_data)) std.div_(len(train_data)) print("mean") print(mean) print("standard deviation") print(std)
def __init__(self, options, path): self._options = options self._path = path # Network. self._net = torch.nn.DataParallel(HBP()).cuda() print(self._net) # Criterion. #self._criterion = torch.nn.CrossEntropyLoss().cuda() self._criterion = torch.nn.CrossEntropyLoss().cuda() # Solver. param_to_optim = [] for param in self._net.parameters(): if param.requires_grad == False: continue param_to_optim.append(param) # Using ADAM self._solver = torch.optim.Adam( param_to_optim, lr=self._options['base_lr'], weight_decay=self._options['weight_decay']) # Commented # self._solver = torch.optim.SGD( # param_to_optim, lr=self._options['base_lr'], # momentum=0.9, weight_decay=self._options['weight_decay']) milestones = [40, 60, 80, 100] self._scheduler = torch.optim.lr_scheduler.MultiStepLR( self._solver, milestones=milestones, gamma=0.25) train_transforms = torchvision.transforms.Compose([ torchvision.transforms.Resize(size=448), # Let smaller edge match torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.RandomCrop(size=448), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) test_transforms = torchvision.transforms.Compose([ torchvision.transforms.Resize(size=448), torchvision.transforms.CenterCrop(size=448), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) train_data = cub200.CUB200(root=self._path['cub200'], train=True, download=True, transform=train_transforms) test_data = cub200.CUB200(root=self._path['cub200'], train=False, download=True, transform=test_transforms) batch_indices = [ 536, 54, 4400, 1769, 1518, 1287, 5554, 4919, 2547, 2249, 5757, 589 ] #12 indices ''' 5508,3672,4026,2859,1701,3799,5087,2493,3256,5808,4979,3464,4302,4653,3412, 5356,4758,2591,3814,2814,2647,3535,4422,4417,1810,1946,2256,2536,3457,5382, 4850,3930,306,2658,3066,298,109,4691,2458,4867,5841,1283,640,3646,5635,4700, 2306,5317,5666,595,4930,4011,906,3592,1372,3972,3415,594,1364,4541,4727,5026, 1939,2917,347,5931,970,622,5380,2673,1145,1339,3743,1192,1606,1075,5740,2909, 4918,3315,3741,1486,149,3765,3123,1756,5098] ''' subset_sampler = torch.utils.data.sampler.SubsetRandomSampler( batch_indices) self._train_loader = torch.utils.data.DataLoader( train_data, batch_size=self._options['batch_size'], shuffle=False, sampler=subset_sampler, num_workers=4, pin_memory=True) #shuffle=True, num_workers=4, pin_memory=True) #changed to shuffle=false for one batch self._test_loader = torch.utils.data.DataLoader( test_data, batch_size=self._options['test_batch_size'], shuffle=False, num_workers=4, pin_memory=True)
def __init__(self, options, path, ckpt_basename='vgg_16'): """Prepare the network, criterion, solver, and data. Args: options, dict: Hyperparameters. """ print('Prepare the network and data.') self._options = options self._path = path self.ckpt_basename = ckpt_basename # Network. self._net = BCNN(freeze_features=True) #self._net = torch.nn.DataParallel(self._net) self._net.features = torch.nn.DataParallel(self._net.features) self._net.cuda() if 'ckpt_path' in self._path: if os.path.exists(self._path['ckpt_path']): print('Continue from', self._path['ckpt_path']) self._net.load_state_dict(torch.load(self._path['ckpt_path'])) else: print('Ckpt {} not found!'.format(self._path['ckpt_path'])) print(self._net) # Criterion. self._criterion = torch.nn.CrossEntropyLoss().cuda() # Solver. self._solver = torch.optim.SGD( self._net.fc.parameters(), lr=self._options['base_lr'], momentum=0.9, weight_decay=self._options['weight_decay']) if self._options['lr_scheduler'] == 'reduce_on_plateau': self._scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( self._solver, mode='max', factor=0.1, patience=5, verbose=True, threshold=1e-4, min_lr=1e-6) elif self._options['lr_scheduler'] == 'fixed': self._scheduler = torch.optim.lr_scheduler.LambdaLR( self._solver, lambda epoch: 1.0) else: raise ValueError('Unknown scheduler:', self._options['lr_scheduler']) # Imagenet normalization normalize = torchvision.transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_transforms = torchvision.transforms.Compose([ torchvision.transforms.Resize(size=448), # Let smaller edge match torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.RandomCrop(size=448), torchvision.transforms.ToTensor(), normalize ]) test_transforms = torchvision.transforms.Compose([ torchvision.transforms.Resize(size=448), torchvision.transforms.CenterCrop(size=448), torchvision.transforms.ToTensor(), normalize ]) train_data = cub200.CUB200(root=self._path['cub200'], train=True, download=True, transform=train_transforms) test_data = cub200.CUB200(root=self._path['cub200'], train=False, download=True, transform=test_transforms) self._train_loader = torch.utils.data.DataLoader( train_data, batch_size=self._options['batch_size'], shuffle=True, num_workers=4, pin_memory=True) self._test_loader = torch.utils.data.DataLoader(test_data, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)
def __init__(self, options, path): self._options = options self._path = path # Network. self._net = torch.nn.DataParallel(HBP()).cuda() print(self._net) # Criterion. #self._criterion = torch.nn.CrossEntropyLoss().cuda() self._criterion = torch.nn.CrossEntropyLoss().cuda() # Solver. param_to_optim = [] for param in self._net.parameters(): if param.requires_grad == False: continue param_to_optim.append(param) # Abhi: self._solver = torch.optim.Adam( param_to_optim, lr=self._options['base_lr'], weight_decay=self._options['weight_decay']) # Commented by Abhi # self._solver = torch.optim.SGD( # param_to_optim, lr=self._options['base_lr'], # momentum=0.9, weight_decay=self._options['weight_decay']) milestones = [40, 60, 80, 100] self._scheduler = torch.optim.lr_scheduler.MultiStepLR( self._solver, milestones=milestones, gamma=0.25) train_transforms = torchvision.transforms.Compose([ torchvision.transforms.Resize(size=448), # Let smaller edge match torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.RandomCrop(size=448), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) test_transforms = torchvision.transforms.Compose([ torchvision.transforms.Resize(size=448), torchvision.transforms.CenterCrop(size=448), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) train_data = cub200.CUB200(root=self._path['cub200'], train=True, download=True, transform=train_transforms) test_data = cub200.CUB200(root=self._path['cub200'], train=False, download=True, transform=test_transforms) self._train_loader = torch.utils.data.DataLoader( train_data, batch_size=self._options['batch_size'], shuffle=False, num_workers=4, pin_memory=True) #shuffle=True, num_workers=4, pin_memory=True) #changed to false for one batch self._test_loader = torch.utils.data.DataLoader( test_data, batch_size=self._options['test_batch_size'], shuffle=False, num_workers=4, pin_memory=True)
def __init__(self, options, paths): """Prepare the network, criterion, optimizer, and data. Args: options, dict<str, float/int>: Hyperparameters. paths, dict<str, str>: Useful paths. """ print('Prepare the network and data.') # Configurations. self._options = options self._paths = paths # Network. if self._paths['pretrained'] is not None: self._net = torch.nn.DataParallel( model.BCNN(num_classes=200, is_all=True)).cuda() self._net.load_state_dict(torch.load(self._paths['pretrained']), strict=False) else: self._net = torch.nn.DataParallel( model.BCNN(num_classes=200, is_all=False)).cuda() print(self._net) self._criterion = torch.nn.CrossEntropyLoss().cuda() # Optimizer. self._optimizer = torch.optim.SGD( self._net.parameters(), lr=self._options['base_lr'], momentum=0.9, weight_decay=self._options['weight_decay']) self._scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( self._optimizer, mode='max', factor=0.1, patience=8, verbose=True, threshold=1e-4) # Data. if self._paths['pretrained'] is not None: train_transforms = torchvision.transforms.Compose([ torchvision.transforms.RandomResizedCrop(size=448, scale=(0.8, 1.0)), # torchvision.transforms.Resize(size=448), # torchvision.transforms.RandomCrop(size=448), torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ]) test_transforms = torchvision.transforms.Compose([ # torchvision.transforms.Resize(size=448), # torchvision.transforms.CenterCrop(size=448), torchvision.transforms.Resize(size=(448, 448)), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ]) train_data = cub200.CUB200(root=self._paths['cub200'], train=True, transform=train_transforms, download=True) test_data = cub200.CUB200(root=self._paths['cub200'], train=False, transform=test_transforms, download=True) else: train_data = cub200.CUB200ReLU(root=self._paths['cub200'], train=True) test_data = cub200.CUB200ReLU(root=self._paths['cub200'], train=False) self._train_loader = torch.utils.data.DataLoader( train_data, batch_size=self._options['batch_size'], shuffle=True, num_workers=4, pin_memory=False) self._test_loader = torch.utils.data.DataLoader( test_data, batch_size=(64 if self._paths['pretrained'] is not None else 4096), shuffle=False, num_workers=4, pin_memory=False)
def main(): _net = torch.nn.DataParallel(BCNN()).cuda() train_transforms = torchvision.transforms.Compose([ torchvision.transforms.Resize(size=224), torchvision.transforms.CenterCrop(size=224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) train_data = cub200.CUB200(root=path, train=True, download=True, transform=train_transforms) train_loader = torch.utils.data.DataLoader(train_data, batch_size=8, shuffle=False, num_workers=2, pin_memory=True) test_transforms = torchvision.transforms.Compose([ torchvision.transforms.Resize(size=224), torchvision.transforms.CenterCrop(size=224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) test_data = cub200.CUB200(root=path, train=False, download=True, transform=test_transforms) test_loader = torch.utils.data.DataLoader(test_data, batch_size=8, shuffle=False, num_workers=2, pin_memory=True) _net.train(False) X_numpy = None y_numpy = None for X, y in tqdm(train_loader): X = torch.autograd.Variable(X.cuda()) y = torch.autograd.Variable(y.cuda(async=True)) score = _net(X).cpu().detach().numpy() y = y.cpu().numpy() X_numpy = np.append(X_numpy, score) y_numpy = np.append(y_numpy, y) np.savez('VGG-D_wo_ft_train_pytorch.npz', X_numpy, y_numpy) X_numpy = None y_numpy = None for X, y in tqdm(test_loader): X = torch.autograd.Variable(X.cuda()) y = torch.autograd.Variable(y.cuda(async=True)) score = _net(X).cpu().detach().numpy() y = y.cpu().numpy() X_numpy = np.append(X_numpy, score) y_numpy = np.append(y_numpy, y) np.savez('VGG-D_wo_ft_test_pytorch.npz', X_numpy, y_numpy)
def __init__(self, options, path): """Prepare the network, criterion, solver, and data. Args: options, dict: Hyperparameters. """ print('Prepare the network and data.') self._options = options self._path = path # Network. if self._options['dataset'] == 'cub200': num_classes = 200 elif self._options['dataset'] == 'aircraft': num_classes = 100 else: raise NotImplementedError("Dataset " + self._options['dataset'] + " is not implemented.") self._net = BCNN(num_classes=num_classes, pretrained=options['target'] == 'fc') # Load the model from disk. if options['target'] == 'all': self._net.load_state_dict(torch.load(self._path['model'])) self._net = torch.nn.parallel.DistributedDataParallel( self._net.cuda(), device_ids=[self._options['local_rank']], output_device=self._options['local_rank']) if dist.get_rank() == 0: print(self._net) # Criterion. self._criterion = torch.nn.CrossEntropyLoss().cuda() # Solver. self._solver = torch.optim.SGD( self._net.module.trainable_params, lr=self._options['base_lr'] * dist.get_world_size(), momentum=0.9, weight_decay=self._options['weight_decay']) self._scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( self._solver, mode='max', factor=0.1, patience=3, verbose=True, threshold=1e-4) train_transforms = torchvision.transforms.Compose([ torchvision.transforms.Resize(size=448), # Let smaller edge match torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.RandomCrop(size=448), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) test_transforms = torchvision.transforms.Compose([ torchvision.transforms.Resize(size=448), torchvision.transforms.CenterCrop(size=448), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) if self._options['dataset'] == 'cub200': train_data = cub200.CUB200(root=self._path['dataset'], train=True, download=True, transform=train_transforms) test_data = cub200.CUB200(root=self._path['dataset'], train=False, download=True, transform=test_transforms) elif self._options['dataset'] == 'aircraft': train_data = aircraft.Aircraft(root=self._path['dataset'], train=True, download=True, transform=train_transforms) test_data = aircraft.Aircraft(root=self._path['dataset'], train=False, download=True, transform=test_transforms) else: raise NotImplementedError("Dataset " + self._options['dataset'] + " is not implemented.") # Partition dataset among workers using DistributedSampler train_sampler = distributed.DistributedSampler( train_data, num_replicas=dist.get_world_size(), rank=dist.get_rank()) test_sampler = distributed.DistributedSampler( test_data, num_replicas=dist.get_world_size(), rank=dist.get_rank()) self._train_loader = torch.utils.data.DataLoader( train_data, batch_size=self._options['batch_size'], shuffle=False, num_workers=4, pin_memory=True, sampler=train_sampler) self._test_loader = torch.utils.data.DataLoader( test_data, batch_size=self._options['batch_size'], shuffle=False, num_workers=4, pin_memory=True, sampler=test_sampler)
def __init__(self, options, paths): """Model preparation.""" # Configurations. self._options = options self._paths = paths # Prepare model and loss function. self._model = model.Model(architecture=self._options.arch, input_size=self._options.input_size, num_classes=options.num_classes) self._model = torch.nn.DataParallel(self._model).cuda() print(self._model) torchsummary.summary(self._model.module, input_size=(3, self._options.input_size, self._options.input_size)) # Display configurations and options. print('PyTorch %s, CUDA %s, cuDNN %s, GPU %s' % (torch.__version__, torch.version.cuda, torch.backends.cudnn.version(), torch.cuda.get_device_name(0))) print(self._options) print('Code version %s' % __version__) print('-' * 80) # Prepare optimizer and optimizer scheduler. self._loss_function = torch.nn.CrossEntropyLoss().cuda() self._optimizer = torch.optim.SGD( self._model.parameters(), lr=self._options.lr, momentum=0.9, weight_decay=self._options.weight_decay) self._scheduler = torch.optim.lr_scheduler.MultiStepLR( self._optimizer, milestones=[60, 115], gamma=0.1) # Prepare dataset. resize_size = 512 if self._options.input_size == 448 else 256 train_transform = torchvision.transforms.Compose([ torchvision.transforms.Resize((resize_size, resize_size)), torchvision.transforms.RandomCrop( (self._options.input_size, self._options.input_size)), torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ]) val_transform = torchvision.transforms.Compose([ torchvision.transforms.Resize((resize_size, resize_size)), torchvision.transforms.CenterCrop( (self._options.input_size, self._options.input_size)), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ]) train_data = cub200.CUB200(root=self._paths.data, train=True, transform=train_transform) val_data = cub200.CUB200(root=self._paths.data, train=False, transform=val_transform) print('Dataset size: train=%d, val=%d' % (len(train_data), len(val_data))) self._train_loader = torch.utils.data.DataLoader( train_data, batch_size=self._options.bs, shuffle=True, num_workers=16, pin_memory=True) self._val_loader = torch.utils.data.DataLoader( val_data, batch_size=self._options.bs, shuffle=False, num_workers=16, pin_memory=True) # Visulization setup. self._visdom = visdom.Visdom(env='Term %d' % self._options.term, use_incoming_socket=False) assert self._visdom.check_connection() self._visdom.close()