def _setup(self, config): args = config.pop("args") vars(args).update(config) type_ = config.get('type') args.cuda = torch.cuda.is_available() if args.seed is not None: np.random.seed(args.seed) random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) # ###################################################################### # Data kwargs = {'num_workers': 0, 'pin_memory': True} if args.cuda else {} if args.dataset.startswith('cifar'): self.train_loader, self.test_loader = cifar.get_data( 32, args.datadir, dataset=args.dataset, batch_size=args.batch_size, trainsize=args.trainsize, seed=args.seed, **kwargs) elif args.dataset == 'tiny_imagenet': self.train_loader, self.test_loader = tiny_imagenet.get_data( 64, args.data_dir, val_only=args.testOnly, batch_size=args.batch_size, trainsize=args.trainsize, seed=args.seed, distributed=False, **kwargs) # ###################################################################### # Build the network based on the type parameter. θ are the optimal # hyperparameters found by cross validation. if type_ == 'ref': θ = (0.1, 0.9, 1e-4, 1) elif type_ in nets.keys(): θ = (0.5, 0.85, 1e-4, 1) elif type_ in nets1.keys(): θ = (0.5, 0.85, 1e-4, 1) elif type_ in nets2.keys(): θ = (0.5, 0.85, 1e-4, 1) else: raise ValueError('Unknown type') lr, mom, wd, std = θ # If the parameters were provided as an option, use them lr = config.get('lr', lr) mom = config.get('mom', mom) wd = config.get('wd', wd) std = config.get('std', std) self.model = MixedNet(args.dataset, type_, wd=wd) self.model.init(std) if args.cuda: self.model.cuda() # ###################################################################### # Build the optimizer try: params = self.model.param_groups() except AttributeError: params = self.model.parameters() # Don't use the optimizer's weight decay, call that later in the loss # func self.optimizer, self.scheduler = optim.get_optim( 'sgd', params, init_lr=lr, steps=args.steps, wd=0, gamma=args.gamma, momentum=mom, max_epochs=args.epochs) self.args = args
def _setup(self, config): args = config.pop("args") vars(args).update(config) type_ = config.get('type') dataset = config.get('dataset', args.dataset) num_gpus = config.get('num_gpus', args.num_gpus) if hasattr(args, 'verbose'): self._verbose = args.verbose num_workers = 4 if args.seed is not None: np.random.seed(args.seed) random.seed(args.seed) torch.manual_seed(args.seed) num_workers = 0 if self.use_cuda: torch.cuda.manual_seed(args.seed) else: args.seed = random.randint(0, 10000) # ###################################################################### # Data kwargs = { 'num_workers': num_workers, 'pin_memory': True } if self.use_cuda else {} if dataset.startswith('cifar'): self.train_loader, self.test_loader = cifar.get_data( 32, args.datadir, dataset=dataset, batch_size=args.batch_size, trainsize=args.trainsize, **kwargs) # θ = (0.5, 0.9, 1e-4, 1.5) θ = (0.5, 0.85, 1e-4, 1.5) elif dataset == 'tiny_imagenet': self.train_loader, self.test_loader = tiny_imagenet.get_data( 64, args.datadir, val_only=False, batch_size=args.batch_size, trainsize=args.trainsize, distributed=False, **kwargs) # θ = (0.2, 0.9, 1e-4, 1.5) θ = (0.5, 0.85, 8e-5, 1.5) # ###################################################################### # Build the network based on the type parameter. θ are the optimal # hyperparameters found by cross validation. lr, mom, wd, std = θ # If the parameters were provided as an option, use them lr = config.get('lr', lr) mom = config.get('mom', mom) wd = config.get('wd', wd) std = config.get('std', std) bias = config.get('bias', 1e-2) alpha = config.get('alpha', None) # drop_p = config.get('drop_p', drop_p) # Build the network self.model = ScatNet(dataset, type_, alpha, bias) init = lambda x: net_init(x, std) self.model.apply(init) # Split across GPUs if torch.cuda.device_count() > 1 and num_gpus > 1: self.model = nn.DataParallel(self.model) model = self.model.module else: model = self.model if self.use_cuda: self.model.cuda() # ###################################################################### # Build the optimizer - use separate parameter groups for the invariant # and convolutional layers params = model.parameters() self.optimizer, self.scheduler = optim.get_optim( 'sgd', params, init_lr=lr, steps=args.steps, wd=wd, gamma=0.2, momentum=mom, max_epochs=args.epochs) if self.verbose: print(self.model)
def _setup(self, config): args = config.pop("args") vars(args).update(config) args.cuda = torch.cuda.is_available() if args.seed is not None: np.random.seed(args.seed) random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) # ###################################################################### # Data kwargs = {'num_workers': 0, 'pin_memory': True} if args.cuda else {} if args.dataset.startswith('cifar'): self.train_loader, self.test_loader = cifar.get_data( 32, args.datadir, dataset=args.dataset, batch_size=args.batch_size, trainsize=args.trainsize, seed=args.seed, **kwargs) if args.dataset == 'cifar10': num_classes = 10 elif args.dataset == 'cifar100': num_classes = 100 elif args.dataset == 'tiny_imagenet': self.train_loader, self.test_loader = tiny_imagenet.get_data( 64, args.data_dir, val_only=args.testOnly, batch_size=args.batch_size, trainsize=args.trainsize, seed=args.seed, distributed=False, **kwargs) num_classes = 200 # ###################################################################### # Build the network based on the type parameter. θ are the optimal # hyperparameters found by cross validation. θ = (0.05, 0.9, 5e-4, 1) lr, mom, wd, std = θ # If the parameters were provided as an option, use them lr = config.get('lr', lr) mom = config.get('mom', mom) wd = config.get('wd', wd) std = config.get('std', std) self.model = Wide_ResNet(args.depth, args.widen_factor, args.dropout, num_classes, ref=args.ref) self.model.init(std) if args.cuda: self.model.cuda() self.model = torch.nn.DataParallel(self.model, device_ids=range( torch.cuda.device_count())) # ###################################################################### # Build the optimizer try: params = self.model.param_groups() except AttributeError: params = self.model.parameters() # Don't use the optimizer's weight decay, call that later in the loss # func self.optimizer, self.scheduler = optim.get_optim( 'sgd', params, init_lr=lr, steps=args.steps, wd=wd, gamma=args.gamma, momentum=mom, max_epochs=args.epochs) self.args = args
def _setup(self, config): args = config.pop("args") vars(args).update(config) type_ = config.get('type') dataset = config.get('dataset', args.dataset) if hasattr(args, 'verbose'): self._verbose = args.verbose if args.seed is not None: np.random.seed(args.seed) random.seed(args.seed) torch.manual_seed(args.seed) if self.use_cuda: torch.cuda.manual_seed(args.seed) # ###################################################################### # Data kwargs = { 'num_workers': 0, 'pin_memory': True } if self.use_cuda else {} if dataset.startswith('cifar'): self.train_loader, self.test_loader = cifar.get_data( 32, args.datadir, dataset=dataset, batch_size=args.batch_size, trainsize=args.trainsize, seed=args.seed, **kwargs) elif dataset == 'tiny_imagenet': self.train_loader, self.test_loader = tiny_imagenet.get_data( 64, args.datadir, val_only=False, batch_size=args.batch_size, trainsize=args.trainsize, seed=args.seed, distributed=False, **kwargs) # ###################################################################### # Build the network based on the type parameter. θ are the optimal # hyperparameters found by cross validation. if type_ == 'ref': θ = (0.1, 0.9, 1e-4, 1) else: θ = (0.2, 0.9, 5e-5, 1.5) # raise ValueError('Unknown type') lr, mom, wd, std = θ # If the parameters were provided as an option, use them lr = config.get('lr', lr) mom = config.get('mom', mom) wd = config.get('wd', wd) std = config.get('std', std) bias = config.get('bias', 1e-2) # drop_p = config.get('drop_p', drop_p) # Build the network self.model = ScatNet(args.dataset, type_, bias) init = lambda x: net_init(x, std) self.model.apply(init) # Split across GPUs if torch.cuda.device_count() > 1 and args.num_gpus > 1: self.model = nn.DataParallel(self.model) model = self.model.module else: model = self.model if self.use_cuda: self.model.cuda() # ###################################################################### # Build the optimizer - use separate parameter groups for the invariant # and convolutional layers default_params = list(model.fc1.parameters()) inv_params = [] for name, module in model.net.named_children(): params = [p for p in module.parameters() if p.requires_grad] if name.startswith('inv'): inv_params += params else: default_params += params self.optimizer, self.scheduler = optim.get_optim( 'sgd', default_params, init_lr=lr, steps=args.steps, wd=wd, gamma=0.2, momentum=mom, max_epochs=args.epochs) if len(inv_params) > 0: # Get special optimizer parameters lr1 = config.get('lr1', lr) gamma1 = config.get('gamma1', 0.2) mom1 = config.get('mom1', mom) wd1 = config.get('wd1', wd) self.optimizer1, self.scheduler1 = optim.get_optim( 'sgd', inv_params, init_lr=lr1, steps=args.steps, wd=wd1, gamma=gamma1, momentum=mom1, max_epochs=args.epochs) if self.verbose: print(self.model)
print('| Initial Learning Rate = ' + str(args.lr)) print('| Optimizer = ' + str(args.optim)) tr_writer = SummaryWriter(os.path.join(args.exp_dir, 'train')) te_writer = SummaryWriter(os.path.join(args.exp_dir, 'test')) elapsed_time = 0 # Get the parameters to optimize try: params = net.param_groups() except AttributeError: params = net.parameters() # Don't use the optimizer's weight decay, call that later in the loss func optimizer, scheduler = optim.get_optim(args.optim, params, init_lr=args.lr, steps=args.steps, wd=0, gamma=args.gamma, momentum=args.momentum, max_epochs=args.epochs) # ############################################################################## # Train print('\n[Phase 4] : Training') # Get one batch of validation data for logging # x, y = next(iter(testloader)) # if use_cuda: # x = x.cuda() for epoch in range(start_epoch, start_epoch + args.epochs): start_time = time.time() scheduler.step()