def run_model(config, seed=0, data_dir='./data', genotype_class='PCDARTS', num_epochs=20, batch_size=get('batch_size'), init_channels=get('init_channels'), train_criterion=torch.nn.CrossEntropyLoss, data_augmentations=None, save_model_str=None, **kwargs): """ Training loop for configurableNet. :param model_config: network config (dict) :param data_dir: dataset path (str) :param num_epochs: (int) :param batch_size: (int) :param learning_rate: model optimizer learning rate (float) :param train_criterion: Which loss to use during training (torch.nn._Loss) :param model_optimizer: Which model optimizer to use during trainnig (torch.optim.Optimizer) :param data_augmentations: List of data augmentations to apply such as rescaling. (list[transformations], transforms.Composition[list[transformations]], None) If none only ToTensor is used :return: """ # instantiate optimize if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) gpu = 'cuda:0' np.random.seed(seed) torch.cuda.set_device(gpu) cudnn.benchmark = True torch.manual_seed(seed) cudnn.enabled=True torch.cuda.manual_seed(seed) logging.info('gpu device = %s' % gpu) logging.info("config = %s", config) if data_augmentations is None: # You can add any preprocessing/data augmentation you want here data_augmentations = transforms.ToTensor() elif isinstance(type(data_augmentations), list): data_augmentations = transforms.Compose(data_augmentations) elif not isinstance(data_augmentations, transforms.Compose): raise NotImplementedError train_dataset = K49(data_dir, True, data_augmentations) test_dataset = K49(data_dir, False, data_augmentations) # train_dataset = KMNIST(data_dir, True, data_augmentations) # test_dataset = KMNIST(data_dir, False, data_augmentations) # Make data batch iterable # Could modify the sampler to not uniformly random sample train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) genotype = eval("genotypes.%s" % genotype_class) model = Network(init_channels, train_dataset.n_classes, config['n_conv_layers'], genotype) model = model.cuda() total_model_params = np.sum(p.numel() for p in model.parameters()) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = train_criterion criterion = criterion.cuda() if config['optimizer'] == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=config['initial_lr'], momentum=config['sgd_momentum'], weight_decay=config['weight_decay'], nesterov=config['nesterov']) else: optimizer = get('opti_dict')[config['optimizer']](model.parameters(), lr=config['initial_lr'], weight_decay=config['weight_decay']) if config['lr_scheduler'] == 'Cosine': lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, num_epochs) elif config['lr_scheduler'] == 'Exponential': lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1) logging.info('Generated Network:') summary(model, (train_dataset.channels, train_dataset.img_rows, train_dataset.img_cols), device='cuda' if torch.cuda.is_available() else 'cpu') for epoch in range(num_epochs): lr_scheduler.step() logging.info('epoch %d lr %e', epoch, lr_scheduler.get_lr()[0]) model.drop_path_prob = config['drop_path_prob'] * epoch / num_epochs train_acc, train_obj = train(train_loader, model, criterion, optimizer, grad_clip=config['grad_clip_value']) logging.info('train_acc %f', train_acc) test_acc, test_obj = infer(test_loader, model, criterion) logging.info('test_acc %f', test_acc) if save_model_str: # Save the model checkpoint, can be restored via "model = torch.load(save_model_str)" if os.path.exists(save_model_str): save_model_str += '_'.join(time.ctime()) torch.save(model.state_dict(), save_model_str) return test_acc
def compute(self, x, budget, config, **kwargs): """ Get model with hyperparameters from config generated by get_configspace() """ config = get_config_dictionary(x, config) print("config", config) if (len(config.keys())<len(x)): return 100 if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) gpu = 'cuda:0' np.random.seed(self.seed) torch.cuda.set_device(gpu) cudnn.benchmark = True torch.manual_seed(self.seed) cudnn.enabled=True torch.cuda.manual_seed(self.seed) logging.info('gpu device = %s' % gpu) logging.info("config = %s", config) genotype = eval("genotypes.%s" % 'PCDARTS') model = Network(self.init_channels, self.n_classes, config['n_conv_layers'], genotype) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() if config['optimizer'] == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=config['initial_lr'], momentum=0.9, weight_decay=config['weight_decay'], nesterov=True) else: optimizer = settings.opti_dict[config['optimizer']](model.parameters(), lr=config['initial_lr']) if config['lr_scheduler'] == 'Cosine': lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, int(budget)) elif config['lr_scheduler'] == 'Exponential': lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1) indices = list(range(int(self.split*len(self.train_dataset)))) valid_indices = list(range(int(self.split*len(self.train_dataset)), len(self.train_dataset))) print("Training size=", len(indices)) training_sampler = SubsetRandomSampler(indices) valid_sampler = SubsetRandomSampler(valid_indices) train_queue = torch.utils.data.DataLoader(dataset=self.train_dataset, batch_size=self.batch_size, sampler=training_sampler) valid_queue = torch.utils.data.DataLoader(dataset=self.train_dataset, batch_size=self.batch_size, sampler=valid_sampler) for epoch in range(int(budget)): lr_scheduler.step() logging.info('epoch %d lr %e', epoch, lr_scheduler.get_lr()[0]) model.drop_path_prob = config['drop_path_prob'] * epoch / int(budget) train_acc, train_obj = train(train_queue, model, criterion, optimizer, grad_clip=config['grad_clip_value']) logging.info('train_acc %f', train_acc) valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) return valid_obj # Hyperband always minimizes, so we want to minimise the error, error = 1-acc
def create_run_ensemble(model_state_list, n_layers, grad_clip_value=5, seed=0, num_epochs=20, learning_rate=0.001, init_channels=get('init_channels'), batch_size=get('batch_size'), genotype_class='PCDARTS'): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) gpu = 'cuda:0' np.random.seed(seed) torch.cuda.set_device(gpu) cudnn.benchmark = True torch.manual_seed(seed) cudnn.enabled=True torch.cuda.manual_seed(seed) logging.info('gpu device = %s' % gpu) logging.info("config = %s", config) if data_augmentations is None: # You can add any preprocessing/data augmentation you want here data_augmentations = transforms.ToTensor() elif isinstance(type(data_augmentations), list): data_augmentations = transforms.Compose(data_augmentations) elif not isinstance(data_augmentations, transforms.Compose): raise NotImplementedError train_dataset = K49(data_dir, True, data_augmentations) test_dataset = K49(data_dir, False, data_augmentations) # train_dataset = KMNIST(data_dir, True, data_augmentations) # test_dataset = KMNIST(data_dir, False, data_augmentations) # Make data batch iterable # Could modify the sampler to not uniformly random sample train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) genotype = eval("genotypes.%s" % genotype_class) dataset = dict() dims = [] for i, model_state in enumerate(model_state_list): model = Network(init_channels, train_dataset.n_classes, n_layers, genotype) model.load_state_dict(torch.load(model_state)) model.cuda() for p in model.parameters(): p.requires_grad = False trn_labels = [] trn_features = [] if i == 0: for d,la in train_loader: o = model(Variable(d.cuda())) o = o.view(o.size(0),-1) trn_labels.extend(la) trn_features.extend(o.cpu().data) test_labels = [] test_features = [] for d,la in test_loader: o = model(Variable(d.cuda())) o = o.view(o.size(0),-1) test_labels.extend(la) test_features.extend(o.cpu().data) dataset['trn_labels'] = trn_labels dataset['test_labels'] = test_labels else: for d,la in train_loader: o = model(Variable(d.cuda())) o = o.view(o.size(0),-1) trn_features.extend(o.cpu().data) test_labels = [] test_features = [] for d,la in test_loader: o = model(Variable(d.cuda())) o = o.view(o.size(0),-1) test_features.extend(o.cpu().data) dataset['trn_features'].extend(trn_features) dims.extend(dataset['trn_features'][i][0].size(0)) dataset['test_features'].extend(test_features) trn_feat_dset = FeaturesDataset(dataset['trn_features'][0],dataset['trn_features'][1],dataset['trn_features'][2],dataset['trn_labels']) test_feat_dset = FeaturesDataset(dataset['test_features'][0],dataset['test_features'][1],dataset['test_features'][2],dataset['test_labels']) trn_feat_loader = DataLoader(trn_feat_dset,batch_size=64,shuffle=True) test_feat_loader = DataLoader(val_feat_dset,batch_size=64) model = EnsembleModel(dims, out_size=train_dataset.n_classes) criterion = torch.nn.optim.CrossEntropyLoss criterion = criterion.cuda() optimizer = torch.nn.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9) for epoch in range(num_epochs): epoch_loss, epoch_accuracy = fit(epoch,model,trn_feat_loader,critierion, training=True) val_epoch_loss , val_epoch_accuracy = fit(epoch,model, test_feat_loader, criterion, training=False) if save_model_str: # Save the model checkpoint, can be restored via "model = torch.load(save_model_str)" if not os.path.exists(save_model_str): os.mkdir(save_model_str) torch.save(model.state_dict(), os.path.join(save_model_str, time.ctime()))
def create_run_ensemble(model_description, ensemble_config, seed=get('seed'), num_epochs=20, data_dir='./data', init_channels=get('init_channels'), batch_size=get('batch_size'), genotype_class='PCDARTS', data_augmentations=None, save_model_str=None): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) gpu = 'cuda:0' np.random.seed(seed) torch.cuda.set_device(gpu) cudnn.benchmark = True torch.manual_seed(seed) cudnn.enabled = True torch.cuda.manual_seed(seed) logging.info('gpu device = %s' % gpu) if data_augmentations is None: # You can add any preprocessing/data augmentation you want here data_augmentations = transforms.ToTensor() elif isinstance(type(data_augmentations), list): data_augmentations = transforms.Compose(data_augmentations) elif not isinstance(data_augmentations, transforms.Compose): raise NotImplementedError train_dataset = K49(data_dir, True, data_augmentations) test_dataset = K49(data_dir, False, data_augmentations) # train_dataset = KMNIST(data_dir, True, data_augmentations) # test_dataset = KMNIST(data_dir, False, data_augmentations) # Make data batch iterable # Could modify the sampler to not uniformly random sample train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) genotype = eval("genotypes.%s" % genotype_class) trained_models = [] for i, model_state in enumerate(model_description.keys()): model = Network( init_channels, train_dataset.n_classes, model_description[model_state]['config']['n_conv_layers'], genotype) model.load_state_dict( torch.load(model_description[model_state]['model_path'])) model.cuda() model.drop_path_prob = model_description[model_state]['config'][ 'drop_path_prob'] trained_models.append(model) ensemble_model = EnsembleModel(trained_models, dense_units=ensemble_config['dense_units'], out_size=train_dataset.n_classes) ensemble_model = ensemble_model.cuda() summary(ensemble_model, input_size=(1, 28, 28)) criterion = torch.nn.CrossEntropyLoss() criterion = criterion.cuda() if ensemble_config['optimizer'] == 'sgd': optimizer = torch.optim.SGD( model.parameters(), lr=ensemble_config['initial_lr'], momentum=ensemble_config['sgd_momentum'], weight_decay=ensemble_config['weight_decay'], nesterov=ensemble_config['nesterov']) else: optimizer = get('opti_dict')[ensemble_config['optimizer']]( model.parameters(), lr=ensemble_config['initial_lr'], weight_decay=ensemble_config['weight_decay']) if ensemble_config['lr_scheduler'] == 'Cosine': lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, num_epochs) elif ensemble_config['lr_scheduler'] == 'Exponential': lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1) print('Started Training') for epoch in range(num_epochs): logging.info('epoch %d lr %e', epoch, lr_scheduler.get_lr()[0]) model.drop_path_prob = ensemble_config[ 'drop_path_prob'] * epoch / num_epochs for p in ensemble_model.model_1.parameters(): p.requires_grad = False for p in ensemble_model.model_2.parameters(): p.requires_grad = False for p in ensemble_model.model_3.parameters(): p.requires_grad = False for p in ensemble_model.out_classifier.parameters(): p.requires_grad = True train_acc, train_obj, models_avg = ensemble_train( train_loader, ensemble_model, criterion, optimizer, grad_clip=ensemble_config['grad_clip_value']) logging.info('train_acc %f', train_acc) logging.info('models_avg {}'.format(models_avg)) lr_scheduler.step() test_acc, test_obj, models_avg = ensemble_infer( test_loader, ensemble_model, criterion) logging.info('test_acc %f', test_acc) logging.info('models_avg {}'.format(models_avg)) if save_model_str: # Save the model checkpoint, can be restored via "model = torch.load(save_model_str)" if not os.path.exists(save_model_str): os.mkdir(save_model_str) os.path.join(save_model_str, 'ENSEMBLE') torch.save(ensemble_model.state_dict(), os.path.join(save_model_str, time.ctime()))