class Runner: def __init__(self, config): self.config = config def find_lr(self): from torch_lr_finder import LRFinder logger.info('finding the best learning rate') cfg = self.config if self.tsai_mode: import sodium.tsai_model as module_arch else: import sodium.model.model as module_arch # create a model instance model = get_instance(module_arch, 'arch', cfg) # setup the model with the device model, device = setup_device(model, cfg['target_device']) param_groups = setup_param_groups(model, cfg['optimizer']) optimizer = get_instance(module_optimizer, 'optimizer', cfg, param_groups) criterion = getattr(module_loss, cfg['criterion'])() self.lr_finder = LRFinder(model, optimizer, criterion, device="cuda") lr_finder_epochs = cfg['lr_finder']['epochs'] logger.info(f'Running LR-Test for {lr_finder_epochs} epochs') # my method self.lr_finder.range_test(self.trainer.train_loader, start_lr=1e-3, end_lr=1, num_iter=len(self.trainer.test_loader) * lr_finder_epochs, step_mode='linear') # leslie smith method # self.lr_finder.range_test(self.trainer.train_loader, val_loader = self.trainer.test_loader, # end_lr=1, num_iter=len(self.trainer.train_loader), step_mode='linear') # fast ai method # self.lr_finder.range_test( # self.trainer.train_loader, end_lr=100, num_iter=len(self.trainer.train_loader)) self.best_lr = self.lr_finder.history['lr'][ self.lr_finder.history['loss'].index(self.lr_finder.best_loss)] sorted_lrs = [ x for _, x in sorted( zip(self.lr_finder.history['loss'], self.lr_finder.history['lr'])) ] logger.info(f'sorted lrs : {sorted_lrs[:10]}') logger.info(f'found the best lr : {self.best_lr}') logger.info('plotting lr_finder') plt.style.use("dark_background") self.lr_finder.plot() # reset the model and the optimizer self.lr_finder.reset() plt.show() del model, optimizer, criterion def train(self, use_bestlr=False, lr_value=None): # if the best lr was found use that value instead if use_bestlr and self.best_lr is not None: logger.info(f'using max_lr : {self.best_lr}') logger.info(f'using min_lr : {self.best_lr/30}') logger.info(f'using initial_lr : {self.best_lr/20}') for param_group in self.trainer.optimizer.param_groups: param_group['lr'] = self.best_lr / 10 param_group['max_lr'] = self.best_lr param_group['min_lr'] = self.best_lr / 30 param_group['intial_lr'] = self.best_lr / 20 if not use_bestlr and (lr_value is not None): for param_group in self.trainer.optimizer.param_groups: param_group['lr'] = lr_value self.trainer.train() logger.info('Finished!') def setup_train(self, tsai_mode=False): cfg = self.config self.tsai_mode = tsai_mode if tsai_mode: import sodium.tsai_model as module_arch else: import sodium.model.model as module_arch logger.info('Training Config') # display the config for line in pprint.pformat(cfg).split('\n'): logger.info(line) # to get consistent results, seed everything seed_everything(cfg['seed']) # create a model instance model = get_instance(module_arch, 'arch', cfg) # setup the model with the device model, device = setup_device(model, cfg['target_device']) param_groups = setup_param_groups(model, cfg['optimizer']) optimizer = get_instance(module_optimizer, 'optimizer', cfg, param_groups) self.transforms = get_instance(module_aug, 'augmentation', cfg) # get the train and test loaders self.data_loader = get_instance(module_data, 'data_loader', cfg, self.transforms) train_loader, test_loader = self.data_loader.get_loaders() logger.info('Getting loss function handle') criterion = getattr(module_loss, cfg['criterion'])() batch_scheduler = False if cfg['lr_scheduler']['type'] == 'OneCycleLR': logger.info('Building: torch.optim.lr_scheduler.OneCycleLR') max_at_epoch = cfg['lr_scheduler']['max_lr_at_epoch'] pct_start = (max_at_epoch) / \ cfg['training']['epochs'] if max_at_epoch else 0.8 sch_cfg = cfg['lr_scheduler']['args'] lr_scheduler = torch.optim.lr_scheduler.OneCycleLR( optimizer, max_lr=sch_cfg['max_lr'], steps_per_epoch=len(train_loader), pct_start=pct_start, epochs=cfg['training']['epochs']) batch_scheduler = True else: lr_scheduler = get_instance(module_scheduler, 'lr_scheduler', cfg, optimizer) logger.info('Initializing trainer') self.trainer = Trainer(model, criterion, optimizer, cfg, device, train_loader, test_loader, lr_scheduler=lr_scheduler, batch_scheduler=batch_scheduler) def plot_metrics(self): plt.style.use("dark_background") logger.info('Plotting Metrics...') plot.plot_metrics(self.trainer.train_metric, self.trainer.test_metric) plot.plot_lr_metric(self.trainer.lr_metric) def plot_gradcam(self, target_layers): plt.style.use("dark_background") logger.info('Plotting Grad-CAM...') # use the test images data, target = next(iter(self.trainer.test_loader)) data, target = data.to(self.trainer.device), target.to( self.trainer.device) logger.info('Taking {5} samples') # get 5 images data = data[:5] target = target[:5] # get the generated grad cam gcam_layers, predicted_probs, predicted_classes = get_gradcam( data, target, self.trainer.model, self.trainer.device, target_layers) # get the denomarlization function unorm = module_aug.UnNormalize(mean=self.transforms.mean, std=self.transforms.std) plot_gradcam(gcam_layers, data, target, predicted_classes, self.data_loader.class_names, unorm) def print_summary(self, input_size): summary(self.trainer.model, input_size) def print_visualization(self, input_size): C, H, W = input_size x = torch.zeros(1, C, H, W, dtype=torch.float, requires_grad=False) x = x.to(self.trainer.device) out = self.trainer.model(x) # plot graph of variable, not of a nn.Module dot_graph = torchviz.make_dot(out) dot_graph.view() return dot_graph def plot_misclassifications(self, target_layers): plt.style.use("dark_background") assert (self.trainer.model is not None) # get the data, target of only missclassified and do what you do for gradcam logger.info('getting misclassifications') misclassified = [] misclassified_target = [] misclassified_pred = [] model, device = self.trainer.model, self.trainer.device # set the model to evaluation mode model.eval() # turn off gradients with torch.no_grad(): for data, target in self.trainer.test_loader: # move them to respective device data, target = data.to(device), target.to(device) # do inferencing output = model(data) # get the predicted output pred = output.argmax(dim=1, keepdim=True) # get the current misclassified in this batch list_misclassified = (target.eq(pred.view_as(target)) == False) batch_misclassified = data[list_misclassified] batch_mis_pred = pred[list_misclassified] batch_mis_target = target[list_misclassified] # batch_misclassified = misclassified.append(batch_misclassified) misclassified_pred.append(batch_mis_pred) misclassified_target.append(batch_mis_target) # group all the batched together misclassified = torch.cat(misclassified) misclassified_pred = torch.cat(misclassified_pred) misclassified_target = torch.cat(misclassified_target) logger.info('Taking {25} samples') # get 5 images data = misclassified[:25] target = misclassified_target[:25] # get the generated grad cam gcam_layers, predicted_probs, predicted_classes = get_gradcam( data, target, self.trainer.model, self.trainer.device, target_layers) # get the denomarlization function unorm = module_aug.UnNormalize(mean=self.transforms.mean, std=self.transforms.std) plot_gradcam(gcam_layers, data, target, predicted_classes, self.data_loader.class_names, unorm)
def train_fully_supervised(model,n_epochs,train_loader,val_loader,criterion,optimizer,scheduler,auto_lr,\ save_folder,model_name,benchmark=False,save_all_ep=True, save_best=False, device='cpu',num_classes=21): """ A complete training of fully supervised model. save_folder : Path to save the model, the courb of losses,metric... benchmark : enable or disable backends.cudnn save_all_ep : if True, the model is saved at each epoch in save_folder scheduler : if True, the model will apply a lr scheduler during training auto_lr : Auto lr finder """ torch.backends.cudnn.benchmark = benchmark if auto_lr: print('Auto finder for the Learning rate') lr_finder = LRFinder(model, optimizer, criterion, memory_cache=False, cache_dir='/tmp', device=device) lr_finder.range_test(train_loader, start_lr=10e-5, end_lr=10, num_iter=100) if scheduler: lr_scheduler = torch.optim.lr_scheduler.LambdaLR( optimizer, lambda x: (1 - x / (len(train_loader) * n_epochs))**0.9) loss_test = [] loss_train = [] iou_train = [] iou_test = [] accuracy_train = [] accuracy_test = [] model.to(device) for ep in range(n_epochs): print("EPOCH", ep) model.train() state = step_train_supervised(model,train_loader=train_loader,criterion=criterion,\ optimizer=optimizer,device=device,num_classes=num_classes) iou = state.metrics['mean IoU'] acc = state.metrics['accuracy'] loss = state.metrics['CE Loss'] loss_train.append(loss) iou_train.append(iou) accuracy_train.append(acc) print('TRAIN - EP:', ep, 'iou:', iou, 'Accuracy:', acc, 'Loss CE', loss) if scheduler: lr_scheduler.step() #Eval model model.eval() with torch.no_grad(): state = eval_model(model, val_loader, device=device, num_classes=num_classes) iou = state.metrics['mean IoU'] acc = state.metrics['accuracy'] loss = state.metrics['CE Loss'] loss_test.append(loss) iou_test.append(iou) accuracy_test.append(acc) print('TEST - EP:', ep, 'iou:', iou, 'Accuracy:', acc, 'Loss CE', loss) ## Save model U.save_model(model, save_all_ep, save_best, save_folder, model_name, ep=ep, iou=iou, iou_test=iou_test) U.save_curves(path=save_folder,loss_train=loss_train,iou_train=iou_train,accuracy_train=accuracy_train\ ,loss_test=loss_test,iou_test=iou_test,accuracy_test=accuracy_test)
def Interpol(N, neurons, iter, fun=0, a=1, b=1): datasamp = datagen(N, neurons, fun, a, b, legendre) val_inputs, val_labels = datasamp.get_val() train_inputs, train_labels = datasamp.get_train() train_loader = DataLoader(dataset=datasamp, num_workers=0) # Initiate the data and labels class LockedCybenko(torch.nn.Module ): # Cybenko with inner weight=1 and bias=-x[i] def __init__(self): super(LockedCybenko, self).__init__() self.fc1 = torch.nn.Linear(1, neurons, bias=True) self.fc1.weight.data = torch.ones(neurons).reshape(-1, 1) self.fc1.bias.data = -torch.linspace(-1, 1, neurons).reshape( 1, -1).float() self.fc1.weight.requires_grad_(False) self.fc1.bias.requires_grad_(False) self.fc2 = torch.nn.Linear(neurons, 1, bias=False) self.relu = torch.nn.ReLU() def forward(self, x): x = self.relu(self.fc1(x)) return self.fc2(x) class SemilockedCybenko( torch.nn.Module ): # Cybenko with inner weight=-1, one node less and free bias def __init__(self): super(SemilockedCybenko, self).__init__() self.fc1 = torch.nn.Linear(1, neurons, bias=True) self.fc1.weight.data = torch.ones(neurons - 1).reshape(-1, 1) self.fc1.weight.requires_grad_(False) self.fc1.bias.requires_grad_(True) self.fc2 = torch.nn.Linear(neurons, 1, bias=False) self.relu = torch.nn.Sigmoid() def forward(self, x): x = self.relu(self.fc1(x)) return self.fc2(x) class UnlockedCybenko(torch.nn.Module ): # Cybenko with free inner weight or bias def __init__(self): super(UnlockedCybenko, self).__init__() self.fc1 = torch.nn.Linear(1, neurons, bias=True) self.fc2 = torch.nn.Linear(neurons, 1, bias=True) self.relu = torch.nn.Sigmoid() def forward(self, x): x = self.relu(self.fc1(x)) return self.fc2(x) class Network(torch.nn.Module): # Arbitrary network def __init__(self): super(Network, self).__init__() self.fc1 = torch.nn.Linear(1, neurons, bias=True) self.fc2 = torch.nn.Linear(neurons, 2 * neurons, bias=True) self.fc3 = torch.nn.Linear(2 * neurons, 1, bias=True) self.relu = torch.nn.ReLU() def forward(self, x): x = self.relu(self.fc1(x)) x = self.relu(self.fc2(x)) return self.fc3(x) model = Network() criterion = torch.nn.MSELoss(reduction="sum") optimizer = torch.optim.SGD(model.parameters(), lr=0.005) lr_finder = LRFinder(model, optimizer, criterion) lr_finder.range_test(train_loader, start_lr=0.001, end_lr=1.5, num_iter=1000) lr_finder.reset( ) # to reset the model and optimizer to their initial state learning = lr_finder.history.get('lr')[np.argmin( lr_finder.history.get('loss'))] optimizer = torch.optim.SGD(model.parameters(), lr=0.1) EL2Val = [] EL2train = [] ELinf = [] EL2 = [] # L2 integral between f and u_teta for epoch in range(iter): x = [] ytrue = [] ypred = [] for i, (inputs, labels) in enumerate(train_loader): y_pred = model(inputs) loss = criterion(y_pred, labels) x.append(inputs.data.numpy()) ytrue.append(labels.data.numpy()) ypred.append(y_pred.data.numpy()) optimizer.zero_grad() loss.backward() optimizer.step() def modelonx(x): return model( torch.tensor(x.reshape(-1, 1).tolist(), requires_grad=False)).data.numpy().reshape(1, -1) def L2error(x): return (modelonx(x) - np.array(truef(x, fun)).reshape(1, -1))**2 ELinf.append(max(abs(val_labels - model(val_inputs)))) EL2.append(quadrature(L2error, -1, 1)[0][0]) EL2Val.append(criterion(val_labels, model(val_inputs))) EL2train.append((criterion(train_labels, model(train_inputs)))) print( f'Epoch: {epoch} L2 Error on training : {EL2train[-1]:.6e} | L2 Error on validation : {EL2Val[-1]:.6e} | L2 on [-1,1] : {EL2[-1]:.6e}' ) if epoch % 5 == 0: fig, ax = pl.subplots(nrows=1, ncols=2) plotrange = np.linspace(a - 0.1, b + 0.1, 100) """ Function and Model Plot""" ax[0].scatter(val_inputs.data.numpy(), val_labels.data.numpy(), c='red', s=15) ax[0].scatter(train_inputs, train_labels, s=15) ax[0].plot( plotrange, model(torch.linspace(a - 0.1, b + 0.1, 100).reshape(-1, 1)).data.numpy(), 'r') """ # Code qui permet d'afficher la fonction linéaire par morceau alpha = model.fc2.weight.data.numpy()[0] X = -model.fc1.bias.data.numpy()[0] ReLU = lambda t : np.where(t<=0,0,t) ax[0].plot(xx,alpha[0]*ReLU(xx-X[0])+alpha[1]*ReLU(xx-X[1])+alpha[2]*ReLU(xx-X[2])+alpha[3]*ReLU(xx-X[3])+alpha[4]*ReLU(xx-X[4])+alpha[5]*ReLU(xx-X[5])) """ ax[0].plot(plotrange, truef(plotrange, fun), c='blue') #ax[0].plot(np.linspace(a-0.1,b+0.1,100),np.polyval(np.polyfit(train_inputs.data.numpy().reshape(1,-1)[0],train_labels.data.numpy().reshape(1,-1)[0],10),np.linspace(a-0.1,b+0.1,100)),c='green') if fun == 7: ax[0].plot(plotrange, maclaurin(plotrange, 50), c='green') ax[0].set_ylim(-0.1, 1.1) """ Error Plot """ ax[1].semilogy(range(epoch + 1), EL2Val, color='red') ax[1].semilogy(range(epoch + 1), EL2train, color='blue') #ax[1].semilogy(range(epoch+1),EL2,color='magenta') #ax[1].semilogy(range(epoch+1),ELinf,color='black') pl.show() return model
def run_lr_finder( args, model, train_loader, optimizer, criterion, val_loader=None, verbose=True, show=True, figpth=None, device=None, recommender="logmean14", fieldnames=None, outfile_path=None, hparams=None, ): if verbose: print("Running learning rate finder") if args.mix_pre_apex: model, optimizer = amp.initialize(model, optimizer, opt_level="O2") lr_finder = LRFinder(model, optimizer, criterion, device=device) min_lr = 1e-7 if args.model == 'mlp' else 1e-10 lr_finder.range_test( train_loader, val_loader=val_loader, start_lr=min_lr, end_lr=10, num_iter=200, diverge_th=3, ) min_index = np.argmin(lr_finder.history["loss"]) lr_at_min = lr_finder.history["lr"][min_index] min_loss = lr_finder.history["loss"][min_index] max_index = np.argmax(lr_finder.history["loss"][:min_index]) lr_at_max = lr_finder.history["lr"][max_index] max_loss = lr_finder.history["loss"][max_index] # Outputting data to CSV at end of epoch if fieldnames and outfile_path: with open(outfile_path, mode='a') as out_file: writer = csv.DictWriter(out_file, fieldnames=fieldnames, lineterminator='\n') writer.writerow({ 'hp_idx': args.hp_idx, 'hyperparam_set': hparams, 'seed': args.seed, 'lr': lr_finder.history["lr"], 'loss': lr_finder.history["loss"] }) if not show and not figpth: lr_steepest = None else: if verbose: print("Plotting learning rate finder results") hf = plt.figure(figsize=(15, 9)) ax = plt.axes() _, lr_steepest = lr_finder.plot(skip_start=0, skip_end=3, log_lr=True, ax=ax) ylim = np.array([min_loss, max_loss]) ylim += 0.1 * np.diff(ylim) * np.array([-1, 1]) plt.ylim(ylim) plt.tick_params(reset=True, color=(0.2, 0.2, 0.2)) plt.tick_params(labelsize=14) ax.minorticks_on() ax.tick_params(direction="out") init_loss = lr_finder.history["loss"][0] loss_12 = min_loss + 0.5 * (max_loss - min_loss) index_12 = max_index + np.argmin( np.abs( np.array(lr_finder.history["loss"][max_index:min_index]) - loss_12)) lr_12 = lr_finder.history["lr"][index_12] loss_13 = min_loss + 1 / 3 * (max_loss - min_loss) index_13 = max_index + np.argmin( np.abs( np.array(lr_finder.history["loss"][max_index:min_index]) - loss_13)) lr_13 = lr_finder.history["lr"][index_13] loss_23 = min_loss + 2 / 3 * (max_loss - min_loss) index_23 = max_index + np.argmin( np.abs( np.array(lr_finder.history["loss"][max_index:min_index]) - loss_23)) lr_23 = lr_finder.history["lr"][index_23] loss_14 = min_loss + 1 / 4 * (max_loss - min_loss) index_14 = max_index + np.argmin( np.abs( np.array(lr_finder.history["loss"][max_index:min_index]) - loss_14)) lr_14 = lr_finder.history["lr"][index_14] if recommender == "div10": lr_recomend = np.exp(np.mean([np.log(lr_at_min / 10), np.log(lr_12)])) elif recommender == "min12": lr_recomend = np.min([lr_at_min / 10, lr_12]) elif recommender == "min13": lr_recomend = np.min([lr_at_min / 10, lr_13]) elif recommender == "min14": lr_recomend = np.min([lr_at_min / 10, lr_14]) elif recommender == "logmean12": lr_recomend = np.exp(np.mean([np.log(lr_at_min / 10), np.log(lr_12)])) elif recommender == "logmean13": lr_recomend = np.exp(np.mean([np.log(lr_at_min / 10), np.log(lr_13)])) elif recommender == "logmean14": lr_recomend = np.exp(np.mean([np.log(lr_at_min / 10), np.log(lr_14)])) if verbose: if lr_steepest is not None: print("LR at steepest grad: {:.3e} (red)".format(lr_steepest)) print("LR at minimum loss : {:.3e}".format(lr_at_min)) print("LR a tenth of min : {:.3e} (orange)".format(lr_at_min / 10)) print("LR when 1/4 up : {:.3e} (yellow)".format(lr_14)) print("LR when 1/3 up : {:.3e} (blue)".format(lr_13)) print("LR when 1/2 up : {:.3e} (cyan)".format(lr_12)) print("LR when 2/3 up : {:.3e} (green)".format(lr_23)) print("LR recommended : {:.3e} (black)".format(lr_recomend)) if show or figpth: ax.axvline(x=lr_steepest, color="red") ax.axvline(x=lr_at_min / 10, color="orange") ax.axvline(x=lr_14, color="yellow") ax.axvline(x=lr_13, color="blue") ax.axvline(x=lr_12, color="cyan") ax.axvline(x=lr_23, color="green") ax.axvline(x=lr_recomend, color="black", ls=":") if figpth: # Save figure os.makedirs(os.path.dirname(figpth), exist_ok=True) plt.savefig(figpth) if verbose: print("LR Finder results saved to {}".format(figpth)) if show: plt.show() return lr_recomend
random.seed(SEED) np.random.seed(SEED) torch.manual_seed(SEED) torch.cuda.manual_seed_all(SEED) legend = [] fig = None for wd in [0, .1, 1e-2, 1e-3, 1e-4]: for dp in [.1, 0.2, .3]: nerbert = BertForTokenClassificationCustom.from_pretrained(pretrained_model_name_or_path=MODEL_NAME, num_labels=len(labels2ind), hidden_dropout_prob=dp, attention_probs_dropout_prob=dp) # Prepare optimizer and schedule (linear warmup and decay) optimizer = get_optimizer_with_weight_decay(model=nerbert, optimizer=OPTIMIZER, learning_rate=LEARNING_RATE, weight_decay=wd) lr_finder = LRFinder(nerbert, optimizer, nn.CrossEntropyLoss(), device='cuda') lr_finder.range_test(train_loader=dataloader_tr, end_lr=1, num_iter=100) fig = lr_finder.plot(ax=fig) legend.append(f"wd: {wd}") fig.figure.legend(legend, loc='best') fig.figure.tight_layout() fig.figure.show() fig.figure.savefig('lr_finder.png')
# PyTorch import torchvision from torchvision import transforms, datasets, models import torch from torch import optim, cuda from torch.utils.data import DataLoader, sampler import torch.nn as nn from torch_lr_finder import LRFinder from utils.model import get_model, get_dataloaders model = get_model() dataloaders = get_dataloaders() # we will be using negative log likelihood as the loss function criterion = nn.CrossEntropyLoss() # we will be using the SGD optimizer as our optimizer optimizer = optim.SGD(model.fc.parameters(), lr=1e-4) lr_finder = LRFinder(model, optimizer, criterion, device='cuda') lr_finder.range_test(dataloaders['train'], end_lr=1, num_iter=2500) lr_finder.plot() lr_finder.reset()
def lr_range_test( model, dataset, loss_func, optimizer="AdamW", batch_size=32, num_iter=None, skip_start=10, skip_end=10, start_lr=1e-7, end_lr=10, plot=False, ): if num_iter is None: num_iter = 100 + int(np.log10(10 + len(dataset)) * 50) n_train = min(len(dataset), num_iter * batch_size) n_val = min(int(0.3 * len(dataset)), 2 * num_iter) log.debug("num_iter: {}, n_val: {}".format(num_iter, n_val)) split_idx = int(0.7 * len(dataset)) idx_train = np.random.choice(split_idx, size=n_train) idx_val = np.random.choice(np.arange(split_idx, len(dataset)), size=n_val) train_data = Subset(dataset, idx_train) val_data = Subset(dataset, idx_val) lrtest_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) lrtest_loader_val = DataLoader(val_data, batch_size=1024, shuffle=True) lrtest_optimizer = create_optimizer(optimizer, model.parameters(), start_lr) with utils.HiddenPrints(): lr_finder = LRFinder(model, lrtest_optimizer, loss_func) lr_finder.range_test( lrtest_loader, val_loader=lrtest_loader_val, end_lr=end_lr, num_iter=num_iter, smooth_f=0.2, # re-consider if lr-rate varies a lot ) lrs = lr_finder.history["lr"] losses = lr_finder.history["loss"] if skip_end == 0: lrs = lrs[skip_start:] losses = losses[skip_start:] else: lrs = lrs[skip_start:-skip_end] losses = losses[skip_start:-skip_end] if plot: with utils.HiddenPrints(): ax, steepest_lr = lr_finder.plot( ) # to inspect the loss-learning rate graph max_lr = None try: steep_idx = (np.gradient(np.array(losses))).argmin() min_idx = (np.array(losses)).argmin() steep_lr = lrs[steep_idx] min_lr = lrs[min_idx] max_lr = 10**((np.log10(steep_lr) + 2.0 * np.log10(min_lr)) / 3.0) log.info("lr-range-test results: steep: {:.2E}, min: {:.2E}".format( steep_lr, min_lr)) except ValueError: log.error( "Failed to compute the gradients, there might not be enough points." ) if max_lr is not None: log.info("learning rate range test selected lr: {:.2E}".format(max_lr)) else: max_lr = 0.1 log.error("lr range test failed. defaulting to lr: {}".format(max_lr)) with utils.HiddenPrints(): lr_finder.reset( ) # to reset the model and optimizer to their initial state return max_lr
def main_worker(index, opt): random.seed(opt.manual_seed) np.random.seed(opt.manual_seed) torch.manual_seed(opt.manual_seed) if index >= 0 and opt.device.type == 'cuda': opt.device = torch.device(f'cuda:{index}') if opt.distributed: opt.dist_rank = opt.dist_rank * opt.ngpus_per_node + index dist.init_process_group(backend='nccl', init_method=opt.dist_url, world_size=opt.world_size, rank=opt.dist_rank) opt.batch_size = int(opt.batch_size / opt.ngpus_per_node) opt.n_threads = int( (opt.n_threads + opt.ngpus_per_node - 1) / opt.ngpus_per_node) opt.is_master_node = not opt.distributed or opt.dist_rank == 0 model = generate_model(opt) if opt.batchnorm_sync: assert opt.distributed, 'SyncBatchNorm only supports DistributedDataParallel.' model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) if opt.pretrain_path: model = load_pretrained_model(model, opt.pretrain_path, opt.model, opt.n_finetune_classes) if opt.dropout: n_classes = opt.n_classes if opt.pretrain_path is not None: n_classes = opt.n_finetune_classes model = replace_fc_layer(model=model, dropout_factor=opt.dropout_factor, n_classes=n_classes) if opt.resume_path is not None: model = resume_model(opt.resume_path, opt.arch, model) model = make_data_parallel(model, opt.distributed, opt.device) if opt.pretrain_path: parameters = get_fine_tuning_parameters(model, opt.ft_begin_module) else: parameters = model.parameters() if opt.is_master_node: print(model) if opt.labelsmoothing: criterion = LabelSmoothingCrossEntropy().to(opt.device) else: criterion = CrossEntropyLoss().to(opt.device) if not opt.no_train: (train_loader, train_sampler, train_logger, train_batch_logger, optimizer, scheduler) = get_train_utils(opt, parameters) if opt.resume_path is not None: opt.begin_epoch, optimizer, scheduler = resume_train_utils( opt.resume_path, opt.begin_epoch, optimizer, scheduler) if opt.overwrite_milestones: scheduler.milestones = opt.multistep_milestones if not opt.no_val: val_loader, val_logger = get_val_utils(opt) if opt.tensorboard and opt.is_master_node: from torch.utils.tensorboard import SummaryWriter if opt.begin_epoch == 1: tb_writer = SummaryWriter(log_dir=opt.result_path) else: tb_writer = SummaryWriter(log_dir=opt.result_path, purge_step=opt.begin_epoch) else: tb_writer = None if opt.lr_finder and not opt.no_train and not opt.no_val: print( "Performing Learning Rate Search\nWith Leslie Smith's approach...") lr_finder = LRFinder(model, optimizer, criterion, device=opt.device) lr_finder.range_test(train_loader, val_loader=val_loader, start_lr=opt.learning_rate, end_lr=opt.lrf_end_lr, num_iter=opt.lrf_num_it, step_mode=opt.lrf_mode) lr_finder.plot(log_lr=False) with (opt.result_path / 'lr_search.json').open('w') as results_file: json.dump(lr_finder.history, results_file, default=json_serial) lr_finder.reset() return prev_val_loss = None for i in range(opt.begin_epoch, opt.n_epochs + 1): if not opt.no_train: if opt.distributed: train_sampler.set_epoch(i) #current_lr = get_lr(optimizer) train_epoch(i, train_loader, model, criterion, optimizer, opt.device, train_logger, train_batch_logger, scheduler, opt.lr_scheduler, tb_writer, opt.distributed) if i % opt.checkpoint == 0 and opt.is_master_node: save_file_path = opt.result_path / 'save_{}.pth'.format(i) save_checkpoint(save_file_path, i, opt.arch, model, optimizer, scheduler) if not opt.no_val: prev_val_loss = val_epoch(i, val_loader, model, criterion, opt.device, val_logger, tb_writer, opt.distributed) if not opt.no_train and opt.lr_scheduler == 'multistep': scheduler.step() elif not opt.no_train and opt.lr_scheduler == 'plateau': scheduler.step(prev_val_loss) elif not opt.no_train and opt.lr_scheduler == 'cosineannealing': scheduler.step() if opt.inference: inference_loader, inference_class_names = get_inference_utils(opt) inference_result_path = opt.result_path / '{}.json'.format( opt.inference_subset) inference.inference(inference_loader, model, inference_result_path, inference_class_names, opt.inference_no_average, opt.output_topk)
from torch_lr_finder import LRFinder criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9) lr_finder = LRFinder(model, optimizer, criterion, device=device) lr_finder.range_test(trainloader, end_lr=10, num_iter=1564, step_mode='exp') lr_finder.plot() # to inspect the loss-learning rate graph lr_finder.reset() # to reset the model and optimizer to their initial state a = zip(lr_finder.history['lr'], lr_finder.history['loss']) best_lrloss = sorted(a, key=take_lr, reverse=False)[:50] def take_lr(x): # print(x) return x[1] tup = zip(lr_finder.history['loss'], lr_finder.history['lr']) sorted(tup, key=take_lr, reverse=False)[:50] class shrink: def __init__(self, config): self.config = config def apply_augmentations(self): pass
valloader = dataloaders['val'] class CustomTrainIter(TrainDataLoaderIter): # My dataloader returns index, X, y def inputs_labels_from_batch(self, batch_data): return batch_data[1], batch_data[2] class CustomValIter(ValDataLoaderIter): # My dataloader returns index, X, y def inputs_labels_from_batch(self, batch_data): return batch_data[1], batch_data[2] custom_train_iter = CustomTrainIter(trainloader) custom_val_iter = CustomValIter(valloader) lr_finder.range_test(custom_train_iter, end_lr=10, num_iter=params.num_epochs, step_mode='exp') # Val loader does not work #lr_finder.range_test(custom_train_iter, val_loader=custom_val_iter, end_lr=10, num_iter=params.num_epochs, step_mode='exp') mylrs = lr_finder.history['lr'] mylosses = lr_finder.history['loss'] min_grad_idx = np.gradient(np.array(mylosses)).argmin() print(f'Suggested lr: {mylrs[min_grad_idx]}') lr_metrics = {'lr': mylrs, 'loss': mylosses} fname = os.path.join(args.model_dir, f'lr_metrics.json') with open(fname, 'w') as f: f.write(json.dumps(lr_metrics)) ''' # Train print(f'Fold {fold}') print('-'*10)
class Shrink: '''Shrinks the code and gets the output''' def __init__(self, in_config): self.config = in_config self.class_names = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') self.mean = (0.491, 0.482, 0.446) self.std = (0.247, 0.243, 0.261) self.device = "cuda" if torch.cuda.is_available else "cpu" self.model_path = self.config['modelpath']['args'] plt.style.use("dark_background") def seed_everything(self,seed: int) -> None: '''Seeds the Code so that we get predictable outputs''' random.seed(seed) os.environ['PYTHONHASHSEED'] = str(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) def load_data(self, train_transforms, test_transforms, in_dir='./data'): '''Downloads the dataset and returns train and testloaders after applying the Transformations''' trainset = datasets.CIFAR10(in_dir, train=True, download=True, transform=train_transforms()) testset = datasets.CIFAR10(in_dir, train=False, download=True, transform=test_transforms()) self.trainloader = torch.utils.data.DataLoader(trainset, **self.config['train_data_loader']['args']) self.testloader = torch.utils.data.DataLoader(testset, **self.config['test_data_loader']['args']) return self.trainloader, self.testloader def load_imagenet_data(self, train_transforms, test_transforms): '''Loads the imagenet dataset''' self.trainloader, self.testloader = get_imagenet_loader( train_transforms, test_transforms, self.config['train_data_loader']['args'], self.config['test_data_loader']['args']) def mean_std_dev(self): pass def show_data(self, mode='train', n=25): '''Plots the images on a gridplot to show the images passed via dataloader''' figure = plt.figure(figsize=(20,20)) images = None labels = None if mode.lower() == 'train': images, labels = next(iter(self.trainloader)) labels = np.array(labels) elif mode.lower() == 'test': images, labels = next(iter(self.testloader)) labels = np.array(labels) images = self.denormalize(images) # images = self.denormalize(images) for index in range(1,n+1): plt.subplot(5,5,index) plt.axis('off') # Gets the first n images of the dataset plt.imshow(np.transpose(images[index], (1,2,0))) # Plots the dataset # plt.title(self.class_names[labels[index]]) def get_batched_data(self,in_data): '''Takes in the list data and outputs data, targets and preds''' in_imgs = [] in_preds = [] in_targets = [] for index, i in enumerate(in_data): in_imgs.append(i[0]) in_preds.append(i[1]) in_targets.append(i[2]) return torch.stack(in_imgs), torch.stack(in_preds), torch.stack(in_targets) def plot_gradcam(self, target_layers, images, pred, target, nimgs): '''Plot GradCam - ''' index = 0 in_data = None # model.load_state_dict(torch.load(self.model_path)) images = images[index:nimgs].to(self.device) target = target[index:nimgs] pred = pred[index:nimgs] gcam_layers, predicted_probs, predicted_classes = get_gradcam(images, target, self.model, self.device, target_layers) # get the denomarlization function unorm = UnNormalize(mean=self.mean, std=self.std) plt_gradcam(gcam_layers=gcam_layers, images=images, target_labels=target, predicted_labels= predicted_classes, class_labels= self.class_names, denormalize= unorm) def get_gradoutput(self, misclassified=False): '''Outputs a gradcam output when Inputting an image''' if misclassified: in_data = self.misclassified else: in_data = self.correct_classified target_layers = ["layer1", "layer2", "layer3", "layer4"] imgs, preds, targets = self.get_batched_data(in_data) self.plot_gradcam(target_layers, imgs, preds, targets, 25) def denormalize(self,tensor): '''Denormalize the data''' if not tensor.ndimension() == 4: raise TypeError('tensor should be 4D') mean = torch.FloatTensor(self.mean).view(1, 3, 1, 1).expand_as(tensor).to(tensor.device) std = torch.FloatTensor(self.std).view(1, 3, 1, 1).expand_as(tensor).to(tensor.device) return tensor.mul(std).add(mean) def get_model(self, train=True): self.model = get_attributes(model_arch, 'model', self.config).to(self.device) self.epochs = self.config['epochs'] if train: '''Trains the model and sends the output''' criterion = nn.CrossEntropyLoss(reduction='mean') optimizer = optim.SGD(self.model.parameters(),lr = 0.01, momentum=0.9)# **self.config['optimizer']['args']) max_at_epoch = 5 self.best_lr = self.config['best_lr'] pct_start_val = (max_at_epoch * len(self.trainloader)) / (self.epochs * len(self.trainloader)) scheduler = torch.optim.lr_scheduler.OneCycleLR( optimizer, max_lr=self.best_lr, total_steps = len(self.trainloader) *self.epochs, steps_per_epoch=len(self.trainloader), epochs=self.epochs, pct_start=pct_start_val, anneal_strategy='cos', div_factor=10, final_div_factor=10 ) self.train_acc = [] self.train_losses = [] self.test_acc = [] self.test_losses = [] self.lr_metric = [] EPOCHS = self.epochs print(f'Starting Training for {EPOCHS} Epochs') for i in range(EPOCHS): lr_value = [group['lr'] for group in optimizer.param_groups][0] self.lr_metric.append(lr_value) print(f'EPOCHS : {i} Learning Rate: {lr_value}') model_training(self.model, self.device, self.trainloader, optimizer, scheduler, self.train_acc, self.train_losses, criterion, l1_loss=False) torch.save(self.model.state_dict(), self.model_path) self.misclassified, self.correct_classified = model_testing(self.model, self.device, self.testloader, self.test_acc, self.test_losses, criterion) else: return self.model def test_model(self): '''Loads and saves the test model''' test_losses = [] test_acc = [] model_path = 'latest_model.h5' self.model.load_state_dict(torch.load(model_path)) self.misclassified, self.correct_classified = model_testing(self.model, self.device, self.testloader, test_acc, test_losses) return self.misclassified, self.correct_classified def findbestlr(self): criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(self.model.parameters(), lr= 0.01, momentum= 0.95, weight_decay= 0.0005) self.lr_finder = LRFinder(self.model, optimizer, criterion, device=self.device) self.lr_finder.range_test(self.trainloader, **self.config['range_test']['args']) self.lr_finder.plot() # to inspect the loss-learning rate graph self.lr_finder.reset() # to reset the model and optimizer to their initial state return self.lr_finder def model_metrics(self): fig, axs = plt.subplots(2,2, figsize=(15,10)) axs[0,0].plot(self.train_losses) axs[0,0].set_title('Train_Losses') axs[0,1].plot(self.train_acc) axs[0,1].set_title('Training_Accuracy') axs[1,0].plot(self.test_losses) axs[1,0].set_title('Test_Losses') axs[1,1].plot(self.test_acc) axs[1,1].set_title('Test_Accuracy') def print_visualization(self, input_size): '''Prints a visualization graph for Torch models''' C, H, W = input_size x = torch.zeros(1, C, H, W, dtype=torch.float, requires_grad=False) x = x.to(self.device) out = self.model(x) # plot graph of variable, not of a nn.Module dot_graph = torchviz.make_dot(out) dot_graph.view() return dot_graph
criterion = metrics.MyLossFunc() optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimiser, 'min', patience=5, verbose=True) # find learning rate if find_LR and not torch.cuda.is_available(): if learning_rate > 0.0001: print( f"Selected initial learning rate too high.\nLearning rate changed to 0.0001" ) optimiser = torch.optim.Adam(model.parameters(), lr=0.0001) lr_finder = LRFinder(model, optimiser, criterion, device=device) lr_finder.range_test(train_loader, end_lr=200, num_iter=200) lr_finder.plot() # to inspect the loss-learning rate graph lr_finder.reset( ) # to reset the model and optimizer to their initial state subprocess.Popen(["kill", "-9", f"{TB_process.pid}"]) sys.exit("Learning rate plot finished") # computational graph if print_comp_graph: for sample in train_loader: writer.add_graph(model, sample[0].float()) writer.close() # training loop running_train_loss = 0. running_valid_loss = 0.
def init_weights(m): if type(m) == nn.Linear: torch.nn.init.xavier_uniform(m.weight) #%% torch.manual_seed(42) net = MyNet(50, 40, 20, 3, 5, 0.5) criterion = nn.BCELoss() optim = torch.optim.Adam(net.parameters(), lr=10**-2) # Explicitly init weights! net.apply(init_weights) #%% lrf = LRFinder(net, optim, criterion) lrf.range_test(train_loader, start_lr=0.0001, end_lr=1) lrf.plot() lrf.reset() #%% # seemingly best: Adam + cyclical LR + exp_range decay of learning rate N_EPOCHS = 30 scheduler = torch.optim.lr_scheduler.CyclicLR( optim, 10**-4, 10**-2, mode='exp_range', step_size_up=(xtrain.size(0) / BATCHSIZE) * 2, cycle_momentum=False) history = {'train_loss': [], 'val_loss': []}
elif model_name == 'vgg': trans.insert(0, torchvision.transforms.Resize((244,244))) trans.insert(0, fancy_pca()) trans.insert(0, torchvision.transforms.RandomRotation(180)) trans.insert(0, torchvision.transforms.RandomHorizontalFlip(p=0.5)) train_dataset = torchvision.datasets.ImageFolder( root=data_path, transform=torchvision.transforms.Compose(trans) ) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=256, num_workers=0, shuffle=True ) return train_loader trainloader = train_loader(model_name='inception') model = Inception.inception_v3(img_size=256) criterion = nn.CrossEntropyLoss() #optimizer = torch.optim.SGD(model.parameters(), lr=1e-6, momentum = 0.9, weight_decay = 5e-3, nesterov=True) optimizer = torch.optim.AdamW(model.parameters(), lr=1e-7, weight_decay=1e-4) lr_finder = LRFinder(model, optimizer, criterion, device="cuda") lr_finder.range_test(trainloader, end_lr=1, num_iter=100) lr_finder.plot() # to inspect the loss-learning rate graph lr_finder.reset() # to reset the model and optimizer to their initial state
transforms = utils.build_transforms(second_stage=True) loaders = utils.build_loaders(data_dir, transforms, batch_sizes, num_workers, second_stage=True) model = utils.build_model(backbone, second_stage=True, num_classes=num_classes, ckpt_pretrained=ckpt_pretrained).cuda() optim = utils.build_optim(model, optimizer_params, scheduler_params, criterion_params) criterion, optimizer, scheduler = ( optim["criterion"], optim["optimizer"], optim["scheduler"], ) lr_finder = LRFinder(model, optimizer, criterion, device="cuda") lr_finder.range_test(loaders["train_features_loader"], end_lr=1, num_iter=300) fig, ax = plt.subplots() lr_finder.plot(ax=ax) fig.savefig( "lr_finder_plots/supcon_{}_{}_bs_{}_stage_{}_lr_finder.png".format( optimizer_params["name"], data_dir.split("/")[-1], batch_sizes["train_batch_size"], 'second'))
def train(model, device, train_loader, test_loader, EPOCH, FACTOR, PATIENCE, MOMENTUM, LEARNING_RATE): criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, nesterov=True, weight_decay=0.0001) scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, max_lr=0.008, pct_start=5 / 24, epochs=24, steps_per_epoch=len(trainloader)) train_losses = [] train_acc = [] test_losses = [] test_acc = [] for epoch in range(EPOCH): correct = 0 processed = 0 pbar = tqdm(train_loader) model.train() for batch_idx, (data, target) in enumerate(pbar): # get samples data, target = data.to(device), target.to(device) # Init optimizer.zero_grad() # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes. # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly. # Predict y_pred = model(data) # Calculate loss # regularization_loss = 0 # for param in model.parameters(): # regularization_loss += torch.sum(abs(param)) # classify_loss = criterion(y_pred,target) loss = F.nll_loss(y_pred, target) #loss = classify_loss + LAMDA * regularization_loss # train_losses.append(loss) # Backpropagation loss.backward() optimizer.step() scheduler.step() # Update pbar-tqdm pred = y_pred.argmax( dim=1, keepdim=True) # get the index of the max log-probability correct += pred.eq(target.view_as(pred)).sum().item() processed += len(data) pbar.set_description( desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}' ) # train_acc.append(100*correct/processed) train_losses.append(loss.item()) train_acc.append(100 * correct / processed) img, true_wrong, pred_wrong, tst_acc, tst_loss = test( model, device, test_loader) test_losses.append(tst_loss) test_acc.append(tst_acc) lr_finder = LRFinder(model, optimizer, criterion, device) lr_finder.range_test(train_loader, end_lr=100, num_iter=100) lr_finder.plot() # to inspect the loss-learning rate graph # lr_finder.reset() return train_losses, train_acc, model, img, true_wrong, pred_wrong, test_acc, test_losses, lr_finder