Пример #1
0
class Runner:
    def __init__(self, config):
        self.config = config

    def find_lr(self):
        from torch_lr_finder import LRFinder

        logger.info('finding the best learning rate')

        cfg = self.config

        if self.tsai_mode:
            import sodium.tsai_model as module_arch
        else:
            import sodium.model.model as module_arch

        # create a model instance
        model = get_instance(module_arch, 'arch', cfg)

        # setup the model with the device
        model, device = setup_device(model, cfg['target_device'])

        param_groups = setup_param_groups(model, cfg['optimizer'])
        optimizer = get_instance(module_optimizer, 'optimizer', cfg,
                                 param_groups)

        criterion = getattr(module_loss, cfg['criterion'])()

        self.lr_finder = LRFinder(model, optimizer, criterion, device="cuda")

        lr_finder_epochs = cfg['lr_finder']['epochs']
        logger.info(f'Running LR-Test for {lr_finder_epochs} epochs')
        # my method
        self.lr_finder.range_test(self.trainer.train_loader,
                                  start_lr=1e-3,
                                  end_lr=1,
                                  num_iter=len(self.trainer.test_loader) *
                                  lr_finder_epochs,
                                  step_mode='linear')

        # leslie smith method
        # self.lr_finder.range_test(self.trainer.train_loader, val_loader = self.trainer.test_loader,
        # end_lr=1, num_iter=len(self.trainer.train_loader), step_mode='linear')

        # fast ai method
        # self.lr_finder.range_test(
        #     self.trainer.train_loader, end_lr=100, num_iter=len(self.trainer.train_loader))

        self.best_lr = self.lr_finder.history['lr'][
            self.lr_finder.history['loss'].index(self.lr_finder.best_loss)]

        sorted_lrs = [
            x for _, x in sorted(
                zip(self.lr_finder.history['loss'],
                    self.lr_finder.history['lr']))
        ]

        logger.info(f'sorted lrs : {sorted_lrs[:10]}')

        logger.info(f'found the best lr : {self.best_lr}')

        logger.info('plotting lr_finder')

        plt.style.use("dark_background")
        self.lr_finder.plot()

        # reset the model and the optimizer
        self.lr_finder.reset()
        plt.show()

        del model, optimizer, criterion

    def train(self, use_bestlr=False, lr_value=None):

        # if the best lr was found use that value instead
        if use_bestlr and self.best_lr is not None:
            logger.info(f'using max_lr : {self.best_lr}')
            logger.info(f'using min_lr : {self.best_lr/30}')
            logger.info(f'using initial_lr : {self.best_lr/20}')
            for param_group in self.trainer.optimizer.param_groups:
                param_group['lr'] = self.best_lr / 10
                param_group['max_lr'] = self.best_lr
                param_group['min_lr'] = self.best_lr / 30
                param_group['intial_lr'] = self.best_lr / 20

        if not use_bestlr and (lr_value is not None):
            for param_group in self.trainer.optimizer.param_groups:
                param_group['lr'] = lr_value

        self.trainer.train()
        logger.info('Finished!')

    def setup_train(self, tsai_mode=False):
        cfg = self.config

        self.tsai_mode = tsai_mode

        if tsai_mode:
            import sodium.tsai_model as module_arch
        else:
            import sodium.model.model as module_arch

        logger.info('Training Config')

        # display the config
        for line in pprint.pformat(cfg).split('\n'):
            logger.info(line)

        # to get consistent results, seed everything
        seed_everything(cfg['seed'])

        # create a model instance
        model = get_instance(module_arch, 'arch', cfg)

        # setup the model with the device
        model, device = setup_device(model, cfg['target_device'])

        param_groups = setup_param_groups(model, cfg['optimizer'])
        optimizer = get_instance(module_optimizer, 'optimizer', cfg,
                                 param_groups)

        self.transforms = get_instance(module_aug, 'augmentation', cfg)

        # get the train and test loaders
        self.data_loader = get_instance(module_data, 'data_loader', cfg,
                                        self.transforms)
        train_loader, test_loader = self.data_loader.get_loaders()

        logger.info('Getting loss function handle')
        criterion = getattr(module_loss, cfg['criterion'])()

        batch_scheduler = False
        if cfg['lr_scheduler']['type'] == 'OneCycleLR':
            logger.info('Building: torch.optim.lr_scheduler.OneCycleLR')
            max_at_epoch = cfg['lr_scheduler']['max_lr_at_epoch']
            pct_start = (max_at_epoch) / \
                cfg['training']['epochs'] if max_at_epoch else 0.8
            sch_cfg = cfg['lr_scheduler']['args']
            lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(
                optimizer,
                max_lr=sch_cfg['max_lr'],
                steps_per_epoch=len(train_loader),
                pct_start=pct_start,
                epochs=cfg['training']['epochs'])
            batch_scheduler = True
        else:
            lr_scheduler = get_instance(module_scheduler, 'lr_scheduler', cfg,
                                        optimizer)

        logger.info('Initializing trainer')
        self.trainer = Trainer(model,
                               criterion,
                               optimizer,
                               cfg,
                               device,
                               train_loader,
                               test_loader,
                               lr_scheduler=lr_scheduler,
                               batch_scheduler=batch_scheduler)

    def plot_metrics(self):
        plt.style.use("dark_background")
        logger.info('Plotting Metrics...')
        plot.plot_metrics(self.trainer.train_metric, self.trainer.test_metric)
        plot.plot_lr_metric(self.trainer.lr_metric)

    def plot_gradcam(self, target_layers):
        plt.style.use("dark_background")
        logger.info('Plotting Grad-CAM...')

        # use the test images
        data, target = next(iter(self.trainer.test_loader))
        data, target = data.to(self.trainer.device), target.to(
            self.trainer.device)

        logger.info('Taking {5} samples')
        # get 5 images
        data = data[:5]
        target = target[:5]

        # get the generated grad cam
        gcam_layers, predicted_probs, predicted_classes = get_gradcam(
            data, target, self.trainer.model, self.trainer.device,
            target_layers)

        # get the denomarlization function
        unorm = module_aug.UnNormalize(mean=self.transforms.mean,
                                       std=self.transforms.std)

        plot_gradcam(gcam_layers, data, target, predicted_classes,
                     self.data_loader.class_names, unorm)

    def print_summary(self, input_size):
        summary(self.trainer.model, input_size)

    def print_visualization(self, input_size):
        C, H, W = input_size
        x = torch.zeros(1, C, H, W, dtype=torch.float, requires_grad=False)
        x = x.to(self.trainer.device)
        out = self.trainer.model(x)
        # plot graph of variable, not of a nn.Module
        dot_graph = torchviz.make_dot(out)
        dot_graph.view()
        return dot_graph

    def plot_misclassifications(self, target_layers):
        plt.style.use("dark_background")
        assert (self.trainer.model is not None)
        # get the data, target of only missclassified and do what you do for gradcam

        logger.info('getting misclassifications')

        misclassified = []
        misclassified_target = []
        misclassified_pred = []

        model, device = self.trainer.model, self.trainer.device

        # set the model to evaluation mode
        model.eval()

        # turn off gradients
        with torch.no_grad():
            for data, target in self.trainer.test_loader:
                # move them to respective device
                data, target = data.to(device), target.to(device)

                # do inferencing
                output = model(data)

                # get the predicted output
                pred = output.argmax(dim=1, keepdim=True)

                # get the current misclassified in this batch
                list_misclassified = (target.eq(pred.view_as(target)) == False)
                batch_misclassified = data[list_misclassified]
                batch_mis_pred = pred[list_misclassified]
                batch_mis_target = target[list_misclassified]

                # batch_misclassified =

                misclassified.append(batch_misclassified)
                misclassified_pred.append(batch_mis_pred)
                misclassified_target.append(batch_mis_target)

        # group all the batched together
        misclassified = torch.cat(misclassified)
        misclassified_pred = torch.cat(misclassified_pred)
        misclassified_target = torch.cat(misclassified_target)

        logger.info('Taking {25} samples')
        # get 5 images
        data = misclassified[:25]
        target = misclassified_target[:25]

        # get the generated grad cam
        gcam_layers, predicted_probs, predicted_classes = get_gradcam(
            data, target, self.trainer.model, self.trainer.device,
            target_layers)

        # get the denomarlization function
        unorm = module_aug.UnNormalize(mean=self.transforms.mean,
                                       std=self.transforms.std)

        plot_gradcam(gcam_layers, data, target, predicted_classes,
                     self.data_loader.class_names, unorm)
Пример #2
0
def train_fully_supervised(model,n_epochs,train_loader,val_loader,criterion,optimizer,scheduler,auto_lr,\
        save_folder,model_name,benchmark=False,save_all_ep=True, save_best=False, device='cpu',num_classes=21):
    """
        A complete training of fully supervised model. 
        save_folder : Path to save the model, the courb of losses,metric...
        benchmark : enable or disable backends.cudnn 
        save_all_ep : if True, the model is saved at each epoch in save_folder
        scheduler : if True, the model will apply a lr scheduler during training
        auto_lr : Auto lr finder 
    """
    torch.backends.cudnn.benchmark = benchmark

    if auto_lr:
        print('Auto finder for the Learning rate')
        lr_finder = LRFinder(model,
                             optimizer,
                             criterion,
                             memory_cache=False,
                             cache_dir='/tmp',
                             device=device)
        lr_finder.range_test(train_loader,
                             start_lr=10e-5,
                             end_lr=10,
                             num_iter=100)

    if scheduler:
        lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
            optimizer, lambda x: (1 - x / (len(train_loader) * n_epochs))**0.9)

    loss_test = []
    loss_train = []
    iou_train = []
    iou_test = []
    accuracy_train = []
    accuracy_test = []
    model.to(device)
    for ep in range(n_epochs):
        print("EPOCH", ep)
        model.train()
        state = step_train_supervised(model,train_loader=train_loader,criterion=criterion,\
            optimizer=optimizer,device=device,num_classes=num_classes)
        iou = state.metrics['mean IoU']
        acc = state.metrics['accuracy']
        loss = state.metrics['CE Loss']
        loss_train.append(loss)
        iou_train.append(iou)
        accuracy_train.append(acc)
        print('TRAIN - EP:', ep, 'iou:', iou, 'Accuracy:', acc, 'Loss CE',
              loss)
        if scheduler:
            lr_scheduler.step()
        #Eval model
        model.eval()
        with torch.no_grad():
            state = eval_model(model,
                               val_loader,
                               device=device,
                               num_classes=num_classes)
            iou = state.metrics['mean IoU']
            acc = state.metrics['accuracy']
            loss = state.metrics['CE Loss']
            loss_test.append(loss)
            iou_test.append(iou)
            accuracy_test.append(acc)
            print('TEST - EP:', ep, 'iou:', iou, 'Accuracy:', acc, 'Loss CE',
                  loss)

        ## Save model
        U.save_model(model,
                     save_all_ep,
                     save_best,
                     save_folder,
                     model_name,
                     ep=ep,
                     iou=iou,
                     iou_test=iou_test)

    U.save_curves(path=save_folder,loss_train=loss_train,iou_train=iou_train,accuracy_train=accuracy_train\
                                ,loss_test=loss_test,iou_test=iou_test,accuracy_test=accuracy_test)
Пример #3
0
def Interpol(N, neurons, iter, fun=0, a=1, b=1):

    datasamp = datagen(N, neurons, fun, a, b, legendre)
    val_inputs, val_labels = datasamp.get_val()
    train_inputs, train_labels = datasamp.get_train()
    train_loader = DataLoader(dataset=datasamp,
                              num_workers=0)  # Initiate the data and labels

    class LockedCybenko(torch.nn.Module
                        ):  # Cybenko with inner weight=1 and bias=-x[i]
        def __init__(self):
            super(LockedCybenko, self).__init__()
            self.fc1 = torch.nn.Linear(1, neurons, bias=True)
            self.fc1.weight.data = torch.ones(neurons).reshape(-1, 1)
            self.fc1.bias.data = -torch.linspace(-1, 1, neurons).reshape(
                1, -1).float()
            self.fc1.weight.requires_grad_(False)
            self.fc1.bias.requires_grad_(False)
            self.fc2 = torch.nn.Linear(neurons, 1, bias=False)
            self.relu = torch.nn.ReLU()

        def forward(self, x):
            x = self.relu(self.fc1(x))
            return self.fc2(x)

    class SemilockedCybenko(
            torch.nn.Module
    ):  # Cybenko with inner weight=-1, one node less and free bias
        def __init__(self):
            super(SemilockedCybenko, self).__init__()
            self.fc1 = torch.nn.Linear(1, neurons, bias=True)
            self.fc1.weight.data = torch.ones(neurons - 1).reshape(-1, 1)
            self.fc1.weight.requires_grad_(False)
            self.fc1.bias.requires_grad_(True)
            self.fc2 = torch.nn.Linear(neurons, 1, bias=False)
            self.relu = torch.nn.Sigmoid()

        def forward(self, x):
            x = self.relu(self.fc1(x))
            return self.fc2(x)

    class UnlockedCybenko(torch.nn.Module
                          ):  # Cybenko with free inner weight or bias
        def __init__(self):
            super(UnlockedCybenko, self).__init__()
            self.fc1 = torch.nn.Linear(1, neurons, bias=True)
            self.fc2 = torch.nn.Linear(neurons, 1, bias=True)
            self.relu = torch.nn.Sigmoid()

        def forward(self, x):
            x = self.relu(self.fc1(x))
            return self.fc2(x)

    class Network(torch.nn.Module):  # Arbitrary network
        def __init__(self):
            super(Network, self).__init__()
            self.fc1 = torch.nn.Linear(1, neurons, bias=True)
            self.fc2 = torch.nn.Linear(neurons, 2 * neurons, bias=True)
            self.fc3 = torch.nn.Linear(2 * neurons, 1, bias=True)
            self.relu = torch.nn.ReLU()

        def forward(self, x):
            x = self.relu(self.fc1(x))
            x = self.relu(self.fc2(x))
            return self.fc3(x)

    model = Network()
    criterion = torch.nn.MSELoss(reduction="sum")
    optimizer = torch.optim.SGD(model.parameters(), lr=0.005)

    lr_finder = LRFinder(model, optimizer, criterion)
    lr_finder.range_test(train_loader,
                         start_lr=0.001,
                         end_lr=1.5,
                         num_iter=1000)
    lr_finder.reset(
    )  # to reset the model and optimizer to their initial state
    learning = lr_finder.history.get('lr')[np.argmin(
        lr_finder.history.get('loss'))]

    optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

    EL2Val = []
    EL2train = []
    ELinf = []
    EL2 = []  # L2 integral between f and u_teta

    for epoch in range(iter):
        x = []
        ytrue = []
        ypred = []
        for i, (inputs, labels) in enumerate(train_loader):
            y_pred = model(inputs)
            loss = criterion(y_pred, labels)
            x.append(inputs.data.numpy())
            ytrue.append(labels.data.numpy())
            ypred.append(y_pred.data.numpy())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        def modelonx(x):
            return model(
                torch.tensor(x.reshape(-1, 1).tolist(),
                             requires_grad=False)).data.numpy().reshape(1, -1)

        def L2error(x):
            return (modelonx(x) - np.array(truef(x, fun)).reshape(1, -1))**2

        ELinf.append(max(abs(val_labels - model(val_inputs))))
        EL2.append(quadrature(L2error, -1, 1)[0][0])
        EL2Val.append(criterion(val_labels, model(val_inputs)))
        EL2train.append((criterion(train_labels, model(train_inputs))))
        print(
            f'Epoch: {epoch} L2 Error on training : {EL2train[-1]:.6e} | L2 Error on validation : {EL2Val[-1]:.6e} | L2 on [-1,1] : {EL2[-1]:.6e}'
        )

        if epoch % 5 == 0:

            fig, ax = pl.subplots(nrows=1, ncols=2)
            plotrange = np.linspace(a - 0.1, b + 0.1, 100)
            """ Function and Model Plot"""
            ax[0].scatter(val_inputs.data.numpy(),
                          val_labels.data.numpy(),
                          c='red',
                          s=15)
            ax[0].scatter(train_inputs, train_labels, s=15)
            ax[0].plot(
                plotrange,
                model(torch.linspace(a - 0.1, b + 0.1,
                                     100).reshape(-1, 1)).data.numpy(), 'r')
            """ # Code qui permet d'afficher la fonction linéaire par morceau
            alpha = model.fc2.weight.data.numpy()[0]
            X = -model.fc1.bias.data.numpy()[0]
            ReLU = lambda t : np.where(t<=0,0,t)
            ax[0].plot(xx,alpha[0]*ReLU(xx-X[0])+alpha[1]*ReLU(xx-X[1])+alpha[2]*ReLU(xx-X[2])+alpha[3]*ReLU(xx-X[3])+alpha[4]*ReLU(xx-X[4])+alpha[5]*ReLU(xx-X[5]))
            """

            ax[0].plot(plotrange, truef(plotrange, fun), c='blue')
            #ax[0].plot(np.linspace(a-0.1,b+0.1,100),np.polyval(np.polyfit(train_inputs.data.numpy().reshape(1,-1)[0],train_labels.data.numpy().reshape(1,-1)[0],10),np.linspace(a-0.1,b+0.1,100)),c='green')
            if fun == 7:
                ax[0].plot(plotrange, maclaurin(plotrange, 50), c='green')
                ax[0].set_ylim(-0.1, 1.1)
            """ Error Plot """
            ax[1].semilogy(range(epoch + 1), EL2Val, color='red')
            ax[1].semilogy(range(epoch + 1), EL2train, color='blue')
            #ax[1].semilogy(range(epoch+1),EL2,color='magenta')
            #ax[1].semilogy(range(epoch+1),ELinf,color='black')
            pl.show()

    return model
Пример #4
0
def run_lr_finder(
    args,
    model,
    train_loader,
    optimizer,
    criterion,
    val_loader=None,
    verbose=True,
    show=True,
    figpth=None,
    device=None,
    recommender="logmean14",
    fieldnames=None,
    outfile_path=None,
    hparams=None,
):
    if verbose:
        print("Running learning rate finder")
    if args.mix_pre_apex:
        model, optimizer = amp.initialize(model, optimizer, opt_level="O2")
    lr_finder = LRFinder(model, optimizer, criterion, device=device)
    min_lr = 1e-7 if args.model == 'mlp' else 1e-10
    lr_finder.range_test(
        train_loader,
        val_loader=val_loader,
        start_lr=min_lr,
        end_lr=10,
        num_iter=200,
        diverge_th=3,
    )
    min_index = np.argmin(lr_finder.history["loss"])
    lr_at_min = lr_finder.history["lr"][min_index]
    min_loss = lr_finder.history["loss"][min_index]
    max_index = np.argmax(lr_finder.history["loss"][:min_index])
    lr_at_max = lr_finder.history["lr"][max_index]
    max_loss = lr_finder.history["loss"][max_index]

    # Outputting data to CSV at end of epoch
    if fieldnames and outfile_path:
        with open(outfile_path, mode='a') as out_file:
            writer = csv.DictWriter(out_file,
                                    fieldnames=fieldnames,
                                    lineterminator='\n')
            writer.writerow({
                'hp_idx': args.hp_idx,
                'hyperparam_set': hparams,
                'seed': args.seed,
                'lr': lr_finder.history["lr"],
                'loss': lr_finder.history["loss"]
            })

    if not show and not figpth:
        lr_steepest = None
    else:
        if verbose:
            print("Plotting learning rate finder results")
        hf = plt.figure(figsize=(15, 9))
        ax = plt.axes()
        _, lr_steepest = lr_finder.plot(skip_start=0,
                                        skip_end=3,
                                        log_lr=True,
                                        ax=ax)
        ylim = np.array([min_loss, max_loss])
        ylim += 0.1 * np.diff(ylim) * np.array([-1, 1])
        plt.ylim(ylim)
        plt.tick_params(reset=True, color=(0.2, 0.2, 0.2))
        plt.tick_params(labelsize=14)
        ax.minorticks_on()
        ax.tick_params(direction="out")
    init_loss = lr_finder.history["loss"][0]
    loss_12 = min_loss + 0.5 * (max_loss - min_loss)
    index_12 = max_index + np.argmin(
        np.abs(
            np.array(lr_finder.history["loss"][max_index:min_index]) -
            loss_12))
    lr_12 = lr_finder.history["lr"][index_12]
    loss_13 = min_loss + 1 / 3 * (max_loss - min_loss)
    index_13 = max_index + np.argmin(
        np.abs(
            np.array(lr_finder.history["loss"][max_index:min_index]) -
            loss_13))
    lr_13 = lr_finder.history["lr"][index_13]
    loss_23 = min_loss + 2 / 3 * (max_loss - min_loss)
    index_23 = max_index + np.argmin(
        np.abs(
            np.array(lr_finder.history["loss"][max_index:min_index]) -
            loss_23))
    lr_23 = lr_finder.history["lr"][index_23]
    loss_14 = min_loss + 1 / 4 * (max_loss - min_loss)
    index_14 = max_index + np.argmin(
        np.abs(
            np.array(lr_finder.history["loss"][max_index:min_index]) -
            loss_14))
    lr_14 = lr_finder.history["lr"][index_14]
    if recommender == "div10":
        lr_recomend = np.exp(np.mean([np.log(lr_at_min / 10), np.log(lr_12)]))
    elif recommender == "min12":
        lr_recomend = np.min([lr_at_min / 10, lr_12])
    elif recommender == "min13":
        lr_recomend = np.min([lr_at_min / 10, lr_13])
    elif recommender == "min14":
        lr_recomend = np.min([lr_at_min / 10, lr_14])
    elif recommender == "logmean12":
        lr_recomend = np.exp(np.mean([np.log(lr_at_min / 10), np.log(lr_12)]))
    elif recommender == "logmean13":
        lr_recomend = np.exp(np.mean([np.log(lr_at_min / 10), np.log(lr_13)]))
    elif recommender == "logmean14":
        lr_recomend = np.exp(np.mean([np.log(lr_at_min / 10), np.log(lr_14)]))
    if verbose:
        if lr_steepest is not None:
            print("LR at steepest grad: {:.3e}  (red)".format(lr_steepest))
        print("LR at minimum loss : {:.3e}".format(lr_at_min))
        print("LR a tenth of min  : {:.3e}  (orange)".format(lr_at_min / 10))
        print("LR when 1/4 up     : {:.3e}  (yellow)".format(lr_14))
        print("LR when 1/3 up     : {:.3e}  (blue)".format(lr_13))
        print("LR when 1/2 up     : {:.3e}  (cyan)".format(lr_12))
        print("LR when 2/3 up     : {:.3e}  (green)".format(lr_23))
        print("LR recommended     : {:.3e}  (black)".format(lr_recomend))
    if show or figpth:
        ax.axvline(x=lr_steepest, color="red")
        ax.axvline(x=lr_at_min / 10, color="orange")
        ax.axvline(x=lr_14, color="yellow")
        ax.axvline(x=lr_13, color="blue")
        ax.axvline(x=lr_12, color="cyan")
        ax.axvline(x=lr_23, color="green")
        ax.axvline(x=lr_recomend, color="black", ls=":")
    if figpth:
        # Save figure
        os.makedirs(os.path.dirname(figpth), exist_ok=True)
        plt.savefig(figpth)
        if verbose:
            print("LR Finder results saved to {}".format(figpth))
    if show:
        plt.show()
    return lr_recomend
Пример #5
0
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

legend = []
fig = None

for wd in [0, .1, 1e-2, 1e-3, 1e-4]:
    for dp in [.1, 0.2, .3]:
        nerbert = BertForTokenClassificationCustom.from_pretrained(pretrained_model_name_or_path=MODEL_NAME,
                                                                   num_labels=len(labels2ind),
                                                                   hidden_dropout_prob=dp,
                                                                   attention_probs_dropout_prob=dp)

        # Prepare optimizer and schedule (linear warmup and decay)
        optimizer = get_optimizer_with_weight_decay(model=nerbert,
                                                    optimizer=OPTIMIZER,
                                                    learning_rate=LEARNING_RATE,
                                                    weight_decay=wd)

        lr_finder = LRFinder(nerbert, optimizer, nn.CrossEntropyLoss(), device='cuda')
        lr_finder.range_test(train_loader=dataloader_tr, end_lr=1, num_iter=100)
        fig = lr_finder.plot(ax=fig)
        legend.append(f"wd: {wd}")

fig.figure.legend(legend, loc='best')
fig.figure.tight_layout()
fig.figure.show()
fig.figure.savefig('lr_finder.png')
Пример #6
0
# PyTorch
import torchvision
from torchvision import transforms, datasets, models
import torch
from torch import optim, cuda
from torch.utils.data import DataLoader, sampler
import torch.nn as nn

from torch_lr_finder import LRFinder
from utils.model import get_model, get_dataloaders

model = get_model()
dataloaders = get_dataloaders()

# we will be using negative log likelihood as the loss function
criterion = nn.CrossEntropyLoss()
# we will be using the SGD optimizer as our optimizer
optimizer = optim.SGD(model.fc.parameters(), lr=1e-4)
lr_finder = LRFinder(model, optimizer, criterion, device='cuda')
lr_finder.range_test(dataloaders['train'], end_lr=1, num_iter=2500)
lr_finder.plot()
lr_finder.reset()
Пример #7
0
def lr_range_test(
    model,
    dataset,
    loss_func,
    optimizer="AdamW",
    batch_size=32,
    num_iter=None,
    skip_start=10,
    skip_end=10,
    start_lr=1e-7,
    end_lr=10,
    plot=False,
):
    if num_iter is None:
        num_iter = 100 + int(np.log10(10 + len(dataset)) * 50)
    n_train = min(len(dataset), num_iter * batch_size)
    n_val = min(int(0.3 * len(dataset)), 2 * num_iter)
    log.debug("num_iter: {}, n_val: {}".format(num_iter, n_val))
    split_idx = int(0.7 * len(dataset))
    idx_train = np.random.choice(split_idx, size=n_train)
    idx_val = np.random.choice(np.arange(split_idx, len(dataset)), size=n_val)
    train_data = Subset(dataset, idx_train)
    val_data = Subset(dataset, idx_val)
    lrtest_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    lrtest_loader_val = DataLoader(val_data, batch_size=1024, shuffle=True)
    lrtest_optimizer = create_optimizer(optimizer, model.parameters(),
                                        start_lr)
    with utils.HiddenPrints():
        lr_finder = LRFinder(model, lrtest_optimizer, loss_func)
        lr_finder.range_test(
            lrtest_loader,
            val_loader=lrtest_loader_val,
            end_lr=end_lr,
            num_iter=num_iter,
            smooth_f=0.2,  # re-consider if lr-rate varies a lot
        )
        lrs = lr_finder.history["lr"]
        losses = lr_finder.history["loss"]
    if skip_end == 0:
        lrs = lrs[skip_start:]
        losses = losses[skip_start:]
    else:
        lrs = lrs[skip_start:-skip_end]
        losses = losses[skip_start:-skip_end]
    if plot:
        with utils.HiddenPrints():
            ax, steepest_lr = lr_finder.plot(
            )  # to inspect the loss-learning rate graph
    max_lr = None
    try:
        steep_idx = (np.gradient(np.array(losses))).argmin()
        min_idx = (np.array(losses)).argmin()
        steep_lr = lrs[steep_idx]
        min_lr = lrs[min_idx]
        max_lr = 10**((np.log10(steep_lr) + 2.0 * np.log10(min_lr)) / 3.0)
        log.info("lr-range-test results: steep: {:.2E}, min: {:.2E}".format(
            steep_lr, min_lr))
    except ValueError:
        log.error(
            "Failed to compute the gradients, there might not be enough points."
        )
    if max_lr is not None:
        log.info("learning rate range test selected lr: {:.2E}".format(max_lr))
    else:
        max_lr = 0.1
        log.error("lr range test failed. defaulting to lr: {}".format(max_lr))
    with utils.HiddenPrints():
        lr_finder.reset(
        )  # to reset the model and optimizer to their initial state
    return max_lr
Пример #8
0
def main_worker(index, opt):
    random.seed(opt.manual_seed)
    np.random.seed(opt.manual_seed)
    torch.manual_seed(opt.manual_seed)

    if index >= 0 and opt.device.type == 'cuda':
        opt.device = torch.device(f'cuda:{index}')

    if opt.distributed:
        opt.dist_rank = opt.dist_rank * opt.ngpus_per_node + index
        dist.init_process_group(backend='nccl',
                                init_method=opt.dist_url,
                                world_size=opt.world_size,
                                rank=opt.dist_rank)
        opt.batch_size = int(opt.batch_size / opt.ngpus_per_node)
        opt.n_threads = int(
            (opt.n_threads + opt.ngpus_per_node - 1) / opt.ngpus_per_node)
    opt.is_master_node = not opt.distributed or opt.dist_rank == 0

    model = generate_model(opt)
    if opt.batchnorm_sync:
        assert opt.distributed, 'SyncBatchNorm only supports DistributedDataParallel.'
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
    if opt.pretrain_path:
        model = load_pretrained_model(model, opt.pretrain_path, opt.model,
                                      opt.n_finetune_classes)
    if opt.dropout:
        n_classes = opt.n_classes
        if opt.pretrain_path is not None:
            n_classes = opt.n_finetune_classes
        model = replace_fc_layer(model=model,
                                 dropout_factor=opt.dropout_factor,
                                 n_classes=n_classes)
    if opt.resume_path is not None:
        model = resume_model(opt.resume_path, opt.arch, model)

    model = make_data_parallel(model, opt.distributed, opt.device)
    if opt.pretrain_path:
        parameters = get_fine_tuning_parameters(model, opt.ft_begin_module)
    else:
        parameters = model.parameters()

    if opt.is_master_node:
        print(model)

    if opt.labelsmoothing:
        criterion = LabelSmoothingCrossEntropy().to(opt.device)
    else:
        criterion = CrossEntropyLoss().to(opt.device)

    if not opt.no_train:
        (train_loader, train_sampler, train_logger, train_batch_logger,
         optimizer, scheduler) = get_train_utils(opt, parameters)
        if opt.resume_path is not None:
            opt.begin_epoch, optimizer, scheduler = resume_train_utils(
                opt.resume_path, opt.begin_epoch, optimizer, scheduler)
            if opt.overwrite_milestones:
                scheduler.milestones = opt.multistep_milestones
    if not opt.no_val:
        val_loader, val_logger = get_val_utils(opt)

    if opt.tensorboard and opt.is_master_node:
        from torch.utils.tensorboard import SummaryWriter
        if opt.begin_epoch == 1:
            tb_writer = SummaryWriter(log_dir=opt.result_path)
        else:
            tb_writer = SummaryWriter(log_dir=opt.result_path,
                                      purge_step=opt.begin_epoch)
    else:
        tb_writer = None

    if opt.lr_finder and not opt.no_train and not opt.no_val:
        print(
            "Performing Learning Rate Search\nWith Leslie Smith's approach...")
        lr_finder = LRFinder(model, optimizer, criterion, device=opt.device)
        lr_finder.range_test(train_loader,
                             val_loader=val_loader,
                             start_lr=opt.learning_rate,
                             end_lr=opt.lrf_end_lr,
                             num_iter=opt.lrf_num_it,
                             step_mode=opt.lrf_mode)
        lr_finder.plot(log_lr=False)
        with (opt.result_path / 'lr_search.json').open('w') as results_file:
            json.dump(lr_finder.history, results_file, default=json_serial)
        lr_finder.reset()
        return

    prev_val_loss = None
    for i in range(opt.begin_epoch, opt.n_epochs + 1):
        if not opt.no_train:
            if opt.distributed:
                train_sampler.set_epoch(i)
            #current_lr = get_lr(optimizer)
            train_epoch(i, train_loader, model, criterion, optimizer,
                        opt.device, train_logger, train_batch_logger,
                        scheduler, opt.lr_scheduler, tb_writer,
                        opt.distributed)

            if i % opt.checkpoint == 0 and opt.is_master_node:
                save_file_path = opt.result_path / 'save_{}.pth'.format(i)
                save_checkpoint(save_file_path, i, opt.arch, model, optimizer,
                                scheduler)

        if not opt.no_val:
            prev_val_loss = val_epoch(i, val_loader, model, criterion,
                                      opt.device, val_logger, tb_writer,
                                      opt.distributed)

        if not opt.no_train and opt.lr_scheduler == 'multistep':
            scheduler.step()
        elif not opt.no_train and opt.lr_scheduler == 'plateau':
            scheduler.step(prev_val_loss)
        elif not opt.no_train and opt.lr_scheduler == 'cosineannealing':
            scheduler.step()

    if opt.inference:
        inference_loader, inference_class_names = get_inference_utils(opt)
        inference_result_path = opt.result_path / '{}.json'.format(
            opt.inference_subset)

        inference.inference(inference_loader, model, inference_result_path,
                            inference_class_names, opt.inference_no_average,
                            opt.output_topk)
Пример #9
0
from torch_lr_finder import LRFinder

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)
lr_finder = LRFinder(model, optimizer, criterion, device=device)
lr_finder.range_test(trainloader, end_lr=10, num_iter=1564, step_mode='exp')
lr_finder.plot()  # to inspect the loss-learning rate graph
lr_finder.reset()  # to reset the model and optimizer to their initial state

a = zip(lr_finder.history['lr'], lr_finder.history['loss'])
best_lrloss = sorted(a, key=take_lr, reverse=False)[:50]


def take_lr(x):
    # print(x)
    return x[1]


tup = zip(lr_finder.history['loss'], lr_finder.history['lr'])
sorted(tup, key=take_lr, reverse=False)[:50]


class shrink:
    def __init__(self, config):
        self.config = config

    def apply_augmentations(self):
        pass
        valloader = dataloaders['val']

        class CustomTrainIter(TrainDataLoaderIter):
            # My dataloader returns index, X, y
            def inputs_labels_from_batch(self, batch_data):
                return batch_data[1], batch_data[2]

        class CustomValIter(ValDataLoaderIter):
            # My dataloader returns index, X, y
            def inputs_labels_from_batch(self, batch_data):
                return batch_data[1], batch_data[2]

        custom_train_iter = CustomTrainIter(trainloader)
        custom_val_iter = CustomValIter(valloader)
        lr_finder.range_test(custom_train_iter,
                             end_lr=10,
                             num_iter=params.num_epochs,
                             step_mode='exp')
        # Val loader does not work
        #lr_finder.range_test(custom_train_iter, val_loader=custom_val_iter, end_lr=10, num_iter=params.num_epochs, step_mode='exp')
        mylrs = lr_finder.history['lr']
        mylosses = lr_finder.history['loss']
        min_grad_idx = np.gradient(np.array(mylosses)).argmin()
        print(f'Suggested lr: {mylrs[min_grad_idx]}')
        lr_metrics = {'lr': mylrs, 'loss': mylosses}
        fname = os.path.join(args.model_dir, f'lr_metrics.json')
        with open(fname, 'w') as f:
            f.write(json.dumps(lr_metrics))
        '''
        # Train
        print(f'Fold {fold}')
        print('-'*10)
Пример #11
0
class Shrink:
    '''Shrinks the code and gets the output'''
    def __init__(self, in_config):
        self.config = in_config
        self.class_names = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
        self.mean = (0.491, 0.482, 0.446)
        self.std = (0.247, 0.243, 0.261)
        self.device = "cuda" if torch.cuda.is_available else "cpu"
        self.model_path = self.config['modelpath']['args']

    plt.style.use("dark_background")

    def seed_everything(self,seed: int) -> None:
        '''Seeds the Code so that we get predictable outputs'''
        random.seed(seed)
        os.environ['PYTHONHASHSEED'] = str(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)


    def load_data(self, train_transforms, test_transforms, in_dir='./data'):
        '''Downloads the dataset and returns train and testloaders after applying the Transformations'''

        trainset = datasets.CIFAR10(in_dir, train=True, download=True, transform=train_transforms())
        testset = datasets.CIFAR10(in_dir, train=False, download=True, transform=test_transforms())

        self.trainloader = torch.utils.data.DataLoader(trainset, **self.config['train_data_loader']['args'])
        self.testloader = torch.utils.data.DataLoader(testset, **self.config['test_data_loader']['args'])
        return self.trainloader, self.testloader
    
    def load_imagenet_data(self, train_transforms, test_transforms):
        '''Loads the imagenet dataset'''
        self.trainloader, self.testloader = get_imagenet_loader(
            train_transforms, test_transforms,
            self.config['train_data_loader']['args'],
            self.config['test_data_loader']['args'])

    def mean_std_dev(self):
        pass


    def show_data(self, mode='train', n=25):
        '''Plots the images on a gridplot to show the images passed via dataloader'''

        figure = plt.figure(figsize=(20,20))
        images = None
        labels = None
        
        if mode.lower() == 'train':
            images, labels = next(iter(self.trainloader))
            labels = np.array(labels)
        elif mode.lower() == 'test':
            images, labels = next(iter(self.testloader))
            labels = np.array(labels)
        
        images = self.denormalize(images)

        # images = self.denormalize(images)
        for index in range(1,n+1):
            plt.subplot(5,5,index)
            plt.axis('off')
            # Gets the first n images of the dataset
            plt.imshow(np.transpose(images[index], (1,2,0))) # Plots the dataset
            # plt.title(self.class_names[labels[index]])
        
    
    def get_batched_data(self,in_data):
        '''Takes in the list data and outputs data, targets and preds'''
        in_imgs = []
        in_preds = []
        in_targets = []
        
        for index, i in enumerate(in_data):
            in_imgs.append(i[0])
            in_preds.append(i[1])
            in_targets.append(i[2])
        return torch.stack(in_imgs), torch.stack(in_preds), torch.stack(in_targets)
   

    def plot_gradcam(self, target_layers, images, pred, target, nimgs):
        '''Plot GradCam - '''
        index = 0
        in_data = None
        # model.load_state_dict(torch.load(self.model_path))

        images = images[index:nimgs].to(self.device)
        target = target[index:nimgs]
        pred = pred[index:nimgs]

        gcam_layers, predicted_probs, predicted_classes = get_gradcam(images, target, self.model, self.device, target_layers)

        # get the denomarlization function
        unorm = UnNormalize(mean=self.mean, std=self.std)

        plt_gradcam(gcam_layers=gcam_layers, images=images, target_labels=target, predicted_labels= predicted_classes, class_labels= self.class_names, denormalize= unorm)
    
    def get_gradoutput(self, misclassified=False):
        '''Outputs a gradcam output when Inputting an image'''
        if misclassified:
            in_data = self.misclassified
        else:
            in_data = self.correct_classified

        target_layers = ["layer1", "layer2", "layer3", "layer4"]
        imgs, preds, targets = self.get_batched_data(in_data)
        self.plot_gradcam(target_layers, imgs, preds, targets, 25)


    def denormalize(self,tensor):
        '''Denormalize the data'''
        if not tensor.ndimension() == 4:
            raise TypeError('tensor should be 4D')

        mean = torch.FloatTensor(self.mean).view(1, 3, 1, 1).expand_as(tensor).to(tensor.device)
        std = torch.FloatTensor(self.std).view(1, 3, 1, 1).expand_as(tensor).to(tensor.device)

        return tensor.mul(std).add(mean)
    
    def get_model(self, train=True):
        
        self.model = get_attributes(model_arch, 'model', self.config).to(self.device)
        self.epochs = self.config['epochs']
        if train:
            '''Trains the model and sends the output'''
            criterion = nn.CrossEntropyLoss(reduction='mean')
            optimizer = optim.SGD(self.model.parameters(),lr = 0.01, momentum=0.9)# **self.config['optimizer']['args'])
            max_at_epoch = 5
            self.best_lr = self.config['best_lr']
            pct_start_val =  (max_at_epoch * len(self.trainloader)) / (self.epochs * len(self.trainloader))

            scheduler = torch.optim.lr_scheduler.OneCycleLR(
                optimizer,
                max_lr=self.best_lr,
                total_steps = len(self.trainloader) *self.epochs,
                steps_per_epoch=len(self.trainloader),
                epochs=self.epochs,
                pct_start=pct_start_val,
                anneal_strategy='cos',
                div_factor=10,
                final_div_factor=10
                )

            self.train_acc = []
            self.train_losses = []
            self.test_acc = []
            self.test_losses = []
            self.lr_metric = []

            EPOCHS = self.epochs
            print(f'Starting Training for {EPOCHS} Epochs')

            for i in range(EPOCHS):
                lr_value = [group['lr']
                            for group in optimizer.param_groups][0]
                self.lr_metric.append(lr_value)
                print(f'EPOCHS : {i} Learning Rate: {lr_value}')
                model_training(self.model, self.device, self.trainloader, optimizer, scheduler, self.train_acc, self.train_losses, criterion, l1_loss=False)
                torch.save(self.model.state_dict(), self.model_path)
                self.misclassified, self.correct_classified = model_testing(self.model, self.device, self.testloader, self.test_acc, self.test_losses, criterion)
        else:
            return self.model
                
        
    def test_model(self):
        '''Loads and saves the test model'''

        test_losses = []
        test_acc = []

        model_path = 'latest_model.h5'
        self.model.load_state_dict(torch.load(model_path))
        self.misclassified, self.correct_classified = model_testing(self.model, self.device, self.testloader, test_acc, test_losses)
        return self.misclassified, self.correct_classified

    def findbestlr(self):
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(self.model.parameters(), lr= 0.01, momentum= 0.95, weight_decay= 0.0005)
        self.lr_finder = LRFinder(self.model, optimizer, criterion, device=self.device)
        self.lr_finder.range_test(self.trainloader, **self.config['range_test']['args'])
        self.lr_finder.plot() # to inspect the loss-learning rate graph
        self.lr_finder.reset() # to reset the model and optimizer to their initial state
        return self.lr_finder
    
    def model_metrics(self):
        fig, axs = plt.subplots(2,2, figsize=(15,10))
        axs[0,0].plot(self.train_losses)
        axs[0,0].set_title('Train_Losses')
        axs[0,1].plot(self.train_acc)
        axs[0,1].set_title('Training_Accuracy')
        axs[1,0].plot(self.test_losses)
        axs[1,0].set_title('Test_Losses')
        axs[1,1].plot(self.test_acc)
        axs[1,1].set_title('Test_Accuracy')

    def print_visualization(self, input_size):
        '''Prints a visualization graph for Torch models'''
        C, H, W = input_size
        x = torch.zeros(1, C, H, W, dtype=torch.float, requires_grad=False)
        x = x.to(self.device)
        out = self.model(x)
        # plot graph of variable, not of a nn.Module
        dot_graph = torchviz.make_dot(out)
        dot_graph.view()
        return dot_graph
Пример #12
0
criterion = metrics.MyLossFunc()
optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimiser,
                                                       'min',
                                                       patience=5,
                                                       verbose=True)

# find learning rate
if find_LR and not torch.cuda.is_available():
    if learning_rate > 0.0001:
        print(
            f"Selected initial learning rate too high.\nLearning rate changed to 0.0001"
        )
        optimiser = torch.optim.Adam(model.parameters(), lr=0.0001)
    lr_finder = LRFinder(model, optimiser, criterion, device=device)
    lr_finder.range_test(train_loader, end_lr=200, num_iter=200)
    lr_finder.plot()  # to inspect the loss-learning rate graph
    lr_finder.reset(
    )  # to reset the model and optimizer to their initial state
    subprocess.Popen(["kill", "-9", f"{TB_process.pid}"])
    sys.exit("Learning rate plot finished")

# computational graph
if print_comp_graph:
    for sample in train_loader:
        writer.add_graph(model, sample[0].float())
    writer.close()

# training loop
running_train_loss = 0.
running_valid_loss = 0.
Пример #13
0
def init_weights(m):
    if type(m) == nn.Linear:
        torch.nn.init.xavier_uniform(m.weight)


#%%
torch.manual_seed(42)
net = MyNet(50, 40, 20, 3, 5, 0.5)
criterion = nn.BCELoss()
optim = torch.optim.Adam(net.parameters(), lr=10**-2)
# Explicitly init weights!
net.apply(init_weights)

#%%
lrf = LRFinder(net, optim, criterion)
lrf.range_test(train_loader, start_lr=0.0001, end_lr=1)
lrf.plot()
lrf.reset()

#%%
# seemingly best: Adam + cyclical LR + exp_range decay of learning rate
N_EPOCHS = 30
scheduler = torch.optim.lr_scheduler.CyclicLR(
    optim,
    10**-4,
    10**-2,
    mode='exp_range',
    step_size_up=(xtrain.size(0) / BATCHSIZE) * 2,
    cycle_momentum=False)

history = {'train_loss': [], 'val_loss': []}
Пример #14
0
	elif model_name == 'vgg':
		trans.insert(0, torchvision.transforms.Resize((244,244)))
	trans.insert(0, fancy_pca())
	trans.insert(0, torchvision.transforms.RandomRotation(180))
	trans.insert(0, torchvision.transforms.RandomHorizontalFlip(p=0.5))

	train_dataset = torchvision.datasets.ImageFolder(
			root=data_path,
			transform=torchvision.transforms.Compose(trans)
		)

	train_loader = torch.utils.data.DataLoader(
		train_dataset,
		batch_size=256,
		num_workers=0,
		shuffle=True
	)
	return train_loader


trainloader = train_loader(model_name='inception')

model = Inception.inception_v3(img_size=256)
criterion = nn.CrossEntropyLoss()
#optimizer = torch.optim.SGD(model.parameters(), lr=1e-6, momentum = 0.9, weight_decay = 5e-3, nesterov=True)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-7, weight_decay=1e-4)
lr_finder = LRFinder(model, optimizer, criterion, device="cuda")
lr_finder.range_test(trainloader, end_lr=1, num_iter=100)
lr_finder.plot() # to inspect the loss-learning rate graph
lr_finder.reset() # to reset the model and optimizer to their initial state
    transforms = utils.build_transforms(second_stage=True)
    loaders = utils.build_loaders(data_dir,
                                  transforms,
                                  batch_sizes,
                                  num_workers,
                                  second_stage=True)
    model = utils.build_model(backbone,
                              second_stage=True,
                              num_classes=num_classes,
                              ckpt_pretrained=ckpt_pretrained).cuda()

    optim = utils.build_optim(model, optimizer_params, scheduler_params,
                              criterion_params)
    criterion, optimizer, scheduler = (
        optim["criterion"],
        optim["optimizer"],
        optim["scheduler"],
    )
    lr_finder = LRFinder(model, optimizer, criterion, device="cuda")
    lr_finder.range_test(loaders["train_features_loader"],
                         end_lr=1,
                         num_iter=300)
    fig, ax = plt.subplots()
    lr_finder.plot(ax=ax)

    fig.savefig(
        "lr_finder_plots/supcon_{}_{}_bs_{}_stage_{}_lr_finder.png".format(
            optimizer_params["name"],
            data_dir.split("/")[-1], batch_sizes["train_batch_size"],
            'second'))
Пример #16
0
def train(model, device, train_loader, test_loader, EPOCH, FACTOR, PATIENCE,
          MOMENTUM, LEARNING_RATE):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=0.001,
                          momentum=0.9,
                          nesterov=True,
                          weight_decay=0.0001)
    scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer,
                                              max_lr=0.008,
                                              pct_start=5 / 24,
                                              epochs=24,
                                              steps_per_epoch=len(trainloader))
    train_losses = []
    train_acc = []
    test_losses = []
    test_acc = []

    for epoch in range(EPOCH):
        correct = 0
        processed = 0
        pbar = tqdm(train_loader)
        model.train()
        for batch_idx, (data, target) in enumerate(pbar):
            # get samples

            data, target = data.to(device), target.to(device)
            # Init
            optimizer.zero_grad()
            # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes.
            # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.
            # Predict
            y_pred = model(data)
            # Calculate loss
            #             regularization_loss = 0
            #             for param in model.parameters():
            #                 regularization_loss += torch.sum(abs(param))

            #             classify_loss = criterion(y_pred,target)
            loss = F.nll_loss(y_pred, target)
            #loss = classify_loss + LAMDA * regularization_loss
            #             train_losses.append(loss)

            # Backpropagation
            loss.backward()
            optimizer.step()
            scheduler.step()
            # Update pbar-tqdm

            pred = y_pred.argmax(
                dim=1,
                keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
            processed += len(data)
            pbar.set_description(
                desc=
                f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}'
            )
            # train_acc.append(100*correct/processed)

        train_losses.append(loss.item())
        train_acc.append(100 * correct / processed)

        img, true_wrong, pred_wrong, tst_acc, tst_loss = test(
            model, device, test_loader)
        test_losses.append(tst_loss)
        test_acc.append(tst_acc)

    lr_finder = LRFinder(model, optimizer, criterion, device)
    lr_finder.range_test(train_loader, end_lr=100, num_iter=100)
    lr_finder.plot()  # to inspect the loss-learning rate graph
    #     lr_finder.reset()
    return train_losses, train_acc, model, img, true_wrong, pred_wrong, test_acc, test_losses, lr_finder