def run(self): ## init distributed self.cfg = init_distributed(self.cfg) cfg = self.cfg # cfg.print() ## parser_dict self.dictionary = self._parser_dict() ## parser_datasets datasets, dataloaders,data_samplers, dataset_sizes = self._parser_datasets() ## parser_model model_ft = self._parser_model() # Scale learning rate based on global batch size if cfg.SCALE_LR: cfg.INIT_LR = cfg.INIT_LR * float(self.batch_size) / cfg.SCALE_LR scaler = amp.GradScaler(enabled=True) if cfg.WARMUP.NAME is not None and cfg.WARMUP.ITERS: logger.info('Start warm-up ... ') self.warm_up(scaler, model_ft, dataloaders['train'], cfg) logger.info('finish warm-up!') ## parser_optimizer optimizer_ft = build_optimizer(cfg, model_ft) ## parser_lr_scheduler lr_scheduler_ft = build_lr_scheduler(cfg, optimizer_ft) if cfg.distributed: model_ft = DDP(model_ft, device_ids=[cfg.local_rank], output_device=(cfg.local_rank)) # Freeze freeze_models(model_ft) if self.cfg.PRETRAIN_MODEL is not None: if self.cfg.RESUME: self.start_epoch = self.ckpts.resume_checkpoint(model_ft, optimizer_ft) else: self.start_epoch = self.ckpts.load_checkpoint(self.cfg.PRETRAIN_MODEL, model_ft, optimizer_ft) ## vis network graph if self.cfg.TENSORBOARD_MODEL and False: self.tb_writer.add_graph(model_ft, (model_ft.dummy_input.cuda(),)) self.steps_per_epoch = int(dataset_sizes['train']//self.batch_size) best_acc = 0.0 best_perf_rst = None for epoch in range(self.start_epoch + 1, self.cfg.N_MAX_EPOCHS): if cfg.distributed: dataloaders['train'].sampler.set_epoch(epoch) self.train_epoch(scaler, epoch, model_ft,datasets['train'], dataloaders['train'], optimizer_ft) lr_scheduler_ft.step() if self.cfg.DATASET.VAL and (not epoch % cfg.EVALUATOR.EVAL_INTERVALS or epoch==self.cfg.N_MAX_EPOCHS-1): acc, perf_rst = self.val_epoch(epoch, model_ft,datasets['val'], dataloaders['val']) if cfg.local_rank == 0: # start to save best performance model after learning rate decay to 1e-6 if best_acc < acc: self.ckpts.autosave_checkpoint(model_ft, epoch, 'best', optimizer_ft) best_acc = acc best_perf_rst = perf_rst # continue if not epoch % cfg.N_EPOCHS_TO_SAVE_MODEL: if cfg.local_rank == 0: self.ckpts.autosave_checkpoint(model_ft, epoch,'last', optimizer_ft) if best_perf_rst is not None: logger.info(best_perf_rst.replace("(val)", "(best)")) if cfg.local_rank == 0: self.tb_writer.close() dist.destroy_process_group() if cfg.local_rank!=0 else None torch.cuda.empty_cache()
def run(self): ## init distributed self.cfg = init_distributed(self.cfg) cfg = self.cfg # cfg.print() ## parser_dict self.dictionary = self._parser_dict() ## parser_datasets datasets, dataloaders, data_samplers = self._parser_datasets() # dataset_sizes = {x: len(datasets[x]) for x in ['train', 'val']} # class_names = datasets['train'].classes ## parser_model model_ft = self._parser_model() ## parser_optimizer # Scale learning rate based on global batch size # cfg.INIT_LR = cfg.INIT_LR * float(self.batch_size_all) / 256 optimizer_ft = parser_optimizer(cfg, model_ft) ## parser_lr_scheduler lr_scheduler_ft = parser_lr_scheduler(cfg, optimizer_ft) ''' # Scheduler https://arxiv.org/pdf/1812.01187.pdf # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR lf = lambda x: (((1 + math.cos(x * math.pi / self.cfg.N_MAX_EPOCHS)) / 2) ** 1.0) * 0.8 + 0.2 # cosine lr_scheduler_ft = lr_scheduler.LambdaLR(optimizer_ft, lr_lambda=lf) ''' if cfg.distributed: model_ft = DDP(model_ft, device_ids=[cfg.local_rank], output_device=(cfg.local_rank)) # Freeze freeze = [ '', ] # parameter names to freeze (full or partial) if any(freeze): for k, v in model_ft.named_parameters(): if any(x in k for x in freeze): print('freezing %s' % k) v.requires_grad = False if self.cfg.PRETRAIN_MODEL is not None: if self.cfg.RESUME: self.start_epoch = self.ckpts.load_checkpoint( self.cfg.PRETRAIN_MODEL, model_ft, optimizer_ft, lr_scheduler_ft) else: self.ckpts.load_checkpoint(self.cfg.PRETRAIN_MODEL, model_ft) ## vis net graph if self.cfg.TENSORBOARD_MODEL and False: self.tb_writer.add_graph(model_ft, (model_ft.dummy_input.cuda(), )) self.n_steps_per_epoch = int( ceil(sum(len(t) for t in datasets['train']))) best_acc = 0.0 scaler = amp.GradScaler(enabled=True) for epoch in range(self.start_epoch + 1, self.cfg.N_MAX_EPOCHS): if cfg.distributed: dataloaders['train'].sampler.set_epoch(epoch) self.train_epoch(scaler, epoch, model_ft, dataloaders['train'], optimizer_ft) lr_scheduler_ft.step() if self.cfg.DATASET.VAL: acc = self.val_epoch(epoch, model_ft, dataloaders['val']) if cfg.local_rank == 0: # start to save best performance model after learning rate decay to 1e-6 if best_acc < acc: self.ckpts.autosave_checkpoint(model_ft, epoch, 'best', optimizer_ft, lr_scheduler_ft) best_acc = acc # continue if not epoch % cfg.N_EPOCHS_TO_SAVE_MODEL: if cfg.local_rank == 0: self.ckpts.autosave_checkpoint(model_ft, epoch, 'autosave', optimizer_ft, lr_scheduler_ft) if cfg.local_rank == 0: self.tb_writer.close() dist.destroy_process_group() if cfg.local_rank != 0 else None torch.cuda.empty_cache()