def finetuning(config): all_start_time = time.time() my_params = filter(lambda p: p.requires_grad, self.model.parameters()) optimizer = torch.optim.SGD(my_params, config['lr'], momentum=config['momentum'], nesterov=True) epoch_time = AverageMeter() lr_scheduler = torch.optim.lr_scheduler.ExponentialLR( optimizer, gamma=config['lr_decay']) train_info = {} test_info = {} from threading import Thread import asyncio def start_loop(loop): asyncio.set_event_loop(loop) loop.run_forever() new_loop = asyncio.new_event_loop() t = Thread(target=start_loop, args=(new_loop, )) t.start() for epoch in range(0, config['epochs']): start_time = time.time() avg_loss_ = AverageMeter() for x, target in self.train_loader: optimizer.zero_grad() x, target = x.cuda(), target.cuda() loss = self.model.loss(self.model(x), target) loss.backward() avg_loss_.update(loss.item()) optimizer.step() end_time = time.time() training_time = end_time - all_start_time epoch_time.update(end_time - start_time) print( "Epoch {0} finished in {1.val:.3f}s (avg: {1.avg:.3f}s). Training for {2}" .format(epoch, epoch_time, format_time(end_time - all_start_time))) print('AVG train loss {0.avg:.6f}'.format(avg_loss_)) print("\tLR: {:.4e}".format(lr_scheduler.get_lr()[0])) lr_scheduler.step() if (epoch + 1) % config['print_freq'] == 0: self.model.eval() accuracy, ave_loss = compute_acc_loss( my_forward_eval, self.train_loader) train_info[epoch + 1] = [ave_loss, accuracy, training_time] print('\ttrain loss: {:.6f}, accuracy: {:.4f}'.format( ave_loss, accuracy)) accuracy, ave_loss = compute_acc_loss( my_forward_eval, self.test_loader) test_info[epoch + 1] = [ave_loss, accuracy, training_time] print('\ttest loss: {:.6f}, accuracy: {:.4f}'.format( ave_loss, accuracy)) self.model.train() to_save = {} to_save['config'] = config to_save['optimizer_state'] = optimizer.state_dict() to_save['model_state'] = self.model.state_dict() to_save['training_time'] = training_time to_save['traing_info'] = train_info to_save['test_info'] = test_info to_save['current_epoch'] = epoch to_save['compression_stats'] = compression_stats async def actual_save(): # TODO: make better saves, 1) mv file as backup, 2) save new data 3) delte bk torch.save( to_save, f'results/{self.name}_ft_{config["tag"]}.th') asyncio.run_coroutine_threadsafe(actual_save(), new_loop) async def last_task(): print("Async file saving has been finished.") new_loop.stop() asyncio.run_coroutine_threadsafe(last_task(), new_loop)
def l_step_optimization(model, lc_penalty, step, config): all_start_time = config['all_start_time'] lr_scheduler = None my_params = filter(lambda p: p.requires_grad, model.parameters()) learning_rate = config['lr'] if config['lr_decay_mode'] == 'after_l': learning_rate *= (config['lr_decay']**step) print(f"Current LR={learning_rate}") def constract_my_forward_lc_eval(lc_penalty): pen = lc_penalty() def my_forward_lc_eval(x, target): out_ = model.forward(x) return out_, model.loss(out_, target) + pen return my_forward_lc_eval optimizer = torch.optim.SGD(my_params, learning_rate, momentum=config['momentum'], nesterov=True) if config['lr_decay_mode'] == 'restart_on_l': lr_scheduler = torch.optim.lr_scheduler.ExponentialLR( optimizer, gamma=config['lr_decay']) if 'lr_trick' in config: l_trick_value = 0.1 print( 'LR trick in play. first epoch is trained with LR of {:.4e}' .format(config['lr'] * l_trick_value)) for param_group in optimizer.param_groups: param_group['lr'] = config['lr'] * l_trick_value # TODO: revert back the lr_trick? epochs_in_this_it = config['epochs'] if step > 0 else \ config['first_mu_epochs'] if 'first_mu_epochs' in config else config['epochs'] print('Epochs in this iteration is :', epochs_in_this_it) print('Epochs in this iteration is :', epochs_in_this_it) model.eval() lc_evaluator = constract_my_forward_lc_eval(lc_penalty) accuracy, ave_loss = compute_acc_loss(lc_evaluator, self.train_loader) print('\ttrain loss: {:.6f}, accuracy: {:.4f}'.format( ave_loss, accuracy)) accuracy, ave_loss = compute_acc_loss(lc_evaluator, self.test_loader) print('\ttest loss: {:.6f}, accuracy: {:.4f}'.format( ave_loss, accuracy)) model.train() epoch_time = AverageMeter() rec = Recorder() # avg_epoch_losses = [] for epoch in range(epochs_in_this_it): start_time = time.time() avg_loss_ = AverageMeter() for x, target in self.train_loader: optimizer.zero_grad() x, target = x.cuda(), target.cuda(non_blocking=True) loss = model.loss(model(x), target) + lc_penalty() avg_loss_.update(loss.item()) loss.backward() optimizer.step() end_time = time.time() training_time = end_time - all_start_time epoch_time.update(end_time - start_time) print( "LC step {0}, Epoch {1} finished in {2.val:.3f}s (avg: {2.avg:.3f}s). Training for {3}" .format(step, epoch, epoch_time, format_time(end_time - all_start_time))) print('AVG train loss {0.avg:.6f}'.format(avg_loss_)) rec.record('average_loss_per_epoch', avg_loss_) if (epoch + 1) % config['print_freq'] == 0: model.eval() lc_evaluator = constract_my_forward_lc_eval(lc_penalty) accuracy, ave_loss = compute_acc_loss( lc_evaluator, self.train_loader) rec.record('train', [ ave_loss, accuracy, training_time, step + 1, epoch + 1 ]) print('\ttrain loss: {:.6f}, accuracy: {:.4f}'.format( ave_loss, accuracy)) accuracy, ave_loss = compute_acc_loss( lc_evaluator, self.test_loader) rec.record('test', [ ave_loss, accuracy, training_time, step + 1, epoch + 1 ]) print('\ttest loss: {:.6f}, accuracy: {:.4f}'.format( ave_loss, accuracy)) model.train() if config['lr_decay_mode'] == 'restart_on_l': print("\told LR: {:.4e}".format( optimizer.param_groups[0]['lr'])) lr_scheduler.step() print("\tnew LR: {:.4e}".format( optimizer.param_groups[0]['lr'])) else: print("\tLR: {:.4e}".format(learning_rate)) info = { 'train': rec.train, 'test': rec.test, 'average_loss_per_train_epoch': rec.average_loss_per_epoch } return info