visualizer.reset() total_steps += opt.batch_size epoch_iter += opt.batch_size model.set_input(data) model.optimize_parameters() if total_steps % opt.display_freq == 0: save_result = total_steps % opt.update_html_freq == 0 visualizer.display_current_results(model.get_current_visuals(), epoch, save_result) if total_steps % opt.print_freq == 0: losses = model.get_current_losses() t = (time.time() - iter_start_time) / opt.batch_size visualizer.print_current_losses(epoch, epoch_iter, losses, t, t_data) if opt.display_id > 0: visualizer.plot_current_losses(epoch, float(epoch_iter) / dataset_size, opt, losses) if total_steps % opt.save_latest_freq == 0: print('saving the latest model (epoch %d, total_steps %d)' % (epoch, total_steps)) model.save_networks('latest') iter_data_time = time.time() if epoch % opt.save_epoch_freq == 0: print('saving the model at the end of epoch %d, iters %d' % (epoch, total_steps)) model.save_networks('latest') model.save_networks(epoch) print('End of epoch %d / %d \t Time Taken: %d sec' % (epoch, opt.niter + opt.niter_decay, time.time() - epoch_start_time))
) # calculate loss functions, get gradients, update network weights if total_iters % opt.display_freq == 0: # display images on visdom and save images to a HTML file save_result = total_iters % opt.update_html_freq == 0 model.compute_visuals() visualizer.display_current_results(model.get_current_visuals(), epoch, save_result) if total_iters % opt.print_freq == 0: # print training losses and save logging information to the disk losses = model.get_current_losses() t_comp = (time.time() - iter_start_time) / opt.batch_size visualizer.print_current_losses(epoch, epoch_iter, losses, t_comp, t_data) if opt.display_id > 0: visualizer.plot_current_losses( epoch, float(epoch_iter) / dataset_size, losses) if total_iters % opt.save_latest_freq == 0: # cache our latest model every <save_latest_freq> iterations print('saving the latest model (epoch %d, total_iters %d)' % (epoch, total_iters)) save_suffix = 'iter_%d' % total_iters if opt.save_by_iter else 'latest' model.save_networks(save_suffix) iter_data_time = time.time() if epoch % opt.save_epoch_freq == 0: # cache our model every <save_epoch_freq> epochs print('saving the model at the end of epoch %d, iters %d' % (epoch, total_iters)) model.save_networks('latest') model.save_networks(epoch)
def train(cfg): #init du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) #init dataset dataset = create_dataset( cfg) # create a dataset given cfg.dataset_mode and other options dataset_size = len(dataset) # get the number of images in the dataset. print('The number of training images = %d' % dataset_size) model = create_model( cfg) # create a model given cfg.model and other options model.setup( cfg) # regular setup: load and print networks; create schedulers visualizer = Visualizer( cfg) # create a visualizer that display/save images and plots total_iters = 0 # the total number of training iterations # cur_device = torch.cuda.current_device() is_master = du.is_master_proc(cfg.NUM_GPUS) for epoch in range( cfg.epoch_count, cfg.niter + cfg.niter_decay + 1 ): # outer loop for different epochs; we save the model by <epoch_count>, <epoch_count>+<save_latest_freq> if is_master: epoch_start_time = time.time() # timer for entire epoch iter_data_time = time.time( ) # timer for data loading per iteration epoch_iter = 0 # the number of training iterations in current epoch, reset to 0 every epoch shuffle_dataset(dataset, epoch) for i, data in enumerate(dataset): # inner loop within one epoch if is_master: iter_start_time = time.time( ) # timer for computation per iteration if total_iters % cfg.print_freq == 0: t_data = iter_start_time - iter_data_time iter_data_time = time.time() visualizer.reset() total_iters += cfg.batch_size epoch_iter += cfg.batch_size model.set_input( data) # unpack data from dataset and apply preprocessing model.optimize_parameters( ) # calculate loss functions, get gradients, update network weights if total_iters % cfg.display_freq == 0 and is_master: # display images on visdom and save images to a HTML file save_result = total_iters % cfg.update_html_freq == 0 model.compute_visuals() visualizer.display_current_results(model.get_current_visuals(), epoch, save_result) losses = model.get_current_losses() if cfg.NUM_GPUS > 1: losses = du.all_reduce(losses) if total_iters % cfg.print_freq == 0 and is_master: # print training losses and save logging information to the disk t_comp = (time.time() - iter_start_time) / cfg.batch_size visualizer.print_current_losses(epoch, epoch_iter, losses, t_comp, t_data) if cfg.display_id > 0: visualizer.plot_current_losses( epoch, float(epoch_iter) / dataset_size, losses) if total_iters % cfg.save_latest_freq == 0 and is_master: # cache our latest model every <save_latest_freq> iterations print('saving the latest model (epoch %d, total_iters %d)' % (epoch, total_iters)) save_suffix = 'iter_%d' % total_iters if cfg.save_by_iter else 'latest' model.save_networks(save_suffix) if epoch % cfg.save_epoch_freq == 0 and is_master: # cache our model every <save_epoch_freq> epochs print('saving the model at the end of epoch %d, iters %d' % (epoch, total_iters)) model.save_networks('latest') if cfg.save_iter_model and epoch >= 500: model.save_networks(epoch) if is_master: print('End of epoch %d / %d \t Time Taken: %d sec' % (epoch, cfg.niter + cfg.niter_decay, time.time() - epoch_start_time)) model.update_learning_rate( ) # update learning rates at the end of every epoch.
def train_model(opt): epochs = opt.niter visualizer = Visualizer(opt) best_model_wts = copy.deepcopy(my_senet154.state_dict()) best_acc = 0. for epoch in range(epochs): # in each epoch #epoch_start = time.time() print('Epoch {}/{}'.format(epoch, epochs - 1)) print('-' * 10) # iterate on the whole data training set loss_dic = {} # legend = ['train'+'epoch_loss','train'+'epoch_acc','val'+'epoch_loss','val'+'epoch_acc'] legend = ['train' + 'epoch_acc', 'val' + 'epoch_acc'] for phase in mode: running_loss = 0. running_corrects = 0 if phase == 'train': #exp_lr_scheduler.step() my_senet154.train() else: my_senet154.eval() # in each epoch iterate over all dataset for inputs, labels in data_loaders[phase]: #inputs,labels = Variable(inputs.cuda()),Variable(labels.cuda()) inputs = inputs.to(device) labels = labels.to(device) with torch.set_grad_enabled(phase == 'train'): # in each iter step # 1. zero the parameter gradients optimizer.zero_grad() # 2. forward # attention there only need the first one outputs = my_senet154(inputs) # if phase =='train': # outputs = my_senet154(inputs)[0] loss = criterion(outputs, labels) if phase == 'train': loss.backward() optimizer.step() # statistics preds = outputs.max(1)[1] running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels) epoch_loss = running_loss / dataset_size[phase] epoch_acc = running_corrects.double() / dataset_size[phase] # loss_dic[phase+'epoch_loss'] = epoch_loss loss_dic[phase + 'epoch_acc'] = epoch_acc print('%s Loss: %.4f ACC: %.4f' % (phase, epoch_loss, epoch_acc)) # #print('finished drawing') #visualizer.plot_current_losses(epoch,opt,epoch_acc,[phase+'epoch_acc']) if phase == 'val' and epoch_acc > best_acc: best_acc = epoch_acc best_model_wts = copy.deepcopy(my_senet154) visualizer.plot_current_losses(epoch, opt, loss_dic, legend) if epoch % opt.save_epoch_freq == 0: save_net_works(opt, epoch, best_model_wts) my_senet154.load_state_dict(best_model_wts)
if total_iters % opt.display_freq == 0: # display images on visdom and save images to a HTML file save_result = total_iters % opt.update_html_freq == 0 model.compute_visuals() visualizer.display_current_results( model.get_current_visuals(), int(total_iters / opt.display_freq), opt.display_freq, save_result) if total_iters % opt.print_freq == 0: # print training losses and save logging information to the disk losses = model.get_current_losses() t_comp = (time.time() - iter_start_time) / opt.batch_size visualizer.print_current_losses(epoch, total_iters, losses, t_comp, t_data) if opt.display_id > 0: visualizer.plot_current_losses( epoch, float(total_iters) / dataset_size, losses) if total_iters % opt.print_freq == 0 and opt.model == 'moegan': # print pareto front points = [(fits[0], fits[1]) for fits in model.Fitness] path = os.path.join(opt.checkpoints_dir, opt.name, 'paretof.txt') with open(path, "w") as pffile: for x, y in points: pffile.write(str(x) + " " + str(y) + "\n") if total_iters % opt.score_freq == 0: # print generation scores and save logging information to the disk scores = model.get_current_scores() t_comp = (time.time() - iter_start_time) / opt.batch_size visualizer.print_current_scores(epoch, total_iters, scores) if opt.display_id > 0: