def handle_checkpointing(train_cnt, avg_train_loss): if ((train_cnt - info['last_save']) >= args.save_every): print("Saving Model at cnt:%s cnt since last saved:%s" % (train_cnt, train_cnt - info['last_save'])) info['last_save'] = train_cnt info['save_times'].append(time.time()) handle_plot_ckpt(True, train_cnt, avg_train_loss) filename = vae_base_filepath + "_%010dex.pkl" % train_cnt state = { 'vae_state_dict': vae_model.state_dict(), 'prior_state_dict': prior_model.state_dict(), 'pcnn_state_dict': pcnn_decoder.state_dict(), 'optimizer': opt.state_dict(), 'info': info, } save_checkpoint(state, filename=filename) elif not len(info['train_cnts']): print("Logging model: %s no previous logs" % (train_cnt)) handle_plot_ckpt(False, train_cnt, avg_train_loss) elif (train_cnt - info['last_plot']) >= args.plot_every: print("Plotting Model at cnt:%s cnt since last plotted:%s" % (train_cnt, train_cnt - info['last_plot'])) handle_plot_ckpt(True, train_cnt, avg_train_loss) else: if (train_cnt - info['train_cnts'][-1]) >= args.log_every: print("Logging Model at cnt:%s cnt since last logged:%s" % (train_cnt, train_cnt - info['train_cnts'][-1])) handle_plot_ckpt(False, train_cnt, avg_train_loss)
def save_vqvae(info, train_cnt, vqvae_model, opt, avg_train_losses, valid_batch): info['model_last_save'] = train_cnt info['model_save_times'].append(time.time()) avg_valid_losses = valid_vqvae(train_cnt, vqvae_model, info, valid_batch) handle_plot_ckpt(train_cnt, info, avg_train_losses, avg_valid_losses) filename = info['MODEL_MODEL_BASE_FILEDIR'] + "_%010dex.pt" % train_cnt print("SAVING MODEL:%s" % filename) print("Saving model at cnt:%s cnt since last saved:%s" % (train_cnt, train_cnt - info['model_last_save'])) state = { 'model_state_dict': vqvae_model.state_dict(), 'model_optimizer': opt.state_dict(), 'model_embedding': vqvae_model.embedding, 'model_info': info, } save_checkpoint(state, filename=filename)