help='the prefix of the model to save') parser.add_argument('--num-epochs', type=int, default=10, help='the number of training epochs') parser.add_argument('--load-epoch', type=int, help="load the model on an epoch using the model-prefix") parser.add_argument('--kv-store', type=str, default='local', help='the kvstore type') parser.add_argument('--lr-factor', type=float, default=1, help='times the lr with a factor for every lr-factor-epoch epoch') parser.add_argument('--lr-factor-epoch', type=float, default=1, help='the number of epoch to factor the lr, could be .5') return parser.parse_args() if __name__ == '__main__': args = parse_args() use_caffe_loss = args.caffe_loss if args.network == 'mlp': data_shape = (784, ) net = get_mlp() else: data_shape = (1, 28, 28) net = get_lenet() # train if use_caffe_loss: train_model.fit(args, net, get_iterator(data_shape), mx.metric.Caffe()) else: train_model.fit(args, net, get_iterator(data_shape))
def main(): global args args = parser.parse_args() if args.save is '': args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') save_path = os.path.join(args.results_dir, args.save) if not os.path.exists(save_path): os.makedirs(save_path) setup_logging(os.path.join(save_path, 'log.txt')) checkpoint_file = os.path.join(save_path, 'checkpoint_epoch_%s.pth.tar') logging.debug("run arguments: %s", args) logging.info("using pretrained cnn %s", args.cnn) cnn = resnet.__dict__[args.cnn](pretrained=True) # loading annotations into memory... # Done (t=0.49s) # creating index... # index created! vocab = build_vocab() # len(vocab) 10003 model = CaptionModel(cnn, vocab, embedding_size=args.embedding_size, rnn_size=args.rnn_size, num_layers=args.num_layers, share_embedding_weights=args.share_weights) # loading annotations into memory... # Done (t=0.47s) # creating index... # index created! train_data = get_iterator( get_coco_data(vocab, train=True), batch_size=20, #batch_size=args.batch_size # default 128 max_length=25, #max_length=args.max_length # default 30 shuffle=True, num_workers=args.workers) # loading annotations into memory... # Done (t=0.37s) # creating index... # index created! val_data = get_iterator( get_coco_data(vocab, train=False), batch_size=20, #batch_size=args.eval_batch_size # default 128 max_length=25, #max_length=args.max_length # default 30 shuffle=False, num_workers=args.workers) #if 'cuda' in args.type: # cudnn.benchmark = True # model.cuda() # optimizer = select_optimizer( #args.optimizer SGD # args.optimizer, params=model.parameters(), lr=args.lr) # args.lr = 0.1 float optimizer = select_optimizer( # args.optimizer SGD args.optimizer, params=model.parameters(), lr=args.lr) # args.lr = 0.1 float regime = lambda e: { 'lr': args.lr * (args.lr_decay**e), 'momentum': args.momentum, 'weight_decay': args.weight_decay } model.finetune_cnn(False) def forward(model, data, training=True, optimizer=None): use_cuda = 'cuda' in args.type # return True or False loss = nn.CrossEntropyLoss() # CrossEntropyLoss() perplexity = AverageMeter( ) # <utils.AverageMeter object at 0x7fa6be6f1780 batch_time = AverageMeter() data_time = AverageMeter() if training: model.train() else: model.eval() end = time.time() # Train # train_perp = forward( # model, train_data, training=True, optimizer=optimizer) for i, (imgs, (captions, lengths)) in enumerate(data): #len(data)键值对个数 2587 time.sleep(1) if i == 3: break data_time.update(time.time() - end) #if use_cuda: # imgs = imgs.cuda() # captions = captions.cuda(async=True) imgs = Variable(imgs, volatile=not training) captions = Variable(captions, volatile=not training) input_captions = captions[:-1] target_captions = pack_padded_sequence(captions, lengths)[0] pred, _ = model(imgs, input_captions, lengths) err = loss(pred, target_captions) # perplexity.update(math.exp(err.data[0])) perplexity.update(math.exp(err.item())) if training: optimizer.zero_grad() err.backward() clip_grad_norm(model.rnn.parameters(), args.grad_clip) optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: logging.info( '{phase} - Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Perplexity {perp.val:.4f} ({perp.avg:.4f})'.format( epoch, i, len(data), phase='TRAINING' if training else 'EVALUATING', batch_time=batch_time, data_time=data_time, perp=perplexity)) return perplexity.avg for epoch in range(args.start_epoch, args.epochs): #(0,10) if epoch >= args.finetune_epoch: # args.finetune_epoch 3 model.finetune_cnn(True) optimizer = adjust_optimizer(optimizer, epoch, regime) # optimizer none # Train train_perp = forward(model, train_data, training=True, optimizer=optimizer) # Evaluate val_perp = forward(model, val_data, training=False) logging.info('\n Epoch: {0}\t' 'Training Perplexity {train_perp:.4f} \t' 'Validation Perplexity {val_perp:.4f} \n'.format( epoch + 1, train_perp=train_perp, val_perp=val_perp)) model.save_checkpoint(checkpoint_file % (epoch + 1))
parser.add_argument('--kv-store', type=str, default='local', help='the kvstore type') parser.add_argument('--lr-factor', type=float, default=1, help='times the lr with a factor for every lr-factor-epoch epoch') parser.add_argument('--lr-factor-epoch', type=float, default=1, help='the number of epoch to factor the lr, could be .5') return parser.parse_args() if __name__ == '__main__': args = parse_args() use_caffe_loss = args.caffe_loss use_caffe_data = args.caffe_data data_shape = () if args.network == 'mlp': data_shape = (784, ) net = get_mlp() elif args.network == 'lenet': if not use_caffe_data: data_shape = (1, 28, 28) net = get_lenet() else: net = get_network_from_json_file(args.network) # train if use_caffe_loss: train_model.fit(args, net, get_iterator(data_shape, use_caffe_data), mx.metric.Caffe()) else: train_model.fit(args, net, get_iterator(data_shape, use_caffe_data))
def main(args): print("Loading data") dataset = args.data.rstrip('/').split('/')[-1] if dataset in ['yahoo', 'yelp']: with_label = True else: with_label = False corpus = Corpus( args.data, max_vocab_size=args.max_vocab, max_length=args.max_length, with_label=with_label ) pad_id = corpus.word2idx[PAD_TOKEN] vocab_size = len(corpus.word2idx) print("\ttraining data size: ", len(corpus.train)) print("\tvocabulary size: ", vocab_size) print("Constructing model") print(args) device = torch.device('cpu' if args.nocuda else 'cuda') model = TopGenVAE( vocab_size, args.embed_size, args.hidden_size, args.code_size, args.num_topics, args.dropout ).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) best_loss = None train_iter = get_iterator(corpus.train, args.batch_size, True, device) valid_iter = get_iterator(corpus.valid, args.batch_size, False, device) test_iter = get_iterator(corpus.test, args.batch_size, False, device) print("\nStart training") try: for epoch in range(1, args.epochs+1): epoch_start_time = time.time() (tr_seq_loss, tr_bow_loss, tr_kld_z, tr_kld_t, tr_seq_ppl, tr_bow_ppl) = train( train_iter, model, pad_id, optimizer, epoch ) (va_seq_loss, va_bow_loss, va_kld_z, va_kld_t, va_seq_ppl, va_bow_ppl) = evaluate( valid_iter, model, pad_id ) print('-' * 90) meta = "| epoch {:2d} | time {:5.2f}s ".format(epoch, time.time()-epoch_start_time) print(meta + "| train loss {:5.2f} {:5.2f} ({:5.2f} {:5.2f}) " "| train ppl {:5.2f} {:5.2f}".format( tr_seq_loss, tr_bow_loss, tr_kld_z, tr_kld_t, tr_seq_ppl, tr_bow_ppl)) print(len(meta)*' ' + "| valid loss {:5.2f} {:5.2f} ({:5.2f} {:5.2f}) " "| valid ppl {:5.2f} {:5.2f}".format( va_seq_loss, va_bow_loss, va_kld_z, va_kld_t, va_seq_ppl, va_bow_ppl), flush=True) epoch_loss = va_seq_loss + va_bow_loss + va_kld_z + va_kld_t if best_loss is None or epoch_loss < best_loss: best_loss = epoch_loss with open(get_savepath(args), 'wb') as f: torch.save(model, f) except KeyboardInterrupt: print('-' * 90) print('Exiting from training early') with open(get_savepath(args), 'rb') as f: model = torch.load(f) (te_seq_loss, te_bow_loss, te_kld_z, te_kld_t, te_seq_ppl, te_bow_ppl) = evaluate(test_iter, model, pad_id) print('=' * 90) print("| End of training | test loss {:5.2f} {:5.2f} ({:5.2f} {:5.2f}) " "| test ppl {:5.2f} {:5.2f}".format( te_seq_loss, te_bow_loss, te_kld_z, te_kld_t, te_seq_ppl, te_bow_ppl)) print('=' * 90)
def main(): k = 10 repeat = 4 epochs = 30 batchsize = 256 learning_rate = 1e-4 # dataset_object = tf.keras.datasets.mnist dataset_object = tf.keras.datasets.cifar10 (x_train, y_train), (x_test, y_test) = dataset_object.load_data() x_train, x_test = x_train / 255.0, x_test / 255.0 print('x_train:', x_train.shape, x_train.min(), x_train.max()) print('y_train:', y_train.shape, y_train.min(), y_train.max()) train_iterator = get_iterator(x_train, y_train, batchsize=batchsize) x_batch, x_perturb, y_batch = next(train_iterator) print('xbatch', x_batch.shape, 'xperturb', x_perturb.shape, 'ybatch', y_batch.shape) model = ResNetModel(k=k) print('x_batch:', x_batch.shape) z = model(x_batch, head='main', verbose=True) for z_ in z: print('z:', z_.shape) z = model(x_batch, head='aux') for z_ in z: print('z:', z_.shape) model.summary() # optimizer = tf.train.AdamOptimizer(learning_rate=1e-4) plt.figure(figsize=(3, 3), dpi=300) ax = plt.gca() ax.set_xlim([-1, 1]) ax.set_ylim([-1, 1]) main_losses = [] aux_losses = [] for e in range(epochs): optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate / (2 * (e + 1))) # mnist_generator = generate_mnist(x_train, y_train, batchsize=batchsize) if e % 2 == 0: trainhead = 'main' else: trainhead = 'aux' train_iterator = get_iterator(x_train, y_train, batchsize=batchsize, repeat=repeat) for k, (x_batch, x_perturb, y_batch) in enumerate(train_iterator): # if k % 2 == 0: # trainhead = 'main' # else: # trainhead = 'aux' with tf.GradientTape() as tape: z = model(x_batch, head=trainhead) zp = model(x_perturb, head=trainhead) losses = [IID_loss(z_, zp_) for z_, zp_ in zip(z, zp)] loss = tf.reduce_mean(losses) grads = tape.gradient(loss, model.trainable_variables) if k % 2 == 0: main_losses.append(loss.numpy()) else: aux_losses.append(loss.numpy()) optimizer.apply_gradients(zip(grads, model.trainable_variables)) if k % 100 == 0: # take the last head and write save_images(x_batch, z[0], 'clusters/{}'.format(e), n=5) print('e: {} k: {} loss={}'.format(e, k, loss.numpy())) for i in range(1): zmax = tf.argmax(z[i], axis=-1).numpy() zpmax = tf.argmax(zp[i], axis=-1).numpy() acc = (zmax == zpmax).mean() print('\tacc={}'.format(acc), np.unique(zmax), np.unique(zpmax)) # Each epoch ztest = {r: [] for r in range(1)} ylabel = [] # test_iterator = get_iterator(x_train, y_train, batchsize=batchsize, repeat=1) test_iterator = get_iterator(x_test, y_test, batchsize=batchsize, repeat=1) for j, (x_batch, x_perturb, y_batch) in enumerate(test_iterator): for i, h in enumerate(model(x_batch, head='main')): ztest[i].append(h) ylabel.append(y_batch) # ztest = np.concatenate(ztest, axis=0) ylabel = np.squeeze(np.concatenate(ylabel)) print('ylabel', ylabel.shape) for r in range(1): ztest[r] = np.concatenate(ztest[r], axis=0) print('ztest', ztest[r].shape) convex_combo(ztest[r], ylabel, ax, 'pointcloud/{}_{}.png'.format(r, e)) with open('losses_main.txt', 'w+') as f: for l in main_losses: f.write('{}\n'.format(l)) with open('losses_aux.txt', 'w+') as f: for l in aux_losses: f.write('{}\n'.format(l))
parser.add_argument( '--lr-factor-epoch', type=float, default=1, help='the number of epoch to factor the lr, could be .5') return parser.parse_args() if __name__ == '__main__': args = parse_args() use_caffe_loss = args.caffe_loss use_caffe_data = args.caffe_data data_shape = () if args.network == 'mlp': data_shape = (784, ) net = get_mlp() elif args.network == 'lenet': if not use_caffe_data: data_shape = (1, 28, 28) net = get_lenet() else: net = get_network_from_json_file(args.network) # train if use_caffe_loss: train_model.fit(args, net, get_iterator(data_shape, use_caffe_data), mx.metric.Caffe()) else: train_model.fit(args, net, get_iterator(data_shape, use_caffe_data))
def main(): global args args = parser.parse_args() if args.save is '': args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') save_path = os.path.join(args.results_dir, args.save) if not os.path.exists(save_path): os.makedirs(save_path) if args.pretrained != 1: # if this is a model trained from scratch, train CNN from the starting of the epoch and train epoch much longer args.finetune_epoch = 0 setup_logging(os.path.join(save_path, 'log.txt')) checkpoint_file = os.path.join(save_path, 'checkpoint_epoch_%s.pth.tar') logging.debug("run arguments: %s", args) if args.pretrained == 1: logging.info("using pretrained cnn %s", args.cnn) cnn = resnet.__dict__[args.cnn](pretrained=True) else: logging.info("using from-scratch cnn %s", args.cnn) cnn = resnet.__dict__[args.cnn](pretrained=False) vocab = build_vocab() model = CaptionModel(cnn, vocab, embedding_size=args.embedding_size, rnn_size=args.rnn_size, num_layers=args.num_layers, share_embedding_weights=args.share_weights) train_data = get_iterator(get_coco_data(vocab, train=True), batch_size=args.batch_size, max_length=args.max_length, shuffle=True, num_workers=args.workers) val_data = get_iterator(get_coco_data(vocab, train=False), batch_size=args.eval_batch_size, max_length=args.max_length, shuffle=False, num_workers=args.workers) if 'cuda' in args.type: cudnn.benchmark = True model.cuda() optimizer = select_optimizer( args.optimizer, params=model.parameters(), lr=args.lr) regime = lambda e: {'lr': args.lr * (args.lr_decay ** e), 'momentum': args.momentum, 'weight_decay': args.weight_decay} model.finetune_cnn(False) def forward(model, data, training=True, optimizer=None): use_cuda = 'cuda' in args.type loss = nn.CrossEntropyLoss() perplexity = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() if training: model.train() else: model.eval() end = time.time() for i, (imgs, (captions, lengths)) in enumerate(data): data_time.update(time.time() - end) if use_cuda: imgs = imgs.cuda() captions = captions.cuda(async=True) imgs = Variable(imgs, volatile=not training) captions = Variable(captions, volatile=not training) input_captions = captions[:-1] target_captions = pack_padded_sequence(captions, lengths)[0] pred, _ = model(imgs, input_captions, lengths) err = loss(pred, target_captions) perplexity.update(math.exp(err.data[0])) if training: optimizer.zero_grad() err.backward() clip_grad_norm(model.rnn.parameters(), args.grad_clip) optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Perplexity {perp.val:.4f} ({perp.avg:.4f})'.format( epoch, i, len(data), phase='TRAINING' if training else 'EVALUATING', batch_time=batch_time, data_time=data_time, perp=perplexity)) return perplexity.avg for epoch in range(args.start_epoch, args.epochs): if epoch >= args.finetune_epoch: model.finetune_cnn(True) optimizer = adjust_optimizer( optimizer, epoch, regime) # Train train_perp = forward( model, train_data, training=True, optimizer=optimizer) # Evaluate val_perp = forward(model, val_data, training=False) logging.info('\n Epoch: {0}\t' 'Training Perplexity {train_perp:.4f} \t' 'Validation Perplexity {val_perp:.4f} \n' .format(epoch + 1, train_perp=train_perp, val_perp=val_perp)) if epoch % args.save_freq == 0 or epoch == args.epochs-1: model.save_checkpoint(checkpoint_file % (epoch + 1))
def main(args): print("Loading data") dataset = args.data.rstrip('/').split('/')[-1] corpus = Corpus(args.data, max_vocab_size=args.max_vocab, max_length=args.max_length) pad_id = corpus.word2idx[PAD_TOKEN] sos_id = corpus.word2idx[SOS_TOKEN] vocab_size = len(corpus.word2idx) print(args) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch.cuda.set_device(args.device_id) cent_name = dataset + "_centers.pt" centers = None if args.center: centers = torch.load(cent_name).to(device) # centers.requires_grad = False centers = centers.detach() model = LstmVAE(vocab_size, args.embed_dim, args.hidden_dim, args.code_dim, args.dropout, centers=centers, enc_type=args.enc_type, de_type=args.de_type, dist=args.dist, fix=args.fix, device=device).to(device) if args.flow: flow = NormalizingFlows(args.code_dim, n_flows=args.n_flows, reg=args.reg, band=args.band).to(device) model.add_flow(flow) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(args.beta, 0.999), weight_decay=args.wd) train_iter = get_iterator(corpus.train, args.batch_size, True, device) valid_iter = get_iterator(corpus.valid, args.batch_size, False, device) test_iter = get_iterator(corpus.test, args.batch_size, False, device) start_epoch = 1 if args.load is True: start_epoch, model, optimizer = load_checkpoint( model, optimizer, device, args.save_name) print("\nStart training") try: for epoch in range(start_epoch, args.epochs + 1): (re_loss, kl_divergence, flow_kld, mi1, mi2, mmd_loss, nll_ppl, nll, iw_nll, sum_log_j, start, batch_time) = run(args, train_iter, model, pad_id, optimizer, epoch, train=True) if args.save: save_checkpoint(model, optimizer, epoch, args.save_name) print('-' * 90) meta = "| epoch {:2d} ".format(epoch) print( meta + "| train loss {:5.2f} ({:5.2f}) ({:5.2f}) | train ppl {:5.2f} ({:5.2f} {:5.2f}) | mmd {:5.2f} | mi E {:5.2f} | mi R {:5.2f} | log J {:5.2f}" "| Time {batch_time.val:5.2f} ({batch_time.avg:5.2f})\t". format(re_loss, kl_divergence, flow_kld, nll_ppl, nll, iw_nll, mmd_loss, mi1, mi2, sum_log_j, batch_time=batch_time)) (re_loss, kl_divergence, flow_kld, mi1, mi2, mmd_loss, nll_ppl, nll, iw_nll, sum_log_j, _, _) = run(args, valid_iter, model, pad_id, optimizer, epoch, train=False) print( len(meta) * ' ' + "| valid loss {:5.2f} ({:5.2f}) ({:5.2f}) | valid ppl {:5.2f} ({:5.2f} {:5.2f})" "| mmd {:5.2f} | mi E {:5.2f} | mi R {:5.2f} | log J {:5.2f} \t" .format(re_loss, kl_divergence, flow_kld, nll_ppl, nll, iw_nll, mmd_loss, mi1, mi2, sum_log_j, flush=True)) if dataset in ['yahoo'] and epoch in [15, 35]: for param_group in optimizer.param_groups: param_group['lr'] *= 0.5 except KeyboardInterrupt: print('-' * 50) print('Quit training') (re_loss, kl_divergence, flow_kld, mi1, mi2, mmd_loss, nll_ppl, nll, iw_nll, sum_log_j, _, _) = run(args, test_iter, model, pad_id, optimizer, epoch, train=False) print('=' * 90) print( "| Test results | test loss {:5.2f} ({:5.2f}) ({:5.2f}) | test ppl {:5.2f} ({:5.2f} {:5.2f}) | test mmd {:5.2f} | mi E {:5.2f} | mi R {:5.2f} | log J {:5.2f} " .format(re_loss, kl_divergence, flow_kld, nll_ppl, nll, iw_nll, mmd_loss, mi1, mi2, sum_log_j)) print('=' * 90) with open(args.test_log_name, 'a') as fd: print('=' * 90, file=fd) print( "{} | dist {} | ende {} | em {} | | kla {} | mmd {} | flow {} | center {} | n flow {} | ker {} | reg {} | band {} | t {} | mmd w {} | iw {} | gpu {} | log {} |" .format(dataset, args.dist, args.de_type, args.embed_dim, args.kla, args.mmd, args.flow, args.center, args.n_flows, args.kernel, args.reg, args.band, args.t, args.mmd_w, args.iw, args.device_id, args.test_log_name), file=fd) print('-' * 90, file=fd) print( "| Test results | test loss {:5.2f} ({:5.2f}) ({:5.2f}) | test ppl {:5.2f} ({:5.2f} {:5.2f}) | test mmd {:5.2f} | mi E {:5.2f} | mi R {:5.2f} | log J {:5.2f}" .format(re_loss, kl_divergence, flow_kld, nll_ppl, nll, iw_nll, mmd_loss, mi1, mi2, sum_log_j), file=fd) print('=' * 90, file=fd)
def main(args): print("Loading data") dataset = args.data.rstrip('/').split('/')[-1] if dataset in ['yahoo']: with_label = True else: with_label = False if dataset in ['yahoo']: corpus = CorpusYahoo(args.data, max_vocab_size=args.max_vocab, max_length=args.max_length, with_label=with_label) pad_id = corpus.word2idx['_PAD'] else: corpus = Corpus(args.data, max_vocab_size=args.max_vocab, max_length=args.max_length, with_label=with_label) pad_id = corpus.word2idx[PAD_TOKEN] vocab_size = len(corpus.word2idx) print("\ttraining data size: ", len(corpus.train)) print("\tvocabulary size: ", vocab_size) print("Constructing model") print(args) device = torch.device('cpu' if args.nocuda else 'cuda') torch.cuda.set_device(args.cuda) if args.diag: model = MultiNormalVAE(vocab_size, args.embed_size, args.hidden_size, args.code_size, args.dropout, batch_size=args.batch_size, decomp=args.method, copula=args.copula) else: model = LstmVAE(vocab_size, args.embed_size, args.hidden_size, args.code_size, args.dropout, batch_size=args.batch_size, decomp=args.method, copula=args.copula) if args.multi: print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) best_loss = None train_iter = get_iterator(corpus.train, args.batch_size, True, device) valid_iter = get_iterator(corpus.valid, args.batch_size, False, device) test_iter = get_iterator(corpus.test, args.batch_size, False, device) start_epoch = 0 if args.load is True: start_epoch, model, optimizer, losslogger = load_checkpoint( model, optimizer, device, args.save_name) tr_loggers = [] va_loggers = [] print("\nStart training") try: for epoch in range(start_epoch, args.epochs): epoch_start_time = time.time() (tr_seq_loss, tr_bow_loss, tr_kld, tr_mi, tr_tc, tr_dwkl, tr_seq_ppl, tr_bow_ppl, tr_log_copula, tr_mmd, batch_time) = train(train_iter, model, pad_id, optimizer, epoch) (va_seq_loss, va_bow_loss, va_kld, va_mi, va_tc, va_dwkl, va_seq_ppl, va_bow_ppl, va_log_copula, va_mmd) = evaluate(valid_iter, model, pad_id) tr_losslogger = { "epoch": epoch, "seq_loss": tr_seq_loss, "bow_loss": tr_bow_loss, "kld": tr_kld, "mutual info": tr_mi, "tc": tr_tc, "dwkl": tr_dwkl, "seq_ppl": tr_seq_ppl, "bow_ppl": tr_bow_ppl, "log_copula": tr_log_copula, "mmd": tr_mmd, "time": batch_time } tr_loggers.append(tr_losslogger) losslogger = { "epoch": epoch, "seq_loss": va_seq_loss, "bow_loss": va_bow_loss, "kld": va_kld, "mutual info": va_mi, "tc": va_tc, "dwkl": va_dwkl, "seq_ppl": va_seq_ppl, "bow_ppl": va_bow_ppl, "log_copula": va_log_copula, "mmd": va_mmd, "time": batch_time } va_loggers.append(losslogger) save_checkpoint(model, optimizer, losslogger, args.save_name) print('-' * 90) meta = "| epoch {:2d} | time {:5.2f}s ".format( epoch, time.time() - epoch_start_time) print( meta + "| train loss {:5.2f} {:5.2f} ({:5.2f}) " "| {:5.2f} {:5.2f} {:5.2f} " "| train ppl {:5.2f} {:5.2f} | log copula {:5.2f} | mmd {:5.2f}" "| Time {batch_time.val:5.2f} ({batch_time.avg:5.2f})\t". format(tr_seq_loss, tr_bow_loss, tr_kld, tr_mi, tr_tc, tr_dwkl, tr_seq_ppl, tr_bow_ppl, tr_log_copula, tr_mmd, batch_time=batch_time)) print( len(meta) * ' ' + "| valid loss {:5.2f} {:5.2f} ({:5.2f}) " "| {:5.2f} {:5.2f} {:5.2f} " "| valid ppl {:5.2f} {:5.2f} | valid log copula {:5.2f} | valid mmd {:5.2f}" "| joint NLL {:5.2f}".format( va_seq_loss, va_bow_loss, va_kld, va_mi, va_tc, va_dwkl, va_seq_ppl, va_bow_ppl, va_log_copula, va_mmd, va_seq_loss + va_kld - va_log_copula), flush=True) epoch_loss = va_seq_loss + va_kld if best_loss is None or epoch_loss < best_loss: best_loss = epoch_loss # with open(get_savepath(args), 'wb') as f: # torch.save(model, f) except KeyboardInterrupt: print('-' * 90) print('Exiting from training early') save_logger(tr_loggers, va_loggers, args.loss_name) # with open(get_savepath(args), 'rb') as f: # model = torch.load(f) (te_seq_loss, te_bow_loss, te_kld, te_mi, te_tc, te_dwkl, te_seq_ppl, te_bow_ppl, te_log_copula, te_mmd) = evaluate(test_iter, model, pad_id) print('=' * 90) print("| End of training | test loss {:5.2f} {:5.2f} ({:5.2f}) " "| {:5.2f} {:5.2f} {:5.2f} " "| test ppl {:5.2f} {:5.2f}" "| test log copula {:5.2f}" "| test mmd {:5.2f}" "| test nll {:5.2f}".format(te_seq_loss, te_bow_loss, te_kld, te_mi, te_tc, te_dwkl, te_seq_ppl, te_bow_ppl, te_log_copula, te_mmd, te_seq_loss + te_kld - te_log_copula)) print('=' * 90) te_losslogger = { "seq_loss": te_seq_loss, "bow_loss": te_bow_loss, "kld": te_kld, "seq_ppl": te_seq_ppl, "bow_ppl": te_bow_ppl, "log_copula": te_log_copula, "mmd": te_mmd, }