def main(args): # load samples samples = np.load('samples/sents_sample_n250.pkl') samples_prime = load_prime_samples('samples/sents_sample_prime_n250.pkl') samples_prime_dict = np.load('samples/sents_sample_prime_n250.pkl') f = open('results/perplexity.csv', 'w') f.write('data_type,sample_type,perplexity\n') for data_type in ['train', 'valid']: datasets = OrderedDict() datasets[data_type] = PTB(data_dir=args.data_dir, split=data_type, create_data=False, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) corpus = make_corpus(datasets[data_type]) model = unigram(corpus) # compute and save perplexities f.write('{},all_z_prime,{:.2f}\n'.format( data_type, perplexity(samples_prime, model))) for k in samples_prime_dict.keys(): f.write('{},{}_z_prime,{}\n'.format( data_type, k, perplexity(samples_prime_dict[k], model))) f.write('{},z,{:.2f}\n'.format(data_type, perplexity(samples, model))) f.close()
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO, filename=os.path.join(args.logdir, experiment_name(args, ts) + ".log")) logger = logging.getLogger(__name__) datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) model = SentenceVAE(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, experiment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) total_step = int(args.epochs * 42000.0 / args.batch_size) def kl_anneal_function(anneal_function, step): if anneal_function == 'half': return 0.5 if anneal_function == 'identity': return 1 if anneal_function == 'double': return 2 if anneal_function == 'quadra': return 4 if anneal_function == 'sigmoid': return 1 / (1 + np.exp((0.5 * total_step - step) / 200)) if anneal_function == 'monotonic': beta = step * 4 / total_step if beta > 1: beta = 1.0 return beta if anneal_function == 'cyclical': t = total_step / 4 beta = 4 * (step % t) / t if beta > 1: beta = 1.0 return beta ReconLoss = torch.nn.NLLLoss(reduction='sum', ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).item()].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood recon_loss = ReconLoss(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step) return recon_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 train_loss = [] test_loss = [] for epoch in range(args.epochs): for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) tracker = defaultdict(list) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) # loss calculation recon_loss, KL_loss, KL_weight = loss_fn( logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step) if split == 'train': loss = (recon_loss + KL_weight * KL_loss) / batch_size else: # report complete elbo when validation loss = (recon_loss + KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing # tracker['negELBO'] = torch.cat((tracker['negELBO'], loss.data)) tracker["negELBO"].append(loss.item()) tracker["recon_loss"].append(recon_loss.item() / batch_size) tracker["KL_Loss"].append(KL_loss.item() / batch_size) tracker["KL_Weight"].append(KL_weight) if args.tensorboard_logging: writer.add_scalar("%s/Negative_ELBO" % split.upper(), loss.item(), epoch * len(data_loader) + iteration) writer.add_scalar("%s/Recon_Loss" % split.upper(), recon_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Loss" % split.upper(), KL_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): logger.info( "\tStep\t%s\t%04d\t%i\t%9.4f\t%9.4f\t%9.4f\t%6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.item(), recon_loss.item() / batch_size, KL_loss.item() / batch_size, KL_weight)) print( "%s Batch %04d/%i, Loss %9.4f, Recon-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.item(), recon_loss.item() / batch_size, KL_loss.item() / batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'].append(z.data.tolist()) logger.info( "\tEpoch\t%s\t%02d\t%i\t%9.4f\t%9.4f\t%9.4f\t%6.3f" % (split.upper(), epoch, args.epochs, sum(tracker['negELBO']) / len(tracker['negELBO']), 1.0 * sum(tracker['recon_loss']) / len(tracker['recon_loss']), 1.0 * sum(tracker['KL_Loss']) / len(tracker['KL_Loss']), 1.0 * sum(tracker['KL_Weight']) / len(tracker['KL_Weight']))) print("%s Epoch %02d/%i, Mean Negative ELBO %9.4f" % (split.upper(), epoch, args.epochs, sum(tracker['negELBO']) / len(tracker['negELBO']))) if args.tensorboard_logging: writer.add_scalar( "%s-Epoch/NegELBO" % split.upper(), 1.0 * sum(tracker['negELBO']) / len(tracker['negELBO']), epoch) writer.add_scalar( "%s-Epoch/recon_loss" % split.upper(), 1.0 * sum(tracker['recon_loss']) / len(tracker['recon_loss']), epoch) writer.add_scalar( "%s-Epoch/KL_Loss" % split.upper(), 1.0 * sum(tracker['KL_Loss']) / len(tracker['KL_Loss']), epoch) writer.add_scalar( "%s-Epoch/KL_Weight" % split.upper(), 1.0 * sum(tracker['KL_Weight']) / len(tracker['KL_Weight']), epoch) if split == 'train': train_loss.append(1.0 * sum(tracker['negELBO']) / len(tracker['negELBO'])) else: test_loss.append(1.0 * sum(tracker['negELBO']) / len(tracker['negELBO'])) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'] } if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/' + ts) with open( os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s" % checkpoint_path) sns.set(style="whitegrid") df = pd.DataFrame() df["train"] = train_loss df["test"] = test_loss ax = sns.lineplot(data=df, legend=False) ax.set(xlabel='Epoch', ylabel='Loss') plt.legend(title='Split', loc='upper right', labels=['Train', 'Test']) plt.savefig(os.path.join(args.logdir, experiment_name(args, ts) + ".png"), transparent=True, dpi=300)
from ptb import PTB # parse the command line arguments parser = NgraphArgparser(__doc__) parser.set_defaults(gen_be=False) args = parser.parse_args() # these hyperparameters are from the paper args.batch_size = 50 time_steps = 10 hidden_size = 20 gradient_clip_value = 15 # download penn treebank tree_bank_data = PTB(path=args.data_dir) ptb_data = tree_bank_data.load_data() train_set = SequentialArrayIterator(ptb_data['train'], batch_size=args.batch_size, time_steps=time_steps, total_iterations=args.num_iterations) valid_set = SequentialArrayIterator(ptb_data['valid'], batch_size=args.batch_size, time_steps=time_steps) # weight initialization init = UniformInit(low=-0.08, high=0.08) # model initialization seq1 = Sequential([
def main(args): with open(args.data_dir+'/ptb.vocab.json', 'r') as file: vocab = json.load(file) w2i, i2w = vocab['w2i'], vocab['i2w'] model = SentenceVAE( vocab_size=len(w2i), sos_idx=w2i['<sos>'], eos_idx=w2i['<eos>'], pad_idx=w2i['<pad>'], unk_idx=w2i['<unk>'], max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional ) if not os.path.exists(args.load_checkpoint): raise FileNotFoundError(args.load_checkpoint) model.load_state_dict(torch.load(args.load_checkpoint)) print("Model loaded from %s"%(args.load_checkpoint)) if torch.cuda.is_available(): model = model.cuda() model.eval() # samples, z = model.inference(n=args.num_samples) # print('----------SAMPLES----------') # print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') # z1 = torch.randn([args.latent_size]).numpy() # z2 = torch.randn([args.latent_size]).numpy() # z = to_var(torch.from_numpy(interpolate(start=z1, end=z2, steps=8)).float()) # samples, _ = model.inference(z=z) # print('-------INTERPOLATION-------') # print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') print('-------Encode ... Decode-------') datasets = PTB( data_dir=args.data_dir, split="valid", create_data=False, max_sequence_length=args.max_sequence_length, min_occ=1 ) data_loader = DataLoader(dataset=datasets, batch_size=2, shuffle='valid',num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) print("*"*10) print(*idx2word(batch['input'], i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') logp, mean, logv, z = model(batch['input'], batch['length']) print("+"*10) samples, z = model.inference(z=z) print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') if iteration == 0: break
from utils import linear_anneal, log_Normal_diag, log_Normal_standard # In[20]: # device configuration device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') max_len = 64 batch_size = 32 splits = ['train', 'valid', 'test'] # Penn TreeBank (PTB) dataset data_path = '../data' datasets = {split: PTB(root=data_path, split=split) for split in splits} # dataloader dataloaders = {split: DataLoader(datasets[split], batch_size=batch_size, shuffle=split=='train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) for split in splits} symbols = datasets['train'].symbols id_to_word = datasets['train'].idx_to_word # In[3]:
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) model = SentenceRNN(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, experiment_name_rnn(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).item()].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) return NLL_loss optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 early_stopping = EarlyStopping(history=10) for epoch in range(args.epochs): early_stopping_flag = False for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) # tracker = defaultdict(tensor) tracker = defaultdict(list) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp = model(batch['input'], batch['length']) # loss calculation NLL_loss = loss_fn(logp, batch['target'], batch['length']) loss = (NLL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing # tracker['Loss'] = torch.cat((tracker['Loss'], loss.data)) tracker['Loss'].append(loss.item()) if args.tensorboard_logging: writer.add_scalar("%s/NLL_Loss" % split.upper(), NLL_loss.item() / batch_size, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): logger.info("%s Batch %04d/%i, Loss %9.4f" % (split.upper(), iteration, len(data_loader) - 1, loss.item())) mean_loss = sum(tracker['Loss']) / len(tracker['Loss']) logger.info("%s Epoch %02d/%i, Mean Loss %9.4f" % (split.upper(), epoch, args.epochs, mean_loss)) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/Loss" % split.upper(), mean_loss, epoch) if split == 'valid': if (args.early_stopping): if (early_stopping.check(mean_loss)): early_stopping_flag = True # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) logger.info("Model saved at %s" % checkpoint_path) if (early_stopping_flag): print("Early stopping trigerred. Training stopped...") break
def main(args): #create dir name ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) ts = ts.replace(':', '-') #prepare dataset splits = ['train', 'valid'] + (['test'] if args.test else []) #create dataset object datasets = OrderedDict() # create test and train split in data, also preprocess for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) #get training params params = dict(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) #init model object model = SentenceVAE(**params) if torch.cuda.is_available(): model = model.cuda() #logging print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, expierment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) # make dir save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) #write params to json and save with open(os.path.join(save_model_path, 'model_params.json'), 'w') as f: json.dump(params, f, indent=4) #defining function that returns disentangling weight used for KL loss at each input step def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1 / (1 + np.exp(-k * (step - x0)))) elif anneal_function == 'linear': return min(1, step / x0) #defining NLL loss to measure accuracy of the decoding NLL = torch.nn.NLLLoss(ignore_index=datasets['train'].pad_idx, reduction='sum') #this functiom is used to compute the 2 loss terms and KL loss weight def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).item()].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) return NLL_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 for epoch in range(args.epochs): #do train and then test for split in splits: #create dataloader data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) #tracker used to track the loss tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() #start batch wise training/testing for iteration, batch in enumerate(data_loader): #get batch size batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) # loss calculation NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, args.k, args.x0) # final loss calculation loss = (NLL_loss + KL_weight * KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() #flush grads loss.backward() #run bp optimizer.step() #run gd step += 1 # bookkeepeing tracker['ELBO'] = torch.cat( (tracker['ELBO'], loss.data.view(1, -1)), dim=0) #logging of losses if args.tensorboard_logging: writer.add_scalar("%s/ELBO" % split.upper(), loss.item(), epoch * len(data_loader) + iteration) writer.add_scalar("%s/NLL Loss" % split.upper(), NLL_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) # if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): print( "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.item(), NLL_loss.item() / batch_size, KL_loss.item() / batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, tracker['ELBO'].mean())) #more logging if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO" % split.upper(), torch.mean(tracker['ELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist() } if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/' + ts) with open( os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % epoch) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s" % checkpoint_path)
if torch.cuda.is_available(): model = model.cuda() model.eval() print('----------SAMPLES----------') for i in range(5): sample, z = model.inference() sample = sample.cpu().numpy() print(sample) print(idx2word(sample, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') datasets = OrderedDict() datasets['test'] = PTB(data_dir=args.data_dir, split='test', create_data=args.create_data, max_sequence_length=60, min_occ=args.min_occ) print('-------RECONSTRUCTION-------') sample = datasets['test'].data['300']['input'] print('sample 1: ' + idx2word(sample[1:], i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') input = torch.Tensor(sample).long() if torch.cuda.is_available(): input = input.cuda() input = input.unsqueeze(0) _, _, _, z = model(input) recon, z = model.inference(z=z) recon = recon.cpu().numpy()
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) model = SentenceVAE(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, experiment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) total_steps = (len(datasets["train"]) // args.batch_size) * args.epochs print("Train dataset size", total_steps) def kl_anneal_function(anneal_function, step): if anneal_function == 'identity': return 1 if anneal_function == 'linear': if args.warmup is None: return 1 - (total_steps - step) / total_steps else: warmup_steps = (total_steps / args.epochs) * args.warmup return 1 - (warmup_steps - step ) / warmup_steps if step < warmup_steps else 1.0 ReconLoss = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).data[0]].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood recon_loss = ReconLoss(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step) return recon_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 for epoch in range(args.epochs): for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) # loss calculation recon_loss, KL_loss, KL_weight = loss_fn( logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step) if split == 'train': loss = (recon_loss + KL_weight * KL_loss) / batch_size else: # report complete elbo when validation loss = (recon_loss + KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['negELBO'] = torch.cat( (tracker['negELBO'], loss.data.unsqueeze(0))) if args.tensorboard_logging: neg_elbo = (recon_loss + KL_loss) / batch_size writer.add_scalar("%s/Negative_ELBO" % split.upper(), neg_elbo.data[0], epoch * len(data_loader) + iteration) writer.add_scalar("%s/Recon_Loss" % split.upper(), recon_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Loss" % split.upper(), KL_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): logger.info( "%s Batch %04d/%i, Loss %9.4f, Recon-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.data[0], recon_loss.data[0] / batch_size, KL_loss.data[0] / batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) logger.info("%s Epoch %02d/%i, Mean Negative ELBO %9.4f" % (split.upper(), epoch, args.epochs, torch.mean(tracker['negELBO']))) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/NegELBO" % split.upper(), torch.mean(tracker['negELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist() } if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/' + ts) with open( os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) logger.info("Model saved at %s" % checkpoint_path) if args.num_samples: torch.cuda.empty_cache() model.eval() with torch.no_grad(): print(f"Generating {args.num_samples} samples") generations, _ = model.inference(n=args.num_samples) vocab = datasets["train"].i2w print( "Sampled latent codes from z ~ N(0, I), generated sentences:") for i, generation in enumerate(generations, start=1): sentence = [vocab[str(word.item())] for word in generation] print(f"{i}:", " ".join(sentence))
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.localtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ, use_bert=args. False) model = SentenceVAE(alphabet_size=datasets['train'].alphabet_size, vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, expierment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) print("Saving model to directory: " + save_model_path) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1 / (1 + np.exp(-k * (step - x0)))) elif anneal_function == 'linear': return min(1, step / x0) def word_weight_function(step, k, x0): return float(1 / (1 + np.exp(-k * (step - x0)))) NLL = torch.nn.NLLLoss(reduction='sum', ignore_index=datasets['train'].pad_idx) def loss_fn(def_logp, word_logp, def_target, def_length, word_target, word_length, mean, logv): # cut-off unnecessary padding from target definition, and flatten def_target = def_target[:, :torch.max(def_length).item()].contiguous( ).view(-1) def_logp = def_logp.view(-1, def_logp.size(2)) # Negative Log Likelihood def_NLL_loss = NLL(def_logp, def_target) # cut off padding for words word_target = word_target[:, :torch.max(word_length).item( )].contiguous().view(-1) word_logp = word_logp.view(-1, word_logp.size(2)) # Word NLL word_NLL_loss = NLL(word_logp, word_target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) return def_NLL_loss, word_NLL_loss, KL_loss def get_weights(anneal_function, step, k, x0): # for logistic function, k = growth rate KL_weight = kl_anneal_function(anneal_function, step, k, x0) word_weight = word_weight_function(step, k, x0) return {'def': 1, 'word': word_weight, 'kl': KL_weight} optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 for epoch in range(args.epochs): for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model = model.train() else: model = model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass [def_logp, word_logp], mean, logv, z = model(batch['input'], batch['length'], batch['word_length']) # loss calculation def_NLL_loss, word_NLL_loss, KL_loss = loss_fn( def_logp, word_logp, batch['target'], batch['length'], batch['word'], batch['word_length'], mean, logv) weights = get_weights(args.anneal_function, step, args.k, args.x0) loss = (weights['def'] * def_NLL_loss + weights['word'] * word_NLL_loss + weights['kl'] * KL_loss) / batch_size mean_logv = torch.mean(logv) # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['ELBO'] = torch.cat( (tracker['ELBO'], loss.detach().unsqueeze(0))) if args.tensorboard_logging: writer.add_scalar("%s/ELBO" % split.upper(), loss.item(), epoch * len(data_loader) + iteration) writer.add_scalar("%s/Def NLL Loss" % split.upper(), def_NLL_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/Word NLL Loss" % split.upper(), word_NLL_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Weight" % split.upper(), weights['kl'], epoch * len(data_loader) + iteration) writer.add_scalar("%s/Word Weight" % split.upper(), weights['word'], epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): print( "%s Batch %04d/%i, Loss %9.4f, Def NLL-Loss %9.4f, Word NLL-Loss %9.4f Word-Weight %6.3f, KL-Loss %9.4f, KL-Weight %6.3f KL-VAL %9.4f" % (split.upper(), iteration, len(data_loader) - 1, loss.item(), def_NLL_loss.item() / batch_size, word_NLL_loss.item() / batch_size, weights['word'], KL_loss.item() / batch_size, weights['kl'], mean_logv)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'], i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO']))) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO" % split.upper(), torch.mean(tracker['ELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist() } if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/' + ts) with open( os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s" % checkpoint_path)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: datasets[split] = PTB( data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ ) encoderVAE = EncoderVAE( vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional ) decoderVAE = DecoderVAE( vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional ) if torch.cuda.is_available(): encoderVAE = encoderVAE.cuda() decoderVAE = decoderVAE.cuda() if args.tensorboard_logging: writer = SummaryWriter(os.path.join(args.logdir, experiment_name(args,ts))) #writer.add_text("model", str(mode)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, totalIterations, split): if(split != 'train'): return 1 elif anneal_function == 'identity': return 1 elif anneal_function == 'linear': return 1.005*float(step)/totalIterations elif anneal_function == 'sigmoid': return (1/(1 + math.exp(-8*(float(step)/totalIterations)))) elif anneal_function == 'tanh': return math.tanh(4*(float(step)/totalIterations)) elif anneal_function == 'linear_capped': #print(float(step)*30/totalIterations) return min(1.0, float(step)*5/totalIterations) elif anneal_function == 'cyclic': quantile = int(totalIterations/5) remainder = int(step % quantile) midPoint = int(quantile/2) if(remainder > midPoint): return 1 else: return float(remainder)/midPoint else: return 1 ReconLoss = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, totalIterations, split): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).data[0]].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood recon_loss = ReconLoss(logp, target) # KL Divergence #print((1 + logv - mean.pow(2) - logv.exp()).size()) KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) #print(KL_loss.size()) KL_weight = kl_anneal_function(anneal_function, step, totalIterations, split) return recon_loss, KL_loss, KL_weight encoderOptimizer = torch.optim.Adam(encoderVAE.parameters(), lr=args.learning_rate) decoderOptimizer = torch.optim.Adam(decoderVAE.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor step = 0 for epoch in range(args.epochs): for split in splits: data_loader = DataLoader( dataset=datasets[split], batch_size=args.batch_size, shuffle=split=='train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available() ) totalIterations = (int(len(datasets[split])/args.batch_size) + 1)*args.epochs tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': encoderVAE.train() decoderVAE.train() else: encoderVAE.eval() decoderVAE.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass hidden, mean, logv, z = encoderVAE(batch['input'], batch['length']) # loss calculation logp = decoderVAE(batch['input'], batch['length'], hidden) recon_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, totalIterations, split) if split == 'train': loss = (recon_loss + KL_weight * KL_loss)/batch_size negELBO = loss else: # report complete elbo when validation loss = (recon_loss + KL_loss)/batch_size negELBO = loss # backward + optimization if split == 'train': encoderOptimizer.zero_grad() decoderOptimizer.zero_grad() loss.backward() if(step < 500): encoderOptimizer.step() else: encoderOptimizer.step() decoderOptimizer.step() #optimizer.step() step += 1 # bookkeepeing tracker['negELBO'] = torch.cat((tracker['negELBO'], negELBO.data)) if args.tensorboard_logging: writer.add_scalar("%s/Negative_ELBO"%split.upper(), negELBO.data[0], epoch*len(data_loader) + iteration) writer.add_scalar("%s/Recon_Loss"%split.upper(), recon_loss.data[0]/batch_size, epoch*len(data_loader) + iteration) writer.add_scalar("%s/KL_Loss"%split.upper(), KL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration) writer.add_scalar("%s/KL_Weight"%split.upper(), KL_weight, epoch*len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration+1 == len(data_loader): logger.info("%s Batch %04d/%i, Loss %9.4f, Recon-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" %(split.upper(), iteration, len(data_loader)-1, negELBO.data[0], recon_loss.data[0]/batch_size, KL_loss.data[0]/batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) logger.info("%s Epoch %02d/%i, Mean Negative ELBO %9.4f"%(split.upper(), epoch, args.epochs, torch.mean(tracker['negELBO']))) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/NegELBO"%split.upper(), torch.mean(tracker['negELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = {'target_sents':tracker['target_sents'], 'z':tracker['z'].tolist()} if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/'+ts) with open(os.path.join('dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file: json.dump(dump,dump_file)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) ptb = PTB(vocab_file=args.vocab_file, train_file=args.train_file, train_with_vocab=False, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) datasets = PTBDataset(ptb) print('done preprocessing data') model = SentenceVAE(vocab_size=datasets.vocab_size, sos_idx=datasets.sos_idx, eos_idx=datasets.eos_idx, pad_idx=datasets.pad_idx, unk_idx=datasets.unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) model.ptb = ptb if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, expierment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1 / (1 + np.exp(-k * (step - x0)))) elif anneal_function == 'linear': return min(1, step / x0) NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets.pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0): # cut-off unnecessary padding from target, and flatten #target = target[:, :torch.max(length).data[0]].contiguous().view(-1) target = target[:, :torch.max(length).data].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) return NLL_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 model.train() split = 'train' for epoch in range(args.epochs): data_loader = DataLoader(dataset=datasets, batch_size=args.batch_size, shuffle=True, num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) tracker = defaultdict(list) for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass #logp, mean, logv, z = model(batch['input'], batch['length']) logp, mean, logv, z, encoder_last = model(batch['input'], batch['length']) # loss calculation NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, args.k, args.x0) loss = (NLL_loss + KL_weight * KL_loss) / batch_size # backward + optimization optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['ELBO'].append(loss.data.cpu().numpy().tolist()) if args.tensorboard_logging: writer.add_scalar("%s/ELBO" % split.upper(), loss.data, epoch * len(data_loader) + iteration) writer.add_scalar("%s/NLL Loss" % split.upper(), NLL_loss.data / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.data / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): print( "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.data, NLL_loss.data / batch_size, KL_loss.data / batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets.get_i2w(), pad_idx=datasets.pad_idx) tracker['z'].append(z.data) print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, np.mean(tracker['ELBO']))) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO" % split.upper(), np.mean(tracker['ELBO']), epoch) ''' # save a dump of all sentences and the encoded latent space if split == 'valid': dump = {'target_sents':tracker['target_sents'], 'z':tracker['z']} if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/'+ts) with open(os.path.join('dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file: json.dump(dump,dump_file) ''' # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) joblib.dump(model.cpu(), checkpoint_path) print("Model saved at %s" % checkpoint_path) if torch.cuda.is_available(): model.cuda()
action='store_true', default=False, help='enables CUDA training') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() # torch.manual_seed(args.seed) device = torch.device("cuda" if args.cuda else "cpu") splits = ['train', 'valid'] datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=60) # vocab_size = datasets['train'].vocab_size sos_idx = datasets['train'].sos_idx eos_idx = datasets['train'].eos_idx pad_idx = datasets['train'].pad_idx embedding = KeyedVectors.load('model/pretrained_embedding') if args.cuda: weights = torch.FloatTensor(embedding.syn0).cuda() else: weights = torch.FloatTensor(embedding.syn0) model = SentenceVAE(weights.size(0), sos_idx, eos_idx, pad_idx, training=True).to(device)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) with open(args.data_dir + '/ptb.vocab.json', 'r') as file: vocab = json.load(file) w2i, i2w = vocab['w2i'], vocab['i2w'] with open(os.path.join(args.save_model_path, 'model_params.json'), 'r') as f: params = json.load(f) model = SentenceVAE(**params) model.load_state_dict(torch.load(args.load_checkpoint)) print("Model loaded from %s" % args.load_checkpoint) if torch.cuda.is_available(): model = model.cuda() print(model) with torch.no_grad(): input_sent = "the n stock specialist firms on the big board floor the buyers and sellers of last resort who were criticized after the n crash once again could n't handle the selling pressure" batch_input = torch.LongTensor([[w2i[i] for i in input_sent.split()]]).cuda() batch_len = torch.LongTensor([len(input_sent.split())]).cuda() input_mean = model(batch_input, batch_len, output_mean=True) data_loader = DataLoader(dataset=datasets["train"], batch_size=args.batch_size, shuffle=False, num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) print('---------CALCULATING NEAREST SENTENCES--------') sim = [] all_sentences = [] for iteration, batch in enumerate(data_loader): for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) all_sentences.append(batch['input']) # Forward pass mean = model(batch['input'], batch['length'], output_mean=True) batch_sim = torch.abs(mean - input_mean) sim.append(batch_sim) sim = torch.cat(sim, dim=0) _, most_similar_per_dim = torch.topk(-sim, k=20, dim=0) most_similar_per_dim = most_similar_per_dim.transpose(0, 1) all_sentences = torch.cat(all_sentences, dim=0) for dim, i in enumerate(most_similar_per_dim): sentences = torch.index_select(all_sentences, dim=0, index=i) print(f"{dim=}") print(*idx2word(sentences, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n')
def main(args): # Load the vocab with open(args.data_dir+'/ptb.vocab.json', 'r') as file: vocab = json.load(file) w2i, i2w = vocab['w2i'], vocab['i2w'] ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) # Initialize semantic loss sl = Semantic_Loss() datasets = OrderedDict() for split in splits: datasets[split] = PTB( data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ ) params = dict( vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional ) model = SentenceVAE(**params) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter(os.path.join(args.logdir, expierment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) with open(os.path.join(save_model_path, 'model_params.json'), 'w') as f: json.dump(params, f, indent=4) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1/(1+np.exp(-k*(step-x0)))) elif anneal_function == 'linear': return min(1, step/x0) def perplexity_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1/ 1+np.exp(-k*(step-x0))) elif anneal_function == 'linear': return min(1, (step/x0)) NLL = torch.nn.NLLLoss(ignore_index=datasets['train'].pad_idx, reduction='sum') def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0, \ batch_perplexity, perplexity_anneal_function): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).item()].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) # Perplexity perp_loss = batch_perplexity perp_weight = perplexity_anneal_function(anneal_function, step, k, x0) return NLL_loss, KL_loss, KL_weight, perp_loss, perp_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor step = 0 for epoch in range(args.epochs): # Keep track of epoch loss epoch_loss = [] for split in splits: data_loader = DataLoader( dataset=datasets[split], batch_size=args.batch_size, shuffle=split=='train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available() ) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() batch_t_start = None for iteration, batch in enumerate(data_loader): if batch_t_start: batch_run_time = time.time() - batch_t_start # print("Batch run time: " + str(batch_run_time)) batch_t_start = time.time() batch_size = batch['input_sequence'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Get the original sentences in this batch batch_sentences = idx2word(batch['input_sequence'], i2w=i2w, pad_idx=w2i['<pad>']) # Remove the first tag batch_sentences = [x.replace("<sos>", "") for x in batch_sentences] # Forward pass (logp, mean, logv, z), states = model(**batch) # Choose some random pairs of samples within the batch # to get latent representations for batch_index_pairs = list(itertools.combinations(np.arange(batch_size), 2)) random.shuffle(batch_index_pairs) batch_index_pairs = batch_index_pairs[:args.perplexity_samples_per_batch] batch_perplexity = [] # If we start the perplexity start_perplexity = epoch > 10 # If we should have perplexity loss if start_perplexity and args.perplexity_loss: # For each pair, get the intermediate representations in the latent space for index_pair in batch_index_pairs: with torch.no_grad(): z1_hidden = states['z'][index_pair[0]].cpu() z2_hidden = states['z'][index_pair[1]].cpu() z_hidden = to_var(torch.from_numpy(interpolate(start=z1_hidden, end=z2_hidden, steps=1)).float()) if args.rnn_type == "lstm": with torch.no_grad(): z1_cell_state = states['z_cell_state'].cpu().squeeze()[index_pair[0]] z2_cell_state = states['z_cell_state'].cpu().squeeze()[index_pair[1]] z_cell_states = \ to_var(torch.from_numpy(interpolate(start=z1_cell_state, end=z2_cell_state, steps=1)).float()) samples, _ = model.inference(z=z_hidden, z_cell_state=z_cell_states) else: samples, _ = model.inference(z=z_hidden, z_cell_state=None) # Check interpolated sentences interpolated_sentences = idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']) # For each sentence, get the perplexity and show it perplexities = [] for sentence in interpolated_sentences: perplexities.append(sl.get_perplexity(sentence)) avg_sample_perplexity = sum(perplexities) / len(perplexities) batch_perplexity.append(avg_sample_perplexity) # Calculate batch perplexity avg_batch_perplexity = sum(batch_perplexity) / len(batch_perplexity) # loss calculation NLL_loss, KL_loss, KL_weight, perp_loss, perp_weight = loss_fn(logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, \ args.k, args.x0, avg_batch_perplexity, perplexity_anneal_function) loss = ((NLL_loss + KL_weight * KL_loss) / batch_size) + (perp_loss * perp_weight) else: # Epochs < X, so train without perplexity # loss calculation NLL_loss, KL_loss, KL_weight, perp_loss, perp_weight = loss_fn(logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, \ args.k, args.x0, 0, perplexity_anneal_function) loss = (NLL_loss + KL_weight * KL_loss) / batch_size # Turn model back into train, since inference changed to eval if split == 'train': model.train() else: model.eval() # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # Add loss epoch_loss.append(loss.item()) # bookkeepeing tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data.view(1, -1)), dim=0) if args.tensorboard_logging: writer.add_scalar("%s/ELBO" % split.upper(), loss.item(), epoch*len(data_loader) + iteration) writer.add_scalar("%s/NLL Loss" % split.upper(), NLL_loss.item() / batch_size, epoch*len(data_loader) + iteration) writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.item() / batch_size, epoch*len(data_loader) + iteration) writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight, epoch*len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration+1 == len(data_loader): print("%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f, Perp-loss %9.4f, Perp-weight %6.3f" % (split.upper(), iteration, len(data_loader)-1, loss.item(), NLL_loss.item()/batch_size, KL_loss.item()/batch_size, KL_weight, perp_loss, perp_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, tracker['ELBO'].mean())) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO" % split.upper(), torch.mean(tracker['ELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = {'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist()} if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/'+ts) with open(os.path.join('dumps/'+ts+'/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump,dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % epoch) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s" % checkpoint_path)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) model = SentenceVAE(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, experiment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, x1, x2): if anneal_function == 'identity': return 1 elif anneal_function == 'linear': return min(1, step / x1) elif anneal_function == 'logistic': return float(1 / (1 + np.exp(-x2 * (step - x1)))) elif anneal_function == 'cyclic_log': return float(1 / (1 + np.exp(-x2 * ((step % (3 * x1)) - x1)))) elif anneal_function == 'cyclic_lin': return min(1, (step % (3 * x1)) / x1) ReconLoss = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, x1, x2): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).item()].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood recon_loss = ReconLoss(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, x1, x2) return recon_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 early_stopping = EarlyStopping(history=10) for epoch in range(args.epochs): early_stopping_flag = False for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) # tracker = defaultdict(tensor) tracker = defaultdict(list) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) # loss calculation recon_loss, KL_loss, KL_weight = loss_fn( logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, args.x1, args.x2) if split == 'train': loss = (recon_loss + KL_weight * KL_loss) / batch_size else: # report complete elbo when validation loss = (recon_loss + KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['negELBO'].append(loss.item()) if args.tensorboard_logging: writer.add_scalar("%s/Negative_ELBO" % split.upper(), loss.item(), epoch * len(data_loader) + iteration) writer.add_scalar("%s/Recon_Loss" % split.upper(), recon_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Loss" % split.upper(), KL_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): # print(step) # logger.info("Step = %d"%step) logger.info( "%s Batch %04d/%i, Loss %9.4f, Recon-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.item(), recon_loss.item() / batch_size, KL_loss.item() / batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) # tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) # print(z.data.shape) tracker['z'].append(z.data.tolist()) mean_loss = sum(tracker['negELBO']) / len(tracker['negELBO']) logger.info("%s Epoch %02d/%i, Mean Negative ELBO %9.4f" % (split.upper(), epoch, args.epochs, mean_loss)) # print(mean_loss) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/NegELBO" % split.upper(), mean_loss, epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'] } if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/' + ts) with open( os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) if (args.early_stopping): if (early_stopping.check(mean_loss)): early_stopping_flag = True # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) logger.info("Model saved at %s" % checkpoint_path) if (early_stopping_flag): print("Early stopping trigerred. Training stopped...") break
def load_e2e(create_data, max_sequence_length, min_occ): splits = ['train', 'valid', 'test'] #Reading in text files of E2E database w_datasets = OrderedDict() for split in splits: w_datasets[split] = PTB(data_dir='e2e-dataset', split=split, create_data=create_data, max_sequence_length=max_sequence_length, min_occ=min_occ) #Reading in attributes of E2E data predicate_dict = defaultdict(set) #predicate_dict_dev = defaultdict(set) df = pd.read_csv('./e2e-dataset/trainset.csv', delimiter=',') tuples = [tuple(x) for x in df.values] for t in tuples: for r in t[0].split(','): r_ind1 = r.index('[') r_ind2 = r.index(']') rel = r[0:r_ind1].strip() rel_val = r[r_ind1 + 1:r_ind2] predicate_dict[rel].add(rel_val) #Order both keys and items in dictionary for consistensy od = OrderedDict(sorted(predicate_dict.items())) for key in od.keys(): od[key] = sorted(od[key]) predicate_dict = od #print('preddict',predicate_dict_dev) rel_lens = [len(predicate_dict[p]) for p in predicate_dict.keys()] rel_list = list(predicate_dict.keys()) rel_val_list = list(predicate_dict.values()) X = np.zeros((len(tuples), sum(rel_lens)), dtype=np.int) #X_test = np.zeros((len(dev_tuples), sum(rel_lens)), dtype=np.bool) #int_to_rel = defaultdict() for i, tup in enumerate(tuples): for relation in tup[0].split(','): rel_name = relation[0:relation.index('[')].strip() rel_value = relation[relation.index('[') + 1:-1].strip() name_ind = rel_list.index(rel_name) value_ind = list(predicate_dict[rel_name]).index(rel_value) j = sum(rel_lens[0:name_ind]) + value_ind #print(relation,j) #int_to_rel[j] = relation X[i, j] = 1. #Create holdoutset: hold_tuples = [ tuple(x) for x in pd.read_csv('./e2e-dataset/testset.csv', delimiter=',').values ] X_hold = np.zeros((len(hold_tuples), sum(rel_lens)), dtype=np.int) for i, tup in enumerate(hold_tuples): for relation in tup[0].split(','): rel_name = relation[0:relation.index('[')].strip() rel_value = relation[relation.index('[') + 1:-1].strip() name_ind = rel_list.index(rel_name) value_ind = list(predicate_dict[rel_name]).index(rel_value) j = sum(rel_lens[0:name_ind]) + value_ind #print(relation,j) #int_to_rel[j] = relation X_hold[i, j] = 1. y_datasets = OrderedDict() #Same size as test set split_num = len(w_datasets['valid']) #is 4672 y_datasets['train'] = X[0:-split_num] y_datasets['valid'] = X[-split_num:] y_datasets['test'] = X_hold assert (len(y_datasets['train']) == len(y_datasets['train'])) assert (len(w_datasets['valid']) == len(w_datasets['valid'])) assert (len(w_datasets['test']) == len(w_datasets['test'])) for split in splits: w_datasets[split] = PTBlabel(data_dir='e2e-dataset', split=split, labels=y_datasets[split], create_data=create_data, max_sequence_length=max_sequence_length, min_occ=min_occ) return w_datasets, y_datasets
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO, filename=os.path.join(args.logdir, experiment_name_rnn(args, ts) + ".log")) logger = logging.getLogger(__name__) datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) model = SentenceRNN(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, experiment_name_rnn(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) NLL = torch.nn.NLLLoss(reduction='sum', ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).item()].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) return NLL_loss optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 train_loss = [] test_loss = [] for epoch in range(args.epochs): for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) tracker = defaultdict(list) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp = model(batch['input'], batch['length']) # loss calculation NLL_loss = loss_fn(logp, batch['target'], batch['length']) loss = (NLL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker["Loss"].append(loss.item()) if args.tensorboard_logging: writer.add_scalar("%s/NLL_Loss" % split.upper(), NLL_loss.item() / batch_size, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): logger.info("\tStep\t%s\t%04d\t%i\t%9.4f" % (split.upper(), iteration, len(data_loader) - 1, loss.item())) print("%s Batch %04d/%i, Loss %9.4f" % (split.upper(), iteration, len(data_loader) - 1, loss.item())) logger.info("\tEpoch\t%s\t%02d\t%i\t%9.4f" % (split.upper(), epoch, args.epochs, 1.0 * sum(tracker['Loss']) / len(tracker['Loss']))) print("%s Epoch %02d/%i, Mean Loss %9.4f" % (split.upper(), epoch, args.epochs, 1.0 * sum(tracker['Loss']) / len(tracker['Loss']))) if args.tensorboard_logging: writer.add_scalar( "%s-Epoch/Loss" % split.upper(), 1.0 * sum(tracker['Loss']) / len(tracker['Loss']), epoch) if split == 'train': train_loss.append(1.0 * sum(tracker['Loss']) / len(tracker['Loss'])) else: test_loss.append(1.0 * sum(tracker['Loss']) / len(tracker['Loss'])) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s" % checkpoint_path) sns.set(style="whitegrid") df = pd.DataFrame() df["train"] = train_loss df["test"] = test_loss ax = sns.lineplot(data=df, legend=False) ax.set(xlabel='Epoch', ylabel='Loss') plt.legend(title='Split', loc='upper right', labels=['Train', 'Test']) plt.savefig(os.path.join(args.logdir, experiment_name_rnn(args, ts) + ".png"), transparent=True, dpi=300)
def main(args): with open(args.data_dir + '/ptb/ptb.vocab.json', 'r') as file: vocab = json.load(file) w2i, i2w = vocab['w2i'], vocab['i2w'] # load params params = load_model_params_from_checkpoint(args.load_params) # create model model = SentenceVAE(**params) print(model) model.load_state_dict(torch.load(args.load_checkpoint)) print("Model loaded from %s" % args.load_checkpoint) # splits = ['train', 'test'] splits = ['test'] if torch.cuda.is_available(): model = model.cuda() model.eval() datasets = OrderedDict() tsne_values = np.empty((0, 256), int) tsne_labels = np.empty((0, 2), int) for split in splits: print("creating dataset for: {}".format(split)) datasets[split] = PTB(split=split, create_data=args.create_data, min_occ=args.min_occ) total_bleu = 0.0 total_iterations = 0 for split in splits: # create dataloader data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) for iteration, batch in enumerate(data_loader): # get batch size batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) logp = model.bleu(batch['input'], batch['length']) gen_sents = idx2word(logp, i2w=i2w, pad_idx=w2i['<pad>']) batch_sents = idx2word(batch['input'], i2w=i2w, pad_idx=w2i['<pad>']) generated = [line.strip().split() for line in gen_sents] actual = [line.strip().split() for line in batch_sents] all_actual = [actual for i in range(len(generated))] bleus = nltk.translate.bleu_score.corpus_bleu( all_actual, generated) total_bleu = total_bleu + bleus total_iterations = iteration + 1 # if iteration==: # break bleu_score = total_bleu / total_iterations print(bleu_score)
def load_e2e(args): test = False splits = ['train'] #, 'test'] #Reading in text files of E2E database w_datasets = OrderedDict() for split in splits: w_datasets[split] = PTB(data_dir='e2e-dataset', split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) #Reading in attributes of E2E data predicate_dict = defaultdict(set) #predicate_dict_dev = defaultdict(set) df = pd.read_csv('./e2e-dataset/trainset.csv', delimiter=',') tuples = [tuple(x) for x in df.values] for t in tuples: for r in t[0].split(','): r_ind1 = r.index('[') r_ind2 = r.index(']') rel = r[0:r_ind1].strip() rel_val = r[r_ind1 + 1:r_ind2] predicate_dict[rel].add(rel_val) """ df_dev = pd.read_csv('./e2e-dataset/devset.csv' , delimiter=',') dev_tuples = [tuple(x) for x in df_dev.values] for t in dev_tuples: for r in t[0].split(','): r_ind1 = r.index('[') r_ind2 = r.index(']') rel = r[0:r_ind1].strip() rel_val = r[r_ind1+1:r_ind2] predicate_dict[rel].add(rel_val) #print('preddict',predicate_dict_dev) print('preddict',predicate_dict) """ rel_lens = [len(predicate_dict[p]) for p in predicate_dict.keys()] print('kb') rel_list = list(predicate_dict.keys()) print('rl') print(rel_list) rel_val_list = list(predicate_dict.values()) X = np.zeros((len(tuples), sum(rel_lens)), dtype=np.bool) X_test = np.zeros((len(dev_tuples), sum(rel_lens)), dtype=np.bool) for i, tup in enumerate(tuples): for relation in tup[0].split(',')[1:]: rel_name = relation[0:relation.index('[')].strip() rel_value = relation[relation.index('[') + 1:-1].strip() name_ind = rel_list.index(rel_name) value_ind = list(predicate_dict[rel_name]).index(rel_value) j = sum(rel_lens[0:name_ind]) + value_ind X[i, j] = 1 """ for i, tup in enumerate(dev_tuples): for relation in tup[0].split(',')[1:]: rel_name = relation[0:relation.index('[')].strip() rel_value= relation[relation.index('[')+1:-1].strip() name_ind = rel_list.index(rel_name) value_ind= list(predicate_dict[rel_name]).index(rel_value) j = sum(rel_lens[0:name_ind]) + value_ind X_test[i,j] = 1 """ return X, X_test
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) model = SentenceVAE(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, experiment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) def sigmoid(step): x = step - 6569.5 if x < 0: a = np.exp(x) res = (a / (1 + a)) else: res = (1 / (1 + np.exp(-x))) return float(res) def frange_cycle_linear(n_iter, start=0.0, stop=1.0, n_cycle=4, ratio=0.5): L = np.ones(n_iter) * stop period = n_iter / n_cycle step = (stop - start) / (period * ratio) # linear schedule for c in range(n_cycle): v, i = start, 0 while v <= stop and (int(i + c * period) < n_iter): L[int(i + c * period)] = v v += step i += 1 return L n_iter = 0 for epoch in range(args.epochs): split = 'train' data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) for iteration, batch in enumerate(data_loader): n_iter += 1 print("Total no of iterations = " + str(n_iter)) L = frange_cycle_linear(n_iter) def kl_anneal_function(anneal_function, step): if anneal_function == 'identity': return 1 if anneal_function == 'sigmoid': return sigmoid(step) if anneal_function == 'cyclic': return float(L[step]) ReconLoss = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, split='train'): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).data[0]].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood recon_loss = ReconLoss(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) if split == 'train': KL_weight = kl_anneal_function(anneal_function, step) else: KL_weight = 1 return recon_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 for epoch in range(args.epochs): for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) # loss calculation recon_loss, KL_loss, KL_weight = loss_fn( logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, split) if split == 'train': loss = (recon_loss + KL_weight * KL_loss) / batch_size else: # report complete elbo when validation loss = (recon_loss + KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['negELBO'] = torch.cat((tracker['negELBO'], loss.data)) if args.tensorboard_logging: writer.add_scalar("%s/Negative_ELBO" % split.upper(), loss.data[0], epoch * len(data_loader) + iteration) writer.add_scalar("%s/Recon_Loss" % split.upper(), recon_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Loss" % split.upper(), KL_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): logger.info( "%s Batch %04d/%i, Loss %9.4f, Recon-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.data[0], recon_loss.data[0] / batch_size, KL_loss.data[0] / batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) logger.info("%s Epoch %02d/%i, Mean Negative ELBO %9.4f" % (split.upper(), epoch, args.epochs, torch.mean(tracker['negELBO']))) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/NegELBO" % split.upper(), torch.mean(tracker['negELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist() } if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/' + ts) with open( os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) logger.info("Model saved at %s" % checkpoint_path)
def main(args): test = False splits = ['train', 'test'] #Reading in text files of E2E database w_datasets = OrderedDict() for split in splits: w_datasets[split] = PTB(data_dir='e2e-dataset', split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) #Reading in attributes of E2E data predicate_dict = defaultdict(set) #predicate_dict_dev = defaultdict(set) df = pd.read_csv('./e2e-dataset/trainset.csv', delimiter=',') tuples = [tuple(x) for x in df.values] for t in tuples: for r in t[0].split(','): r_ind1 = r.index('[') r_ind2 = r.index(']') rel = r[0:r_ind1].strip() rel_val = r[r_ind1 + 1:r_ind2] predicate_dict[rel].add(rel_val) df_dev = pd.read_csv('./e2e-dataset/devset.csv', delimiter=',') dev_tuples = [tuple(x) for x in df_dev.values] for t in dev_tuples: for r in t[0].split(','): r_ind1 = r.index('[') r_ind2 = r.index(']') rel = r[0:r_ind1].strip() rel_val = r[r_ind1 + 1:r_ind2] predicate_dict[rel].add(rel_val) #print('preddict',predicate_dict_dev) print('preddict', predicate_dict) rel_lens = [len(predicate_dict[p]) for p in predicate_dict.keys()] print('kb') rel_list = list(predicate_dict.keys()) print('rl') print(rel_list) rel_val_list = list(predicate_dict.values()) X = np.zeros((len(tuples), sum(rel_lens)), dtype=np.bool) X_test = np.zeros((len(dev_tuples), sum(rel_lens)), dtype=np.bool) for i, tup in enumerate(tuples): for relation in tup[0].split(',')[1:]: rel_name = relation[0:relation.index('[')].strip() rel_value = relation[relation.index('[') + 1:-1].strip() name_ind = rel_list.index(rel_name) value_ind = list(predicate_dict[rel_name]).index(rel_value) j = sum(rel_lens[0:name_ind]) + value_ind X[i, j] = 1 for i, tup in enumerate(dev_tuples): for relation in tup[0].split(',')[1:]: rel_name = relation[0:relation.index('[')].strip() rel_value = relation[relation.index('[') + 1:-1].strip() name_ind = rel_list.index(rel_name) value_ind = list(predicate_dict[rel_name]).index(rel_value) j = sum(rel_lens[0:name_ind]) + value_ind X_test[i, j] = 1 print('Xtest crated... shape:', X_test.shape) for i in range(5): print(tuples[i][0]) ind_list = [] for ind, a in enumerate(X[i]): if a == True: ind_list.append(ind) #IS THIS WORKING??? print(ind_list) pandata = PandasDataset(csv_file='./e2e-dataset/trainset.csv') panda_splits = ['a', 'b'] for epoch in range(args.epochs): #print(datasets['train'].shape) for split in splits: #panda_splits: pandata_loader = DataLoader(dataset=pandata, batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) for epoch in range(args.epochs): for split in splits: data_loader = DataLoader(dataset=w_datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) for iteration, batch in enumerate(data_loader): if (iteration == 0): a = 1
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) log_file = open("res.txt", "a") log_file.write(expierment_name(args, ts)) log_file.write("\n") graph_file = open("elbo-graph.txt", "a") graph_file.write(expierment_name(args, ts)) graph_file.write("\n") model = SentenceVAE(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, expierment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1 / (1 + np.exp(-k * (step - x0)))) elif anneal_function == 'linear': return min(1, step / x0) elif anneal_function == "softplus": return min(1, np.log(1 + np.exp(k * step))) elif anneal_function == "no": return 1 NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).data[0]].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) return NLL_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 val_lowest_elbo = 5000 val_accu_epoch = 0 val_min_epoch = 0 split_elbo = {"train": [], "valid": []} if args.test: split_elbo["test"] = [] split_loss = {"train": [], "valid": []} if args.test: split_loss["test"] = [] for epoch in range(args.epochs): for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) # loss calculation NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, args.k, args.x0) if split != 'train': KL_weight = 1.0 loss = (NLL_loss + KL_weight * KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data)) if args.tensorboard_logging: writer.add_scalar("%s/ELBO" % split.upper(), loss.data[0], epoch * len(data_loader) + iteration) writer.add_scalar("%s/NLL Loss" % split.upper(), NLL_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): print( "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.data[0], NLL_loss.data[0] / batch_size, KL_loss.data[0] / batch_size, KL_weight)) split_loss[split].append([ loss.data[0], NLL_loss.data[0] / batch_size, KL_loss.data[0] / batch_size ]) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO']))) split_elbo[split].append([torch.mean(tracker["ELBO"])]) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO" % split.upper(), torch.mean(tracker['ELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist() } if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/' + ts) with open( os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s" % checkpoint_path) if split == 'valid': if torch.mean(tracker['ELBO']) < val_lowest_elbo: val_lowest_elbo = torch.mean(tracker['ELBO']) val_accu_epoch = 0 val_min_epoch = epoch else: val_accu_epoch += 1 if val_accu_epoch >= 3: if not args.test: exp_str = "" exp_str += "train_ELBO={}\n".format( split_elbo["train"][val_min_epoch]) exp_str += "valid_ELBO={}\n".format( split_elbo["valid"][val_min_epoch]) exp_str += "==========\n" log_file.write(exp_str) log_file.close() print(exp_str) graph_file.write("ELBO\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[0]) line += "\n" graph_file.write(line) graph_file.write("NLL\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[1]) line += "\n" graph_file.write(line) graph_file.write("KL\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[2]) line += "\n" graph_file.write(line) graph_file.close() exit() elif split == 'test' and val_accu_epoch >= 3: exp_str = "" exp_str += "train_ELBO={}\n".format( split_elbo["train"][val_min_epoch]) exp_str += "valid_ELBO={}\n".format( split_elbo["valid"][val_min_epoch]) exp_str += "test_ELBO={}\n".format( split_elbo["test"][val_min_epoch]) exp_str += "==========\n" log_file.write(exp_str) log_file.close() print(exp_str) graph_file.write("ELBO\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[0]) line += "\n" for s in splits: for i in split_elbo[s]: line += "{},".format(i[0]) line += "\n" graph_file.write(line) graph_file.write("NLL\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[1]) line += "\n" graph_file.write(line) graph_file.write("KL\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[2]) line += "\n" graph_file.write(line) graph_file.close() exit() if epoch == args.epochs - 1: exp_str = "" exp_str += "train_ELBO={}\n".format( split_elbo["train"][val_min_epoch]) exp_str += "valid_ELBO={}\n".format( split_elbo["valid"][val_min_epoch]) if args.test: exp_str += "test_ELBO={}\n".format( split_elbo["test"][val_min_epoch]) exp_str += "==========\n" log_file.write(exp_str) log_file.close() print(exp_str) graph_file.write("ELBO\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[0]) line += "\n" graph_file.write(line) graph_file.write("NLL\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[1]) line += "\n" graph_file.write(line) graph_file.write("KL\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[2]) line += "\n" graph_file.write(line) graph_file.close() exit()
from ptb import PTB # parse the command line arguments parser = NgraphArgparser(__doc__) parser.set_defaults(gen_be=False) args = parser.parse_args() # these hyperparameters are from the paper args.batch_size = 50 time_steps = 5 hidden_size = 10 gradient_clip_value = 15 # download penn treebank # set shift_target to be False, since it is going to predict the same sequence tree_bank_data = PTB(path=args.data_dir, shift_target=False) ptb_data = tree_bank_data.load_data() train_set = SequentialArrayIterator(ptb_data['train'], batch_size=args.batch_size, time_steps=time_steps, total_iterations=args.num_iterations, reverse_target=True, get_prev_target=True) inputs = train_set.make_placeholders() ax.Y.length = len(tree_bank_data.vocab) def expand_onehot(x): # Assign the recurrent role and property to the axis named 'time' x.axes.find_by_short_name('time')[0].add_role(ar.time)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] #+ (['test'] if args.test else []) datasets = OrderedDict() for split in splits: datasets[split] = PTB( data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ ) model = SentenceVAE( vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional ) if torch.cuda.is_available(): model = model.cuda() if args.tensorboard_logging: writer = SummaryWriter(os.path.join('./',args.logdir, expierment_name(args,ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join('./',args.save_model_path,'VAE', ts) os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1/(1+np.exp(-k*(step-x0)))) elif anneal_function == 'linear': return min(1, step/x0) NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).data[0]].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) NLL_w_avg = NLL_loss/torch.sum(length).float() # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) return NLL_loss, KL_loss, KL_weight,NLL_w_avg print(model) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor step = 0 for epoch in range(args.epochs): for split in splits: data_loader = DataLoader( dataset=datasets[split], batch_size=args.batch_size, shuffle=split=='train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available() ) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) # loss calculation NLL_loss, KL_loss, KL_weight,NLL_w_avg = loss_fn(logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, args.k, args.x0) loss = (NLL_loss + KL_weight * KL_loss)/batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing # Avoid the .cat error !!! #print(loss.data) #print(tracker['ELBO']) loss_data = torch.tensor([loss.data.item()]) tracker['ELBO'] = torch.cat((tracker['ELBO'], loss_data)) #Orig: tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data),1) if args.tensorboard_logging: writer.add_scalar("%s/ELBO"%split.upper(), loss.data[0], epoch*len(data_loader) + iteration) writer.add_scalar("%s/NLL Loss"%split.upper(), NLL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration) writer.add_scalar("%s/KL Loss"%split.upper(), KL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration) writer.add_scalar("%s/KL Weight"%split.upper(), KL_weight, epoch*len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration+1 == len(data_loader): print("%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f, NLL-word-Loss %9.4f" %(split.upper(), iteration, len(data_loader)-1, loss.data[0], NLL_loss.data[0]/batch_size, KL_loss.data[0]/batch_size, KL_weight,NLL_w_avg)) #split = 'invalid' #JUST TO DEBUG!!! if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) #ERROR HERE!!! tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) print("%s Epoch %02d/%i, Mean ELBO %9.4f"%(split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO']))) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO"%split.upper(), torch.mean(tracker['ELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = {'target_sents':tracker['target_sents'], 'z':tracker['z'].tolist()} if not os.path.exists(os.path.join('./dumps', ts)): os.makedirs('dumps/'+ts) with open(os.path.join('./dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file: json.dump(dump,dump_file) # save checkpoint if split == 'train' and epoch %10 ==0 : checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch)) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s"%checkpoint_path)
def main(args): with open(args.data_dir+'/ptb.vocab.json', 'r') as file: vocab = json.load(file) w2i, i2w = vocab['w2i'], vocab['i2w'] model = SentenceVAE( vocab_size=len(w2i), sos_idx=w2i['<sos>'], eos_idx=w2i['<eos>'], pad_idx=w2i['<pad>'], unk_idx=w2i['<unk>'], max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional ) if not os.path.exists(args.load_checkpoint): raise FileNotFoundError(args.load_checkpoint) model.load_state_dict(torch.load(args.load_checkpoint)) print("Model loaded from %s"%(args.load_checkpoint)) if torch.cuda.is_available(): model = model.cuda() model.eval() samples, z = model.inference(n=args.num_samples) print('----------SAMPLES----------') print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') z1 = torch.randn([args.latent_size]).numpy() z2 = torch.randn([args.latent_size]).numpy() z = to_var(torch.from_numpy(interpolate(start=z1, end=z2, steps=8)).float()) samples, _ = model.inference(z=z) print('-------INTERPOLATION-------') print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') print('-------Encode ... Decode-------') datasets = PTB( data_dir=args.data_dir, split="valid", create_data=False, batch_size=1, max_sequence_length=args.max_sequence_length, min_occ=3 ) iteration = 0 for input_batch_tensor, target_batch_tensor, length_batch_tensor in datasets: if torch.is_tensor(input_batch_tensor): input_batch_tensor = to_var(input_batch_tensor) if torch.is_tensor(target_batch_tensor): target_batch_tensor = to_var(target_batch_tensor) if torch.is_tensor(length_batch_tensor): length_batch_tensor = to_var(length_batch_tensor) print("*"*10) print(*idx2word(input_batch_tensor, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') logp, mean, logv, z = model(input_batch_tensor,length_batch_tensor) print("+"*10) samples, z = model.inference(z=z) print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') if iteration == 0: break iteration += 1