d = DecoderRNN(embed_size, hidden_size, n_characters, 2) vae = VAE(e, d) optimizer = torch.optim.Adam(vae.parameters(), lr=learning_rate) criterion = nn.CrossEntropyLoss() if USE_CUDA: vae.cuda() criterion.cuda() log_every = 200 save_every = 5000 job = sconce.Job('vae', { 'hidden_size': hidden_size, 'embed_size': embed_size, 'learning_rate': learning_rate, 'kld_weight': kld_weight, 'temperature': temperature, 'grad_clip': grad_clip, }) job.log_every = log_every def save(): save_filename = 'vae.pt' torch.save(vae, save_filename) print('Saved as %s' % save_filename) try: for epoch in range(n_epochs): input, target = random_training_set()
import os import argparse import sconce # Parse command line arguments argparser = argparse.ArgumentParser() argparser.add_argument('--n_epochs', type=int, default=200) argparser.add_argument('--n_iters', type=int, default=200) argparser.add_argument('--hidden_size', type=int, default=50) argparser.add_argument('--n_layers', type=int, default=2) argparser.add_argument('--dropout_p', type=float, default=0.1) argparser.add_argument('--learning_rate', type=float, default=0.05) args = argparser.parse_args() job = sconce.Job('seq2seq-intent-parsing', vars(args)) job.log_every = args.n_iters * 10 from data import * from model import * from evaluate import * # # Training def train(input_variable, target_variable): encoder_hidden = encoder.init_hidden() encoder_optimizer.zero_grad() decoder_optimizer.zero_grad()
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio) criterion = nn.CrossEntropyLoss() # Move models to GPU if USE_CUDA: encoder.cuda() decoder.cuda() import sconce job = sconce.Job( 'seq2seq-translate', { 'attn_model': attn_model, 'n_layers': n_layers, 'dropout': dropout, 'hidden_size': hidden_size, 'learning_rate': learning_rate, 'clip': clip, 'teacher_forcing_ratio': teacher_forcing_ratio, 'decoder_learning_ratio': decoder_learning_ratio, }) job.plot_every = plot_every job.log_every = print_every # Keep track of time elapsed and running averages start = time.time() plot_losses = [] print_loss_total = 0 # Reset every print_every plot_loss_total = 0 # Reset every plot_every
def main(args): p_data, q_data, train_triples, test_triples = prepare_data(args.post_data_tsvfile, args.qa_data_tsvfile, \ args.train_ids_file, args.test_ids_file, args.sim_ques_fname) pretrained_emb = load_pretrained_emb(args.word_vec_fname, p_data) #N = int(len(triples)*0.8) #train_triples = triples[:N] #test_triples = triples[N:] # Initialize models #p_encoder = EncoderAvgEmb(pretrained_emb) p_encoder = EncoderRNN(p_data.n_words, hidden_size, n_layers, dropout=dropout) q_encoder = EncoderRNN(q_data.n_words, hidden_size, n_layers, dropout=dropout) decoder = AttnDecoderRNN(attn_model, hidden_size, q_data.n_words, n_layers) # Initialize optimizers and criterion p_encoder_optimizer = optim.Adam(p_encoder.parameters(), lr=learning_rate) q_encoder_optimizer = optim.Adam(q_encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio) criterion = nn.CrossEntropyLoss() # Move models to GPU if USE_CUDA: p_encoder.cuda() q_encoder.cuda() decoder.cuda() import sconce job = sconce.Job( 'seq2seq-translate', { 'attn_model': attn_model, 'n_layers': n_layers, 'dropout': dropout, 'hidden_size': hidden_size, 'learning_rate': learning_rate, 'clip': clip, 'teacher_forcing_ratio': teacher_forcing_ratio, 'decoder_learning_ratio': decoder_learning_ratio, }) job.plot_every = plot_every job.log_every = print_every # Keep track of time elapsed and running averages start = time.time() plot_losses = [] print_loss_total = 0 # Reset every print_every plot_loss_total = 0 # Reset every plot_every ecs = [] dcs = [] eca = 0 dca = 0 epoch = 0.0 print 'No. of train_triples %d' % len(train_triples) print 'No. of test_triples %d' % len(test_triples) while epoch < n_epochs: epoch += 1 # Get training data for this cycle p_input_batches, p_input_lengths, q_input_batches, q_input_lengths, target_batches, target_lengths = \ random_batch(batch_size, p_data, q_data, train_triples) # Run the train function loss, ec, dc = train(p_input_batches, p_input_lengths, q_input_batches, q_input_lengths, target_batches, target_lengths, p_encoder, q_encoder, decoder, p_encoder_optimizer, q_encoder_optimizer, decoder_optimizer, criterion) # Keep track of loss print_loss_total += loss plot_loss_total += loss eca += ec dca += dc job.record(epoch, loss) if epoch % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 print_summary = '%s (%d %d%%) %.4f' % (time_since( start, epoch / n_epochs), epoch, epoch / n_epochs * 100, print_loss_avg) print(print_summary) if epoch % evaluate_every == 0: evaluate_randomly(p_data, q_data, test_triples, p_encoder, q_encoder, decoder) if epoch % plot_every == 0: plot_loss_avg = plot_loss_total / plot_every plot_losses.append(plot_loss_avg) plot_loss_total = 0 # TODO: Running average helper ecs.append(eca / plot_every) dcs.append(dca / plot_every) ecs_win = 'encoder grad (%s)' % hostname dcs_win = 'decoder grad (%s)' % hostname #vis.line(np.array(ecs), win=ecs_win, opts={'title': ecs_win}) #vis.line(np.array(dcs), win=dcs_win, opts={'title': dcs_win}) eca = 0 dca = 0
def main(): ########################################################################## ###### PART-I : Data Formation using scripts in data_for_modeling.py ##### ########################################################################## input_lang, output_lang, pairs = prepare_data('eng', 'fra', False) # TRIMMING DATA: # Trimming is optional but could be done to reduce the data size and make processing faster # Removes words with frequency < 5 MIN_COUNT = 5 input_lang.trim(MIN_COUNT) output_lang.trim(MIN_COUNT) keep_pairs = [] for pair in pairs: input_sentence = pair[0] output_sentence = pair[1] keep_input = True keep_output = True for word in input_sentence.split(' '): if word not in input_lang.word2index: keep_input = False break for word in output_sentence.split(' '): if word not in output_lang.word2index: keep_output = False break # Remove if pair doesn't match input and output conditions if keep_input and keep_output: keep_pairs.append(pair) print("Trimmed from %d pairs to %d, %.4f of total" % (len(pairs), len(keep_pairs), len(keep_pairs) / len(pairs))) pairs = keep_pairs ########################################################################## ###### PART-II : Setup Configuration for training the data ##### ########################################################################## # Configure models # attn_model = 'dot' hidden_size = 1024 n_layers = 2 dropout = 0.1 batch_size = 80 # batch_size = 50 # Configure training/optimization # clip = 50.0 clip = 1.0 # Based on our paper, clipping gradient norm is 1 teacher_forcing_ratio = 0.5 learning_rate = 0.0001 decoder_learning_ratio = 5.0 n_epochs = 50000 epoch = 0 plot_every = 20 print_every = 100 evaluate_every = 10000 # We check the validation in every 10,000 minibatches # Initialize models encoder = EncoderRNN(input_lang.n_words, hidden_size, n_layers, dropout=dropout) decoder = BahdanauAttnDecoderRNN( hidden_size, output_lang.n_words, n_layers, dropout_p=dropout) # Initialize optimizers and criterion encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio) criterion = nn.CrossEntropyLoss() # Move models to GPU if USE_CUDA: encoder.cuda() decoder.cuda() import sconce job = sconce.Job('seq2seq-translate', { 'attn_model': attn_model, 'n_layers': n_layers, 'dropout': dropout, 'hidden_size': hidden_size, 'learning_rate': learning_rate, 'clip': clip, 'teacher_forcing_ratio': teacher_forcing_ratio, 'decoder_learning_ratio': decoder_learning_ratio, }) job.plot_every = plot_every job.log_every = print_every # Keep track of time elapsed and running averages start = time.time() plot_losses = [] print_loss_total = 0 # Reset every print_every plot_loss_total = 0 # Reset every plot_every ########################################################################## ###### PART-III : Modeling ##### ########################################################################## ecs = [] dcs = [] eca = 0 dca = 0 while epoch < n_epochs: epoch += 1 # Get training data for this cycle input_batches, input_lengths, target_batches, target_lengths = random_batch(batch_size) # Run the train function loss, ec, dc = train( input_batches, input_lengths, target_batches, target_lengths, encoder, decoder, encoder_optimizer, decoder_optimizer) # Keep track of loss print_loss_total += loss plot_loss_total += loss eca += ec dca += dc job.record(epoch, loss) if epoch % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 print_summary = '%s (%d %d%%) %.4f' % (time_since(start, epoch / n_epochs), epoch, epoch / n_epochs * 100, print_loss_avg) print(print_summary) if epoch % evaluate_every == 0: evaluate_randomly() if epoch % plot_every == 0: plot_loss_avg = plot_loss_total / plot_every plot_losses.append(plot_loss_avg) plot_loss_total = 0 # TODO: Running average helper ecs.append(eca / plot_every) dcs.append(dca / plot_every) ecs_win = 'encoder grad (%s)' % hostname dcs_win = 'decoder grad (%s)' % hostname vis.line(np.array(ecs), win=ecs_win, opts={'title': ecs_win}) vis.line(np.array(dcs), win=dcs_win, opts={'title': dcs_win}) eca = 0 dca = 0
from helpers import * # Configuration gamma = 0.9 # Discounted reward factor hidden_size = 50 learning_rate = 1e-4 weight_decay = 1e-5 log_every = 1000 render_every = 20000 job = sconce.Job('rl2', { 'gamma': gamma, 'learning_rate': learning_rate, }) job.log_every = log_every job.plot_every = 500 DROP_MAX = 0.3 DROP_MIN = 0.05 DROP_OVER = 200000 ## The Grid World, Agent and Environment ## ============================================== ## World Parameters ### The Grid PLANT_VALUE = -10 GOAL_VALUE = 10
def __init__(self, name): self.job = sconce.Job(name)
if child.type in ['phrase', 'value', 'ref'] ] output_tokens = ['EOS'] + output_tokens print(output_tokens) # Initialize model, optimizer, criterions rarnn = RARNN(input_size, output_tokens, hidden_size) optimizer = torch.optim.Adam(rarnn.parameters(), lr=learning_rate, weight_decay=weight_decay) decoder_criterion = nn.NLLLoss() attention_criterion = nn.MSELoss(size_average=False) job = sconce.Job('rarnn') job.plot_every = 20 job.log_every = 100 # Train try: for i in range(n_epochs): walked_flat, walked_tree = walk_tree(parsed, parsed['%'], None) def _train(node): return train(walked_flat, node) ds = descend(walked_tree, _train) d = sum(ds) / len(ds) job.record(i, d)