print_loss_total += loss plot_loss_total += loss if iter % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters), iter, iter / n_iters * 100, print_loss_avg)) if iter % plot_every == 0: plot_loss_avg = plot_loss_total/plot_every plot_losses.append(plot_loss_avg) plot_loss_total = 0 showPlot(plot_losses) plt.savefig('base-lstm-loss') torch.save(encoder.state_dict(), 'encoder.pth') torch.save(decoder.state_dict(), 'decoder.pth') if __name__ == "__main__": hidden_size = 256 n_iters = 910000 teacher_forcing_ratio = 0.5 input_lang, output_lang, pairs = loader.prepareData('eng', 'fra', True) encoder = seq2seq.Encoder(input_lang.n_words, hidden_size).to(device) decoder = seq2seq.Decoder(hidden_size, output_lang.n_words).to(device) trainiters(encoder, decoder, n_iters)
if load_corpus: with open(corpus_path, 'rb') as f: input_lang, output_lang, pairs = pickle.load(f) else: input_lang, output_lang, pairs = corpus.read_file( 'ENG', 'FRA', 'data/eng-fra.txt', True) pairs = corpus.filter_pairs(pairs) for pair in pairs: input_lang.add_sentence(pair[0]) output_lang.add_sentence(pair[1]) with open(corpus_path, 'wb') as f: pickle.dump((input_lang, output_lang, pairs), f) print(f'{len(pairs)} pairs, {input_lang.n_words} source, {output_lang.n_words} target') # Load model encoder = seq2seq.Encoder(input_lang.n_words, hidden_size, embed_size, n_layers) decoder = seq2seq.Decoder(hidden_size, embed_size, output_lang.n_words, n_layers) model = seq2seq.Seq2seq(encoder, decoder).to(device) if load_model: model.load_state_dict(torch.load(model_path)) # train.train(model, (input_lang, output_lang, pairs), batch_size, n_epochs, learning_rate, # teaching_rate, clip, model_path) else: def init_weights(m): for name, param in m.named_parameters(): nn.init.uniform_(param.data, -0.08, 0.08) model.apply(init_weights) # Test or train if mode == 'train': train.train(model, (input_lang, output_lang, pairs), batch_size, n_epochs, learning_rate,
if __name__ == "__main__": config = argparser() embedding_size = config.embedding_size hidden_size = config.hidden_size teacher_forcing_ratio = config.teacher_forcing_ratio n_iters = config.n_iters input_lang, output_lang, pairs = loader.prepareData('eng', 'fra', True) input_emb_matrix, output_emb_matrix = np.load( 'input_emb_matrix.npy'), np.load('output_emb_matrix.npy') print('Embedding-matrix shape: {}, {}'.format(input_emb_matrix.shape, output_emb_matrix.shape)) encoder = seq2seq.Encoder(input_size=input_lang.n_words, embedding_size=embedding_size, hidden_size=hidden_size, embedding_matrix=input_emb_matrix, n_layers=config.n_layers, dropout_p=config.dropout_p).to(device) decoder = seq2seq.AttnDecoder(output_size=output_lang.n_words, embedding_size=embedding_size, hidden_size=hidden_size, embedding_matrix=output_emb_matrix, n_layers=config.n_layers, dropout_p=config.dropout_p).to(device) trainiters(pairs, encoder, decoder, n_iters)
print('BLEU: {:.4}'.format(sum(scores) / n)) if __name__ == "__main__": ''' Evaluation is mostly the same as training, but there are no targets so we simply feed the decoder’s predictions back to itself for each step. Every time it predicts a word we add it to the output string, and if it predicts the EOS token we stop there. ''' hidden_size = 300 input_lang, output_lang, pairs = loader.prepareData('eng', 'fra', True) input_emb_matrix, output_emb_matrix = np.load( 'input_emb_matrix.npy'), np.load('output_emb_matrix.npy') print('Embedding-matrix shape: {}, {}'.format(input_emb_matrix.shape, output_emb_matrix.shape)) encoder = seq2seq.Encoder(input_lang.n_words, hidden_size, input_emb_matrix).to(device) decoder = seq2seq.Decoder(hidden_size, output_lang.n_words, output_emb_matrix).to(device) encoder.load_state_dict(torch.load('encoder.pth')) encoder.eval() decoder.load_state_dict(torch.load('decoder.pth')) decoder.eval() evaluateRandomly(encoder, decoder, pairs, int(len(pairs) * 0.3))
BATCH_SIZE = 64 # try bigger batch for faster training train = ds_train.take(BUFFER_SIZE) # 1.5M samples print("Dataset sample taken") train_dataset = train.map(s2s.tf_encode) # train_dataset = train_dataset.shuffle(BUFFER_SIZE) – optional train_dataset = train_dataset.batch(BATCH_SIZE, drop_remainder=True) print("Dataset batching done") steps_per_epoch = BUFFER_SIZE // BATCH_SIZE embedding_dim = 128 units = 256 # from pointer generator paper EPOCHS = 6 encoder = s2s.Encoder(vocab_size, embedding_dim, units, BATCH_SIZE) decoder = s2s.Decoder(vocab_size, embedding_dim, units, BATCH_SIZE) # Learning rate scheduler lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay( 0.001, decay_steps=steps_per_epoch * (EPOCHS / 2), decay_rate=2, staircase=False) optimizer = tf.keras.optimizers.Adam(lr_schedule) if args.checkpoint is None: dt = datetime.datetime.today().strftime("%Y-%b-%d-%H-%M-%S") checkpoint_dir = './training_checkpoints-' + dt else: