def run(args): reverse, fil, n_epoch, print_every, learning_rate, n_layers, hidden_size, batch_size, beam_size, input = args.reverse, args.filter, args.epoch, args.print, args.learning_rate, args.layer, args.hidden, args.batch_size, args.beam, args.input if args.train and not args.load: print("==" * 20, "train", "==" * 20) trainIters(args, args.train, reverse, n_epoch, learning_rate, batch_size, n_layers, hidden_size, print_every) elif args.load: print("==" * 20, "load", "==" * 20) n_layers, hidden_size = parseFilename(args.load) trainIters(args, args.train, reverse, n_epoch, learning_rate, batch_size, n_layers, hidden_size, print_every, loadFilename=args.load) # load parameters from filename elif args.test: print("==" * 20, "test", "==" * 20) n_layers, hidden_size = parseFilename(args.test, True) runTest(args, n_layers, hidden_size, reverse, args.test, beam_size, batch_size, input, args.corpus)
def run(args): learning_rate, lr_decay_epoch, lr_decay_ratio, n_layers, hidden_size, embed_size, \ attr_size, attr_num, batch_size, beam_size, overall, max_length, min_length, save_dir = \ args.learning_rate, args.lr_decay_epoch, args.lr_decay_ratio, args.layer, args.hidden_size, args.embed_size, \ args.attr_size, args.attr_num, args.batch_size, args.beam_size, args.overall, args.max_length, args.min_length, args.save_dir if args.train and not args.load: trainIters(args.train, learning_rate, lr_decay_epoch, lr_decay_ratio, batch_size, n_layers, hidden_size, embed_size, attr_size, attr_num, overall, save_dir) elif args.load: n_layers, hidden_size = parseFilename(args.load) trainIters(args.train, learning_rate, lr_decay_epoch, lr_decay_ratio, batch_size, n_layers, hidden_size, embed_size, attr_size, attr_num, overall, save_dir, loadFilename=args.load) elif args.test: n_layers, hidden_size = parseFilename(args.review_model) runTest(args.test, n_layers, hidden_size, embed_size, attr_size, attr_num, overall, args.review_model, args.sketch_model, args.topic_model, beam_size, max_length, min_length, save_dir)
def run(args): tab_printer(args) learning_rate, lr_decay_epoch, lr_decay_ratio, weight_decay, embed_size, hidden_size, \ node_size, capsule_size, gcn_layers, gcn_filters, rnn_layers, capsule_num, batch_size, epochs = \ args.learning_rate, args.lr_decay_epoch, args.lr_decay_ratio, args.weight_decay, args.embed_size, \ args.hidden_size, args.node_size, args.capsule_size, args.gcn_layers, args.gcn_filters, \ args.rnn_layers, args.capsule_num, args.batch_size, args.epochs if args.train: trainIters(args.train, learning_rate, lr_decay_epoch, lr_decay_ratio, weight_decay, batch_size, rnn_layers, hidden_size, embed_size, node_size, epochs, args.save_dir) elif args.load: trainIters(args.load, learning_rate, lr_decay_epoch, lr_decay_ratio, weight_decay, batch_size, rnn_layers, hidden_size, embed_size, node_size, epochs, args.save_dir, args.load_file) elif args.test: runTest(args.test, rnn_layers, hidden_size, embed_size, node_size, capsule_size, gcn_layers, gcn_filters, capsule_num, args.aspect_model, args.review_model, args.beam_size, args.max_length, args.min_length, args.save_dir) else: print("mode error!")
def main(): lang1 = "eng" lang2 = "fra" f = open("../data/data/" + lang1 + "-" + lang2 + ".txt", encoding='utf-8') print(f) lines = f.readlines() eng_sentences, fra_sentences = data_loaders.getSentences(lines) print(len(eng_sentences), len(fra_sentences)) eng_lang = Lang(lang1) eng_lang.parseSentences(eng_sentences) fra_lang = Lang(lang2) fra_lang.parseSentences(fra_sentences) print("No of eng words: ", len(eng_lang.vocab)) print("No of fra words: ", len(fra_lang.vocab)) pairs = data_loaders.createPairs(eng_sentences, fra_sentences) print("Length of pairs: ", len(pairs)) hidden_size = 256 encoder1 = EncoderRNN(len(eng_lang.vocab), hidden_size).to(device) attn_decoder1 = DecoderRNN(len(fra_lang.vocab), hidden_size, len(fra_lang.vocab)).to(device) train.trainIters(encoder1, attn_decoder1, 75000, pairs, eng_lang, fra_lang, print_every=5000)
def run(args): reverse, fil, n_iteration, print_every, save_every, learning_rate, \ n_layers, hidden_size, batch_size, beam_size, inp, dropout = \ args.reverse, args.filter, args.iteration, args.print, args.save, args.learning_rate, \ args.layer, args.hidden, args.batch_size, args.beam, args.input, args.dropout if args.train and not args.load: trainIters(args.train, reverse, n_iteration, learning_rate, batch_size, n_layers, hidden_size, print_every, save_every, dropout) elif args.load: n_layers, hidden_size, reverse = parseFilename(args.load) trainIters(args.train, reverse, n_iteration, learning_rate, batch_size, n_layers, hidden_size, print_every, save_every, dropout, loadFilename=args.load) elif args.test: n_layers, hidden_size, reverse = parseFilename(args.test, True) runTest(n_layers, hidden_size, reverse, args.test, beam_size, inp, args.corpus)
def run(): iteration = 10000 learning_rate = 0.01 batch_size = 1 layers = 1 hidden_size = 200 trainIters(iteration, learning_rate, batch_size, layers, hidden_size)
def run(args): reverse, fil, n_iteration, print_every, save_every, learning_rate, n_layers, hidden_size, batch_size, beam_size, inp = \ args.reverse, args.filter, args.iteration, args.print, args.save, args.learning_rate, \ args.layer, args.hidden, args.batch_size, args.beam, args.input if args.train and not args.load: trainIters(args.train, reverse, n_iteration, learning_rate, batch_size, n_layers, hidden_size, print_every, save_every) elif args.load: n_layers, hidden_size, reverse = parseFilename(args.load) trainIters(args.train, reverse, n_iteration, learning_rate, batch_size, n_layers, hidden_size, print_every, save_every, loadFilename=args.load) elif args.test: n_layers, hidden_size, reverse = parseFilename(args.test, True) runTest(n_layers, hidden_size, reverse, args.test, beam_size, inp, args.corpus)
def run(args): reverse, fil, n_iteration, print_every, save_every, learning_rate, n_layers, hidden_size, batch_size, beam_size, input = \ args.reverse, args.filter, args.iteration, args.print, args.save, args.learning_rate, \ args.layer, args.hidden, args.batch_size, args.beam, args.input if args.train and not args.load: trainIters(args.train, args.corpus_index, args.strip, args.pretrained_model, reverse, n_iteration, learning_rate, batch_size, n_layers, hidden_size, print_every, save_every) elif args.load: n_layers, hidden_size, reverse = parseFilename(args.load) trainIters(args.train, args.corpus_index, args.strip, args.pretrained_model, reverse, n_iteration, learning_rate, batch_size, n_layers, hidden_size, print_every, save_every, loadFilename=args.load) elif args.test: n_layers, hidden_size, reverse = parseFilename(args.test, True) runTest(n_layers, args.pretrained_model, hidden_size, reverse, args.test, beam_size, input, args.corpus, args.diff_corpus) elif args.loss: loss_graph(args.loss, args.corpus, hidden_size)
def run(args): reverse, fil, n_iteration, print_every, save_every, learning_rate, n_layers, hidden_size, batch_size, beam_size, input = \ args.reverse, args.filter, args.iteration, args.print, args.save, args.learning_rate, \ args.layer, args.hidden, args.batch_size, args.beam, args.input if args.train and not args.load: print("Train new model ... ") trainIters(args.train, reverse, n_iteration, learning_rate, batch_size, n_layers, hidden_size, print_every, save_every) elif args.load: print("Load existing model ... ") reverse = parseFilename(args.load) trainIters(args.train, reverse, n_iteration, learning_rate, batch_size, n_layers, hidden_size, print_every, save_every, loadFilename=args.load) elif args.test: print("Testing ... ") n_layers, hidden_size, reverse = parseFilename(args.test, True) runTest(n_layers, hidden_size, reverse, args.test, beam_size, input, args.corpus)
def trainDemo(lang, dataSet, nlVocab, codeVocab, train_variables): print("Training...") encoder1 = EncoderRNN(codeVocab.n_words, setting.HIDDDEN_SIAZE) attn_decoder1 = AttnDecoderRNN(setting.HIDDDEN_SIAZE, nlVocab.n_words, 1, dropout_p=0.1) if setting.USE_CUDA: encoder1 = encoder1.cuda() attn_decoder1 = attn_decoder1.cuda() trainIters(lang, dataSet, train_variables, encoder1, attn_decoder1, 2000000, print_every=5000)
def training(): context_x = Context(inputs_dict.n_words, hidden_size).to(device) classification_x = Classification().to(device) context_x, classification_x, plot_losses = trainIters(context_x, classification_x, device, inputs_dict, target_dict, pairs, n_iters, print_every=50) return context_x, classification_x, plot_losses
def run(args): learning_rate, loadFilename, datafile, decoder_n_layers,\ encoder_n_layers, hidden_size, dropout, attn_model, \ n_iteration, batch_size, save_every, print_every, \ decoder_learning_ratio, clip, beam_size, inp = args.learning_rate, args.model_file, args.data_file, args.layers, \ args.layers, args.hidden_size, args.dropout, args.attn_model, args.iteration, \ args.batch_size, args.save_every, args.print_every, \ args.decoder_learning_ratio, args.clip, args.beam_size, args.input if args.test: if loadFilename: print("Starting testing model!") runTest(decoder_n_layers, hidden_size, False, loadFilename, beam_size, inp, datafile) else: raise RuntimeError("Please assign modelFile to load") elif args.train: print("Starting Training model!") trainIters(attn_model=attn_model, hidden_size=hidden_size, encoder_n_layers=encoder_n_layers, \ decoder_n_layers=decoder_n_layers, save_dir=save_dir, n_iteration=n_iteration, batch_size=batch_size, \ learning_rate=learning_rate, decoder_learning_ratio=decoder_learning_ratio, print_every=print_every, \ save_every=save_every, clip=clip, dropout=dropout, corpus_name=corpus_name, datafile=datafile, \ modelFile=loadFilename) else: raise RuntimeError("Please specify a running mode between train and test")
def run(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Device :", device, "\n") # Preprocess data input_lang, output_lang, pairs = prepareData('eng', 'fra', True) print("Finished Preprocessing\n") # Seq2Seq Model hidden_size = 256 encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device) attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1) metadata = (input_lang, output_lang, pairs) trainIters(encoder1, attn_decoder1, metadata, n_iters=500, print_every=100) # 원래는 n_iters=75000, print_every=5000 # Check evaluateRandomly(encoder1, attn_decoder1, metadata) # Evaluate and Visualize output_words, attentions = evaluate(encoder1, attn_decoder1, metadata, "je suis trop froid .") plt.matshow(attentions.numpy()) evaluateAndShowAttention(encoder1, attn_decoder1, metadata, "elle a cinq ans de moins que moi .") evaluateAndShowAttention(encoder1, attn_decoder1, metadata, "elle est trop petit .") evaluateAndShowAttention(encoder1, attn_decoder1, metadata, "je ne crains pas de mourir .") evaluateAndShowAttention(encoder1, attn_decoder1, metadata, "c est un jeune directeur plein de talent .")
def main(): input_lang, output_lang, pairs = prepareData('eng', 'fra', True) print(random.choice(pairs)) device = torch.device(args.device) print('device : {}'.format(device)) encoder = EncoderRNN(input_lang.n_words, args.hidden_size).to(device) decoder = AttnDecoderRNN(args.hidden_size, output_lang.n_words, dropout_p=0.1).to(device) encoder_optimizer = optim.SGD(encoder.parameters(), lr=args.lr) decoder_optimizer = optim.SGD(decoder.parameters(), lr=args.lr) model = Translator(input_lang, output_lang, encoder, decoder, encoder_optimizer, decoder_optimizer) trainIters(model, pairs, n_iters=10000, print_every=100, plot_every=100) evaluateRandomly(model, pairs) output_words, attentions = evaluate(model, "je suis trop froid .") plt.matshow(attentions.numpy())
# If you have cuda, configure cuda to call for state in encoder_optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() for state in decoder_optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() # Run training iterations print("Starting Training!") trainIters(model_name, voc, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer, embedding, encoder_n_layers, decoder_n_layers, save_dir, n_iteration, batch_size, print_every, save_every, clip, corpus_name, loadFilename, checkpoint, MAX_LENGTH, teacher_forcing_ratio, hidden_size) ###################################################################### # Run Evaluation # ~~~~~~~~~~~~~~ # # To chat with your model, run the following block. # # Set dropout layers to eval mode encoder.eval() decoder.eval() # Initialize search module
if __name__ == '__main__': args = parse() if args.train: hidden_size = args.hidden print_every = args.print save_every = args.save iteration = args.iteration encoder = EncoderRNN(input_lang.n_words, hidden_size, input_lang) attn_decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, output_lang, dropout_p=0.1) trainIters(encoder, attn_decoder, iteration, print_every=print_every, save_every=save_every) if args.test: hidden_size = args.hidden encoder = EncoderRNN(input_lang.n_words, hidden_size, input_lang) attn_decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, output_lang, dropout_p=0.1) modelFile = args.model checkpoint = torch.load(modelFile) encoder.load_state_dict(checkpoint['en']) attn_decoder.load_state_dict(checkpoint['de']) evaluateInput(encoder, attn_decoder)
import torch from lang import prepareData from model import EncoderRNN, AttnDecoderRNN from config import Config from train import trainIters, evaluateRandomly # Load default parameters and configurations config = Config() input_lang, output_lang, pairs = prepareData('en', 'zh', config) config.input_lang_n_words = input_lang.n_words config.output_lang_n_words = output_lang.n_words print(random.choice(pairs)) encoder = EncoderRNN(config) decoder = AttnDecoderRNN(config) if config.use_cuda: encoder = encoder.cuda() decoder = decoder.cuda() trainIters(encoder, decoder, pairs, input_lang, output_lang, config) # Save model torch.save(encoder, 'encoder.pt') torch.save(decoder, 'decoder.pt') # Evaluate model evaluateRandomly(encoder, decoder, input_lang, output_lang, pairs, config)
def main(args): if args.decoder_type == "attn": args.use_bi = True if (args.test_only == True) and (args.decode_method == "beam"): args.batch_size = 1 if args.self_attn == True: args.encoder_hidden_size = 300 args.decoder_hidden_size = 300 source_words_to_load = 1000000 target_words_to_load = 1000000 input_lang, output_lang, train_pairs, train_max_length = prepareData( "train", args.language, "en", args.data_path, max_len_ratio=args.max_len_ratio, char=args.char_chinese) input_lang_dev, output_lang_dev, dev_pairs, _ = prepareData( 'dev', args.language, 'en', path=args.data_path, max_len_ratio=1, char=args.char_chinese) # _, _, test_pairs, _ = prepareData('test', args.language, 'en', path=args.data_path) if args.use_pretrain_emb: if args.language == "zh": if args.char_chinese: source_embedding, source_notPretrained = load_char_embd( args.emb_path + "sgns.literature.char", input_lang, reload=args.reload_emb) else: file_check(args.emb_path + 'chinese_ft_300.txt') source_embedding, source_notPretrained = load_fasttext_embd( args.emb_path + 'chinese_ft_300.txt', input_lang, input_lang, source_words_to_load, reload=args.reload_emb) else: file_check(args.emb_path + 'vietnamese_ft_300.txt') source_embedding, source_notPretrained = load_fasttext_embd( args.emb_path + 'vietnamese_ft_300.txt', input_lang, input_lang, source_words_to_load, reload=args.reload_emb) file_check(args.emb_path + 'english_ft_300.txt') target_embedding, target_notPretrained = load_fasttext_embd( args.emb_path + 'english_ft_300.txt', output_lang, input_lang, target_words_to_load, reload=args.reload_emb) if args.tune_pretrain_emb: source_notPretrained[:] = 1 target_notPretrained[:] = 1 else: source_embedding = source_notPretrained = target_embedding = target_notPretrained = None # 0000000000 # target_embedding = target_notPretrained = None params = { 'batch_size': args.batch_size, 'shuffle': True, 'collate_fn': vocab_collate_func, 'num_workers': 20 } params2 = { 'batch_size': args.batch_size, 'shuffle': False, 'collate_fn': vocab_collate_func, 'num_workers': 20 } train_set, dev_set = Dataset(train_pairs, input_lang, output_lang), Dataset(dev_pairs, input_lang, output_lang_dev) train_loader = torch.utils.data.DataLoader(train_set, **params) dev_loader = torch.utils.data.DataLoader(dev_set, **params2) print(len(train_loader), len(dev_loader)) if args.self_attn: encoder = Encoder_SelfAttn(input_lang.n_words, EMB_DIM, args.dim_ff, args.selfattn_en_num, args.decoder_layers, args.decoder_hidden_size, source_embedding, source_notPretrained, args.device, args.attn_head).to(args.device) else: encoder = EncoderRNN(input_lang.n_words, EMB_DIM, args.encoder_hidden_size, args.encoder_layers, args.decoder_layers, args.decoder_hidden_size, source_embedding, source_notPretrained, args.rnn_type, args.use_bi, args.device, False, args.attn_head).to(args.device) if args.transformer: decoder = Decoder_SelfAttn(output_lang.n_words, EMB_DIM, args.dim_ff, args.selfattn_de_num, target_embedding, target_notPretrained, args.device, args.attn_head).to(args.device) elif args.decoder_type == "basic": decoder = DecoderRNN(output_lang.n_words, EMB_DIM, args.decoder_hidden_size, args.decoder_layers, target_embedding, target_notPretrained, args.rnn_type, dropout_p=args.decoder_emb_dropout, device=args.device).to(args.device) elif args.decoder_type == "attn": decoder = DecoderRNN_Attention(output_lang.n_words, EMB_DIM, args.decoder_hidden_size, args.decoder_layers, target_embedding, target_notPretrained, args.rnn_type, dropout_p=args.decoder_emb_dropout, device=args.device, method=args.attn_method).to(args.device) else: raise ValueError print(encoder, decoder) if not args.test_only: trainIters(encoder, decoder, train_loader, dev_loader, \ input_lang, output_lang, input_lang_dev, output_lang_dev, train_max_length, args.epoch, plot_every=args.plot_every, print_every=args.print_every, weight_decay=args.weight_decay, learning_rate=args.learning_rate, device=args.device, teacher_forcing_ratio=args.teacher_forcing_ratio, label=args.save_model_name, use_lr_scheduler = True, gamma_en = 0.99, gamma_de = 0.99, beam_width=args.beam_width, min_len=args.min_len, n_best=args.n_best, decode_method=args.decode_method, save_result_path = args.save_result_path, save_model=args.save_model) else: encoder.load_state_dict( torch.load('encoder' + "-" + args.save_model_name + '.ckpt', map_location=lambda storage, location: storage)) decoder.load_state_dict( torch.load('decoder' + "-" + args.save_model_name + '.ckpt', map_location=lambda storage, location: storage)) bleu_score, decoded_list, target_list, attn_weight = test( encoder, decoder, dev_loader, input_lang, output_lang, input_lang, output_lang_dev, args.beam_width, args.min_len, args.n_best, train_max_length, args.decode_method, args.device) print("dev bleu: ", bleu_score) i = 0 with open("results/dev_examples_{}.txt".format(args.save_result_label), "w+") as f: f.write("bleu: {}\n".format(bleu_score)) for (source, target, source_len, target_len) in (dev_loader): source_list = [[ input_lang.index2word[k.item()] for k in source[i] ][:source_len[i] - 1] for i in range(len(source))] for s in source_list: f.write("S: {}\n".format(" ".join(s))) f.write("T: {}\n".format(decoded_list[i])) f.write("H: {}\n".format(target_list[i])) f.write("\n") i += 1 # ===================================================== # bleu_score, decoded_list, target_list, attn_weight = test( encoder, decoder, train_loader, input_lang, output_lang, input_lang, output_lang, args.beam_width, args.min_len, args.n_best, train_max_length, args.decode_method, args.device) print("train bleu: ", bleu_score) i = 0 with open( "results/train_examples_{}.txt".format(args.save_result_label), "w+") as f: f.write("bleu: {}\n".format(bleu_score)) for (source, target, source_len, target_len) in (train_loader): source_list = [[ input_lang.index2word[k.item()] for k in source[i] ][:source_len[i] - 1] for i in range(len(source))] for s in source_list: f.write("S: {}\n".format(" ".join(s))) f.write("T: {}\n".format(decoded_list[i])) f.write("H: {}\n".format(target_list[i])) f.write("\n") i += 1 return 0
num_layers=num_layers, bidirectional=bidirectional).to(device) decoder = DecoderLSTM(hidden_size, eng.n_words, dropout, num_layers=num_layers, bidirectional=bidirectional).to(device) print( "Split done. Elements in train: %d and elements in test: %d. Starting training..." % (len(train_set), len(test_set))) best_encoder, best_decoder = trainIters(encoder, decoder, epochs, train_set, test_set, sil0, sil1, eng, lr=lr, lr_decay=lr_decay, lr_drop_epoch=lr_drop, l2_penalty=l2_penalty) print("Training done. Printing stats to file....") calculateTrainingAccuracy(best_encoder, best_decoder, test_set, eng, sil0, sil1, 'results/' + file_name + '/results.txt') print("Saving Models") torch.save(best_encoder.state_dict(), "models/" + file_name + "/encoderLSTM.pt") torch.save(best_decoder.state_dict(), "models/" + file_name + "/decoderLSTM.pt") else:
if args.cuda: encoderCNN = encoderCNN.cuda() encoderRNN = encoderRNN.cuda() decoder = decoder.cuda() """ embedding = nn.Embedding(args.embed_num, args.embed_dim) """ encoderCNN = model.EncoderCNN(args, embedding) encoderRNN = model.EncoderRNN(args) decoder = model.AttnDecoderRNN(args, embedding) train.trainIters1(args, abstracts[0][:100], titles[0][:100], encoderCNN, encoderRNN, decoder) """ """ vanillaEncoderRNN = model.VanillaEncoderRNN(args, embedding) vanillaDecoderRNN = model.VanillaDecoderRNN(args, embedding) train.trainIters(args, abstracts[0][:100], titles[0][:100], vanillaEncoderRNN, vanillaDecoderRNN) """ """ filename = "../Snapshots/2017-12-01_03-28-04/" step = '10000' """ """ vanillaEncoderRNN = torch.load(filename + 'vanillaEncoderRNN_steps' + step + '.pt') vanillaDecoderRNN = torch.load(filename + 'vanillaDecoderRNN_steps' + step + '.pt') train.evaluateRandomly(args, abstracts[0][:100], titles[0][:100], vanillaEncoderRNN, vanillaDecoderRNN, n = 3) """
def run(args): # Initialize word embeddings embedding = nn.Embedding(voc.num_words, hidden_size) # Initialize encoder & decoder models encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) # Use appropriate device encoder = encoder.to(device) decoder = decoder.to(device) # Initialize optimizers encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio) if (args.train): loadFilename = None print('Building encoder and decoder ...') print('Building optimizers ...') print('Models built and ready to go!') # Ensure dropout layers are in train mode encoder.train() decoder.train() # Run training iterations print("Starting Training!") trainIters(model_name, voc, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer, embedding, encoder_n_layers, decoder_n_layers, save_dir, n_iteration, batch_size, print_every, save_every, clip, corpus_name, loadFilename, args.name) if (args.evaluate): # Set checkpoint to load from; set to None if starting from scratch loadFilename = os.path.join( save_dir, model_name, corpus_name, '{}-{}_{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size, args.name), '{}_checkpoint.tar'.format(checkpoint_iter)) # Load model if a loadFilename is provided if loadFilename: # If loading on same machine the model was trained on # checkpoint = torch.load(loadFilename) # If loading a model trained on GPU to CPU checkpoint = torch.load(loadFilename, map_location=torch.device('cpu')) encoder_sd = checkpoint['en'] decoder_sd = checkpoint['de'] encoder_optimizer_sd = checkpoint['en_opt'] decoder_optimizer_sd = checkpoint['de_opt'] embedding_sd = checkpoint['embedding'] voc.__dict__ = checkpoint['voc_dict'] if loadFilename: embedding.load_state_dict(embedding_sd) encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) encoder_optimizer.load_state_dict(encoder_optimizer_sd) decoder_optimizer.load_state_dict(decoder_optimizer_sd) # Set dropout layers to eval mode encoder.eval() decoder.eval() # Begin chatting (uncomment and run the following line to begin) evaluateInput(encoder, decoder, voc, args.beam, args.name)
def main(): # TODO CHECK THE EFFECT OF LOADING DATA HERE: input_lang, output_lang, pairs = prepareData('eng', 'fra', True) lang_pack = input_lang, output_lang, pairs print(random.choice(pairs)) hidden_size = 2 encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device) attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device) trainIters(encoder1, attn_decoder1, 100, print_every=5000, plot_every=1, lang_pack=lang_pack) ###################################################################### # evaluateRandomly(encoder1, attn_decoder1, lang_pack=lang_pack) output_words, attentions = evaluate(encoder1, attn_decoder1, "je suis trop froid .", lang_pack=lang_pack) ###################################################################### # For a better viewing experience we will do the extra work of adding axes # and labels: # def showAttention(input_sentence, output_words, attentions): # Set up figure with colorbar fig = plt.figure() ax = fig.add_subplot(111) cax = ax.matshow(attentions.numpy(), cmap='bone') fig.colorbar(cax) # Set up axes ax.set_xticklabels([''] + input_sentence.split(' ') + ['<EOS>'], rotation=90) ax.set_yticklabels([''] + output_words) # Show label at every tick ax.xaxis.set_major_locator(ticker.MultipleLocator(1)) ax.yaxis.set_major_locator(ticker.MultipleLocator(1)) plt.show() def evaluateAndShowAttention(input_sentence): output_words, attentions = evaluate(encoder1, attn_decoder1, input_sentence, lang_pack=lang_pack) print('input =', input_sentence) print('output =', ' '.join(output_words)) showAttention(input_sentence, output_words, attentions) evaluateAndShowAttention("elle a cinq ans de moins que moi .") evaluateAndShowAttention("elle est trop petit .") evaluateAndShowAttention("je ne crains pas de mourir .") evaluateAndShowAttention("c est un jeune directeur plein de talent .")
from __future__ import unicode_literals, print_function, division import torch import numpy as np from models import Encoder, AttnDecoder from train import trainIters from eval import evaluateRandomly from utils import * device = torch.device("cuda" if torch.cuda.is_available() else "cpu") file_name = "paraphrasing data_DH.xlsx" dictionary, pair_data = prepareData("kor", file_name) embedtable = np.loadtxt("word_emb.txt", delimiter=" ", dtype='float32') special_embeddings = np.concatenate((np.random.rand(len(SPECIAL_TOKENS)-1, 128).astype('float32'), np.zeros((1,128), dtype=np.float32)), axis=0) embedtable = np.insert(embedtable, [2], special_embeddings, axis=0) embedtable = torch.from_numpy(embedtable).float() encoder = Encoder(dictionary.n_tokens, 128, embedtable).to(device) attndecoder = AttnDecoder(128, dictionary.n_tokens, embedtable, dropout_p=0.1).to(device) trainIters(encoder, attndecoder, dictionary, pair_data, epochs=100) evaluateRandomly(encoder, attndecoder, pair_data, dictionary, n=10)
bidirectional = config.getboolean("bidirectional") trainset, source_vocab, target_vocab = get_dataset( types="train", batch_size=int(config["batch_size"]), shuffle=True, num_workers=int(config["num_workers"]), pin_memory=False, drop_last=True) encoder1 = EncoderRNN(int(config["hidden_size_encoder"]), len(source_vocab) + 2, int(config["batch_size"]), num_layers=int(config["num_layer_encoder"]), bidirectional=bidirectional).to(device) bridge = Linear(bidirectional, int(config["hidden_size_encoder"]), int(config["hidden_size_decoder"])).to(device) decoder1 = DecoderRNN(int(config["hidden_size_decoder"]), len(target_vocab) + 2, int(config["batch_size"]), num_layers=int(config["num_layer_decoder"])).to(device) trainIters(trainset, encoder1, decoder1, bridge, num_epochs=int(config["num_epoch"]), batch_size=int(config["batch_size"]), print_every=10, device=device) torch.save(encoder1, "encoder.pt") torch.save(decoder1, "decoder.pt") torch.save(bridge, "bridge.pt")
pickle.dump( list(i2w.items()), open( os.path.dirname(os.path.realpath(__file__)) + "/models/i2w.pickle", 'wb')) batches = batchify(train, w2i, args.batch_size) pickle.dump( batches, open( os.path.dirname(os.path.realpath(__file__)) + "/batches.pickle", 'wb')) logging.info("Loaded data.") embed = load_glove_matrix(w2i, args.emfile) logging.info("Initialized embeddings.") dim = 200 encoder = Attentive_Encoder(len(w2i), dim, args.q, embed, enable_cuda) decoder = RAN_Decoder(dim, len(w2i), embed, args.length, args.batch_size, enable_cuda) if enable_cuda: encoder.cuda() decoder.cuda() logging.info("Training will start shortly.") trainIters(batches, w2i, encoder, decoder, args.epochs, args.lr, args.length, args.ratio, enable_cuda)
decoder = SimpleDecoderRNN(vocab_size, hidden_size).to(device) else: decoder = AttentionDecoderRNN(vocab_size, hidden_size, datatransformer.MAX_LENGTH).to(device) """ train """ loss_list = [] BLEU_list = [] best_score = 0 best_encoder_wts, best_decoder_wts = None, None for epoch in range(1, epochs + 1): loss = trainIters(decoder_type, encoder, decoder, training_tensor_list, learning_rate=0.05, max_length=datatransformer.MAX_LENGTH, teacher_forcing_ratio=0.5, device=device) print(f'epoch{epoch:>2d} loss:{loss:.4f}') predicted_list = evaluateAll(decoder_type, encoder, decoder, testing_tensor_list, max_length=datatransformer.MAX_LENGTH, device=device) # test all testing data score = 0 for i, (input, target) in enumerate(testing_input): predict = datatransformer.indices2sequence(predicted_list[i]) print(f'input: {input}')
trainIters(args.train, reverse, n_iteration, learning_rate, batch_size, n_layers, hidden_size, print_every, save_every, dropout, loadFilename=args.load) elif args.test: n_layers, hidden_size, reverse = parseFilename(args.test, True) runTest(n_layers, hidden_size, reverse, args.test, beam_size, inp, args.corpus) if __name__ == '__main__': # args = parse() # run(args) trainIters('data/touchpal_done.txt', False, n_iteration=50000, learning_rate=0.0001, batch_size=64, n_layers=1, hidden_size=512, print_every=500, save_every=1000, dropout=0.1)
encoder.train() decoder.train() # 初始化优化器 print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio) if loadFilename: encoder_optimizer.load_state_dict(encoder_optimizer_sd) decoder_optimizer.load_state_dict(decoder_optimizer_sd) # 运行训练迭代 print("Starting Training!") trainIters(model_name, voc, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer, embedding, encoder_n_layers, decoder_n_layers, save_dir, n_iteration, batch_size, print_every, save_every, clip, corpus_name, loadFilename) #eval model encoder.eval() decoder.eval() # 初始化探索模块 searcher = GreedySearchDecoder(encoder, decoder) # 开始聊天(取消注释并运行以下行开始) evaluateInput(encoder, decoder, searcher, voc) print("*************")
eng, fra = genetate_lang(pairs) print("eng vocab size: ", eng.num_words) print("fra vocab size: ", fra.num_words) hidden_size = 256 input_lang = 'eng' output_lang = 'fra' encoder1 = EncoderRNN(eng.num_words, hidden_size).to(device) attn_decoder1 = AttnDecoderRNN(hidden_size, fra.num_words, dropout_p=0.1).to(device) from train import trainIters trainIters(encoder1, attn_decoder1, 75000, print_every=5000) print("Evaluating randomly") evaluateRandomly(encoder1, attn_decoder1) print("model description") print("encoder model: \n\n", encoder1, '\n') print("The state dict keys: \n\n", encoder1.state_dict().keys()) print(" ") print("attn_decoder model: \n\n", attn_decoder1, '\n') print("The state dict keys: \n\n", attn_decoder1.state_dict().keys()) print("Saving checkpoints") torch.save(encoder1.state_dict(), 'checkpoint_enc.pth') files.download('checkpoint_enc.pth')