def model_initialization(encoder_style, decoder_style, langs, embedding_size, learning_rate, use_model): # Initialize the model emb = docEmbedding(langs['rt'].n_words, langs['re'].n_words, langs['rm'].n_words, embedding_size) emb.init_weights() # Choose encoder style # TODO: Set up a choice for hierarchical or not if encoder_style == 'LIN': encoder = EncoderLIN(embedding_size, emb) elif encoder_style == 'BiLSTM': encoder = EncoderBiLSTM(embedding_size, emb) elif encoder_style == 'BiLSTMMax': encoder = EncoderBiLSTMMaxPooling(embedding_size, emb) elif encoder_style == 'HierarchicalBiLSTM': encoder_args = {"hidden_size": embedding_size, "local_embed": emb} encoder = HierarchicalBiLSTM(**encoder_args) elif encoder_style == 'HierarchicalLIN': encoder_args = {"hidden_size": embedding_size, "local_embed": emb} encoder = HierarchicalLIN(**encoder_args) else: # initialize hierarchical encoder rnn, (both global and local) encoder_args = {"hidden_size": embedding_size, "local_embed": emb} encoder = HierarchicalEncoderRNN(**encoder_args) # Choose decoder style and training function if decoder_style == 'HierarchicalRNN': decoder = HierarchicalDecoder(embedding_size, langs['summary'].n_words) train_func = Hierarchical_seq_train else: decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words) train_func = Plain_seq_train if use_cuda: emb.cuda() encoder.cuda() decoder.cuda() # Choose optimizer loss_optimizer = optim.Adagrad(list(encoder.parameters()) + list(decoder.parameters()), lr=learning_rate, lr_decay=0, weight_decay=0) # loss_optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), # lr=learning_rate) if use_model is not None: encoder = load_model(encoder, use_model[0]) decoder = load_model(decoder, use_model[1]) if not use_cuda: loss_optimizer.load_state_dict( torch.load(use_model[2], map_location=lambda storage, loc: storage)) else: loss_optimizer.load_state_dict(torch.load(use_model[2])) return encoder, decoder, loss_optimizer, train_func
def generate_text(model, data_file, output): encoder_src = model['encoder_path'] decoder_src = model['decoder_path'] encoder_style = None # Choose model architecture if 'RNN' in encoder_src: encoder = EncoderRNN(embedding_size, emb) encoder_style = 'RNN' elif 'LSTM' in encoder_src: encoder = EncoderBiLSTM(embedding_size, emb) encoder_style = 'LSTM' else: encoder = EncoderLIN(embedding_size, emb) encoder_style = 'LIN' decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words) encoder = load_model(encoder, encoder_src) decoder = load_model(decoder, decoder_src) data_path = os.path.join(data_file['data_dir'], data_file['data_name'] + '.json') with open(data_path) as f: valuation_data = json.load(f) assert valuation_data is not None valid_data, _ = loaddata(data_file['data_dir'], data_file['data_name']) data_length = len(valid_data) valid_data = data2index(valid_data, train_lang) text_generator = evaluate(encoder, decoder, valid_data, train_lang['summary'], embedding_size, encoder_style=encoder_style, iter_time=data_length, beam_size=1, verbose=False) print('The text generation begin\n', flush=True) with open(output, 'w') as f: for idx, line in enumerate(text_generator): print('Summery generated, No{}'.format(idx + 1)) f.write(line + '\n')
def train(train_set, langs, embedding_size=600, learning_rate=0.01, iter_time=10, batch_size=32, get_loss=GET_LOSS, save_model=SAVE_MODEL, encoder_style=ENCODER_STYLE, use_model=USE_MODEL): """The training procedure.""" # Set the timer start = time.time() # Initialize the model emb = docEmbedding(langs['rt'].n_words, langs['re'].n_words, langs['rm'].n_words, embedding_size) emb.init_weights() if encoder_style == 'LIN': encoder = EncoderLIN(embedding_size, emb) elif encoder_style == 'BiLSTM': encoder = EncoderBiLSTM(embedding_size, emb) else: encoder = EncoderRNN(embedding_size, emb) decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words) if use_cuda: emb.cuda() encoder.cuda() decoder.cuda() if use_model is not None: encoder = load_model(encoder, use_model[0]) decoder = load_model(decoder, use_model[1]) # Choose optimizer loss_optimizer = optim.Adagrad(list(encoder.parameters()) + list(decoder.parameters()), lr=learning_rate, lr_decay=0, weight_decay=0) # decoder_optimizer = optim.Adagrad(decoder.parameters(), lr=learning_rate, lr_decay=0, weight_decay=0) criterion = nn.NLLLoss() total_loss = 0 iteration = 0 for epo in range(1, iter_time + 1): print("Epoch #%d" % (epo)) # Get data train_iter = data_iter(train_set, batch_size=batch_size) for dt in train_iter: iteration += 1 data, idx_data = get_batch(dt) rt, re, rm, summary = idx_data # Add paddings rt = addpaddings(rt) re = addpaddings(re) rm = addpaddings(rm) summary = addpaddings(summary) rt = Variable(torch.LongTensor(rt), requires_grad=False) re = Variable(torch.LongTensor(re), requires_grad=False) rm = Variable(torch.LongTensor(rm), requires_grad=False) # For Decoding summary = Variable(torch.LongTensor(summary), requires_grad=False) if use_cuda: rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda( ), summary.cuda() # Get the average loss on the sentences loss = sentenceloss(rt, re, rm, summary, encoder, decoder, loss_optimizer, criterion, embedding_size, encoder_style) total_loss += loss # Print the information and save model if iteration % get_loss == 0: print("Time {}, iter {}, avg loss = {:.4f}".format( gettime(start), iteration, total_loss / get_loss)) total_loss = 0 if epo % save_model == 0: torch.save(encoder.state_dict(), "{}_encoder_{}".format(OUTPUT_FILE, iteration)) torch.save(decoder.state_dict(), "{}_decoder_{}".format(OUTPUT_FILE, iteration)) print("Save the model at iter {}".format(iteration)) return encoder, decoder
embedding_size = 600 langs = train_lang emb = docEmbedding(langs['rt'].n_words, langs['re'].n_words, langs['rm'].n_words, embedding_size) emb.init_weights() encoder_src = './models/long4_encoder_2120' decoder_src = './models/long4_decoder_2120' encoder_style = None if 'RNN' == ENCODER_STYLE: encoder = EncoderRNN(embedding_size, emb) encoder_style = 'RNN' elif 'LSTM' == ENCODER_STYLE: encoder = EncoderBiLSTM(embedding_size, emb) encoder_style = 'BiLSTM' else: encoder = EncoderLIN(embedding_size, emb) encoder_style = 'LIN' decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words) encoder = load_model(encoder, encoder_src) decoder = load_model(decoder, decoder_src) valid_data, _ = loaddata(file_loc, 'valid') data_length = len(valid_data) valid_data = data2index(valid_data, train_lang) text_generator = evaluate(encoder, decoder,
def train(train_set, langs, embedding_size=EMBEDDING_SIZE, learning_rate=LR, batch_size=BATCH_SIZE, get_loss=GET_LOSS, grad_clip=GRAD_CLIP, encoder_style=ENCODER_STYLE, decoder_style=DECODER_STYLE, to_copy=TOCOPY, epoch_time=EPOCH_TIME, layer_depth=LAYER_DEPTH, max_length=MAX_LENGTH, max_sentence=MAX_SENTENCES, save_model=SAVE_MODEL, output_file=OUTPUT_FILE, iter_num=iterNum, pretrain=PRETRAIN): """The training procedure.""" # # Test arg parser (For Debugging) # print("embedding_size={}, learning_rate={}, batch_size={}, get_loss={}, grad_clip={},\ # encoder_style={}, decoder_style={}, max_length={},\ # max_sentece={}, save_model={}, output_file={}, to_copy={},\ # epoch={}, layer_depth={}, iter num={}, pretrain={}".format( # embedding_size, learning_rate, batch_size, get_loss, grad_clip, # encoder_style, decoder_style, max_length, max_sentece, save_model, output_file, # to_copy, epoch_time, layer_depth, iter_num, pretrain)) # Set the timer start = time.time() # Initialize the model emb = docEmbedding(langs['rt'].n_words, langs['re'].n_words, langs['rm'].n_words, embedding_size) emb.init_weights() # Choose encoder style if encoder_style == 'LIN': encoder = EncoderLIN(embedding_size, emb) elif encoder_style == 'BiLSTM': encoder = EncoderBiLSTM(embedding_size, emb, n_layers=layer_depth) elif encoder_style == 'BiLSTMMax': encoder = EncoderBiLSTMMaxPool(embedding_size, emb, n_layers=layer_depth) elif encoder_style == 'HierarchicalBiLSTM': encoder_args = { "hidden_size": embedding_size, "local_embed": emb, "n_layers": layer_depth } encoder = HierarchicalBiLSTM(**encoder_args) elif encoder_style == 'HierarchicalLIN': encoder_args = {"hidden_size": embedding_size, "local_embed": emb} encoder = HierarchicalLIN(**encoder_args) else: # initialize hierarchical encoder rnn, (both global and local) encoder_args = { "hidden_size": embedding_size, "local_embed": emb, "n_layers": layer_depth } encoder = HierarchicalRNN(**encoder_args) # Choose decoder style and training function if decoder_style == 'HierarchicalRNN': decoder = HierarchicalDecoder(embedding_size, langs['summary'].n_words, n_layers=layer_depth, copy=to_copy) train_func = Hierarchical_seq_train else: decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words, n_layers=layer_depth, copy=to_copy) train_func = Plain_seq_train if use_cuda: emb.cuda() encoder.cuda() decoder.cuda() # Choose optimizer loss_optimizer = optim.Adagrad(list(encoder.parameters()) + list(decoder.parameters()), lr=learning_rate, lr_decay=0, weight_decay=0) # loss_optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), # lr=learning_rate) # Load pre-train model use_model = None if pretrain is not None and iter_num is not None: use_model = [ './models/' + pretrain + '_' + s + '_' + str(iter_num) for s in ['encoder', 'decoder', 'optim'] ] if use_model is not None: encoder = load_model(encoder, use_model[0]) decoder = load_model(decoder, use_model[1]) loss_optimizer.load_state_dict(torch.load(use_model[2])) print("Load Pretrain Model {}".format(use_model)) else: print("Not use Pretrain Model") criterion = nn.NLLLoss() # Build up the model model = Seq2Seq(encoder, decoder, train_func, criterion, embedding_size, langs) # print(encoder) # print(decoder) # print(loss_optimizer) total_loss = 0 iteration = 0 for epo in range(1, epoch_time + 1): # Start of an epoch print("Epoch #%d" % (epo)) # Get data train_iter = data_iter(train_set, batch_size=batch_size) for dt in train_iter: iteration += 1 data, idx_data = get_batch(dt) rt, re, rm, summary = idx_data # Debugging: check the input triplets # show_triplets(data[0][0]) # Add paddings rt = addpaddings(rt) re = addpaddings(re) rm = addpaddings(rm) # For summary paddings, if the model is herarchical then pad between sentences # If the batch_size is 1 then we don't need to do sentence padding if decoder_style == 'HierarchicalRNN' and batch_size != 1: summary = add_sentence_paddings(summary) else: summary = addpaddings(summary) rt = Variable(torch.LongTensor(rt), requires_grad=False) re = Variable(torch.LongTensor(re), requires_grad=False) rm = Variable(torch.LongTensor(rm), requires_grad=False) # For Decoding summary = Variable(torch.LongTensor(summary), requires_grad=False) if use_cuda: rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda( ), summary.cuda() # Zero the gradient loss_optimizer.zero_grad() model.train() # calculate loss of "a batch of input sequence" loss = sequenceloss(rt, re, rm, summary, model) # Backpropagation loss.backward() torch.nn.utils.clip_grad_norm( list(model.encoder.parameters()) + list(model.decoder.parameters()), grad_clip) loss_optimizer.step() # Get the average loss on the sentences target_length = summary.size()[1] if float(torch.__version__[:3]) > 0.3: total_loss += loss.item() else: total_loss += loss.data[0] # Print the information and save model if iteration % get_loss == 0: print("Time {}, iter {}, Seq_len:{}, avg loss = {:.4f}".format( gettime(start), iteration, target_length, total_loss / get_loss)) total_loss = 0 if epo % save_model == 0: torch.save(encoder.state_dict(), "models/{}_encoder_{}".format(output_file, iteration)) torch.save(decoder.state_dict(), "models/{}_decoder_{}".format(output_file, iteration)) torch.save(loss_optimizer.state_dict(), "models/{}_optim_{}".format(output_file, iteration)) print("Save the model at iter {}".format(iteration)) return model.encoder, model.decoder
def train(train_set, langs, embedding_size=600, learning_rate=0.01, iter_time=10, batch_size=32, get_loss=GET_LOSS, save_model=SAVE_MODEL, encoder_style=ENCODER_STYLE, decoder_style=DECODER_STYLE, use_model=USE_MODEL): """The training procedure.""" # Set the timer start = time.time() # Initialize the model emb = docEmbedding(langs['rt'].n_words, langs['re'].n_words, langs['rm'].n_words, embedding_size) emb.init_weights() # Choose encoder style # TODO:: Set up a choice for hierarchical or not if encoder_style == 'LIN': encoder = EncoderLIN(embedding_size, emb) elif encoder_style == 'BiLSTM': encoder = EncoderBiLSTM(embedding_size, emb) elif encoder_style == 'BiLSTMMax': encoder = EncoderBiLSTMMaxPooling(embedding_size, emb) elif encoder_style == 'HierarchicalBiLSTM': encoder_args = {"hidden_size": embedding_size, "local_embed": emb} encoder = HierarchicalBiLSTM(**encoder_args) elif encoder_style == 'HierarchicalLIN': encoder_args = {"hidden_size": embedding_size, "local_embed": emb} encoder = HierarchicalLIN(**encoder_args) else: # initialize hierarchical encoder rnn, (both global and local) encoder_args = {"hidden_size": embedding_size, "local_embed": emb} encoder = HierarchicalEncoderRNN(**encoder_args) # Choose decoder style and training function if decoder_style == 'HierarchicalRNN': decoder = HierarchicalDecoder(embedding_size, langs['summary'].n_words) train_func = Hierarchical_seq_train else: decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words) train_func = Plain_seq_train if use_cuda: emb.cuda() encoder.cuda() decoder.cuda() # Choose optimizer loss_optimizer = optim.Adagrad(list(encoder.parameters()) + list(decoder.parameters()), lr=learning_rate, lr_decay=0, weight_decay=0) # loss_optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), # lr=learning_rate) if use_model is not None: encoder = load_model(encoder, use_model[0]) decoder = load_model(decoder, use_model[1]) loss_optimizer.load_state_dict(torch.load(use_model[2])) criterion = nn.NLLLoss() # Build up the model model = Seq2Seq(encoder, decoder, train_func, criterion, embedding_size, langs) # print(encoder) # print(decoder) # print(loss_optimizer) total_loss = 0 iteration = 0 for epo in range(1, iter_time + 1): # Start of an epoch print("Epoch #%d" % (epo)) # Get data train_iter = data_iter(train_set, batch_size=batch_size) for dt in train_iter: iteration += 1 data, idx_data = get_batch(dt) rt, re, rm, summary = idx_data # Debugging: check the input triplets # show_triplets(data[0][0]) # Add paddings rt = addpaddings(rt) re = addpaddings(re) rm = addpaddings(rm) # For summary paddings, if the model is herarchical then pad between sentences if decoder_style == 'HierarchicalRNN': summary = add_sentence_paddings(summary) else: summary = addpaddings(summary) rt = Variable(torch.LongTensor(rt), requires_grad=False) re = Variable(torch.LongTensor(re), requires_grad=False) rm = Variable(torch.LongTensor(rm), requires_grad=False) # For Decoding summary = Variable(torch.LongTensor(summary), requires_grad=False) if use_cuda: rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda( ), summary.cuda() # Zero the gradient loss_optimizer.zero_grad() model.train() # calculate loss of "a batch of input sequence" loss = sequenceloss(rt, re, rm, summary, model) # Backpropagation loss.backward() torch.nn.utils.clip_grad_norm( list(model.encoder.parameters()) + list(model.decoder.parameters()), GRAD_CLIP) loss_optimizer.step() # Get the average loss on the sentences target_length = summary.size()[1] if float(torch.__version__[:3]) > 0.3: total_loss += loss.item() else: total_loss += loss.data[0] # Print the information and save model if iteration % get_loss == 0: print("Time {}, iter {}, Seq_len:{}, avg loss = {:.4f}".format( gettime(start), iteration, target_length, total_loss / get_loss)) total_loss = 0 if epo % save_model == 0: torch.save(encoder.state_dict(), "models/{}_encoder_{}".format(OUTPUT_FILE, iteration)) torch.save(decoder.state_dict(), "models/{}_decoder_{}".format(OUTPUT_FILE, iteration)) torch.save(loss_optimizer.state_dict(), "models/{}_optim_{}".format(OUTPUT_FILE, iteration)) print("Save the model at iter {}".format(iteration)) return model.encoder, model.decoder