print_loss_total += loss plot_loss_total += loss if epoch % config.PRINT_STEP == 0: print_loss_avg = print_loss_total / config.PRINT_STEP print_loss_total = 0 print_summary = '%s (%d %d%%) %.4f' % (time_since( start, epoch / config.NUM_ITER), epoch, epoch / config.NUM_ITER * 100, print_loss_avg) print(print_summary) if epoch % config.CHECKPOINT_STEP == 0: encoder_path = os.path.join(config.MODEL_DIR, "encoder.pth") decoder_path = os.path.join(config.MODEL_DIR, "decoder.pth") torch.save(encoder.state_dict(), encoder_path) torch.save(decoder.state_dict(), decoder_path) """ def evaluate(sentence, max_length=MAX_LENGTH): input_index, output_index = val_dataloader.indexes_from_sentence(sentence) input_variable = Variable(torch.LongTensor(input_index)) output_variable = Variable(torch.LongTensor(output_index)) input_variable = variable_from_sentence(chinese, sentence) input_length = input_variable.size()[0] # Run through encoder encoder_hidden = encoder.init_hidden() encoder_outputs, encoder_hidden = encoder(input_variable, encoder_hidden) # Create starting vectors for decoder decoder_input = Variable(torch.LongTensor([[SOS_token]])) # SOS decoder_context = Variable(torch.zeros(1, decoder.hidden_size))
def train(train_set, langs, embedding_size=600, learning_rate=0.01, iter_time=10, batch_size=32, get_loss=GET_LOSS, save_model=SAVE_MODEL, encoder_style=ENCODER_STYLE, use_model=USE_MODEL): """The training procedure.""" # Set the timer start = time.time() # Initialize the model emb = docEmbedding(langs['rt'].n_words, langs['re'].n_words, langs['rm'].n_words, embedding_size) emb.init_weights() if encoder_style == 'LIN': encoder = EncoderLIN(embedding_size, emb) elif encoder_style == 'BiLSTM': encoder = EncoderBiLSTM(embedding_size, emb) else: encoder = EncoderRNN(embedding_size, emb) decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words) if use_cuda: emb.cuda() encoder.cuda() decoder.cuda() if use_model is not None: encoder = load_model(encoder, use_model[0]) decoder = load_model(decoder, use_model[1]) # Choose optimizer loss_optimizer = optim.Adagrad(list(encoder.parameters()) + list(decoder.parameters()), lr=learning_rate, lr_decay=0, weight_decay=0) # decoder_optimizer = optim.Adagrad(decoder.parameters(), lr=learning_rate, lr_decay=0, weight_decay=0) criterion = nn.NLLLoss() total_loss = 0 iteration = 0 for epo in range(1, iter_time + 1): print("Epoch #%d" % (epo)) # Get data train_iter = data_iter(train_set, batch_size=batch_size) for dt in train_iter: iteration += 1 data, idx_data = get_batch(dt) rt, re, rm, summary = idx_data # Add paddings rt = addpaddings(rt) re = addpaddings(re) rm = addpaddings(rm) summary = addpaddings(summary) rt = Variable(torch.LongTensor(rt), requires_grad=False) re = Variable(torch.LongTensor(re), requires_grad=False) rm = Variable(torch.LongTensor(rm), requires_grad=False) # For Decoding summary = Variable(torch.LongTensor(summary), requires_grad=False) if use_cuda: rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda( ), summary.cuda() # Get the average loss on the sentences loss = sentenceloss(rt, re, rm, summary, encoder, decoder, loss_optimizer, criterion, embedding_size, encoder_style) total_loss += loss # Print the information and save model if iteration % get_loss == 0: print("Time {}, iter {}, avg loss = {:.4f}".format( gettime(start), iteration, total_loss / get_loss)) total_loss = 0 if epo % save_model == 0: torch.save(encoder.state_dict(), "{}_encoder_{}".format(OUTPUT_FILE, iteration)) torch.save(decoder.state_dict(), "{}_decoder_{}".format(OUTPUT_FILE, iteration)) print("Save the model at iter {}".format(iteration)) return encoder, decoder
1, dropout_p=0.1) if use_cuda: encoder1 = encoder1.cuda() attn_decoder1 = attn_decoder1.cuda() logger.info('train start. ') # 训练过程,指定迭代次数,此处为迭代75000次,每5000次打印中间信息 trainIters(input_lang, output_lang, pairs, encoder1, attn_decoder1, 75000, print_every=5000) logger.info('train end. ') # 保存编码器和解码器网络状态 torch.save(encoder1.state_dict(), open('./data/%s_%s_encoder1.stat' % (input, output), 'wb')) torch.save(attn_decoder1.state_dict(), open('./data/%s_%s_attn_decoder1.stat' % (input, output), 'wb')) logger.info('stat saved.') # 保存整个网络 torch.save(encoder1, open('./data/%s_%s_encoder1.model' % (input, output), 'wb')) torch.save(attn_decoder1, open('./data/%s_%s_attn_decoder1.model' % (input, output), 'wb')) logger.info('model saved.')
max_length_eval) print( '{:s} ({:d} {:.0f}% finished) TrainLoss: {:.4f}, ValAccRetrieval: {:.1f}, ValAccGeneralize: {:.1f}' .format( timeSince(start, float(episode) / float(num_episodes)), episode, float(episode) / float(num_episodes) * 100., avg_train_loss / counter, acc_val_retrieval, acc_val_gen)) avg_train_loss = 0. counter = 0 if episode % 1000 == 0 or episode == num_episodes: state = { 'encoder_state_dict': encoder.state_dict(), 'decoder_state_dict': decoder.state_dict(), 'input_lang': input_lang, 'output_lang': output_lang, 'episodes_validation': samples_val, 'episode_type': episode_type, 'emb_size': emb_size, 'dropout': dropout_p, 'nlayers': nlayers, 'episode': episode, 'disable_memory': disable_memory, 'disable_recon_loss': disable_recon_loss, 'use_attention': use_attention, 'max_length_eval': max_length_eval, 'num_episodes': num_episodes, 'args': args }
def main(args): if not os.path.exists(args.model_path): os.makedirs(args.model_path) transform = transforms.Compose([ transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) encoder = EncoderCNN(args.embed_size) decoder = AttnDecoderRNN(args.embed_size, args.hidden_size, len(vocab), args.num_layers) if torch.cuda.is_available(): encoder.cuda() decoder.cuda() criterion = nn.CrossEntropyLoss() params = list(decoder.parameters()) + list(encoder.linear.parameters()) + list(encoder.bn.parameters()) optimizer = torch.optim.Adam(params, lr=args.learning_rate) total_step = len(data_loader) decoder_hidden = decoder.init_hidden() for epoch in range(args.num_epochs): for i, (images, captions, lengths) in enumerate(data_loader): images = cuda_variable(images, volatile=True) captions = cuda_variable(captions) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] decoder.zero_grad() encoder.zero_grad() features = encoder(images) outputs = decoder(captions, decoder_hidden, features, lengths) # outputs = decoder(features, captions, lengths) loss = criterion(outputs, targets) loss.backward() optimizer.step() if i % args.log_step == 0: print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f' %(epoch, args.num_epochs, i, total_step, loss.data[0], np.exp(loss.data[0]))) if (i+1) % args.save_step == 0: torch.save(decoder.state_dict(), os.path.join(args.model_path, 'decoder-%d-%d.pkl' %(epoch+1, i+1))) torch.save(encoder.state_dict(), os.path.join(args.model_path, 'encoder-%d-%d.pkl' %(epoch+1, i+1)))
) # detach from history as input loss += criterion(decoder_output, target_tensor[di]) if decoder_input.item() == dataset.end_of_string_token_idx: break loss.backward() encoder_optimizer.step() decoder_optimizer.step() if i % 1000 == 0: torch.save( { "encoder": encoder.state_dict(), "decoder": decoder.state_dict(), }, f"{PATH}/{i}.pt", ) loss_total += loss.item() / target_length progress.set_description(loss_total) torch.save( { "encoder": encoder.state_dict(), "decoder": decoder.state_dict(), }, f"{PATH}/final.pt", )
loss = train( title, text, new_batch, words_padding_mask,target, embedder, encoder, decoder ,embedder_optimizer, encoder_optimizer, decoder_optimizer, criterion) print_loss_total += loss plot_loss_total += loss print_loss += loss print(print_loss ) print_loss = 0 if epoch == 0: continue if epoch%print_every == 0: print_loss_avg = print_loss_total / print_every loss_list.append(print_loss_avg) print_summary = '%s (%d %d%%) %.4f' % (time_since(start, epoch / n_epochs), epoch, epoch / n_epochs * 100, print_loss_avg) print(print_summary) if epoch % plot_every == 0: plot_loss_avg = plot_loss_total / plot_every plot_losses.append(plot_loss_avg) plot_loss_total = 0 if epoch %5 == 0: torch.save({'embedder':embedder,'encoder': encoder, 'decoder': decoder}, str(epoch)+'model.pkl') if epoch %5 == 0: torch.save({'embedder':embedder.state_dict(),'encoder': encoder.state_dict(), 'decoder': decoder.state_dict()},'check/checkpoint.pkl')