示例#1
0
def trainIters(encoder, decoder, n_epochs, validation_pairs, lang1, lang2,
               search, title, max_length_generation, print_every, val_every,
               learning_rate):
    start = time.time()
    count, print_loss_total = 0, 0
    encoder_optimizer = torch.optim.Adadelta(encoder.parameters(),
                                             lr=learning_rate)
    decoder_optimizer = torch.optim.Adadelta(decoder.parameters(),
                                             lr=learning_rate)
    criterion = nn.NLLLoss(
        ignore_index=PAD_token)  # this ignores the padded token.

    for epoch in range(n_epochs):
        for step, (sent1s, sent1_lengths, sent2s,
                   sent2_lengths) in enumerate(train_loader):
            encoder.train()
            decoder.train()
            sent1_batch, sent2_batch = sent1s.to(device), sent2s.to(device)
            sent1_length_batch, sent2_length_batch = sent1_lengths.to(
                device), sent2_lengths.to(device)

            loss = train(sent1_batch, sent1_length_batch, sent2_batch,
                         sent2_length_batch, encoder, decoder,
                         encoder_optimizer, decoder_optimizer, criterion)

            print_loss_total += loss
            count += 1

            if (step + 1) % print_every == 0:
                # lets train and plot at the same time.
                print_loss_avg = print_loss_total / count
                count = 0
                print_loss_total = 0
                print('TRAIN SCORE %s (%d %d%%) %.4f' %
                      (timeSince(start, step / n_epochs), step,
                       step / n_epochs * 100, print_loss_avg))
                print("Memory allocated (mb): ",
                      torch.cuda.memory_allocated(device) / (1e6))

                if (step + 1) % val_every == 0:
                    with torch.no_grad():
                        bleu_score = test_model(
                            encoder,
                            decoder,
                            search,
                            validation_pairs,
                            lang2,
                            max_length=max_length_generation)
                    # returns bleu score
                    print("VALIDATION BLEU SCORE: " + str(bleu_score))
                    torch.save(encoder.state_dict(),
                               "Attention_Vish_encoder_latest")
                    torch.save(decoder.state_dict(),
                               "Attention_Vish_decoder_latest")

            del sent1s, sent1_lengths, sent2s, sent2_lengths, sent1_batch, sent2_batch, sent1_length_batch, sent2_length_batch
            gc.collect()
def trainIters(encoder, decoder, n_epochs, pairs, validation_pairs, lang1, lang2, search, title, max_length_generation, val_every=1000, print_every=1000, plot_every=1000, learning_rate=0.0001):
    """
    lang1 is the Lang object for language 1 
    Lang2 is the Lang object for language 2
    Max length generation is the max length generation you want 
    """
    start = time.time()
    #plot_losses, val_losses = [], []
    count, print_loss_total, plot_loss_total, val_loss_total, plot_val_loss = 0, 0, 0, 0, 0 
    encoder_optimizer = torch.optim.Adadelta(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = torch.optim.Adadelta(decoder.parameters(), lr=learning_rate)
    #encoder_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(encoder_optimizer, mode="min")
    #decoder_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(decoder_optimizer, mode="min")

    criterion = nn.NLLLoss(ignore_index=PAD_token) # this ignores the padded token. 
    for epoch in range(n_epochs):
        for step, (sent1s, sent1_lengths, sent2s, sent2_lengths) in enumerate(train_loader):
            encoder.train()
            decoder.train()
            sent1_batch, sent2_batch = sent1s.to(device), sent2s.to(device) 
            sent1_length_batch, sent2_length_batch = sent1_lengths.to(device), sent2_lengths.to(device)
            
            loss = train(sent1_batch, sent1_length_batch, sent2_batch, sent2_length_batch, 
                         encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
            
            print_loss_total += loss
            count += 1
          
            if (step+1) % print_every == 0:
                # lets train and plot at the same time. 
                print_loss_avg = print_loss_total / count
                count = 0
                print_loss_total = 0
                print('TRAIN SCORE %s (%d %d%%) %.4f' % (timeSince(start, step / n_epochs),
                                                         step, step / n_epochs * 100, print_loss_avg))
                print("Memory allocated: ", torch.cuda.memory_allocated(device)/(1e6))

                if (step+1) % val_every == 0:
                    with torch.no_grad():
                        v_loss = test_model(encoder, decoder, search, validation_pairs, lang2, max_length=max_length_generation)
                    # returns bleu score
                    print("VALIDATION BLEU SCORE: "+str(v_loss))
                    #val_losses.append(v_loss.item())
                    current_time = time.strftime("%Y-%m-%d-%H-%M-%S")
                    torch.save(encoder.state_dict(), "Attention_Vish_encoder_" + current_time)
                    torch.save(decoder.state_dict(), "Attention_Vish_decoder_" + current_time)
                    #pickle.dump(val_losses, open("val_losses_1.2_2nd_train", "wb"))
                    
                           
            del sent1s, sent1_lengths, sent2s, sent2_lengths, sent1_batch, sent2_batch, sent1_length_batch, sent2_length_batch
            gc.collect() 
def trainIters(encoder, decoder, n_epochs, validation_pairs, lang1, lang2, search, title, max_length_generation, print_every, val_every, learning_rate):
    start = time.time()
    count, print_loss_total = 0, 0
    encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=5*learning_rate)
    criterion = nn.NLLLoss(ignore_index=PAD_token) # this ignores the padded token. 

    for epoch in range(n_epochs):
        for step, (sent1s, sent1_lengths, sent2s, sent2_lengths) in enumerate(train_loader):
            encoder.train()
            decoder.train()
            sent1_batch, sent2_batch = sent1s.to(device), sent2s.to(device) 
            sent1_length_batch, sent2_length_batch = sent1_lengths.to(device), sent2_lengths.to(device)
            
            loss = train(sent1_batch, sent1_length_batch, sent2_batch, sent2_length_batch, 
                         encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
            
            print_loss_total += loss
            count += 1
          
            if (step+1) % print_every == 0:
                # lets train and plot at the same time. 
                print_loss_avg = print_loss_total / count
                count = 0
                print_loss_total = 0
                print('TRAIN SCORE %s (%d %d%%) %.4f' % (timeSince(start, step / n_epochs),
                                                         step, step / n_epochs * 100, print_loss_avg))
                print("Memory allocated (mb): ", torch.cuda.memory_allocated(device)/(1e6))

                if (step+1) % val_every == 0:
                    with torch.no_grad():
                        bleu_score = test_model(encoder, decoder, search, validation_pairs, lang2, max_length=max_length_generation)
                    # returns bleu score
                    print("VALIDATION BLEU SCORE: "+str(bleu_score))
                    torch.save(encoder.state_dict(), "Attention_Vish_encoder_latest")
                    torch.save(decoder.state_dict(), "Attention_Vish_decoder_latest")
                    
                           
            del sent1s, sent1_lengths, sent2s, sent2_lengths, sent1_batch, sent2_batch, sent1_length_batch, sent2_length_batch
            gc.collect() 
示例#4
0
def trainIters(encoder,
               decoder,
               n_epochs,
               validation_pairs,
               pairs,
               lang1,
               lang2,
               max_length,
               max_length_generation,
               title,
               print_every=5000,
               plot_every=5000,
               learning_rate=3e-4,
               search="beam"):
    """
    lang1 is the Lang o|bject for language 1 
    Lang2 is the Lang object for language 2
    n_iters is the number of training pairs per epoch you want to train on
    """

    start = time.time()
    training_pairs = pairs
    n_iters = len(pairs)
    plot_losses, val_losses = [], []
    val_losses = []
    count, print_loss_total, plot_loss_total, val_loss_total, plot_val_loss = 0, 0, 0, 0, 0
    encoder_optimizer = torch.optim.Adam(encoder.parameters(),
                                         lr=learning_rate)
    decoder_optimizer = torch.optim.Adam(decoder.parameters(),
                                         lr=learning_rate)

    criterion = nn.NLLLoss(ignore_index=PAD_token)
    plot_loss = []
    val_loss = []

    for i in range(n_epochs):
        plot_loss = []
        val_loss = []
        # framing it as a categorical loss function.
        for iter in range(1, n_iters + 1):
            training_pair = training_pairs[iter - 1]
            input_tensor = training_pair[0]
            target_tensor = training_pair[1]
            input_length = input_tensor.size(0)
            if target_tensor.size(0) < 3:
                continue
            loss_value, count = train(input_tensor, target_tensor, encoder,
                                      decoder, encoder_optimizer,
                                      decoder_optimizer, criterion, max_length,
                                      count)
            print_loss_total += loss_value
            plot_loss_total += loss_value

            if iter % print_every == 0:
                print_loss_avg = print_loss_total / count
                count = 0
                print_loss_total = 0
                print('TRAIN SCORE %s (%d %d%%) %.4f' %
                      (timeSince(start, iter / n_epochs), iter,
                       iter / n_epochs * 100, print_loss_avg))
                plot_loss.append(print_loss_avg)
                plot_loss_total = 0
                with torch.no_grad():
                    v_loss = test_model(encoder,
                                        decoder,
                                        search,
                                        validation_pairs,
                                        lang2,
                                        max_length=None)
                # returns bleu score
                print("VALIDATION BLEU SCORE: " + str(v_loss))
                val_loss.append(v_loss)
                save_model(encoder, decoder, title)
        plot_losses.append(plot_loss)
        val_losses.append(val_loss)
        save_model(encoder, decoder, title)
        make_graph(encoder, decoder, val_losses, plot_losses, title)
def trainIters(encoder, decoder, n_epochs, pairs, validation_pairs, lang1, lang2, search, title, max_length_generation,  print_every=1000, plot_every=1000, learning_rate=0.0001):
    """
    lang1 is the Lang object for language 1 
    Lang2 is the Lang object for language 2
    Max length generation is the max length generation you want 
    """
    start = time.time()
    plot_losses = []
    val_losses = [] 
    count = 0 
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every
    val_loss_total = 0
    plot_val_loss = 0
    encoder_optimizer = torch.optim.Adadelta(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = torch.optim.Adadelta(decoder.parameters(), lr=learning_rate)
    #encoder_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(encoder_optimizer, mode="min")
    #decoder_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(decoder_optimizer, mode="min")

    criterion = nn.NLLLoss(ignore_index=PAD_token) # this ignores the padded token. 
    plot_loss =[]
    val_loss = []
    for epoch in range(n_epochs):

        plot_loss = []
        val_loss = []
        for step, (sent1s, sent1_lengths, sent2s, sent2_lengths) in enumerate(train_loader):
            encoder.train() # what is this for?
            decoder.train()
            sent1_batch, sent2_batch = sent1s.to(device), sent2s.to(device) 
            sent1_length_batch, sent2_length_batch = sent1_lengths.to(device), sent2_lengths.to(device)
            loss, output_translations, count = train(sent1_batch, sent1_length_batch, encoder, decoder, encoder_optimizer, decoder_optimizer, sent2_batch, sent2_length_batch, criterion, count) # Yikes, what is this. 
            i = 0  #look at the first output ranslation
            output = output_translations[i]
            translated = []
            answer = []
            for j in range(len(output)):
                token = torch.argmax(output[j][0])[0] # you get the index
                translated.append(lang2.index2word[token.squeeze().item()])
                answer.append(lang2.index2word[sent2_batch[i][j].squeeze().item()])
            print(answer)
            print("translated prediction")
            print(translated) 
            # lets output what it's actually getting as itsoutput of teh decoder here. 
            # check if there is an SOS here as well. 
            print_loss_total += loss
            plot_loss_total += loss
            # we also have tomaks when it's an eOS tag. 
            if (step+1) % print_every == 0:
                # lets train and polot at the same time. 
                print_loss_avg = print_loss_total / count
                count = 0
                print_loss_total = 0
                print('TRAIN SCORE %s (%d %d%%) %.4f' % (timeSince(start, step / n_epochs),
                                             step, step / n_epochs * 100, print_loss_avg))
                with torch.no_grad():
                    v_loss = test_model(encoder, decoder, search, validation_pairs, lang2, max_length=max_length_generation)
                # returns bleu score
                print("VALIDATION BLEU SCORE: "+str(v_loss))
                val_loss.append(v_loss)
                plot_loss.append(print_loss_avg)
                # save it every time it hits the step now. 
                save_model(encoder, decoder, title)
                sys.stdin.flush()
                plot_loss_total = 0

        plot_losses.append(plot_loss)
        val_losses.append(val_loss)
        print("AVERAGE PLOT LOSS")
        print(np.mean(plot_loss))
        sys.stdin.flush()
        #encoder_scheduler.step(np.mean(plot_loss)) # this isnt' really doing anything. 
        #decoder_scheduler.step(np.mean(plot_loss))
        save_model(encoder, decoder, title)
        make_graph(encoder, decoder, val_losses, plot_losses, title)
    assert len(val_losses) == len(plot_losses)
    save_model(encoder, decoder, title)
    make_graph(encoder, decoder, val_losses, plot_losses, title)
示例#6
0
def trainIters(encoder,
               decoder,
               n_epochs,
               pairs,
               validation_pairs,
               lang1,
               lang2,
               search,
               title,
               max_length_generation,
               print_every=1000,
               plot_every=1000,
               learning_rate=0.0001):
    """
    lang1 is the Lang object for language 1 
    Lang2 is the Lang object for language 2
    Max length generation is the max length generation you want 
    """
    start = time.time()
    plot_losses, val_losses = [], []
    val_losses = []
    count, print_loss_total, plot_loss_total, val_loss_total, plot_val_loss = 0, 0, 0, 0, 0
    encoder_optimizer = torch.optim.Adadelta(encoder.parameters(),
                                             lr=learning_rate)
    decoder_optimizer = torch.optim.Adadelta(decoder.parameters(),
                                             lr=learning_rate)
    #encoder_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(encoder_optimizer, mode="min")
    #decoder_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(decoder_optimizer, mode="min")

    criterion = nn.NLLLoss(
        ignore_index=PAD_token)  # this ignores the padded token.
    plot_loss = []
    val_loss = []
    for epoch in range(n_epochs):
        plot_loss = []
        val_loss = []
        for step, (sent1s, sent1_lengths, sent2s,
                   sent2_lengths) in enumerate(train_loader):
            encoder.train()
            decoder.train()
            sent1_batch, sent2_batch = sent1s.to(device), sent2s.to(device)
            sent1_length_batch, sent2_length_batch = sent1_lengths.to(
                device), sent2_lengths.to(device)

            encoder_optimizer.zero_grad()
            decoder_optimizer.zero_grad()

            encoder_outputs, encoder_hidden = encoder(sent1_batch,
                                                      sent1_length_batch)
            # outputs is 32 by 72 by 256
            # encoder_hidden is 1 by 32 by 256

            decoder_input = torch.LongTensor([SOS_token] * BATCH_SIZE).view(
                -1, 1).to(device)
            decoder_hidden = encoder_hidden
            # decoder_input is 32 by 1
            # decoder_hidden is 1 by 32 by 256

            max_trg_len = max(sent2_lengths)
            loss = 0

            # Run through decoder one time step at a time using TEACHER FORCING=1.0
            for t in range(max_trg_len):
                decoder_output, decoder_hidden = decoder(
                    decoder_input, decoder_hidden, encoder_outputs)
                # decoder_output is 32 by vocab_size
                # sent2_batch is 32 by 46
                loss += criterion(decoder_output, sent2_batch[:, t])
                decoder_input = sent2_batch[:, t]

            loss = loss / max_trg_len.float()
            print_loss_total += loss
            count += 1
            loss.backward()

            encoder_optimizer.step()
            decoder_optimizer.step()

            if (step + 1) % print_every == 0:
                # lets train and plot at the same time.
                print_loss_avg = print_loss_total / count
                count = 0
                print_loss_total = 0
                print('TRAIN SCORE %s (%d %d%%) %.4f' %
                      (timeSince(start, step / n_epochs), step,
                       step / n_epochs * 100, print_loss_avg))
                # 42s
                #                 v_loss = test_model(encoder, decoder, search, validation_pairs, lang2, max_length=max_length_generation)
                # returns bleu score
                #                 print("VALIDATION BLEU SCORE: "+str(v_loss))
                #                 val_loss.append(v_loss)
                plot_loss.append(print_loss_avg)
                plot_loss_total = 0

    save_model(encoder, decoder, val_losses, plot_losses, title)
def trainIters(encoder,
               decoder,
               n_epochs,
               pairs,
               validation_pairs,
               lang1,
               lang2,
               search,
               title,
               max_length_generation,
               val_every=1000,
               print_every=1000,
               plot_every=1000,
               learning_rate=0.0001):
    """
    lang1 is the Lang object for language 1 
    Lang2 is the Lang object for language 2
    Max length generation is the max length generation you want 
    """
    start = time.time()
    #plot_losses, val_losses = [], []
    count, print_loss_total, plot_loss_total, val_loss_total, plot_val_loss = 0, 0, 0, 0, 0
    encoder_optimizer = torch.optim.Adadelta(encoder.parameters(),
                                             lr=learning_rate)
    decoder_optimizer = torch.optim.Adadelta(decoder.parameters(),
                                             lr=learning_rate)
    #encoder_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(encoder_optimizer, mode="min")
    #decoder_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(decoder_optimizer, mode="min")

    criterion = nn.NLLLoss(
        ignore_index=PAD_token)  # this ignores the padded token.
    for epoch in range(n_epochs):
        for step, (sent1s, sent1_lengths, sent2s,
                   sent2_lengths) in enumerate(train_loader):
            encoder.train()
            decoder.train()
            sent1_batch, sent2_batch = sent1s.to(device), sent2s.to(device)
            sent1_length_batch, sent2_length_batch = sent1_lengths.to(
                device), sent2_lengths.to(device)

            loss = train(sent1_batch, sent1_length_batch, sent2_batch,
                         sent2_length_batch, encoder, decoder,
                         encoder_optimizer, decoder_optimizer, criterion)

            print_loss_total += loss
            count += 1

            if (step + 1) % print_every == 0:
                # lets train and plot at the same time.
                print_loss_avg = print_loss_total / count
                count = 0
                print_loss_total = 0
                print('TRAIN SCORE %s (%d %d%%) %.4f' %
                      (timeSince(start, step / n_epochs), step,
                       step / n_epochs * 100, print_loss_avg))
                print("Memory allocated: ",
                      torch.cuda.memory_allocated(device) / (1e6))

                if (step + 1) % val_every == 0:
                    with torch.no_grad():
                        v_loss = test_model(encoder,
                                            decoder,
                                            search,
                                            validation_pairs,
                                            lang2,
                                            max_length=max_length_generation)
                    # returns bleu score
                    print("VALIDATION BLEU SCORE: " + str(v_loss))
                    #val_losses.append(v_loss.item())
                    current_time = time.strftime("%Y-%m-%d-%H-%M-%S")
                    torch.save(encoder.state_dict(),
                               "Attention_Vish_encoder_" + current_time)
                    torch.save(decoder.state_dict(),
                               "Attention_Vish_decoder_" + current_time)
                    #pickle.dump(val_losses, open("val_losses_1.2_2nd_train", "wb"))

            del sent1s, sent1_lengths, sent2s, sent2_lengths, sent1_batch, sent2_batch, sent1_length_batch, sent2_length_batch
            gc.collect()