예제 #1
0
def trainEpochs(encoder, decoder, encoder_optimizer, decoder_optimizer, 
        encoder_scheduler, decoder_scheduler, criterion, dataiter, args):
    n_epochs = args.n_epochs
    print_every = args.print_every
    plot_every = args.plot_every

    start = time.time()
    batch_i = 0
    n_batches = n_epochs * len(dataiter)
    plot_losses = []
    epoch_loss = 0 # Reset every epoch
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    for epoch in range(args.n_epochs):

        for input_tensor, input_lengths, target_tensor, target_lengths in dataiter:
            batch_i += 1

            loss = train(input_tensor, input_lengths, target_tensor, target_lengths, 
                encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, args)
            epoch_loss += loss
            print_loss_total += loss
            plot_loss_total += loss

            if batch_i % args.print_every == 0:
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                print('%s (%d %d%%) %.4f' % (timeSince(start, batch_i / n_batches),
                                            batch_i, batch_i / n_batches * 100, print_loss_avg))

            if batch_i % args.plot_every == 0:
                plot_loss_avg = plot_loss_total / plot_every
                plot_losses.append(plot_loss_avg)
                plot_loss_total = 0

            if (epoch + 1) % args.save_every == 0:
                checkpoint = {
                    'epoch': epoch,
                    'encoder_state_dict': encoder.state_dict(),
                    'decoder_state_dict': decoder.state_dict(),
                    'encoder_optim_state': encoder_optimizer.state_dict(),
                    'decoder_optim_state': decoder_optimizer.state_dict(),
                }
                torch.save(checkpoint, args.save_data_path + "/epoch{}_checkpoint.pt".format(epoch))

            # for testing only
            if args.n_batches > 0 and batch_i == args.n_batches:
                break

        encoder_scheduler.step(epoch_loss)
        decoder_scheduler.step(epoch_loss)
        epoch_loss = 0
        dataiter.reset()
        
        print("Epoch {}/{} finished".format(epoch, args.n_epochs - 1))

    showPlot(plot_losses, args)
예제 #2
0
def save_model(encoder, decoder, plot_losses, model_name):
    stamp = str(time.time())
    savepath = utils.prepare_dir(model_name, stamp)
    torch.save(encoder.state_dict(), savepath + "/%s.encoder" % stamp)
    torch.save(decoder.state_dict(), savepath + "/%s.decoder" % stamp)
    try:
        utils.showPlot(plot_losses, model_name, stamp)
    except:
        pass
    print(" * model save with time stamp: ", stamp)
예제 #3
0
def trainIters(pairs,
               input_lang,
               output_lang,
               encoder,
               decoder,
               n_iters,
               print_every=100,
               plot_every=1000,
               learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # reset every print_every
    plot_loss_total = 0  # reset every print_every

    # define criterion and optimization algorithm
    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [
        variablesFromPair(random.choice(pairs), input_lang, output_lang)
        for i in range(n_iters)
    ]
    criterion = nn.NLLLoss()

    # now proceed one iteration at a time
    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_variable = training_pair[0]
        target_variable = training_pair[1]

        # train on one example
        loss = train(input_variable, target_variable, encoder, decoder,
                     encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' %
                  (utils.timeSince(start,
                                   float(iter) / float(n_iters)), iter,
                   float(iter) / float(n_iters) * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / float(plot_every)
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    # plot the learning curve
    utils.showPlot(plot_losses)
예제 #4
0
def trainIters(lang,
               dataSet,
               pairs,
               encoder,
               decoder,
               n_iters,
               print_every=1000,
               plot_every=100,
               learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)

    # 随机获取训练的数据集
    training_pairs = [random.choice(pairs) for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_variable = training_pair[0]
        target_variable = training_pair[1]

        loss = train(input_variable, target_variable, encoder, decoder,
                     encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss
        # if print_loss_total / print_every <= 0.0003:
        #    break

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, float(
                iter / n_iters)), iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    torch.save(encoder,
               setting.MODEL_HOME + "/%s.%s.encoder.pkl" % (dataSet, lang))
    torch.save(decoder,
               setting.MODEL_HOME + "/%s.%s.decoder.pkl" % (dataSet, lang))

    showPlot(plot_losses)
예제 #5
0
    def train(self,
              pairs,
              n_iters,
              max_length=1000,
              teacher_forcing_ratio=0.5,
              print_every=1000,
              plot_every=100,
              learning_rate=0.01):
        start = time.time()
        plot_losses = []
        print_loss_total = 0  # Reset every print_every
        plot_loss_total = 0  # Reset every plot_every

        encoder_optimizer = optim.SGD(self.encoder.parameters(),
                                      lr=learning_rate)
        decoder_optimizer = optim.SGD(self.decoder.parameters(),
                                      lr=learning_rate)
        training_pairs = [
            tensorsFromPair(self.input_lang, self.output_lang,
                            random.choice(pairs), self.device)
            for i in range(n_iters)
        ]
        criterion = nn.NLLLoss()

        for iter in range(1, n_iters + 1):
            training_pair = training_pairs[iter - 1]
            input_tensor = training_pair[0]
            target_tensor = training_pair[1]

            loss = self.step(input_tensor, target_tensor, encoder_optimizer,
                             decoder_optimizer, criterion, max_length,
                             teacher_forcing_ratio)
            print_loss_total += loss
            plot_loss_total += loss

            if iter % print_every == 0:
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                print('%s (%d %d%%) %.4f' %
                      (timeSince(start, iter / n_iters), iter,
                       iter / n_iters * 100, print_loss_avg))

            if iter % plot_every == 0:
                plot_loss_avg = plot_loss_total / plot_every
                plot_losses.append(plot_loss_avg)
                plot_loss_total = 0

        showPlot(plot_losses)
예제 #6
0
def trainIters(encoder,
               decoder,
               n_iters,
               print_every=1000,
               plot_every=100,
               learning_rate=0.01,
               lang_pack=None):

    assert not (lang_pack == None), "None shall pass"
    input_lang, output_lang, pairs = lang_pack

    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [
        tensorsFromPair(random.choice(pairs), langs=[input_lang, output_lang])
        for i in range(n_iters)
    ]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder, decoder,
                     encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' %
                  (timeSince(start, iter / n_iters), iter,
                   iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)
예제 #7
0
    def trainIters(self,
                   pairs,
                   input_lang,
                   output_lang,
                   n_iters,
                   print_every=1000,
                   plot_every=100,
                   char=False):
        start = time.time()
        plot_losses = []
        print_loss_total = 0  # Reset every print_every
        plot_loss_total = 0  # Reset every plot_every

        self.input_lang = input_lang
        self.output_lang = output_lang
        self.encoder_optimizer = optim.SGD(self.encoder.parameters(),
                                           lr=self.learning_rate)
        self.decoder_optimizer = optim.SGD(self.decoder.parameters(),
                                           lr=self.learning_rate)
        selected_pairs = [random.choice(pairs) for i in range(n_iters)]
        training_pairs = [
            self.tensorsFromPair(pair, char) for pair in selected_pairs
        ]
        self.criterion = nn.NLLLoss()

        for iter in range(1, n_iters + 1):
            training_pair = training_pairs[iter - 1]
            input_tensor = training_pair[0]
            target_tensor = training_pair[1]
            loss = self.train(input_tensor, target_tensor)
            print_loss_total += loss
            plot_loss_total += loss

            if iter % print_every == 0:
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                print('%s (%d %d%%) %.4f' %
                      (timeSince(start, iter / n_iters), iter,
                       iter / n_iters * 100, print_loss_avg))

            if iter % plot_every == 0:
                plot_loss_avg = plot_loss_total / plot_every
                plot_losses.append(plot_loss_avg)
                plot_loss_total = 0

        showPlot(plot_losses)
예제 #8
0
def trainIters(pairs,
               input_lang,
               output_lang,
               encoder,
               decoder,
               n_iters,
               print_every=100,
               plot_every=1000,
               learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # reset every print_every
    plot_loss_total = 0  # reset every print_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [
        variablesFromPair(random.choice(pairs), input_lang, output_lang)
        for i in range(n_iters)
    ]
    criterion = nn.NLLLoss()

    reward = 0
    for iter in range(1, n_iters + 1):
        # print("iter", iter)
        training_pair = training_pairs[iter - 1]
        input_variable = training_pair[0]
        target_variable = training_pair[1]

        # train on one example
        loss = train(input_variable, target_variable, encoder, decoder,
                     encoder_optimizer, decoder_optimizer, criterion, reward)
        # print("loss", loss)

        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' %
                  (utils.timeSince(start,
                                   float(iter) / float(n_iters)), iter,
                   float(iter) / float(n_iters) * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / float(plot_every)
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

        # if iter % 80 == 0:
        # train_edit, pair_rand = generateRandomlytrain(encoder, decoder)
        # if train_edit > 1:
        # trainItersreinforce(pair_rand, input_lang, output_lang, encoder, decoder, 1, 100, 1000, 0.01)
        # else:
        # continue
        if iter % 100 == 0:
            train_edit = generateRandomlytrain(encoder, decoder)
            if train_edit > 2:
                reward = -(loss * 0.75)
            else:
                reward = (loss * 0.75)
            # print("loss", loss)
            # print("loss2", reward)
    # plot the learning curve
    utils.showPlot(plot_losses)
예제 #9
0
def trainModel(n_iters=100000,
               teacher_forcing_ratio=0.,
               print_every=1000,
               plot_every=100,
               learning_rate=0.01,
               max_length=MAX_LENGTH):

    training_pairs, vocab_size, word2ix, ix2word = loadDataset()
    encoder, decoder = loadModel(vocab_size)

    print("Training the model ... ")
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # reset every print_every
    plot_loss_total = 0  # reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_variable = training_pair['input']
        target_variable = training_pair['target']

        input_variable = Variable(torch.LongTensor(input_variable).view(-1, 1))
        target_variable = Variable(
            torch.LongTensor(target_variable).view(-1, 1))
        if USE_CUDA:
            input_variable = input_variable.cuda()
            target_variable = target_variable.cuda()

        print(input_variable)

        loss = trainIter(input_variable,
                         target_variable,
                         encoder,
                         decoder,
                         encoder_optimizer,
                         decoder_optimizer,
                         criterion,
                         max_length=max_length,
                         teacher_forcing_ratio=teacher_forcing_ratio)
        print_loss_total += loss
        plot_loss_total += loss

        # Keeping track of average loss and printing results on screen
        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' %
                  (utils.timeSince(start, iter / n_iters), iter,
                   iter / n_iters * 100, print_loss_avg))

        # Keeping track of average loss and plotting in figure
        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)

            if min(plot_losses) == plot_loss_avg:
                #we save this version of the model
                torch.save(encoder.state_dict(), "encoder.ckpt")
                torch.save(decoder.state_dict(), "decoder.ckpt")

            plot_loss_total = 0

    utils.showPlot(plot_losses)
예제 #10
0
def train(model,
          tokenizer,
          epochs,
          batch_size,
          save_every=1000,
          plot_every=100,
          learning_rate=0.01):
    start = time.time()
    model_save_dir = os.path.join(args.save_dir, 'seq2seq_attn',
                                  datetime.now().strftime('%Y-%m-%d_%H%M'))
    tb_writer = SummaryWriter(model_save_dir)
    plot_losses = []
    save_every_total = 0  # Reset every save_every
    plot_loss_total = 0  # Reset every plot_every
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    if args.fp16:
        try:
            from apex import amp
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=args.fp16_opt_level)

    model.apply(weight_init)

    train_sampler = RandomSampler(train_dataset)
    train_dataloader = DataLoader(train_dataset,
                                  sampler=train_sampler,
                                  batch_size=batch_size)
    train_iterator = trange(int(epochs), desc="Epoch")
    best_rouge = 0.
    early_stopping_steps = 0
    global_step = 1
    n_iters = len(train_dataloader)
    logger.info('train and eval')
    model.zero_grad()
    for epoch in train_iterator:
        epoch_iterator = tqdm(train_dataloader, desc="Iteration")
        for step, batch in enumerate(epoch_iterator):
            model.train()
            text_tensor = batch[0].to(device)
            token_type_tensor = batch[1].to(device)
            question_tensor = batch[2].to(device)

            output, attention, result = model(text_tensor,
                                              token_type_tensor,
                                              question_tensor,
                                              teacher_forcing_ratio=0.5)
            # output = [batch size,trg sen len, output dim]
            # trg = [batch size,trg sen len]

            output = output[1:].view(-1, output.shape[-1])
            question_tensor = question_tensor[:, 1:, ]
            trg = question_tensor.reshape(-1)
            # output = [(trg sent len - 1) * batch size, output dim]
            # trg = [(trg sent len - 1) * batch size]

            loss = loss_calc(output, trg)
            if args.fp16:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
                torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer),
                                               args.max_grad_norm)
            else:
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(),
                                               args.max_grad_norm)

            plot_loss_total += loss.cpu().detach().item()
            optimizer.step()
            model.zero_grad()

            if global_step % save_every == 0:
                valid_loss, rouge_l = evaluate(model, tokenizer, batch_size,
                                               max_question_len)
                tb_writer.add_scalar('valid_loss', valid_loss, global_step)
                tb_writer.add_scalar('valid_rouge_l', rouge_l, global_step)
                print('%s (%d %d%%) loss: %.4f rouge_l: %.4f' %
                      (timeSince(start, step / n_iters), step,
                       step / n_iters * 100, valid_loss, rouge_l))
                if best_rouge < rouge_l:
                    logger.info('save best weight')
                    best_rouge = rouge_l

                    torch.save(
                        model.state_dict(),
                        os.path.join(model_save_dir, 'pytorch_model.bin'))
                    early_stopping_steps = 0
                else:
                    early_stopping_steps += 1
                if args.early_stopping > 0 and early_stopping_steps >= args.early_stopping:
                    break

            if global_step % plot_every == 0:
                plot_loss_avg = plot_loss_total / plot_every
                plot_losses.append(plot_loss_avg)
                tb_writer.add_scalar('train_loss', plot_loss_avg, global_step)
                plot_loss_total = 0
                showPlot(plot_losses)
            global_step += 1
        if args.early_stopping > 0 and early_stopping_steps >= args.early_stopping:
            break
    tb_writer.close()
    return global_step, loss