예제 #1
0
    def validation_epoch_end(self, val_step_outputs):
        # global myGlobal
        # avg_val_loss = torch.tensor([x['loss'] for x in val_step_outputs]).mean()
        # avg_val_acc = torch.tensor([x["progress_bar"]["val_acc"] for x in val_step_outputs]).mean()
        #
        # pbar = {'avg_val_acc': avg_val_acc}
        print("Translation Sample =================")

        #"An old man trying to get up from a broken chair
        #A man wearing red shirt sitting under a tree

        for sentence in config.sentences:
            if config.USE_BPE == False:
                # if self.nepochs == config.MAX_EPOCHS:
                #     myGlobal.change(True)
                # myGlobal = True
                translated_sentence = translate_sentence(self,
                                                         sentence,
                                                         self.german_vocab,
                                                         self.english_vocab,
                                                         self.deviceLegacy,
                                                         max_length=50)
                # print("Output", translated_sentence)
                # print(sentence)
                # global myGlobal
                # myGlobal = False
                # exit()
                # if self.nepochs == config.MAX_EPOCHS:
                #     myGlobal.change(False)
                #     print("Input", sentence)
                #     print("Output", translated_sentence)
                #     exit()
            else:
                translated_sentence = translate_sentence_bpe(
                    self,
                    sentence,
                    self.german_vocab,
                    self.english_vocab,
                    self.deviceLegacy,
                    max_length=50)

            print("Output", translated_sentence)

        # if config.COMPUTE_BLEU == True and self.nepochs == config.MAX_EPOCHS:
        if config.COMPUTE_BLEU == True and self.nepochs > 0:
            bleu_score = computeBLEU(self.test_data, self, self.german_vocab,
                                     self.english_vocab, self.deviceLegacy)
            self.bleu_scores.append(bleu_score)
            print("BLEU score: ", bleu_score)
            if self.nepochs % 1 == 0:
                writeArrToCSV(self.bleu_scores)
        return
예제 #2
0
def calculate_bleu(data, src_field, trg_field, model, device, max_len=100):
    trgs = []
    pred_trgs = []
    for datum in tqdm(data):
        src = vars(datum)['src']
        trg = vars(datum)['trg']
        pred_trg, _ = translate_sentence(src, src_field, trg_field, model,
                                         device, max_len)
        # cut off <eos> token
        pred_trg = pred_trg[:-1]

        pred_trgs.append(pred_trg)
        trgs.append([trg])

    return bleu_score(pred_trgs, trgs)
    def validation_epoch_end(self, val_step_outputs):
        # avg_val_loss = torch.tensor([x['loss'] for x in val_step_outputs]).mean()
        # avg_val_acc = torch.tensor([x["progress_bar"]["val_acc"] for x in val_step_outputs]).mean()
        #
        # pbar = {'avg_val_acc': avg_val_acc}
        print("Translation Sample =================")
        sentence = "ein pferd geht unter einer brücke neben einem boot."
        device = "cuda"

        translated_sentence = translate_sentence(self,
                                                 sentence,
                                                 self.german_vocab,
                                                 self.english_vocab,
                                                 device,
                                                 max_length=50)

        print("Output", translated_sentence)
        return
예제 #4
0
def main():
    parser = argparse.ArgumentParser(
        description='demonstration of machine translation algorithm')
    parser.add_argument('--model_config',
                        default='./checkpoints/config.json',
                        help='train config for model_weights')
    parser.add_argument('--model_weights',
                        default='./checkpoints/en_de_final.pt',
                        help='path for weights of the model')
    args = parser.parse_args()

    seed = 42
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    with open(os.path.join(args.model_config), 'rt') as f:
        model_args = argparse.Namespace()
        model_args.__dict__.update(json.load(f))
        model_args = parser.parse_args(namespace=model_args)

    print('Loading models...')
    train_data, valid_data, test_data, src_lang, trg_lang = prepare_data()
    model = build_model(model_args, src_lang, trg_lang, len(src_lang.vocab),
                        len(trg_lang.vocab), device)
    model.load_state_dict(torch.load(args.model_weights, map_location='cpu'))
    model.eval()

    print('Evaluating 5 random sentence from test set:')
    for _ in range(5):
        random_element = vars(test_data.examples[np.random.randint(
            len(test_data))])
        input_sentence = random_element['src']
        print(colored('Input sentence: \n', 'yellow'),
              ' '.join(input_sentence))
        translation, _ = translate_sentence(input_sentence, src_lang, trg_lang,
                                            model, device)
        # cut off <eos> token
        translation = translation[:-1]
        print(colored('GT translation: \n', 'green'),
              ' '.join(random_element['trg']))
        print(colored('Model translation: \n', 'green'), ' '.join(translation))
예제 #5
0
def training_loop(train_dict, val_dict, idx_dict, encoder, decoder, criterion,
                  optimizer, opts):
    """Runs the main training loop; evaluates the model on the val set every epoch.
        * Prints training and val loss each epoch.
        * Prints qualitative translation results each epoch using TEST_SENTENCE
        * Saves an attention map for TEST_WORD_ATTN each epoch

    Arguments:
        train_dict: The training word pairs, organized by source and target lengths.
        val_dict: The validation word pairs, organized by source and target lengths.
        idx_dict: Contains char-to-index and index-to-char mappings, and start & end token indexes.
        encoder: An encoder model to produce annotations for each step of the input sequence.
        decoder: A decoder model (with or without attention) to generate output tokens.
        criterion: Used to compute the CrossEntropyLoss for each decoder output.
        optimizer: Implements a step rule to update the parameters of the encoder and decoder.
        opts: The command-line arguments.
    """

    start_token = idx_dict['start_token']
    end_token = idx_dict['end_token']
    char_to_index = idx_dict['char_to_index']

    loss_log = open(os.path.join(opts.checkpoint_path, 'loss_log.txt'), 'w')

    best_val_loss = 1e6
    train_losses = []
    val_losses = []

    for epoch in range(opts.nepochs):

        optimizer.param_groups[0]['lr'] *= opts.lr_decay

        epoch_losses = []

        for key in train_dict:

            input_strings, target_strings = zip(*train_dict[key])
            input_tensors = [
                torch.LongTensor(
                    utils.string_to_index_list(s, char_to_index, end_token))
                for s in input_strings
            ]
            target_tensors = [
                torch.LongTensor(
                    utils.string_to_index_list(s, char_to_index, end_token))
                for s in target_strings
            ]

            num_tensors = len(input_tensors)
            num_batches = int(np.ceil(num_tensors / float(opts.batch_size)))

            for i in range(num_batches):

                start = i * opts.batch_size
                end = start + opts.batch_size

                inputs = utils.to_var(torch.stack(input_tensors[start:end]),
                                      opts.cuda)
                targets = utils.to_var(torch.stack(target_tensors[start:end]),
                                       opts.cuda)

                # The batch size may be different in each epoch
                BS = inputs.size(0)

                encoder_annotations, encoder_hidden = encoder(inputs)

                # The last hidden state of the encoder becomes the first hidden state of the decoder
                decoder_hidden = encoder_hidden

                start_vector = torch.ones(BS).long().unsqueeze(
                    1) * start_token  # BS x 1 --> 16x1  CHECKED
                decoder_input = utils.to_var(
                    start_vector, opts.cuda)  # BS x 1 --> 16x1  CHECKED

                loss = 0.0

                seq_len = targets.size(1)  # Gets seq_len from BS x seq_len

                use_teacher_forcing = np.random.rand(
                ) < opts.teacher_forcing_ratio

                for i in range(seq_len):
                    decoder_output, decoder_hidden, attention_weights = decoder(
                        decoder_input, decoder_hidden, encoder_annotations)

                    current_target = targets[:, i]
                    loss += criterion(
                        decoder_output, current_target
                    )  # cross entropy between the decoder distribution and GT
                    ni = F.softmax(decoder_output, dim=1).data.max(1)[1]

                    if use_teacher_forcing:
                        # With teacher forcing, use the ground-truth token to condition the next step
                        decoder_input = targets[:, i].unsqueeze(1)
                    else:
                        # Without teacher forcing, use the model's own predictions to condition the next step
                        decoder_input = utils.to_var(ni.unsqueeze(1),
                                                     opts.cuda)

                loss /= float(seq_len)
                epoch_losses.append(loss.item())

                # Zero gradients
                optimizer.zero_grad()

                # Compute gradients
                loss.backward()

                # Update the parameters of the encoder and decoder
                optimizer.step()

        train_loss = np.mean(epoch_losses)
        val_loss = evaluate(val_dict, encoder, decoder, idx_dict, criterion,
                            opts)

        if val_loss < best_val_loss:
            checkpoint(encoder, decoder, idx_dict, opts)

        if not opts.no_attention:
            # Save attention maps for the fixed word TEST_WORD_ATTN throughout training
            utils.visualize_attention(
                TEST_WORD_ATTN,
                encoder,
                decoder,
                idx_dict,
                opts,
                save=os.path.join(
                    opts.checkpoint_path,
                    'train_attns/attn-epoch-{}.png'.format(epoch)))

        gen_string = utils.translate_sentence(TEST_SENTENCE, encoder, decoder,
                                              idx_dict, opts)
        print(
            "Epoch: {:3d} | Train loss: {:.3f} | Val loss: {:.3f} | Gen: {:20s}"
            .format(epoch, train_loss, val_loss, gen_string))

        loss_log.write('{} {} {}\n'.format(epoch, train_loss, val_loss))
        loss_log.flush()

        train_losses.append(train_loss)
        val_losses.append(val_loss)

        save_loss_plot(train_losses, val_losses, opts)
예제 #6
0
if load_model:
    load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer)

# sentence = "ein pferd geht unter einer brücke neben einem boot."
#
# translated_sentence = translate_sentence(
#     model, sentence, german, english, device, max_length=50
# )
sentence1 = [
    'ein', 'pferd', 'geht', 'unter', 'einer', 'brücke', 'neben', 'einem',
    'boot', '.'
]
translated_sentence = translate_sentence(model,
                                         sentence1,
                                         german,
                                         english,
                                         device,
                                         max_length=50)
# exit()
# print(f"Translated1 example sentence: \n {sentence}")
# print(f"Translated1 example sentence: \n {translated_sentence}")

# exit()

train(model, device, load_model, save_model, german, english, train_data,
      valid_data, test_data, batch_size)
# running on entire test data takes a while
score = bleu(train_data[1:100], model, german, english, device)
print(f"Final Train Bleu score {score * 100:.2f}")

score = bleu(test_data[1:100], model, german, english, device)
예제 #7
0
def train(model, device, load_model, save_model, german_vocab, english_vocab,
          train_data, valid_data, test_data, batch_size):
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    if load_model:
        load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer)

    sentence = "ein pferd geht unter einer brücke neben einem boot."
    # sentence = 'a little girl climbing into a wooden playhouse.'
    # sentence = "man stuffed smiling lion"
    #6 1 4 7 3 2 5 0
    # sentence = ['ein', 'pferd', 'geht', 'unter', 'einer', 'brücke', 'neben', 'einem', 'boot', '.']
    #sentence = ['The', 'study’s', 'questions', 'are', 'carefully', 'worded', 'and', 'chosen', '.']
    # sentence = 'The study questions are carefully worded and chosen.'

    # sentence = ['a', 'little', 'girl', 'climbing', 'into', 'a', 'wooden', 'playhouse', '.']
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           factor=0.1,
                                                           patience=10,
                                                           verbose=True)

    pad_idx = english_vocab.stoi["<pad>"]
    criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)

    # train_iterator, valid_iterator, test_iterator = Batcher(train_data, valid_data, test_data)
    train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
        (train_data, valid_data, test_data),
        batch_size=batch_size,
        sort_within_batch=True,
        sort_key=lambda x: len(x.src),
        device=device,
    )

    step = 0

    for epoch in range(num_epochs):
        print(f"[Epoch {epoch} / {num_epochs}]")

        if save_model:
            checkpoint = {
                "state_dict": model.state_dict(),
                "optimizer": optimizer.state_dict(),
            }
            save_checkpoint(checkpoint)

        model.eval()
        # sentence = "Das wird sehr seltsam"
        # sentence = "Frankreich wird wohl Deutschland angreifen"

        translated_sentence = translate_sentence(model,
                                                 sentence,
                                                 german_vocab,
                                                 english_vocab,
                                                 device,
                                                 max_length=50)

        print(f"Translated example sentence: \n {sentence}")
        print(f"Translated example sentence: \n {translated_sentence}")
        # exit()

        # running on entire test data takes a while
        print("here1")
        score = bleu(train_data[1:10], model, german_vocab, english_vocab,
                     device)
        print(f"Train Bleu score {score * 100:.2f}")

        print("here2")
        score = bleu(test_data[1:50], model, german_vocab, english_vocab,
                     device)
        print(f"Test Bleu score {score * 100:.2f}")

        model.train()
        losses = []

        for batch_idx, batch in enumerate(train_iterator):
            # Get input and targets and get to cuda
            # print(batch_idx)
            inp_data = batch.src
            inp_data = inp_data.to(device)
            target = batch.trg
            target = target.to(device)

            # inp_data = batch[0].to(device)
            # target = batch[1].to(device)
            # Forward prop
            # print(target)
            # printSentences(inp_data, german_vocab)
            # printSentences2(target, english_vocab, inp_data, german_vocab)
            trg = target[:-1, :]
            # print(trg.shape)
            output = model(inp_data, trg)

            # Output is of shape (trg_len, batch_size, output_dim) but Cross Entropy Loss
            # doesn't take input in that form. For example if we have MNIST we want to have
            # output to be: (N, 10) and targets just (N). Here we can view it in a similar
            # way that we have output_words * batch_size that we want to send in into
            # our cost function, so we need to do some reshapin.
            # Let's also remove the start token while we're at it
            output = output.reshape(-1, output.shape[2])
            target = target[1:].reshape(-1)

            optimizer.zero_grad()

            loss = criterion(output, target)
            losses.append(loss.item())

            # Back prop
            loss.backward()
            # Clip to avoid exploding gradient issues, makes sure grads are
            # within a healthy range
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)

            # Gradient descent step
            optimizer.step()

            # plot to tensorboard
            # writer.add_scalar("Training loss", loss, global_step=step)
            step += 1

        mean_loss = sum(losses) / len(losses)
        scheduler.step(mean_loss)
예제 #8
0
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

if load_model:
    load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer)

# sentence = "ein pferd geht unter einer brücke neben einem boot."
#
# translated_sentence = translate_sentence(
#     model, sentence, german, english, device, max_length=50
# )
# sentence = 'The study questions are carefully worded and chosen.'
# sentence = 'a little girl climbing into a wooden playhouse.'

sentence = "is man lion a stuffed A at smiling."

#sentence1 = ['ein', 'pferd', 'geht', 'unter', 'einer', 'brücke', 'neben', 'einem', 'boot', '.']
# sentence1 = ['a', 'little', 'girl', 'climbing', 'into', 'a', 'wooden', 'playhouse', '.']
translated_sentence = translate_sentence(model, sentence, german_vocab, english_vocab, device, max_length=50)
# exit()
# print(f"Translated1 example sentence: \n {sentence}")
# print(f"Translated1 example sentence: \n {translated_sentence}")

# exit()
print("===============================going for training ")

train(model, device, load_model, save_model, german_vocab, english_vocab, train_data, valid_data, test_data, batch_size)
# running on entire test data takes a while


              'nepochs':100, 
              'checkpoint_dir':"checkpoints", 
              'learning_rate':0.005, ## INCREASE BY AN ORDER OF MAGNITUDE
              'lr_decay':0.99,
              'batch_size':64, 
              'hidden_size':20, 
              'encoder_type': 'transformer',
              'decoder_type': 'transformer', # options: rnn / rnn_attention / transformer
              'num_transformer_layers': 3,
}
args.update(args_dict)

print_opts(args)
transformer_encoder, transformer_decoder = train(args)

translated = translate_sentence(TEST_SENTENCE, transformer_encoder, transformer_decoder, None, args)
print("source:\t\t{} \ntranslated:\t{}".format(TEST_SENTENCE, translated))

"""Try translating different sentences by changing the variable TEST_SENTENCE. Identify two distinct failure modes and briefly describe them."""

TEST_SENTENCE = test_cases
translated = translate_sentence(TEST_SENTENCE, transformer_encoder, transformer_decoder, None, args)
print("source:\t\t{} \ntranslated:\t{}".format(TEST_SENTENCE, translated))
exit()

"""# Attention Visualizations

One of the benefits of using attention is that it allows us to gain insight into the inner workings of the model.

By visualizing the attention weights generated for the input tokens in each decoder step, we can see where the model focuses while producing each output token.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
SRC, TRG, train_data, valid_data, test_data = getData(False)

src_vocab_size = len(SRC)
trg_vocab_size = len(TRG)
SRC_PAD_IDX = SRC.stoi[SRC.pad_token]
TRG_EOS_TOKEN = SRC.stoi[SRC.eos_token]

model = Seq2Seq(SRC_PAD_IDX, src_vocab_size, trg_vocab_size, device,
                TRG_EOS_TOKEN).to(device)

model.load_state_dict(torch.load('tut4-model.pt'))

src = "ein pferd geht unter einer brücke neben einem boot ."
translation, attention = translate_sentence(model, src, SRC, TRG, device)
print(src)
print(translation)


#exit()
def display_attention(sentence, translation, attention):

    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)

    attention = attention.squeeze(1).cpu().detach().numpy()

    cax = ax.matshow(attention, cmap='bone')

    ax.tick_params(labelsize=15)